From 6a4b58707903cb0901966ac8dad5d6ec7f2d432f Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:10:14 +0000
Subject: uwb: target reservations shouldn't get streams

The reservation owner should decide the stream index to use based on
what reservations it's created.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index bae16204..e4facae 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -285,7 +285,8 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 	switch (new_state) {
 	case UWB_RSV_STATE_NONE:
 		uwb_drp_avail_release(rsv->rc, &rsv->mas);
-		uwb_rsv_put_stream(rsv);
+		if (uwb_rsv_is_owner(rsv))
+			uwb_rsv_put_stream(rsv);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
 		uwb_rsv_callback(rsv);
 		break;
@@ -532,7 +533,6 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	rsv->target.dev  = &rc->uwb_dev;
 	rsv->type        = uwb_ie_drp_type(drp_ie);
 	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
-	set_bit(rsv->stream, rsv->owner->streams);
 	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
 
 	/*
-- 
cgit v0.10.2


From b09ac64b7b2d93efab3998033588f5cb0e470ccf Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:14:03 +0000
Subject: wusb: release mutex in the error path of whci-hcd's whc_do_gencmd()

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/whci/hw.c b/drivers/usb/host/whci/hw.c
index ac86e59..d498e72 100644
--- a/drivers/usb/host/whci/hw.c
+++ b/drivers/usb/host/whci/hw.c
@@ -50,6 +50,7 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len)
 	unsigned long flags;
 	dma_addr_t dma_addr;
 	int t;
+	int ret = 0;
 
 	mutex_lock(&whc->mutex);
 
@@ -61,7 +62,8 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len)
 		dev_err(&whc->umc->dev, "generic command timeout (%04x/%04x)\n",
 			le_readl(whc->base + WUSBGENCMDSTS),
 			le_readl(whc->base + WUSBGENCMDPARAMS));
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto out;
 	}
 
 	if (addr) {
@@ -80,8 +82,8 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len)
 		  whc->base + WUSBGENCMDSTS);
 
 	spin_unlock_irqrestore(&whc->lock, flags);
-
+out:
 	mutex_unlock(&whc->mutex);
 
-	return 0;
+	return ret;
 }
-- 
cgit v0.10.2


From cae1c11414912bf77a62aebd65ced321f0b9da51 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:22:46 +0000
Subject: uwb: reference count reservations

Reference counting the struct uwb_rsv's is safer and easier to get right than
the transferring ownership of the structures from the PAL to reservation
manager.

This fixes an oops in the debug PAL after a reservation timed out.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index fc63e77..7b6525d 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -59,7 +59,6 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
 	case UWB_RSV_STATE_NONE:
 		dev_dbg(dev, "removed reservation\n");
 		wusbhc_bwa_set(wusbhc, 0, NULL);
-		wusbhc->rsv = NULL;
 		break;
 	default:
 		dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state);
@@ -105,11 +104,11 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc)
 
 
 /**
- * wusbhc_rsv_terminate - terminate any cluster reservation
+ * wusbhc_rsv_terminate - terminate the cluster reservation
  * @wusbhc: the WUSB host whose reservation is to be terminated
  */
 void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
 {
-	if (wusbhc->rsv)
-		uwb_rsv_terminate(wusbhc->rsv);
+	uwb_rsv_terminate(wusbhc->rsv);
+	uwb_rsv_destroy(wusbhc->rsv);
 }
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index e4facae..bcc41a4 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -82,6 +82,23 @@ static void uwb_rsv_dump(struct uwb_rsv *rsv)
 	dev_dbg(dev, "rsv %s -> %s: %s\n", owner, target, uwb_rsv_state_str(rsv->state));
 }
 
+static void uwb_rsv_release(struct kref *kref)
+{
+	struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref);
+
+	kfree(rsv);
+}
+
+static void uwb_rsv_get(struct uwb_rsv *rsv)
+{
+	kref_get(&rsv->kref);
+}
+
+static void uwb_rsv_put(struct uwb_rsv *rsv)
+{
+	kref_put(&rsv->kref, uwb_rsv_release);
+}
+
 /*
  * Get a free stream index for a reservation.
  *
@@ -325,6 +342,7 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 
 	INIT_LIST_HEAD(&rsv->rc_node);
 	INIT_LIST_HEAD(&rsv->pal_node);
+	kref_init(&rsv->kref);
 	init_timer(&rsv->timer);
 	rsv->timer.function = uwb_rsv_timer;
 	rsv->timer.data     = (unsigned long)rsv;
@@ -334,14 +352,6 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 	return rsv;
 }
 
-static void uwb_rsv_free(struct uwb_rsv *rsv)
-{
-	uwb_dev_put(rsv->owner);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		uwb_dev_put(rsv->target.dev);
-	kfree(rsv);
-}
-
 /**
  * uwb_rsv_create - allocate and initialize a UWB reservation structure
  * @rc: the radio controller
@@ -375,23 +385,23 @@ void uwb_rsv_remove(struct uwb_rsv *rsv)
 	if (rsv->state != UWB_RSV_STATE_NONE)
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 	del_timer_sync(&rsv->timer);
-	list_del(&rsv->rc_node);
-	uwb_rsv_free(rsv);
+	uwb_dev_put(rsv->owner);
+	if (rsv->target.type == UWB_RSV_TARGET_DEV)
+		uwb_dev_put(rsv->target.dev);
+
+	list_del_init(&rsv->rc_node);
+	uwb_rsv_put(rsv);
 }
 
 /**
  * uwb_rsv_destroy - free a UWB reservation structure
  * @rsv: the reservation to free
  *
- * The reservation will be terminated if it is pending or established.
+ * The reservation must already be terminated.
  */
 void uwb_rsv_destroy(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-
-	mutex_lock(&rc->rsvs_mutex);
-	uwb_rsv_remove(rsv);
-	mutex_unlock(&rc->rsvs_mutex);
+	uwb_rsv_put(rsv);
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
 
@@ -423,6 +433,7 @@ int uwb_rsv_establish(struct uwb_rsv *rsv)
 		goto out;
 	}
 
+	uwb_rsv_get(rsv);
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	rsv->owner = &rc->uwb_dev;
 	uwb_dev_get(rsv->owner);
@@ -478,9 +489,14 @@ EXPORT_SYMBOL_GPL(uwb_rsv_terminate);
  *
  * Reservation requests from peers are denied unless a PAL accepts it
  * by calling this function.
+ *
+ * The PAL call uwb_rsv_destroy() for all accepted reservations before
+ * calling uwb_pal_unregister().
  */
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv)
 {
+	uwb_rsv_get(rsv);
+
 	rsv->callback = cb;
 	rsv->pal_priv = pal_priv;
 	rsv->state    = UWB_RSV_STATE_T_ACCEPTED;
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 6d232c3..6db641e 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -104,6 +104,11 @@ static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
 
 	dev_dbg(dev, "debug: rsv %s -> %s: %s\n",
 		owner, target, uwb_rsv_state_str(rsv->state));
+
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		list_del(&rsv->pal_node);
+		uwb_rsv_destroy(rsv);
+	}
 }
 
 static int cmd_rsv_establish(struct uwb_rc *rc,
@@ -153,11 +158,11 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 			found = rsv;
 			break;
 		}
+		i++;
 	}
 	if (!found)
 		return -EINVAL;
 
-	list_del(&found->pal_node);
 	uwb_rsv_terminate(found);
 
 	return 0;
@@ -287,8 +292,10 @@ static void uwb_dbg_new_rsv(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
 
-	if (rc->dbg->accept)
+	if (rc->dbg->accept) {
+		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
 		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL);
+	}
 }
 
 /**
@@ -336,7 +343,7 @@ void uwb_dbg_del_rc(struct uwb_rc *rc)
 		return;
 
 	list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) {
-		uwb_rsv_destroy(rsv);
+		uwb_rsv_terminate(rsv);
 	}
 
 	uwb_pal_unregister(rc, &rc->dbg->pal);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index f9ccbd9..010ee70 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -201,6 +201,7 @@ struct uwb_rsv {
 	struct uwb_rc *rc;
 	struct list_head rc_node;
 	struct list_head pal_node;
+	struct kref kref;
 
 	struct uwb_dev *owner;
 	struct uwb_rsv_target target;
-- 
cgit v0.10.2


From d409f3bf47c5e5ae10601d079204e263bc176bcf Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:30:12 +0000
Subject: wusb: disable verification of the key generation algorithms

Verifing the key generation algorithms could take too long on a freshly
booted system (due to lack of entropy) so disable the test unless a module
parameter (debug_crypto_verify) is specified.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index c36c438..0ca8603 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -54,6 +54,10 @@
 #define D_LOCAL 0
 #include <linux/uwb/debug.h>
 
+static int debug_crypto_verify = 0;
+
+module_param(debug_crypto_verify, int, 0);
+MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms");
 
 /*
  * Block of data, as understood by AES-CCM
@@ -526,10 +530,13 @@ int wusb_crypto_init(void)
 {
 	int result;
 
-	result = wusb_key_derive_verify();
-	if (result < 0)
-		return result;
-	return wusb_oob_mic_verify();
+	if (debug_crypto_verify) {
+		result = wusb_key_derive_verify();
+		if (result < 0)
+			return result;
+		return wusb_oob_mic_verify();
+	}
+	return 0;
 }
 
 void wusb_crypto_exit(void)
-- 
cgit v0.10.2


From 4d2bea4ca0adb4cebfbf89d34869c74081c42577 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:42:31 +0000
Subject: wusb: do a proper channel stop

When stopping the WUSB channel the host should send Channel Stop IEs giving
the WUSB Channel Time of the last MMC.  Both WHCI and HWA hosts provide a
channel stop command for this.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 64be4d8..0e18989 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -171,11 +171,6 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd)
 	if (result < 0)
 		goto error_set_cluster_id;
 
-	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "cannot listen to notifications: %d\n", result);
-		goto error_stop;
-	}
 	usb_hcd->uses_new_polling = 1;
 	usb_hcd->poll_rh = 1;
 	usb_hcd->state = HC_STATE_RUNNING;
@@ -185,8 +180,6 @@ out:
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 
-error_stop:
-	__wa_stop(&hwahc->wa);
 error_set_cluster_id:
 	wusb_cluster_id_put(wusbhc->cluster_id);
 error_cluster_id_get:
@@ -194,39 +187,6 @@ error_cluster_id_get:
 
 }
 
-/*
- * FIXME: break this function up
- */
-static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	/* Set up a Host Info WUSB Information Element */
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	result = -ENOSPC;
-
-	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error commanding HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error waiting for HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = 0;
-out:
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return result;
-
-error_stop:
-	result = __wa_clear_feature(&hwahc->wa, WA_ENABLE);
-	goto out;
-}
-
 static int hwahc_op_suspend(struct usb_hcd *usb_hcd, pm_message_t msg)
 {
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
@@ -246,18 +206,6 @@ static int hwahc_op_resume(struct usb_hcd *usb_hcd)
 	return -ENOSYS;
 }
 
-static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	/* Nothing for now */
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return;
-}
-
 /*
  * No need to abort pipes, as when this is called, all the children
  * has been disconnected and that has done it [through
@@ -275,8 +223,6 @@ static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	wusbhc_stop(wusbhc);
-	wa_nep_disarm(&hwahc->wa);
-	result = __wa_stop(&hwahc->wa);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
@@ -325,6 +271,54 @@ static void hwahc_op_endpoint_disable(struct usb_hcd *usb_hcd,
 	rpipe_ep_disable(&hwahc->wa, ep);
 }
 
+static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
+{
+	int result;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct device *dev = &hwahc->wa.usb_iface->dev;
+
+	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error commanding HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error waiting for HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "cannot listen to notifications: %d\n", result);
+		goto error_stop;
+	}
+	return result;
+
+error_stop:
+	__wa_clear_feature(&hwahc->wa, WA_ENABLE);
+	return result;
+}
+
+static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	int ret;
+
+	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			      WUSB_REQ_CHAN_STOP,
+			      USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			      delay * 1000,
+			      iface_no,
+			      NULL, 0, 1000 /* FIXME: arbitrary */);
+	if (ret == 0)
+		msleep(delay);
+
+	wa_nep_disarm(&hwahc->wa);
+	__wa_stop(&hwahc->wa);
+}
+
 /*
  * Set the UWB MAS allocation for the WUSB cluster
  *
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
index 1d2a53b..1bbb8cb 100644
--- a/drivers/usb/host/whci/whcd.h
+++ b/drivers/usb/host/whci/whcd.h
@@ -136,7 +136,7 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len);
 
 /* wusb.c */
 int whc_wusbhc_start(struct wusbhc *wusbhc);
-void whc_wusbhc_stop(struct wusbhc *wusbhc);
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay);
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 		  u8 handle, struct wuie_hdr *wuie);
 int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle);
diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h
index bff1eb7..51df7e3 100644
--- a/drivers/usb/host/whci/whci-hc.h
+++ b/drivers/usb/host/whci/whci-hc.h
@@ -410,6 +410,8 @@ struct dn_buf_entry {
 #  define WUSBDNTSCTRL_SLOTS(s)    ((s) << 0)
 
 #define WUSBTIME             0x68
+#  define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff
+
 #define WUSBBPST             0x6c
 #define WUSBDIBUPDATED       0x70
 
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 66e4ddc..2befd47 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -64,8 +64,9 @@ static int whc_update_di(struct whc *whc, int idx)
 }
 
 /*
- * WHCI starts and stops MMCs based on there being a valid GTK so
- * these need only start/stop the asynchronous and periodic schedules.
+ * WHCI starts MMCs based on there being a valid GTK so these need
+ * only start/stop the asynchronous and periodic schedules and send a
+ * channel stop command.
  */
 
 int whc_wusbhc_start(struct wusbhc *wusbhc)
@@ -78,12 +79,20 @@ int whc_wusbhc_start(struct wusbhc *wusbhc)
 	return 0;
 }
 
-void whc_wusbhc_stop(struct wusbhc *wusbhc)
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay)
 {
 	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 stop_time, now_time;
+	int ret;
 
 	pzl_stop(whc);
 	asl_stop(whc);
+
+	now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK;
+	stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff;
+	ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0);
+	if (ret == 0)
+		msleep(delay);
 }
 
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index cfa77a0..af2aee0 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -250,18 +250,14 @@ error_alloc:
  * wusbhc_stop - stop transmitting MMCs
  * @wusbhc: the HC to stop
  *
- * Send a Host Disconnect IE, wait, remove all the MMCs (stop sending MMCs).
- *
- * If we can't allocate a Host Stop IE, screw it, we don't notify the
- * devices we are disconnecting...
+ * Stops the WUSB channel and removes the cluster reservation.
  */
 void wusbhc_stop(struct wusbhc *wusbhc)
 {
 	if (wusbhc->active) {
 		wusbhc->active = 0;
-		wusbhc->stop(wusbhc);
+		wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
 		wusbhc_sec_stop(wusbhc);
-		__wusbhc_host_disconnect_ie(wusbhc);
 		wusbhc_devconnect_stop(wusbhc);
 		wusbhc_rsv_terminate(wusbhc);
 	}
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index d0c1324..b9bdf5a 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -64,6 +64,13 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 
+/*
+ * Time from a WUSB channel stop request to the last transmitted MMC.
+ *
+ * This needs to be > 4.096 ms in case no MMCs can be transmitted in
+ * zone 0.
+ */
+#define WUSB_CHANNEL_STOP_DELAY_MS 8
 
 /**
  * Wireless USB device
@@ -198,21 +205,18 @@ struct wusb_port {
  * @mmcies_max	   Max number of Information Elements this HC can send
  *                 in its MMC. Read-only.
  *
+ * @start          Start the WUSB channel.
+ *
+ * @stop           Stop the WUSB channel after the specified number of
+ *                 milliseconds.  Channel Stop IEs should be transmitted
+ *                 as required by [WUSB] 4.16.2.1.
+ *
  * @mmcie_add	   HC specific operation (WHCI or HWA) for adding an
  *                 MMCIE.
  *
  * @mmcie_rm	   HC specific operation (WHCI or HWA) for removing an
  *                 MMCIE.
  *
- * @enc_types	   Array which describes the encryptions methods
- *                 supported by the host as described in WUSB1.0 --
- *                 one entry per supported method. As of WUSB1.0 there
- *                 is only four methods, we make space for eight just in
- *                 case they decide to add some more (and pray they do
- *                 it in sequential order). if 'enc_types[enc_method]
- *                 != 0', then it is supported by the host. enc_method
- *                 is USB_ENC_TYPE*.
- *
  * @set_ptk:       Set the PTK and enable encryption for a device. Or, if
  *                 the supplied key is NULL, disable encryption for that
  *                 device.
@@ -269,7 +273,7 @@ struct wusbhc {
 	u8 mmcies_max;
 	/* FIXME: make wusbhc_ops? */
 	int (*start)(struct wusbhc *wusbhc);
-	void (*stop)(struct wusbhc *wusbhc);
+	void (*stop)(struct wusbhc *wusbhc, int delay);
 	int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 			 u8 handle, struct wuie_hdr *wuie);
 	int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle);
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index a102561..fb7c359 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -51,6 +51,7 @@ enum {
 	WUSB_REQ_GET_TIME       = 25,
 	WUSB_REQ_SET_STREAM_IDX = 26,
 	WUSB_REQ_SET_WUSB_MAS   = 27,
+	WUSB_REQ_CHAN_STOP      = 28,
 };
 
 
-- 
cgit v0.10.2


From 1cde7f68ced8d10a20dd2370e9d1d22ab3c1ea5c Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 16:48:09 +0000
Subject: uwb: order IEs by element ID

ECMA-368 requires that IEs in a beacon must be sorted by element ID.  Most
hardware uses the ordering in the Set IE URC command so get the ordering
right on the host.

Also refactor the IE management code:
  - use uwb_ie_next() instead of uwb_ie_for_each().
  - remove unnecessary functions.
  - API is now only uwb_rc_ie_add() and uwb_rc_ie_rm().

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 46b18ee..ad82398 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -349,22 +349,22 @@ ssize_t uwb_bce_print_IEs(struct uwb_dev *uwb_dev, struct uwb_beca_e *bce,
 	ssize_t result = 0;
 	struct uwb_rc_evt_beacon *be;
 	struct uwb_beacon_frame *bf;
-	struct uwb_buf_ctx ctx = {
-		.buf = buf,
-		.bytes = 0,
-		.size = size
-	};
+	int ies_len;
+	struct uwb_ie_hdr *ies;
 
 	mutex_lock(&bce->mutex);
+
 	be = bce->be;
-	if (be == NULL)
-		goto out;
-	bf = (void *) be->BeaconInfo;
-	uwb_ie_for_each(uwb_dev, uwb_ie_dump_hex, &ctx,
-			bf->IEData, be->wBeaconInfoLength - sizeof(*bf));
-	result = ctx.bytes;
-out:
+	if (be) {
+		bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
+		ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame);
+		ies = (struct uwb_ie_hdr *)bf->IEData;
+
+		result = uwb_ie_dump_hex(ies, ies_len, buf, size);
+	}
+
 	mutex_unlock(&bce->mutex);
+
 	return result;
 }
 
diff --git a/drivers/uwb/ie.c b/drivers/uwb/ie.c
index cf6f3d1..ab97668 100644
--- a/drivers/uwb/ie.c
+++ b/drivers/uwb/ie.c
@@ -25,8 +25,6 @@
  */
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /**
  * uwb_ie_next - get the next IE in a buffer
@@ -61,6 +59,42 @@ struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len)
 EXPORT_SYMBOL_GPL(uwb_ie_next);
 
 /**
+ * uwb_ie_dump_hex - print IEs to a character buffer
+ * @ies: the IEs to print.
+ * @len: length of all the IEs.
+ * @buf: the destination buffer.
+ * @size: size of @buf.
+ *
+ * Returns the number of characters written.
+ */
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size)
+{
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+	int r = 0;
+	u8 *d;
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &len);
+		if (!ie)
+			break;
+
+		r += scnprintf(buf + r, size - r, "%02x %02x",
+			       (unsigned)ie->element_id,
+			       (unsigned)ie->length);
+		d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr);
+		while (d != ptr && r < size)
+			r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++);
+		if (r < size)
+			buf[r++] = '\n';
+	};
+
+	return r;
+}
+
+/**
  * Get the IEs that a radio controller is sending in its beacon
  *
  * @uwb_rc:  UWB Radio Controller
@@ -70,6 +104,7 @@ EXPORT_SYMBOL_GPL(uwb_ie_next);
  * anything. Once done with the iedata buffer, call
  * uwb_rc_ie_release(iedata). Don't call kfree on it.
  */
+static
 ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
 {
 	ssize_t result;
@@ -78,148 +113,35 @@ ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
 	struct uwb_rceb *reply = NULL;
 	struct uwb_rc_evt_get_ie *get_ie;
 
-	d_fnstart(3, dev, "(%p, %p)\n", uwb_rc, pget_ie);
-	result = -ENOMEM;
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 	if (cmd == NULL)
-		goto error_kzalloc;
+		return -ENOMEM;
+
 	cmd->bCommandType = UWB_RC_CET_GENERAL;
 	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE);
 	result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd),
 			     UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE,
 			     &reply);
+	kfree(cmd);
 	if (result < 0)
-		goto error_cmd;
+		return result;
+
 	get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb);
 	if (result < sizeof(*get_ie)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"(%zu bytes received vs %zu needed)\n",
 			result, sizeof(*get_ie));
-		result = -EINVAL;
+		return -EINVAL;
 	} else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"payload (%zu bytes received vs %zu needed)\n", result,
 			sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength));
-		result = -EINVAL;
-	} else
-		*pget_ie = get_ie;
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	d_fnend(3, dev, "(%p, %p) = %d\n", uwb_rc, pget_ie, (int)result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_get_ie);
-
-
-/*
- * Given a pointer to an IE, print it in ASCII/hex followed by a new line
- *
- * @ie_hdr: pointer to the IE header. Length is in there, and it is
- *          guaranteed that the ie_hdr->length bytes following it are
- *          safely accesible.
- *
- * @_data: context data passed from uwb_ie_for_each(), an struct output_ctx
- */
-int uwb_ie_dump_hex(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-		    size_t offset, void *_ctx)
-{
-	struct uwb_buf_ctx *ctx = _ctx;
-	const u8 *pl = (void *)(ie_hdr + 1);
-	u8 pl_itr;
-
-	ctx->bytes += scnprintf(ctx->buf + ctx->bytes, ctx->size - ctx->bytes,
-				"%02x %02x ", (unsigned) ie_hdr->element_id,
-				(unsigned) ie_hdr->length);
-	pl_itr = 0;
-	while (pl_itr < ie_hdr->length && ctx->bytes < ctx->size)
-		ctx->bytes += scnprintf(ctx->buf + ctx->bytes,
-					ctx->size - ctx->bytes,
-					"%02x ", (unsigned) pl[pl_itr++]);
-	if (ctx->bytes < ctx->size)
-		ctx->buf[ctx->bytes++] = '\n';
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_ie_dump_hex);
-
-
-/**
- * Verify that a pointer in a buffer points to valid IE
- *
- * @start: pointer to start of buffer in which IE appears
- * @itr:   pointer to IE inside buffer that will be verified
- * @top:   pointer to end of buffer
- *
- * @returns: 0 if IE is valid, <0 otherwise
- *
- * Verification involves checking that the buffer can contain a
- * header and the amount of data reported in the IE header can be found in
- * the buffer.
- */
-static
-int uwb_rc_ie_verify(struct uwb_dev *uwb_dev, const void *start,
-		     const void *itr, const void *top)
-{
-	struct device *dev = &uwb_dev->dev;
-	const struct uwb_ie_hdr *ie_hdr;
-
-	if (top - itr < sizeof(*ie_hdr)) {
-		dev_err(dev, "Bad IE: no data to decode header "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, sizeof(*ie_hdr), itr - start);
-		return -EINVAL;
-	}
-	ie_hdr = itr;
-	itr += sizeof(*ie_hdr);
-	if (top - itr < ie_hdr->length) {
-		dev_err(dev, "Bad IE: not enough data for payload "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, (size_t)ie_hdr->length,
-			(void *)ie_hdr - start);
 		return -EINVAL;
 	}
-	return 0;
-}
 
-
-/**
- * Walk a buffer filled with consecutive IE's a buffer
- *
- * @uwb_dev: UWB device this IEs belong to (for err messages mainly)
- *
- * @fn: function to call with each IE; if it returns 0, we keep
- *      traversing the buffer. If it returns !0, we'll stop and return
- *      that value.
- *
- * @data: pointer passed to @fn
- *
- * @buf: buffer where the consecutive IEs are located
- *
- * @size: size of @buf
- *
- * Each IE is checked for basic correctness (there is space left for
- * the header and the payload). If that test is failed, we stop
- * processing. For every good IE, @fn is called.
- */
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size)
-{
-	ssize_t result = 0;
-	const struct uwb_ie_hdr *ie_hdr;
-	const void *itr = buf, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, buf, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		itr += sizeof(*ie_hdr) + ie_hdr->length;
-		result = fn(uwb_dev, ie_hdr, itr - buf, data);
-		if (result != 0)
-			break;
-	}
+	*pget_ie = get_ie;
 	return result;
 }
-EXPORT_SYMBOL_GPL(uwb_ie_for_each);
 
 
 /**
@@ -256,70 +178,6 @@ error_cmd:
 	return result;
 }
 
-/**
- * Determine by IE id if IE is host settable
- * WUSB 1.0 [8.6.2.8 Table 8.85]
- *
- * EXCEPTION:
- * All but UWB_IE_WLP appears in Table 8.85 from WUSB 1.0. Setting this IE
- * is required for the WLP substack to perform association with its WSS so
- * we hope that the WUSB spec will be changed to reflect this.
- */
-static
-int uwb_rc_ie_is_host_settable(enum uwb_ie element_id)
-{
-	if (element_id == UWB_PCA_AVAILABILITY ||
-	    element_id == UWB_BP_SWITCH_IE ||
-	    element_id == UWB_MAC_CAPABILITIES_IE ||
-	    element_id == UWB_PHY_CAPABILITIES_IE ||
-	    element_id == UWB_APP_SPEC_PROBE_IE ||
-	    element_id == UWB_IDENTIFICATION_IE ||
-	    element_id == UWB_MASTER_KEY_ID_IE ||
-	    element_id == UWB_IE_WLP ||
-	    element_id == UWB_APP_SPEC_IE)
-		return 1;
-	return 0;
-}
-
-
-/**
- * Extract Host Settable IEs from IE
- *
- * @ie_data: pointer to buffer containing all IEs
- * @size:    size of buffer
- *
- * @returns: length of buffer that only includes host settable IEs
- *
- * Given a buffer of IEs we move all Host Settable IEs to front of buffer
- * by overwriting the IEs that are not Host Settable.
- * Buffer length is adjusted accordingly.
- */
-static
-ssize_t uwb_rc_parse_host_settable_ie(struct uwb_dev *uwb_dev,
-				      void *ie_data, size_t size)
-{
-	size_t new_len = size;
-	struct uwb_ie_hdr *ie_hdr;
-	size_t ie_length;
-	void *itr = ie_data, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, ie_data, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-		if (uwb_rc_ie_is_host_settable(ie_hdr->element_id)) {
-			itr += ie_length;
-		} else {
-			memmove(itr, itr + ie_length, top - (itr + ie_length));
-			new_len -= ie_length;
-			top -= ie_length;
-		}
-	}
-	return new_len;
-}
-
-
 /* Cleanup the whole IE management subsystem */
 void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
 {
@@ -328,49 +186,34 @@ void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
 
 
 /**
- * Set up cache for host settable IEs currently being transmitted
+ * uwb_rc_ie_setup - setup a radio controller's IE manager
+ * @uwb_rc: the radio controller.
  *
- * First we just call GET-IE to get the current IEs being transmitted
- * (or we workaround and pretend we did) and (because the format is
- * the same) reuse that as the IE cache (with the command prefix, as
- * explained in 'struct uwb_rc').
+ * The current set of IEs are obtained from the hardware with a GET-IE
+ * command (since the radio controller is not yet beaconing this will
+ * be just the hardware's MAC and PHY Capability IEs).
  *
- * @returns: size of cache created
+ * Returns 0 on success; -ve on an error.
  */
-ssize_t uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
+int uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	ssize_t result;
-	size_t capacity;
-	struct uwb_rc_evt_get_ie *ie_info;
+	struct uwb_rc_evt_get_ie *ie_info = NULL;
+	int capacity;
+
+	capacity = uwb_rc_get_ie(uwb_rc, &ie_info);
+	if (capacity < 0)
+		return capacity;
 
-	d_fnstart(3, dev, "(%p)\n", uwb_rc);
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	capacity = result;
-	d_printf(5, dev, "Got IEs %zu bytes (%zu long at %p)\n", result,
-		 (size_t)le16_to_cpu(ie_info->wIELength), ie_info);
-
-	/* Remove IEs that host should not set. */
-	result = uwb_rc_parse_host_settable_ie(&uwb_rc->uwb_dev,
-			ie_info->IEData, le16_to_cpu(ie_info->wIELength));
-	if (result < 0)
-		goto error_parse;
-	d_printf(5, dev, "purged non-settable IEs to %zu bytes\n", result);
-	uwb_rc->ies = (void *) ie_info;
+
+	uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info;
 	uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL;
 	uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE);
 	uwb_rc->ies_capacity = capacity;
-	d_printf(5, dev, "IE cache at %p %zu bytes, %zu capacity\n",
-		 ie_info, result, capacity);
-	result = 0;
-error_parse:
-error_get_ie:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
-	d_fnend(3, dev, "(%p) = %zu\n", uwb_rc, result);
-	return result;
+
+	return 0;
 }
 
 
@@ -383,26 +226,47 @@ void uwb_rc_ie_release(struct uwb_rc *uwb_rc)
 }
 
 
-static
-int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-	       size_t offset, void *_ctx)
+static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie)
 {
-	size_t *acc_size = _ctx;
-	*acc_size += sizeof(*ie_hdr) + ie_hdr->length;
-	d_printf(6, &uwb_dev->dev, "new acc size %zu\n", *acc_size);
+	struct uwb_rc_cmd_set_ie *new_ies;
+	void *ptr, *prev_ie;
+	struct uwb_ie_hdr *ie;
+	size_t length, new_ie_len, new_capacity, size, prev_size;
+
+	length = le16_to_cpu(rc->ies->wIELength);
+	new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length;
+	new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len;
+
+	if (new_capacity > rc->ies_capacity) {
+		new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL);
+		if (!new_ies)
+			return -ENOMEM;
+		rc->ies = new_ies;
+	}
+
+	ptr = rc->ies->IEData;
+	size = length;
+	for (;;) {
+		prev_ie = ptr;
+		prev_size = size;
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie || ie->element_id > new_ie->element_id)
+			break;
+	}
+
+	memmove(prev_ie + new_ie_len, prev_ie, prev_size);
+	memcpy(prev_ie, new_ie, new_ie_len);
+	rc->ies->wIELength = cpu_to_le16(length + new_ie_len);
+
 	return 0;
 }
 
-
 /**
- * Add a new IE to IEs currently being transmitted by device
- *
+ * uwb_rc_ie_add - add new IEs to the radio controller's beacon
+ * @uwb_rc: the radio controller.
  * @ies: the buffer containing the new IE or IEs to be added to
- *       the device's beacon. The buffer will be verified for
- *       consistence (meaning the headers should be right) and
- *       consistent with the buffer size.
- * @size: size of @ies (in bytes, total buffer size)
- * @returns: 0 if ok, <0 errno code on error
+ *       the device's beacon.
+ * @size: length of all the IEs.
  *
  * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
  * after the device sent the first beacon that includes the IEs specified
@@ -411,66 +275,40 @@ int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
  * we start beaconing.
  *
  * Setting an IE on the device will overwrite all current IEs in device. So
- * we take the current IEs being transmitted by the device, append the
+ * we take the current IEs being transmitted by the device, insert the
  * new one, and call SET IE with all the IEs needed.
  *
- * The local IE cache will only be updated with the new IE if SET IE
- * completed successfully.
+ * Returns 0 on success; or -ENOMEM.
  */
 int uwb_rc_ie_add(struct uwb_rc *uwb_rc,
 		  const struct uwb_ie_hdr *ies, size_t size)
 {
 	int result = 0;
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	struct uwb_rc_cmd_set_ie *new_ies;
-	size_t ies_size, total_size, acc_size = 0;
-
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, __acc_size, &acc_size, ies, size);
-	if (acc_size != size) {
-		dev_err(dev, "BUG: bad IEs, misconstructed headers "
-			"[%zu bytes reported vs %zu calculated]\n",
-			size, acc_size);
-		WARN_ON(1);
-		return -EINVAL;
-	}
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+
 	mutex_lock(&uwb_rc->ies_mutex);
-	ies_size = le16_to_cpu(uwb_rc->ies->wIELength);
-	total_size = sizeof(*uwb_rc->ies) + ies_size;
-	if (total_size + size > uwb_rc->ies_capacity) {
-		d_printf(4, dev, "Reallocating IE cache from %p capacity %zu "
-			 "to capacity %zu\n", uwb_rc->ies, uwb_rc->ies_capacity,
-			 total_size + size);
-		new_ies = kzalloc(total_size + size, GFP_KERNEL);
-		if (new_ies == NULL) {
-			dev_err(dev, "No memory for adding new IE\n");
-			result = -ENOMEM;
-			goto error_alloc;
-		}
-		memcpy(new_ies, uwb_rc->ies, total_size);
-		uwb_rc->ies_capacity = total_size + size;
-		kfree(uwb_rc->ies);
-		uwb_rc->ies = new_ies;
-		d_printf(4, dev, "New IE cache at %p capacity %zu\n",
-			 uwb_rc->ies, uwb_rc->ies_capacity);
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+
+		result = uwb_rc_ie_add_one(uwb_rc, ie);
+		if (result < 0)
+			break;
 	}
-	memcpy((void *)uwb_rc->ies + total_size, ies, size);
-	uwb_rc->ies->wIELength = cpu_to_le16(ies_size + size);
-	if (uwb_rc->beaconing != -1) {
-		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0) {
-			dev_err(dev, "Cannot set new IE on device: %d\n",
-				result);
-			uwb_rc->ies->wIELength = cpu_to_le16(ies_size);
+	if (result >= 0) {
+		if (size == 0) {
+			if (uwb_rc->beaconing != -1)
+				result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
 		} else
-			result = 0;
+			result = -EINVAL;
 	}
-	d_printf(4, dev, "IEs now occupy %hu bytes of %zu capacity at %p\n",
-		 le16_to_cpu(uwb_rc->ies->wIELength), uwb_rc->ies_capacity,
-		 uwb_rc->ies);
-error_alloc:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
@@ -489,53 +327,52 @@ EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
  * beacon. We don't reallocate, we just mark the size smaller.
  */
 static
-int uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
+void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
 {
-	struct uwb_ie_hdr *ie_hdr;
-	size_t new_len = le16_to_cpu(uwb_rc->ies->wIELength);
-	void *itr = uwb_rc->ies->IEData;
-	void *top = itr + new_len;
-
-	while (itr < top) {
-		ie_hdr = itr;
-		if (ie_hdr->element_id != to_remove) {
-			itr += sizeof(*ie_hdr) + ie_hdr->length;
-		} else {
-			int ie_length;
-			ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-			if (top - itr != ie_length)
-				memmove(itr, itr + ie_length, top - itr + ie_length);
-			top -= ie_length;
-			new_len -= ie_length;
+	struct uwb_ie_hdr *ie;
+	size_t len = le16_to_cpu(uwb_rc->ies->wIELength);
+	void *ptr;
+	size_t size;
+
+	ptr = uwb_rc->ies->IEData;
+	size = len;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+		if (ie->element_id == to_remove) {
+			len -= sizeof(struct uwb_ie_hdr) + ie->length;
+			memmove(ie, ptr, size);
+			ptr = ie;
 		}
 	}
-	uwb_rc->ies->wIELength = cpu_to_le16(new_len);
-	return 0;
+	uwb_rc->ies->wIELength = cpu_to_le16(len);
 }
 
 
 /**
- * Remove an IE currently being transmitted by device
+ * uwb_rc_ie_rm - remove an IE from the radio controller's beacon
+ * @uwb_rc: the radio controller.
+ * @element_id: the element ID of the IE to remove.
  *
- * @element_id: id of IE to be removed from device's beacon
+ * Only IEs previously added with uwb_rc_ie_add() may be removed.
+ *
+ * Returns 0 on success; or -ve the SET-IE command to the radio
+ * controller failed.
  */
 int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	int result;
+	int result = 0;
 
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_ie_cache_rm(uwb_rc, element_id);
-	if (result < 0)
-		dev_err(dev, "Cannot remove IE from cache.\n");
-	if (uwb_rc->beaconing != -1) {
+
+	uwb_rc_ie_cache_rm(uwb_rc, element_id);
+
+	if (uwb_rc->beaconing != -1)
 		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0)
-			dev_err(dev, "Cannot set new IE on device.\n");
-	}
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_rm);
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index ee5772f..1129e87 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -468,28 +468,3 @@ void uwb_rc_put(struct uwb_rc *rc)
 	__uwb_rc_put(rc);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_put);
-
-/*
- *
- *
- */
-ssize_t uwb_rc_print_IEs(struct uwb_rc *uwb_rc, char *buf, size_t size)
-{
-	ssize_t result;
-	struct uwb_rc_evt_get_ie *ie_info;
-	struct uwb_buf_ctx ctx;
-
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	ctx.buf = buf;
-	ctx.size = size;
-	ctx.bytes = 0;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, uwb_ie_dump_hex, &ctx,
-			ie_info->IEData, result - sizeof(*ie_info));
-	result = ctx.bytes;
-	kfree(ie_info);
-error_get_ie:
-	return result;
-}
-
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 2ad307d..983ebc4 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -66,14 +66,14 @@ extern int uwb_rc_scan(struct uwb_rc *rc,
 		       unsigned channel, enum uwb_scan_type type,
 		       unsigned bpst_offset);
 extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc);
-extern ssize_t uwb_rc_print_IEs(struct uwb_rc *rc, char *, size_t);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern ssize_t uwb_rc_ie_setup(struct uwb_rc *);
-extern void uwb_rc_ie_release(struct uwb_rc *);
-extern int uwb_rc_ie_add(struct uwb_rc *,
-			 const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
+
+void uwb_rc_ie_init(struct uwb_rc *);
+int uwb_rc_ie_setup(struct uwb_rc *);
+void uwb_rc_ie_release(struct uwb_rc *);
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size);
+int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
+
 
 extern const char *uwb_rc_strerror(unsigned code);
 
diff --git a/drivers/uwb/wlp/wlp-internal.h b/drivers/uwb/wlp/wlp-internal.h
index 1c94fab..3e8d5de 100644
--- a/drivers/uwb/wlp/wlp-internal.h
+++ b/drivers/uwb/wlp/wlp-internal.h
@@ -42,10 +42,6 @@ enum wlp_wss_connect {
 extern struct kobj_type wss_ktype;
 extern struct attribute_group wss_attr_group;
 
-extern int uwb_rc_ie_add(struct uwb_rc *, const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
-
-
 /* This should be changed to a dynamic array where entries are sorted
  * by eth_addr and search is done in a binary form
  *
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 010ee70..6d93f54 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -444,7 +444,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
 		    struct uwb_rccb *cmd, size_t cmd_size,
 		    u8 expected_type, u16 expected_event,
 		    struct uwb_rceb **preply);
-ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **);
 int uwb_bg_joined(struct uwb_rc *rc);
 
 size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
@@ -653,22 +652,9 @@ static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe)
 
 /* Information Element handling */
 
-/* For representing the state of writing to a buffer when iterating */
-struct uwb_buf_ctx {
-	char *buf;
-	size_t bytes, size;
-};
-
-typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *,
-			size_t, void *);
 struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size);
-int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *,
-		    size_t, void *);
-int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
-struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-
+int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size);
+int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id);
 
 /*
  * Transmission statistics
-- 
cgit v0.10.2


From 4656d5de9555e263c5b4c0462b5af7e7bded1b42 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 17:12:33 +0000
Subject: wusb: reset WUSB devices with SetAddress(0)

Using a Reset Device IE to reset a WUSB device is too heavyweight as it
causes the devcie to disconnect (which the USB stack does not expect and
cannot handle).  Instead, do a SetAddress(0); SetAddress(AuthAddr) for
authenticated devices.

Unauthenticated devices will not be reset and the stack will have to rely
on the device timing out after TrustTimeout and disconnecting.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index f45d777..c01c7a8 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -57,9 +57,6 @@
  *                              Called by notif.c:wusb_handle_dn_connect()
  *                              when a DN_Connect is received.
  *
- *   wusbhc_devconnect_auth()   Called by rh.c:wusbhc_rh_port_reset() when
- *                              doing the device connect sequence.
- *
  *     wusb_devconnect_acked()  Ack done, release resources.
  *
  *   wusb_handle_dn_alive()     Called by notif.c:wusb_handle_dn()
@@ -69,9 +66,6 @@
  *                              process a disconenct request from a
  *                              device.
  *
- *   wusb_dev_reset()           Called by rh.c:wusbhc_rh_port_reset() when
- *                              resetting a device.
- *
  *   __wusb_dev_disable()       Called by rh.c:wusbhc_rh_clear_port_feat() when
  *                              disabling a port.
  *
@@ -366,12 +360,10 @@ void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc,
 	port->wusb_dev = wusb_dev;
 	port->status |= USB_PORT_STAT_CONNECTION;
 	port->change |= USB_PORT_STAT_C_CONNECTION;
-	port->reset_count = 0;
 	/* Now the port status changed to connected; khubd will
 	 * pick the change up and try to reset the port to bring it to
 	 * the enabled state--so this process returns up to the stack
-	 * and it calls back into wusbhc_rh_port_reset() who will call
-	 * devconnect_auth().
+	 * and it calls back into wusbhc_rh_port_reset().
 	 */
 error_unlock:
 	mutex_unlock(&wusbhc->mutex);
@@ -413,9 +405,6 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
 		wusb_dev_put(wusb_dev);
 	}
 	port->wusb_dev = NULL;
-	/* don't reset the reset_count to zero or wusbhc_rh_port_reset will get
-	 * confused! We only reset to zero when we connect a new device.
-	 */
 
 	/* After a device disconnects, change the GTK (see [WUSB]
 	 * section 6.2.11.2). */
@@ -429,39 +418,6 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
 }
 
 /*
- * Authenticate a device into the WUSB Cluster
- *
- * Called from the Root Hub code (rh.c:wusbhc_rh_port_reset()) when
- * asking for a reset on a port that is not enabled (ie: first connect
- * on the port).
- *
- * Performs the 4way handshake to allow the device to comunicate w/ the
- * WUSB Cluster securely; once done, issue a request to the device for
- * it to change to address 0.
- *
- * This mimics the reset step of Wired USB that once resetting a
- * device, leaves the port in enabled state and the dev with the
- * default address (0).
- *
- * WUSB1.0[7.1.2]
- *
- * @port_idx: port where the change happened--This is the index into
- *            the wusbhc port array, not the USB port number.
- */
-int wusbhc_devconnect_auth(struct wusbhc *wusbhc, u8 port_idx)
-{
-	struct device *dev = wusbhc->dev;
-	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
-
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	port->status &= ~USB_PORT_STAT_RESET;
-	port->status |= USB_PORT_STAT_ENABLE;
-	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;
-	d_fnend(3, dev, "(%p, %u) = 0\n", wusbhc, port_idx);
-	return 0;
-}
-
-/*
  * Refresh the list of keep alives to emit in the MMC
  *
  * Some devices don't respond to keep alives unless they've been
@@ -662,60 +618,6 @@ static void wusbhc_handle_dn_disconnect(struct wusbhc *wusbhc, struct wusb_dev *
 }
 
 /*
- * Reset a WUSB device on a HWA
- *
- * @wusbhc
- * @port_idx   Index of the port where the device is
- *
- * In Wireless USB, a reset is more or less equivalent to a full
- * disconnect; so we just do a full disconnect and send the device a
- * Device Reset IE (WUSB1.0[7.5.11]) giving it a few millisecs (6 MMCs).
- *
- * @wusbhc should be refcounted and unlocked
- */
-int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port_idx)
-{
-	int result;
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-	struct wuie_reset *ie;
-
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	mutex_lock(&wusbhc->mutex);
-	result = 0;
-	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
-	if (wusb_dev == NULL) {
-		/* reset no device? ignore */
-		dev_dbg(dev, "RESET: no device at port %u, ignoring\n",
-			port_idx);
-		goto error_unlock;
-	}
-	result = -ENOMEM;
-	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
-	if (ie == NULL)
-		goto error_unlock;
-	ie->hdr.bLength = sizeof(ie->hdr) + sizeof(ie->CDID);
-	ie->hdr.bIEIdentifier = WUIE_ID_RESET_DEVICE;
-	ie->CDID = wusb_dev->cdid;
-	result = wusbhc_mmcie_set(wusbhc, 0xff, 6, &ie->hdr);
-	if (result < 0) {
-		dev_err(dev, "RESET: cant's set MMC: %d\n", result);
-		goto error_kfree;
-	}
-	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
-
-	/* 120ms, hopefully 6 MMCs (FIXME) */
-	msleep(120);
-	wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-error_kfree:
-	kfree(ie);
-error_unlock:
-	mutex_unlock(&wusbhc->mutex);
-	d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result);
-	return result;
-}
-
-/*
  * Handle a Device Notification coming a host
  *
  * The Device Notification comes from a host (HWA, DWA or WHCI)
diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c
index 267a643..1c73319 100644
--- a/drivers/usb/wusbcore/rh.c
+++ b/drivers/usb/wusbcore/rh.c
@@ -77,13 +77,17 @@
 /*
  * Reset a fake port
  *
- * This can be called to reset a port from any other state or to reset
- * it when connecting. In Wireless USB they are different; when doing
- * a new connect that involves going over the authentication. When
- * just reseting, its a different story.
+ * Using a Reset Device IE is too heavyweight as it causes the device
+ * to enter the UnConnected state and leave the cluster, this can mean
+ * that when the device reconnects it is connected to a different fake
+ * port.
  *
- * The Linux USB stack resets a port twice before it considers it
- * enabled, so we have to detect and ignore that.
+ * Instead, reset authenticated devices with a SetAddress(0), followed
+ * by a SetAddresss(AuthAddr).
+ *
+ * For unauthenticated devices just pretend to reset but do nothing.
+ * If the device initialization continues to fail it will eventually
+ * time out after TrustTimeout and enter the UnConnected state.
  *
  * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
  *
@@ -97,20 +101,20 @@ static int wusbhc_rh_port_reset(struct wusbhc *wusbhc, u8 port_idx)
 {
 	int result = 0;
 	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
+	struct wusb_dev *wusb_dev = port->wusb_dev;
+
+	port->status |= USB_PORT_STAT_RESET;
+	port->change |= USB_PORT_STAT_C_RESET;
 
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p port_idx %u)\n",
-		  wusbhc, port_idx);
-	if (port->reset_count == 0) {
-		wusbhc_devconnect_auth(wusbhc, port_idx);
-		port->reset_count++;
-	} else if (port->reset_count == 1)
-		/* see header */
-		d_printf(2, wusbhc->dev, "Ignoring second reset on port_idx "
-			"%u\n", port_idx);
+	if (wusb_dev->addr & WUSB_DEV_ADDR_UNAUTH)
+		result = 0;
 	else
-		result = wusbhc_dev_reset(wusbhc, port_idx);
-	d_fnend(3, wusbhc->dev, "(wusbhc %p port_idx %u) = %d\n",
-		wusbhc, port_idx, result);
+		result = wusb_dev_update_address(wusbhc, wusb_dev);
+
+	port->status &= ~USB_PORT_STAT_RESET;
+	port->status |= USB_PORT_STAT_ENABLE;
+	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;	
+
 	return result;
 }
 
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
index a101cad..ac00640 100644
--- a/drivers/usb/wusbcore/security.c
+++ b/drivers/usb/wusbcore/security.c
@@ -338,8 +338,7 @@ static void hs_printk(unsigned level, struct device *dev,
  * Before the device's address (as known by it) was usb_dev->devnum |
  * 0x80 (unauthenticated address). With this we update it to usb_dev->devnum.
  */
-static int wusb_dev_update_address(struct wusbhc *wusbhc,
-				   struct wusb_dev *wusb_dev)
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
 	int result = -ENOMEM;
 	struct usb_device *usb_dev = wusb_dev->usb_dev;
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index b9bdf5a..8fef934 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -154,7 +154,6 @@ struct wusb_port {
 	u16 status;
 	u16 change;
 	struct wusb_dev *wusb_dev;	/* connected device's info */
-	unsigned reset_count;
 	u32 ptk_tkid;
 };
 
@@ -387,10 +386,8 @@ extern void wusbhc_devconnect_destroy(struct wusbhc *);
 extern int wusbhc_devconnect_start(struct wusbhc *wusbhc,
 				   const struct wusb_ckhdid *chid);
 extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
-extern int wusbhc_devconnect_auth(struct wusbhc *, u8);
 extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
 			     struct wusb_dn_hdr *dn_hdr, size_t size);
-extern int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port);
 extern void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port);
 extern int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
 			void *priv);
@@ -436,6 +433,7 @@ extern void wusb_dev_sec_rm(struct wusb_dev *) ;
 extern int wusb_dev_4way_handshake(struct wusbhc *, struct wusb_dev *,
 				   struct wusb_ckhdid *ck);
 void wusbhc_gtk_rekey(struct wusbhc *wusbhc);
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
 
 
 /* WUSB Cluster ID handling */
-- 
cgit v0.10.2


From 7bc914942295b1ea63635b9c1e93b023bd7d3767 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 31 Oct 2008 22:49:54 +0800
Subject: uwb: remove unused #include <version.h>

The file(s) below do not use LINUX_VERSION_CODE nor KERNEL_VERSION.
  drivers/uwb/drp-ie.c
  drivers/uwb/hwa-rc.c
  drivers/uwb/i1480/dfu/usb.c
  drivers/uwb/i1480/i1480u-wlp/lc.c
  drivers/uwb/i1480/i1480u-wlp/sysfs.c
  drivers/uwb/rsv.c
  drivers/uwb/whc-rc.c

This patch removes the said #include <version.h>.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c
index 882724c..75491d4 100644
--- a/drivers/uwb/drp-ie.c
+++ b/drivers/uwb/drp-ie.c
@@ -16,7 +16,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include <linux/uwb.h>
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 3d26fa0..18009c9 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -51,7 +51,6 @@
  *
  *
  */
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/usb.h>
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 98eeeff..b7ea525 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -35,7 +35,6 @@
  * the functions are i1480_usb_NAME().
  */
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/usb.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 737d60c..384306c 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -55,7 +55,6 @@
  *                          is being removed.
  *         i1480u_rm()
  */
-#include <linux/version.h>
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
 #include <linux/uwb/debug.h>
diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
index a1d8ca6..a92a787 100644
--- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c
+++ b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
@@ -226,7 +226,6 @@ ssize_t wlp_tx_inflight_store(struct i1480u_tx_inflight *inflight,
  * (CLASS_DEVICE_ATTR or DEVICE_ATTR) and i1480u_ATTR_NAME produces a
  * class_device_attr_NAME or device_attr_NAME (for group registration).
  */
-#include <linux/version.h>
 
 #define i1480u_SHOW(name, fn, param)				\
 static ssize_t i1480u_show_##name(struct device *dev,		\
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index bcc41a4..ce00946 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/uwb.h>
 
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 1711dea..6c454ea 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -39,7 +39,6 @@
  * them to the hw and transfer the replies/notifications back to the
  * UWB stack through the UWB daemon (UWBD).
  */
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-- 
cgit v0.10.2


From ae9eba0e2744f1aa15cdc97cd39277a84723ae23 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Oct 2008 20:06:16 +0100
Subject: uwb: struct device - replace bus_id with dev_name(), dev_set_name()

Cc: David Vrabel <david.vrabel@csr.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-Off-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c
index aa44e1c..53207e1 100644
--- a/drivers/uwb/umc-dev.c
+++ b/drivers/uwb/umc-dev.c
@@ -31,8 +31,7 @@ struct umc_dev *umc_device_create(struct device *parent, int n)
 
 	umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL);
 	if (umc) {
-		snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d",
-			 parent->bus_id, n);
+		dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n);
 		umc->dev.parent  = parent;
 		umc->dev.bus     = &umc_bus_type;
 		umc->dev.release = umc_device_release;
diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c
index 3df2388..e626467 100644
--- a/drivers/uwb/whci.c
+++ b/drivers/uwb/whci.c
@@ -111,7 +111,7 @@ static int whci_add_cap(struct whci_card *card, int n)
 		+ UWBCAPDATA_TO_OFFSET(capdata);
 	umc->resource.end    = umc->resource.start
 		+ (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1;
-	umc->resource.name   = umc->dev.bus_id;
+	umc->resource.name   = dev_name(&umc->dev);
 	umc->resource.flags  = card->pci->resource[bar].flags;
 	umc->resource.parent = &card->pci->resource[bar];
 	umc->irq             = card->pci->irq;
diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h
index a86a73f..67a2405 100644
--- a/include/linux/uwb/debug.h
+++ b/include/linux/uwb/debug.h
@@ -60,7 +60,7 @@ do {									\
 				snprintf(__head, sizeof(__head),	\
 					 "%s %s: ",			\
 					 dev_driver_string(__dev),	\
-					 __dev->bus_id);		\
+					 dev_name(__dev));		\
 		}							\
 		printk(KERN_ERR "%s%s" _tag ": " f, __head,		\
 			__func__, ## a);				\
-- 
cgit v0.10.2


From f88518d122f1b007f47a46aff37ca2885126a923 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 31 Oct 2008 22:49:58 +0800
Subject: wusb: remove unused #include <version.h>

The file(s) below do not use LINUX_VERSION_CODE nor KERNEL_VERSION.
  drivers/usb/host/hwa-hc.c
  drivers/usb/host/whci/hcd.c
  drivers/usb/host/whci/int.c
  drivers/usb/host/whci/wusb.c
  drivers/usb/wusbcore/cbaf.c

This patch removes the said #include <version.h>.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 0e18989..2827353 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -54,7 +54,6 @@
  *                      DWA).
  */
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/workqueue.h>
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index ef3ad4d..de1e072 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
diff --git a/drivers/usb/host/whci/int.c b/drivers/usb/host/whci/int.c
index fce0117..6aae700 100644
--- a/drivers/usb/host/whci/int.c
+++ b/drivers/usb/host/whci/int.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 2befd47..540021a 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c
index ab4788d..1335cbe 100644
--- a/drivers/usb/wusbcore/cbaf.c
+++ b/drivers/usb/wusbcore/cbaf.c
@@ -88,7 +88,6 @@
  */
 #include <linux/module.h>
 #include <linux/ctype.h>
-#include <linux/version.h>
 #include <linux/usb.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-- 
cgit v0.10.2


From c5995bd2819dc577d0b32b26be0836d16c977e24 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 14:06:31 +0000
Subject: uwb: infrastructure for handling Relinquish Request IEs

The structures and event handler needed to handle Relinish Request IEs
received from neighbors.  Nothing is done with these IEs yet.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 257e690..2b99c3e 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -13,6 +13,7 @@ uwb-objs :=		\
 	drp-ie.o	\
 	est.o		\
 	ie.o		\
+	ie-rcv.o	\
 	lc-dev.o	\
 	lc-rc.o		\
 	neh.o		\
diff --git a/drivers/uwb/ie-rcv.c b/drivers/uwb/ie-rcv.c
new file mode 100644
index 0000000..917e6d7
--- /dev/null
+++ b/drivers/uwb/ie-rcv.c
@@ -0,0 +1,55 @@
+/*
+ * Ultra Wide Band
+ * IE Received notification handling.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitmap.h>
+#include "uwb-internal.h"
+
+/*
+ * Process an incoming IE Received notification.
+ */
+int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_ie_rcv *iercv;
+	size_t iesize;
+
+	/* Is there enough data to decode it? */
+	if (evt->notif.size < sizeof(*iercv)) {
+		dev_err(dev, "IE Received notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*iercv));
+		goto error;
+	}
+	iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb);
+	iesize = le16_to_cpu(iercv->wIELength);
+
+	dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]);
+
+	if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) {
+		dev_warn(dev, "unhandled Relinquish Request IE\n");
+	}
+
+	return 0;
+error:
+	return result;
+}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 983ebc4..031e8a8 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -167,6 +167,7 @@ extern void uwbd_event_queue(struct uwb_event *);
 void uwbd_flush(struct uwb_rc *rc);
 
 /* UWB event handlers */
+extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *);
 extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *);
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index 7890841..f751135 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -104,6 +104,10 @@ struct uwbd_event {
 /** Table of handlers for and properties of the UWBD Radio Control Events */
 static
 struct uwbd_event uwbd_events[] = {
+	[UWB_RC_EVT_IE_RCV] = {
+		.handler = uwbd_evt_handle_rc_ie_rcv,
+		.name = "IE_RECEIVED"
+	},
 	[UWB_RC_EVT_BEACON] = {
 		.handler = uwbd_evt_handle_rc_beacon,
 		.name = "BEACON_RECEIVED"
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
index 198c15f..a30436e 100644
--- a/include/linux/uwb/spec.h
+++ b/include/linux/uwb/spec.h
@@ -200,6 +200,12 @@ enum uwb_drp_reason {
 	UWB_DRP_REASON_MODIFIED,
 };
 
+/** Relinquish Request Reason Codes ([ECMA-368] table 113) */
+enum uwb_relinquish_req_reason {
+	UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0,
+	UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION,
+};
+
 /**
  *  DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9])
  */
@@ -252,6 +258,7 @@ enum uwb_ie {
 	UWB_APP_SPEC_PROBE_IE = 15,
 	UWB_IDENTIFICATION_IE = 19,
 	UWB_MASTER_KEY_ID_IE = 20,
+	UWB_RELINQUISH_REQUEST_IE = 21,
 	UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */
 	UWB_APP_SPEC_IE = 255,
 };
@@ -365,6 +372,27 @@ struct uwb_ie_drp_avail {
 	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
 } __attribute__((packed));
 
+/* Relinqish Request IE ([ECMA-368] section 16.8.19). */
+struct uwb_relinquish_request_ie {
+        struct uwb_ie_hdr       hdr;
+        __le16                  relinquish_req_control;
+        struct uwb_dev_addr     dev_addr;
+        struct uwb_drp_alloc    allocs[];
+} __attribute__((packed));
+
+static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie)
+{
+	return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf;
+}
+
+static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie,
+							 int reason_code)
+{
+	u16 ctrl = le16_to_cpu(ie->relinquish_req_control);
+	ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0);
+	ie->relinquish_req_control = cpu_to_le16(ctrl);
+}
+
 /**
  * The Vendor ID is set to an OUI that indicates the vendor of the device.
  * ECMA-368 [16.8.10]
-- 
cgit v0.10.2


From 6d5a681dfb583b2f1eefe7cd5505419ca2d4d6c8 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 14:24:57 +0000
Subject: uwb: add commands to add/remove IEs to the debug interface

Add the commands UWB_DBG_CMD_IE_ADD and UWB_DBG_CMD_IE_RM to the debug
interface and make them call uwb_rc_ie_add() and uwb_rc_ie_rm().

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 6db641e..88e6ac7 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -168,6 +168,18 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 	return 0;
 }
 
+static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add)
+{
+	return uwb_rc_ie_add(rc,
+			     (const struct uwb_ie_hdr *) ie_to_add->data,
+			     ie_to_add->len);
+}
+
+static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm)
+{
+	return uwb_rc_ie_rm(rc, ie_to_rm->data[0]);
+}
+
 static int command_open(struct inode *inode, struct file *file)
 {
 	file->private_data = inode->i_private;
@@ -195,6 +207,12 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	case UWB_DBG_CMD_RSV_TERMINATE:
 		ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate);
 		break;
+	case UWB_DBG_CMD_IE_ADD:
+		ret = cmd_ie_add(rc, &cmd.ie_add);
+		break;
+	case UWB_DBG_CMD_IE_RM:
+		ret = cmd_ie_rm(rc, &cmd.ie_rm);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -332,7 +350,7 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 }
 
 /**
- * uwb_dbg_add_rc - remove a radio controller's debug interface
+ * uwb_dbg_del_rc - remove a radio controller's debug interface
  * @rc: the radio controller
  */
 void uwb_dbg_del_rc(struct uwb_rc *rc)
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 1141f41..6a16566 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -32,6 +32,8 @@
 enum uwb_dbg_cmd_type {
 	UWB_DBG_CMD_RSV_ESTABLISH = 1,
 	UWB_DBG_CMD_RSV_TERMINATE = 2,
+	UWB_DBG_CMD_IE_ADD = 3,
+	UWB_DBG_CMD_IE_RM = 4,
 };
 
 struct uwb_dbg_cmd_rsv_establish {
@@ -46,11 +48,18 @@ struct uwb_dbg_cmd_rsv_terminate {
 	int index;
 };
 
+struct uwb_dbg_cmd_ie {
+	__u8 data[128];
+	int len;
+};
+
 struct uwb_dbg_cmd {
 	__u32 type;
 	union {
 		struct uwb_dbg_cmd_rsv_establish rsv_establish;
 		struct uwb_dbg_cmd_rsv_terminate rsv_terminate;
+		struct uwb_dbg_cmd_ie ie_add;
+		struct uwb_dbg_cmd_ie ie_rm;
 	};
 };
 
-- 
cgit v0.10.2


From fec1a5932f16c0eb1b3f5ca2e18d81d860924088 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 15:39:08 +0000
Subject: uwb: per-radio controller event thread and beacon cache

Use an event thread per-radio controller so processing events from one
radio controller doesn't delay another.

A radio controller shouldn't have information on devices seen by a
different radio controller (they may be on different channels) so make the
beacon cache per-radio controller.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index ad82398..d9f2a8a 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -168,12 +168,6 @@ out_up:
  * FIXME: use something faster for search than a list
  */
 
-struct uwb_beca uwb_beca = {
-	.list = LIST_HEAD_INIT(uwb_beca.list),
-	.mutex = __MUTEX_INITIALIZER(uwb_beca.mutex)
-};
-
-
 void uwb_bce_kfree(struct kref *_bce)
 {
 	struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt);
@@ -185,10 +179,11 @@ void uwb_bce_kfree(struct kref *_bce)
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bydev(const struct uwb_dev_addr *dev_addr)
+struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
+					 const struct uwb_dev_addr *dev_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		d_printf(6, NULL, "looking for addr %02x:%02x in %02x:%02x\n",
 			 dev_addr->data[0], dev_addr->data[1],
 			 bce->dev_addr.data[0], bce->dev_addr.data[1]);
@@ -202,10 +197,11 @@ out:
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bymac(const struct uwb_mac_addr *mac_addr)
+struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc, 
+					 const struct uwb_mac_addr *mac_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		if (!memcmp(bce->mac_addr, mac_addr->data,
 			    sizeof(struct uwb_mac_addr)))
 			goto out;
@@ -229,11 +225,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bydev(devaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bydev(rc, devaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -249,11 +245,11 @@ struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(macaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, macaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -274,7 +270,9 @@ static void uwb_beca_e_init(struct uwb_beca_e *bce)
  * @bf:         Beacon frame (part of b, really)
  * @ts_jiffies: Timestamp (in jiffies) when the beacon was received
  */
-struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
+static
+struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc,
+				  struct uwb_rc_evt_beacon *be,
 				  struct uwb_beacon_frame *bf,
 				  unsigned long ts_jiffies)
 {
@@ -286,7 +284,7 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
 	uwb_beca_e_init(bce);
 	bce->ts_jiffies = ts_jiffies;
 	bce->uwb_dev = NULL;
-	list_add(&bce->node, &uwb_beca.list);
+	list_add(&bce->node, &rc->uwb_beca.list);
 	return bce;
 }
 
@@ -295,13 +293,13 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
  *
  * Remove associated devicest too.
  */
-void uwb_beca_purge(void)
+void uwb_beca_purge(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
 	unsigned long expires;
 
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
 		if (time_after(jiffies, expires)) {
 			uwbd_dev_offair(bce);
@@ -309,19 +307,20 @@ void uwb_beca_purge(void)
 			uwb_bce_put(bce);
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 /* Clean up the whole beacon cache. Called on shutdown */
-void uwb_beca_release(void)
+void uwb_beca_release(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		list_del(&bce->node);
 		uwb_bce_put(bce);
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be,
@@ -437,18 +436,18 @@ int uwbd_evt_handle_rc_beacon(struct uwb_event *evt)
 	if (uwb_mac_addr_bcast(&bf->Device_Identifier))
 		return 0;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(&bf->Device_Identifier);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier);
 	if (bce == NULL) {
 		/* Not in there, a new device is pinging */
 		uwb_beacon_print(evt->rc, be, bf);
-		bce = __uwb_beca_add(be, bf, evt->ts_jiffies);
+		bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies);
 		if (bce == NULL) {
-			mutex_unlock(&uwb_beca.mutex);
+			mutex_unlock(&rc->uwb_beca.mutex);
 			return -ENOMEM;
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	mutex_lock(&bce->mutex);
 	/* purge old beacon data */
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index 521cdeb..f57c265 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -118,7 +118,6 @@ static int __init uwb_subsys_init(void)
 	result = class_register(&uwb_rc_class);
 	if (result < 0)
 		goto error_uwb_rc_class_register;
-	uwbd_start();
 	uwb_dbg_init();
 	return 0;
 
@@ -132,7 +131,6 @@ module_init(uwb_subsys_init);
 static void __exit uwb_subsys_exit(void)
 {
 	uwb_dbg_exit();
-	uwbd_stop();
 	class_unregister(&uwb_rc_class);
 	uwb_est_destroy();
 	return;
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index 1129e87..38e3d57 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -36,8 +36,6 @@
 #include <linux/etherdevice.h>
 #include <linux/usb.h>
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 #include "uwb-internal.h"
 
 static int uwb_rc_index_match(struct device *dev, void *data)
@@ -83,7 +81,6 @@ static void uwb_rc_sys_release(struct device *dev)
 
 	uwb_rc_neh_destroy(rc);
 	uwb_rc_ie_release(rc);
-	d_printf(1, dev, "freed uwb_rc %p\n", rc);
 	kfree(rc);
 }
 
@@ -100,6 +97,8 @@ void uwb_rc_init(struct uwb_rc *rc)
 	rc->scan_type = UWB_SCAN_DISABLED;
 	INIT_LIST_HEAD(&rc->notifs_chain.list);
 	mutex_init(&rc->notifs_chain.mutex);
+	INIT_LIST_HEAD(&rc->uwb_beca.list);
+	mutex_init(&rc->uwb_beca.mutex);
 	uwb_drp_avail_init(rc);
 	uwb_rc_ie_init(rc);
 	uwb_rsv_init(rc);
@@ -250,6 +249,12 @@ int uwb_rc_add(struct uwb_rc *rc, struct device *parent_dev, void *priv)
 
 	rc->priv = priv;
 
+	init_waitqueue_head(&rc->uwbd.wq);
+	INIT_LIST_HEAD(&rc->uwbd.event_list);
+	spin_lock_init(&rc->uwbd.event_list_lock);
+
+	uwbd_start(rc);
+
 	result = rc->start(rc);
 	if (result < 0)
 		goto error_rc_start;
@@ -284,7 +289,7 @@ error_sys_add:
 error_dev_add:
 error_rc_setup:
 	rc->stop(rc);
-	uwbd_flush(rc);
+	uwbd_stop(rc);
 error_rc_start:
 	return result;
 }
@@ -315,16 +320,18 @@ void uwb_rc_rm(struct uwb_rc *rc)
 	uwb_rc_reset(rc);
 
 	rc->stop(rc);
-	uwbd_flush(rc);
+
+	uwbd_stop(rc);
 
 	uwb_dev_lock(&rc->uwb_dev);
 	rc->priv = NULL;
 	rc->cmd = NULL;
 	uwb_dev_unlock(&rc->uwb_dev);
-	mutex_lock(&uwb_beca.mutex);
+	mutex_lock(&rc->uwb_beca.mutex);
 	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
 	__uwb_rc_sys_rm(rc);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
+ 	uwb_beca_release(rc);
 	uwb_dev_rm(&rc->uwb_dev);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_rm);
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 031e8a8..4c24496 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -160,8 +160,8 @@ struct uwb_event {
 	};
 };
 
-extern void uwbd_start(void);
-extern void uwbd_stop(void);
+extern void uwbd_start(struct uwb_rc *rc);
+extern void uwbd_stop(struct uwb_rc *rc);
 extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask);
 extern void uwbd_event_queue(struct uwb_event *);
 void uwbd_flush(struct uwb_rc *rc);
@@ -194,15 +194,6 @@ int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt);
 
 extern unsigned long beacon_timeout_ms;
 
-/** Beacon cache list */
-struct uwb_beca {
-	struct list_head list;
-	size_t entries;
-	struct mutex mutex;
-};
-
-extern struct uwb_beca uwb_beca;
-
 /**
  * Beacon cache entry
  *
@@ -229,9 +220,6 @@ struct uwb_beca_e {
 struct uwb_beacon_frame;
 extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *,
 				 char *, size_t);
-extern struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *,
-					 struct uwb_beacon_frame *,
-					 unsigned long);
 
 extern void uwb_bce_kfree(struct kref *_bce);
 static inline void uwb_bce_get(struct uwb_beca_e *bce)
@@ -242,8 +230,8 @@ static inline void uwb_bce_put(struct uwb_beca_e *bce)
 {
 	kref_put(&bce->refcnt, uwb_bce_kfree);
 }
-extern void uwb_beca_purge(void);
-extern void uwb_beca_release(void);
+extern void uwb_beca_purge(struct uwb_rc *rc);
+extern void uwb_beca_release(struct uwb_rc *rc);
 
 struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 				       const struct uwb_dev_addr *devaddr);
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index f751135..ec42ce9 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -170,8 +170,6 @@ static const struct uwbd_event uwbd_message_handlers[] = {
 	},
 };
 
-static DEFINE_MUTEX(uwbd_event_mutex);
-
 /**
  * Handle an URC event passed to the UWB Daemon
  *
@@ -235,19 +233,10 @@ static void uwbd_event_handle_message(struct uwb_event *evt)
 		return;
 	}
 
-	/* If this is a reset event we need to drop the
-	 * uwbd_event_mutex or it deadlocks when the reset handler
-	 * attempts to flush the uwbd events. */
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_unlock(&uwbd_event_mutex);
-
 	result = uwbd_message_handlers[evt->message].handler(evt);
 	if (result < 0)
 		dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n",
 			uwbd_message_handlers[evt->message].name, result);
-
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_lock(&uwbd_event_mutex);
 }
 
 static void uwbd_event_handle(struct uwb_event *evt)
@@ -275,20 +264,6 @@ static void uwbd_event_handle(struct uwb_event *evt)
 
 	__uwb_rc_put(rc);	/* for the __uwb_rc_get() in uwb_rc_notif_cb() */
 }
-/* The UWB Daemon */
-
-
-/** Daemon's PID: used to decide if we can queue or not */
-static int uwbd_pid;
-/** Daemon's task struct for managing the kthread */
-static struct task_struct *uwbd_task;
-/** Daemon's waitqueue for waiting for new events */
-static DECLARE_WAIT_QUEUE_HEAD(uwbd_wq);
-/** Daemon's list of events; we queue/dequeue here */
-static struct list_head uwbd_event_list = LIST_HEAD_INIT(uwbd_event_list);
-/** Daemon's list lock to protect concurent access */
-static DEFINE_SPINLOCK(uwbd_event_list_lock);
-
 
 /**
  * UWB Daemon
@@ -302,65 +277,58 @@ static DEFINE_SPINLOCK(uwbd_event_list_lock);
  * FIXME: should change so we don't have a 1HZ timer all the time, but
  *        only if there are devices.
  */
-static int uwbd(void *unused)
+static int uwbd(void *param)
 {
+	struct uwb_rc *rc = param;
 	unsigned long flags;
-	struct list_head list = LIST_HEAD_INIT(list);
-	struct uwb_event *evt, *nxt;
+	struct uwb_event *evt;
 	int should_stop = 0;
+
 	while (1) {
 		wait_event_interruptible_timeout(
-			uwbd_wq,
-			!list_empty(&uwbd_event_list)
+			rc->uwbd.wq,
+			!list_empty(&rc->uwbd.event_list)
 			  || (should_stop = kthread_should_stop()),
 			HZ);
 		if (should_stop)
 			break;
 		try_to_freeze();
 
-		mutex_lock(&uwbd_event_mutex);
-		spin_lock_irqsave(&uwbd_event_list_lock, flags);
-		list_splice_init(&uwbd_event_list, &list);
-		spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-		list_for_each_entry_safe(evt, nxt, &list, list_node) {
+		spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+		if (!list_empty(&rc->uwbd.event_list)) {
+			evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node);
 			list_del(&evt->list_node);
+		} else
+			evt = NULL;
+		spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
+
+		if (evt) {
 			uwbd_event_handle(evt);
 			kfree(evt);
 		}
-		mutex_unlock(&uwbd_event_mutex);
 
-		uwb_beca_purge();	/* Purge devices that left */
+		uwb_beca_purge(rc);	/* Purge devices that left */
 	}
 	return 0;
 }
 
 
 /** Start the UWB daemon */
-void uwbd_start(void)
+void uwbd_start(struct uwb_rc *rc)
 {
-	uwbd_task = kthread_run(uwbd, NULL, "uwbd");
-	if (uwbd_task == NULL)
+	rc->uwbd.task = kthread_run(uwbd, rc, "uwbd");
+	if (rc->uwbd.task == NULL)
 		printk(KERN_ERR "UWB: Cannot start management daemon; "
 		       "UWB won't work\n");
 	else
-		uwbd_pid = uwbd_task->pid;
+		rc->uwbd.pid = rc->uwbd.task->pid;
 }
 
 /* Stop the UWB daemon and free any unprocessed events */
-void uwbd_stop(void)
+void uwbd_stop(struct uwb_rc *rc)
 {
-	unsigned long flags;
-	struct uwb_event *evt, *nxt;
-	kthread_stop(uwbd_task);
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	uwbd_pid = 0;
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
-		if (evt->type == UWB_EVT_TYPE_NOTIF)
-			kfree(evt->notif.rceb);
-		kfree(evt);
-	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-	uwb_beca_release();
+	kthread_stop(rc->uwbd.task);
+	uwbd_flush(rc);
 }
 
 /*
@@ -377,18 +345,20 @@ void uwbd_stop(void)
  */
 void uwbd_event_queue(struct uwb_event *evt)
 {
+	struct uwb_rc *rc = evt->rc;
 	unsigned long flags;
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	if (uwbd_pid != 0) {
-		list_add(&evt->list_node, &uwbd_event_list);
-		wake_up_all(&uwbd_wq);
+
+	spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+	if (rc->uwbd.pid != 0) {
+		list_add(&evt->list_node, &rc->uwbd.event_list);
+		wake_up_all(&rc->uwbd.wq);
 	} else {
 		__uwb_rc_put(evt->rc);
 		if (evt->type == UWB_EVT_TYPE_NOTIF)
 			kfree(evt->notif.rceb);
 		kfree(evt);
 	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
+	spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
 	return;
 }
 
@@ -396,10 +366,8 @@ void uwbd_flush(struct uwb_rc *rc)
 {
 	struct uwb_event *evt, *nxt;
 
-	mutex_lock(&uwbd_event_mutex);
-
-	spin_lock_irq(&uwbd_event_list_lock);
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
+	spin_lock_irq(&rc->uwbd.event_list_lock);
+	list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) {
 		if (evt->rc == rc) {
 			__uwb_rc_put(rc);
 			list_del(&evt->list_node);
@@ -408,7 +376,5 @@ void uwbd_flush(struct uwb_rc *rc)
 			kfree(evt);
 		}
 	}
-	spin_unlock_irq(&uwbd_event_list_lock);
-
-	mutex_unlock(&uwbd_event_mutex);
+	spin_unlock_irq(&rc->uwbd.event_list_lock);
 }
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 6d93f54..881f0c5 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/uwb/spec.h>
 
@@ -86,6 +87,22 @@ struct uwb_notifs_chain {
 	struct mutex mutex;
 };
 
+/* Beacon cache list */
+struct uwb_beca {
+	struct list_head list;
+	size_t entries;
+	struct mutex mutex;
+};
+
+/* Event handling thread. */
+struct uwbd {
+	int pid;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+};
+
 /**
  * struct uwb_mas_bm - a bitmap of all MAS in a superframe
  * @bm: a bitmap of length #UWB_NUM_MAS
@@ -342,6 +359,9 @@ struct uwb_rc {
 	enum uwb_scan_type scan_type:3;
 	unsigned ready:1;
 	struct uwb_notifs_chain notifs_chain;
+	struct uwb_beca uwb_beca;
+
+	struct uwbd uwbd;
 
 	struct uwb_drp_avail drp_avail;
 	struct list_head reservations;
-- 
cgit v0.10.2


From 307ba6dd73254fe7d2ce27db64ffd90e1bb3c6c0 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 7 Nov 2008 17:37:33 +0000
Subject: uwb: don't unbind the radio controller driver when resetting

Use pre_reset and post_reset methods to avoid unbinding the radio
controller driver after a uwb_rc_reset_all() call.  This avoids a
deadlock in uwb_rc_rm() when waiting for the uwb event thread to stop.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 18009c9..158e98d 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -881,6 +881,24 @@ static void hwarc_disconnect(struct usb_interface *iface)
 	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
 }
 
+static int hwarc_pre_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int hwarc_post_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
+}
+
 /** USB device ID's that we handle */
 static struct usb_device_id hwarc_id_table[] = {
 	/* D-Link DUB-1210 */
@@ -897,9 +915,11 @@ MODULE_DEVICE_TABLE(usb, hwarc_id_table);
 
 static struct usb_driver hwarc_driver = {
 	.name =		"hwa-rc",
+	.id_table =	hwarc_id_table,
 	.probe =	hwarc_probe,
 	.disconnect =	hwarc_disconnect,
-	.id_table =	hwarc_id_table,
+	.pre_reset =    hwarc_pre_reset,
+	.post_reset =   hwarc_post_reset,
 };
 
 static int __init hwarc_driver_init(void)
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index 8de856f..e39b3209 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -323,17 +323,16 @@ int uwbd_msg_handle_reset(struct uwb_event *evt)
 	struct uwb_rc *rc = evt->rc;
 	int ret;
 
-	/* Need to prevent the RC hardware module going away while in
-	   the rc->reset() call. */
-	if (!try_module_get(rc->owner))
-		return 0;
-
 	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
 	ret = rc->reset(rc);
-	if (ret)
+	if (ret) {
 		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
-
-	module_put(rc->owner);
+		goto error;
+	}
+	return 0;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
 	return ret;
 }
 
@@ -360,3 +359,37 @@ void uwb_rc_reset_all(struct uwb_rc *rc)
 	uwbd_event_queue(evt);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_reset_all);
+
+void uwb_rc_pre_reset(struct uwb_rc *rc)
+{
+	rc->stop(rc);
+	uwbd_flush(rc);
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	rc->beaconing = -1;
+	rc->scanning = -1;
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	uwb_rsv_remove_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
+
+void uwb_rc_post_reset(struct uwb_rc *rc)
+{
+	int ret;
+
+	ret = rc->start(rc);
+	if (ret)
+		goto error;
+	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
+	if (ret)
+		goto error;
+	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
+	if (ret)
+		goto error;
+	return;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index ce00946..3d76efe 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -659,6 +659,25 @@ static void uwb_rsv_timer(unsigned long arg)
 	uwb_rsv_sched_update(rsv->rc);
 }
 
+/**
+ * uwb_rsv_remove_all - remove all reservations
+ * @rc: the radio controller
+ *
+ * A DRP IE update is not done.
+ */
+void uwb_rsv_remove_all(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+
+	mutex_lock(&rc->rsvs_mutex);
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		uwb_rsv_remove(rsv);
+	}
+	mutex_unlock(&rc->rsvs_mutex);
+
+	cancel_work_sync(&rc->rsv_update_work);
+}
+
 void uwb_rsv_init(struct uwb_rc *rc)
 {
 	INIT_LIST_HEAD(&rc->reservations);
@@ -682,14 +701,6 @@ int uwb_rsv_setup(struct uwb_rc *rc)
 
 void uwb_rsv_cleanup(struct uwb_rc *rc)
 {
-	struct uwb_rsv *rsv, *t;
-
-	mutex_lock(&rc->rsvs_mutex);
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		uwb_rsv_remove(rsv);
-	}
-	mutex_unlock(&rc->rsvs_mutex);
-
-	cancel_work_sync(&rc->rsv_update_work);
+	uwb_rsv_remove_all(rc);
 	destroy_workqueue(rc->rsv_workq);
 }
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 2d8d62d..5ad3616 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -11,23 +11,48 @@
 #include <linux/uwb/umc.h>
 #include <linux/pci.h>
 
-static int umc_bus_unbind_helper(struct device *dev, void *data)
+static int umc_bus_pre_reset_helper(struct device *dev, void *data)
 {
-	struct device *parent = data;
+	int ret = 0;
 
-	if (dev->parent == parent && dev->driver)
-		device_release_driver(dev);
-	return 0;
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->pre_reset)
+			ret = umc_drv->pre_reset(umc);
+		else
+			device_release_driver(dev);
+	}
+	return ret;
+}
+
+static int umc_bus_post_reset_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->post_reset)
+			ret = umc_drv->post_reset(umc);
+	} else
+		ret = device_attach(dev);
+
+	return ret;
 }
 
 /**
  * umc_controller_reset - reset the whole UMC controller
  * @umc: the UMC device for the radio controller.
  *
- * Drivers will be unbound from all UMC devices belonging to the
- * controller and then the radio controller will be rebound.  The
- * radio controller is expected to do a full hardware reset when it is
- * probed.
+ * Drivers or all capabilities of the controller will have their
+ * pre_reset methods called or be unbound from their device.  Then all
+ * post_reset methods will be called or the drivers will be rebound.
+ *
+ * Radio controllers must provide pre_reset and post_reset methods and
+ * reset the hardware in their start method.
  *
  * If this is called while a probe() or remove() is in progress it
  * will return -EAGAIN and not perform the reset.
@@ -35,14 +60,13 @@ static int umc_bus_unbind_helper(struct device *dev, void *data)
 int umc_controller_reset(struct umc_dev *umc)
 {
 	struct device *parent = umc->dev.parent;
-	int ret;
+	int ret = 0;
 
-	if (down_trylock(&parent->sem))
+	if(down_trylock(&parent->sem))
 		return -EAGAIN;
-	bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper);
-	ret = device_attach(&umc->dev);
-	if (ret == 1)
-		ret = 0;
+	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
+	if (ret >= 0)
+		device_for_each_child(parent, parent, umc_bus_post_reset_helper);
 	up(&parent->sem);
 
 	return ret;
@@ -75,10 +99,10 @@ static int umc_bus_rescan_helper(struct device *dev, void *data)
 	if (!dev->driver)
 		ret = device_attach(dev);
 
-	return ret < 0 ? ret : 0;
+	return ret;
 }
 
-static void umc_bus_rescan(void)
+static void umc_bus_rescan(struct device *parent)
 {
 	int err;
 
@@ -86,7 +110,7 @@ static void umc_bus_rescan(void)
 	 * We can't use bus_rescan_devices() here as it deadlocks when
 	 * it tries to retake the dev->parent semaphore.
 	 */
-	err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper);
+	err = device_for_each_child(parent, NULL, umc_bus_rescan_helper);
 	if (err < 0)
 		printk(KERN_WARNING "%s: rescan of bus failed: %d\n",
 		       KBUILD_MODNAME, err);
@@ -120,7 +144,7 @@ static int umc_device_probe(struct device *dev)
 	if (err)
 		put_device(dev);
 	else
-		umc_bus_rescan();
+		umc_bus_rescan(dev->parent);
 
 	return err;
 }
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 4c24496..af95541 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -248,6 +248,7 @@ extern struct device_attribute dev_attr_scan;
 void uwb_rsv_init(struct uwb_rc *rc);
 int uwb_rsv_setup(struct uwb_rc *rc);
 void uwb_rsv_cleanup(struct uwb_rc *rc);
+void uwb_rsv_remove_all(struct uwb_rc *rc);
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
 void uwb_rsv_remove(struct uwb_rsv *rsv);
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 6c454ea..e0d6693 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -394,7 +394,7 @@ void whcrc_stop_rc(struct uwb_rc *rc)
 
 	le_writel(0, whcrc->rc_base + URCCMD);
 	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
-		      URCSTS_HALTED, 0, 40, "URCSTS.HALTED");
+		      URCSTS_HALTED, URCSTS_HALTED, 100, "URCSTS.HALTED");
 }
 
 static void whcrc_init(struct whcrc *whcrc)
@@ -488,6 +488,24 @@ static void whcrc_remove(struct umc_dev *umc_dev)
 	d_printf(1, &umc_dev->dev, "freed whcrc %p\n", whcrc);
 }
 
+static int whcrc_pre_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int whcrc_post_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
+}
+
 /* PCI device ID's that we handle [so it gets loaded] */
 static struct pci_device_id whcrc_id_table[] = {
 	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
@@ -496,10 +514,12 @@ static struct pci_device_id whcrc_id_table[] = {
 MODULE_DEVICE_TABLE(pci, whcrc_id_table);
 
 static struct umc_driver whcrc_driver = {
-	.name   = "whc-rc",
-	.cap_id = UMC_CAP_ID_WHCI_RC,
-	.probe  = whcrc_probe,
-	.remove = whcrc_remove,
+	.name       = "whc-rc",
+	.cap_id     = UMC_CAP_ID_WHCI_RC,
+	.probe      = whcrc_probe,
+	.remove     = whcrc_remove,
+	.pre_reset  = whcrc_pre_reset,
+	.post_reset = whcrc_post_reset,
 };
 
 static int __init whcrc_driver_init(void)
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 881f0c5..c485484 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -540,6 +540,8 @@ void uwb_rc_rm(struct uwb_rc *);
 void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
 void uwb_rc_neh_error(struct uwb_rc *, int);
 void uwb_rc_reset_all(struct uwb_rc *rc);
+void uwb_rc_pre_reset(struct uwb_rc *rc);
+void uwb_rc_post_reset(struct uwb_rc *rc);
 
 /**
  * uwb_rsv_is_owner - is the owner of this reservation the RC?
diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h
index 36a39e3..4b4fc0f 100644
--- a/include/linux/uwb/umc.h
+++ b/include/linux/uwb/umc.h
@@ -89,6 +89,8 @@ struct umc_driver {
 	void (*remove)(struct umc_dev *);
 	int  (*suspend)(struct umc_dev *, pm_message_t state);
 	int  (*resume)(struct umc_dev *);
+	int  (*pre_reset)(struct umc_dev *);
+	int  (*post_reset)(struct umc_dev *);
 
 	struct device_driver driver;
 };
-- 
cgit v0.10.2


From 58be81ed301d96045bca2b85f3b838910efcfde4 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 7 Nov 2008 18:19:19 +0000
Subject: uwb: fix races between events and neh timers

Always use del_timer_sync() before freeing nehs.  Destroy all nehs after
stopping the radio controller and before cleaning up the reservation
manager.  Handle the timer running after an event has removed the neh.

This fixes various oopses that may occur if a radio controller is removed
while a neh timer is still active.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index 38e3d57..f00633d 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -79,7 +79,6 @@ static void uwb_rc_sys_release(struct device *dev)
 	struct uwb_dev *uwb_dev = container_of(dev, struct uwb_dev, dev);
 	struct uwb_rc *rc = container_of(uwb_dev, struct uwb_rc, uwb_dev);
 
-	uwb_rc_neh_destroy(rc);
 	uwb_rc_ie_release(rc);
 	kfree(rc);
 }
@@ -311,7 +310,7 @@ void uwb_rc_rm(struct uwb_rc *rc)
 	rc->ready = 0;
 
 	uwb_dbg_del_rc(rc);
-	uwb_rsv_cleanup(rc);
+	uwb_rsv_remove_all(rc);
 	uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE);
 	if (rc->beaconing >= 0)
 		uwb_rc_beacon(rc, -1, 0);
@@ -322,6 +321,7 @@ void uwb_rc_rm(struct uwb_rc *rc)
 	rc->stop(rc);
 
 	uwbd_stop(rc);
+	uwb_rc_neh_destroy(rc);
 
 	uwb_dev_lock(&rc->uwb_dev);
 	rc->priv = NULL;
@@ -331,6 +331,7 @@ void uwb_rc_rm(struct uwb_rc *rc)
 	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
 	__uwb_rc_sys_rm(rc);
 	mutex_unlock(&rc->uwb_beca.mutex);
+	uwb_rsv_cleanup(rc);
  	uwb_beca_release(rc);
 	uwb_dev_rm(&rc->uwb_dev);
 }
diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c
index 9b4eb64..48b4ece 100644
--- a/drivers/uwb/neh.c
+++ b/drivers/uwb/neh.c
@@ -254,7 +254,6 @@ error_kzalloc:
 
 static void __uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
 {
-	del_timer(&neh->timer);
 	__uwb_rc_ctx_put(rc, neh);
 	list_del(&neh->list_node);
 }
@@ -275,6 +274,7 @@ void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
 	__uwb_rc_neh_rm(rc, neh);
 	spin_unlock_irqrestore(&rc->neh_lock, flags);
 
+	del_timer_sync(&neh->timer);
 	uwb_rc_neh_put(neh);
 }
 
@@ -438,9 +438,10 @@ static void uwb_rc_neh_grok_event(struct uwb_rc *rc, struct uwb_rceb *rceb, size
 				rceb->bEventContext, size);
 	} else {
 		neh = uwb_rc_neh_lookup(rc, rceb);
-		if (neh)
+		if (neh) {
+			del_timer_sync(&neh->timer);
 			uwb_rc_neh_cb(neh, rceb, size);
-		else
+		} else
 			dev_warn(dev, "event 0x%02x/%04x/%02x (%zu bytes): nobody cared\n",
 				 rceb->bEventType, le16_to_cpu(rceb->wEvent),
 				 rceb->bEventContext, size);
@@ -562,16 +563,22 @@ EXPORT_SYMBOL_GPL(uwb_rc_neh_grok);
  */
 void uwb_rc_neh_error(struct uwb_rc *rc, int error)
 {
-	struct uwb_rc_neh *neh, *next;
+	struct uwb_rc_neh *neh;
 	unsigned long flags;
 
-	BUG_ON(error >= 0);
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) {
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
 		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
 		uwb_rc_neh_cb(neh, NULL, error);
 	}
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_neh_error);
 
@@ -583,10 +590,14 @@ static void uwb_rc_neh_timer(unsigned long arg)
 	unsigned long flags;
 
 	spin_lock_irqsave(&rc->neh_lock, flags);
-	__uwb_rc_neh_rm(rc, neh);
+	if (neh->context)
+		__uwb_rc_neh_rm(rc, neh);
+	else
+		neh = NULL;
 	spin_unlock_irqrestore(&rc->neh_lock, flags);
 
-	uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
+	if (neh)
+		uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
 }
 
 /** Initializes the @rc's neh subsystem
@@ -605,12 +616,19 @@ void uwb_rc_neh_create(struct uwb_rc *rc)
 void uwb_rc_neh_destroy(struct uwb_rc *rc)
 {
 	unsigned long flags;
-	struct uwb_rc_neh *neh, *next;
+	struct uwb_rc_neh *neh;
 
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) {
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
 		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
 		uwb_rc_neh_put(neh);
 	}
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
 }
-- 
cgit v0.10.2


From ebeb0406f153db51ab2d4771faf2342bd6ca14dd Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Wed, 12 Nov 2008 07:48:00 +0900
Subject: fat: drop negative dentry on rename() path

Drop the negative dentry on rename() path, in order to make sure to
use the case sensitive name which is specified by user if this is for
creation.

For it, this uses newly added LOOKUP_RENAME_TARGET like LOOKUP_CREATE.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bf326d4..8ae32e3 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -78,7 +78,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 	 * for creation.
 	 */
 	if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
-		if (nd->flags & LOOKUP_CREATE)
+		if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 			return 0;
 	}
 
-- 
cgit v0.10.2


From 985eafcc5480b0d98419b96869f2560abb2764c7 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Wed, 12 Nov 2008 07:48:01 +0900
Subject: fat: fix duplicate addition of ->llseek handler

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 67e0583..3a7f603 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -841,7 +841,6 @@ const struct file_operations fat_dir_operations = {
 	.compat_ioctl	= fat_compat_dir_ioctl,
 #endif
 	.fsync		= file_fsync,
-	.llseek		= generic_file_llseek,
 };
 
 static int fat_get_short_entry(struct inode *dir, loff_t *pos,
-- 
cgit v0.10.2


From 5a6bb10393eb9a1985e97af12f0cb2906bcbf1af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 12 Nov 2008 07:48:01 +0900
Subject: fat: make sure to set d_ops in fat_get_parent

fat_get_parent needs to setup the dentry operations, otherwise we might
lose them when the NFS server needs to reconnect out of cache inodes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bdd8fb7..37a8af1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -749,6 +749,8 @@ static struct dentry *fat_get_parent(struct dentry *child)
 	brelse(bh);
 
 	parent = d_obtain_alias(inode);
+	if (!IS_ERR(parent))
+		parent->d_op = sb->s_root->d_op;
 out:
 	unlock_super(sb);
 
-- 
cgit v0.10.2


From e17be2b2a95b43fe0d5878adf330701bb7a77115 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 15:24:14 +0000
Subject: uwb: add pal parameter to new reservation callback

The pal parameter allows PALs to retrieve their PAL-specific data
structure.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 3d76efe..935d5b5 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -558,7 +558,7 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	spin_lock(&rc->pal_lock);
 	list_for_each_entry(pal, &rc->pals, node) {
 		if (pal->new_rsv)
-			pal->new_rsv(rsv);
+			pal->new_rsv(pal, rsv);
 		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
 			break;
 	}
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 88e6ac7..217ebaa 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -306,13 +306,13 @@ static struct file_operations drp_avail_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static void uwb_dbg_new_rsv(struct uwb_rsv *rsv)
+static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 
-	if (rc->dbg->accept) {
-		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL);
+	if (dbg->accept) {
+		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
 	}
 }
 
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index c485484..effd979 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -405,7 +405,7 @@ struct uwb_pal {
 	struct list_head node;
 	const char *name;
 	struct device *device;
-	void (*new_rsv)(struct uwb_rsv *rsv);
+	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-- 
cgit v0.10.2


From 6fae35f9cea92793a98b2d9ab21235e5ae035581 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 15:53:42 +0000
Subject: uwb: add basic radio manager

The UWB radio manager coordinates the use of the radio between the
PALs that may be using it.  PALs request use of the radio with
uwb_radio_start() and the radio manager will start beaconing if its
not already doing so.  When the last PAL has called uwb_radio_stop()
beaconing will be stopped.

In the future, the radio manager will have a more sophisticated channel
selection algorithm, probably following the Channel Selection Policy
from the WiMedia Alliance when it is finalized.  For now, channel 9
(BG1, TFC1) is selected.

The user may override the channel selected by the radio manager and may
force the radio to stop beaconing.

The WUSB Host Controller PAL makes use of this and there are two new
debug PAL commands that can be used for testing.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
index a0d18db..6a5fd07 100644
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc
@@ -32,14 +32,16 @@ Contact:        linux-usb@vger.kernel.org
 Description:
                 Write:
 
-                <channel> [<bpst offset>]
+                <channel>
 
-                to start beaconing on a specific channel, or stop
-                beaconing if <channel> is -1.  Valid channels depends
-                on the radio controller's supported band groups.
+                to force a specific channel to be used when beaconing,
+                or, if <channel> is -1, to prohibit beaconing.  If
+                <channel> is 0, then the default channel selection
+                algorithm will be used.  Valid channels depends on the
+                radio controller's supported band groups.
 
-                <bpst offset> may be used to try and join a specific
-                beacon group if more than one was found during a scan.
+                Reading returns the currently active channel, or -1 if
+                the radio controller is not beaconing.
 
 What:           /sys/class/uwb_rc/uwbN/scan
 Date:           July 2008
diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf
index 2e78b70..426ddaa 100644
--- a/Documentation/usb/wusb-cbaf
+++ b/Documentation/usb/wusb-cbaf
@@ -80,12 +80,6 @@ case $1 in
     start)
         for dev in ${2:-$hdevs}
           do
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          if cat $uwb_rc/beacon | grep -q -- "-1"
-              then
-              echo 13 0 > $uwb_rc/beacon
-              echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2
-          fi
           echo $host_CHID > $dev/wusb_chid
           echo I: started host $(basename $dev) >&2
         done
@@ -95,9 +89,6 @@ case $1 in
           do
           echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
           echo I: stopped host $(basename $dev) >&2
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          echo -1 | cat > $uwb_rc/beacon
-          echo I: stopped beaconing on $(basename $uwb_rc) >&2
         done
         ;;
     set-chid)
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 2827353..2a4d36f 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -221,7 +221,6 @@ static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 
 	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
-	wusbhc_stop(wusbhc);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index de1e072..f599f89 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -91,8 +91,6 @@ static void whc_stop(struct usb_hcd *usb_hcd)
 
 	mutex_lock(&wusbhc->mutex);
 
-	wusbhc_stop(wusbhc);
-
 	/* stop HC */
 	le_writel(0, whc->base + WUSBINTR);
 	whc_write_wusbcmd(whc, WUSBCMD_RUN, 0);
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index c01c7a8..08a1ec9 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -1124,8 +1124,7 @@ void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
  * FIXME: This also enables the keep alives but this is not necessary
  * until there are connected and authenticated devices.
  */
-int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-			    const struct wusb_ckhdid *chid)
+int wusbhc_devconnect_start(struct wusbhc *wusbhc)
 {
 	struct device *dev = wusbhc->dev;
 	struct wuie_host_info *hi;
@@ -1138,7 +1137,7 @@ int wusbhc_devconnect_start(struct wusbhc *wusbhc,
 	hi->hdr.bLength       = sizeof(*hi);
 	hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO;
 	hi->attributes        = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL);
-	hi->CHID              = *chid;
+	hi->CHID              = wusbhc->chid;
 	result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr);
 	if (result < 0) {
 		dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result);
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index af2aee0..5463ece 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -162,12 +162,11 @@ EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
 /*
  * wusbhc_start - start transmitting MMCs and accepting connections
  * @wusbhc: the HC to start
- * @chid: the CHID to use for this host
  *
  * Establishes a cluster reservation, enables device connections, and
  * starts MMCs with appropriate DNTS parameters.
  */
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
+int wusbhc_start(struct wusbhc *wusbhc)
 {
 	int result;
 	struct device *dev = wusbhc->dev;
@@ -181,7 +180,7 @@ int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
 		goto error_rsv_establish;
 	}
 
-	result = wusbhc_devconnect_start(wusbhc, chid);
+	result = wusbhc_devconnect_start(wusbhc);
 	if (result < 0) {
 		dev_err(dev, "error enabling device connections: %d\n", result);
 		goto error_devconnect_start;
@@ -219,34 +218,6 @@ error_rsv_establish:
 }
 
 /*
- * Disconnect all from the WUSB Channel
- *
- * Send a Host Disconnect IE in the MMC, wait, don't send it any more
- */
-static int __wusbhc_host_disconnect_ie(struct wusbhc *wusbhc)
-{
-	int result = -ENOMEM;
-	struct wuie_host_disconnect *host_disconnect_ie;
-	might_sleep();
-	host_disconnect_ie = kmalloc(sizeof(*host_disconnect_ie), GFP_KERNEL);
-	if (host_disconnect_ie == NULL)
-		goto error_alloc;
-	host_disconnect_ie->hdr.bLength       = sizeof(*host_disconnect_ie);
-	host_disconnect_ie->hdr.bIEIdentifier = WUIE_ID_HOST_DISCONNECT;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &host_disconnect_ie->hdr);
-	if (result < 0)
-		goto error_mmcie_set;
-
-	/* WUSB1.0[8.5.3.1 & 7.5.2] */
-	msleep(100);
-	wusbhc_mmcie_rm(wusbhc, &host_disconnect_ie->hdr);
-error_mmcie_set:
-	kfree(host_disconnect_ie);
-error_alloc:
-	return result;
-}
-
-/*
  * wusbhc_stop - stop transmitting MMCs
  * @wusbhc: the HC to stop
  *
@@ -265,29 +236,6 @@ void wusbhc_stop(struct wusbhc *wusbhc)
 EXPORT_SYMBOL_GPL(wusbhc_stop);
 
 /*
- * Change the CHID in a WUSB Channel
- *
- * If it is just a new CHID, send a Host Disconnect IE and then change
- * the CHID IE.
- */
-static int __wusbhc_chid_change(struct wusbhc *wusbhc,
-				const struct wusb_ckhdid *chid)
-{
-	int result = -ENOSYS;
-	struct device *dev = wusbhc->dev;
-	dev_err(dev, "%s() not implemented yet\n", __func__);
-	return result;
-
-	BUG_ON(wusbhc->wuie_host_info == NULL);
-	__wusbhc_host_disconnect_ie(wusbhc);
-	wusbhc->wuie_host_info->CHID = *chid;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->wuie_host_info->hdr);
-	if (result < 0)
-		dev_err(dev, "Can't update Host Info WUSB IE: %d\n", result);
-	return result;
-}
-
-/*
  * Set/reset/update a new CHID
  *
  * Depending on the previous state of the MMCs, start, stop or change
@@ -302,16 +250,19 @@ int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
 		chid = NULL;
 
 	mutex_lock(&wusbhc->mutex);
-	if (wusbhc->active) {
-		if (chid)
-			result = __wusbhc_chid_change(wusbhc, chid);
-		else
-			wusbhc_stop(wusbhc);
-	} else {
-		if (chid)
-			wusbhc_start(wusbhc, chid);
+	if (chid) {
+		if (wusbhc->active) {
+			mutex_unlock(&wusbhc->mutex);
+			return -EBUSY;
+		}
+		wusbhc->chid = *chid;
 	}
 	mutex_unlock(&wusbhc->mutex);
+
+	if (chid)
+		result = uwb_radio_start(&wusbhc->pal);
+	else
+		uwb_radio_stop(&wusbhc->pal);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wusbhc_chid_set);
diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c
index 7cc51e9..d0b172c 100644
--- a/drivers/usb/wusbcore/pal.c
+++ b/drivers/usb/wusbcore/pal.c
@@ -18,6 +18,16 @@
  */
 #include "wusbhc.h"
 
+static void wusbhc_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal);
+
+	if (channel < 0)
+		wusbhc_stop(wusbhc);
+	else
+		wusbhc_start(wusbhc);
+}
+
 /**
  * wusbhc_pal_register - register the WUSB HC as a UWB PAL
  * @wusbhc: the WUSB HC
@@ -28,8 +38,10 @@ int wusbhc_pal_register(struct wusbhc *wusbhc)
 
 	wusbhc->pal.name   = "wusbhc";
 	wusbhc->pal.device = wusbhc->usb_hcd.self.controller;
+	wusbhc->pal.rc     = wusbhc->uwb_rc;
+	wusbhc->pal.channel_changed = wusbhc_channel_changed;
 
-	return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal);
+	return uwb_pal_register(&wusbhc->pal);
 }
 
 /**
@@ -38,5 +50,5 @@ int wusbhc_pal_register(struct wusbhc *wusbhc)
  */
 void wusbhc_pal_unregister(struct wusbhc *wusbhc)
 {
-	uwb_pal_unregister(wusbhc->uwb_rc, &wusbhc->pal);
+	uwb_pal_unregister(&wusbhc->pal);
 }
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index 8fef934..797c245 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -252,7 +252,8 @@ struct wusbhc {
 	struct uwb_pal pal;
 
 	unsigned trust_timeout;			/* in jiffies */
-	struct wuie_host_info *wuie_host_info;	/* Includes CHID */
+	struct wusb_ckhdid chid;
+	struct wuie_host_info *wuie_host_info;
 
 	struct mutex mutex;			/* locks everything else */
 	u16 cluster_id;				/* Wireless USB Cluster ID */
@@ -376,15 +377,14 @@ static inline void wusbhc_put(struct wusbhc *wusbhc)
 	usb_put_hcd(&wusbhc->usb_hcd);
 }
 
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid);
+int wusbhc_start(struct wusbhc *wusbhc);
 void wusbhc_stop(struct wusbhc *wusbhc);
 extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *);
 
 /* Device connect handling */
 extern int wusbhc_devconnect_create(struct wusbhc *);
 extern void wusbhc_devconnect_destroy(struct wusbhc *);
-extern int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-				   const struct wusb_ckhdid *chid);
+extern int wusbhc_devconnect_start(struct wusbhc *wusbhc);
 extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
 extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
 			     struct wusb_dn_hdr *dn_hdr, size_t size);
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 2b99c3e..ce21a95 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -18,6 +18,7 @@ uwb-objs :=		\
 	lc-rc.o		\
 	neh.o		\
 	pal.o		\
+	radio.o		\
 	reset.o		\
 	rsv.o		\
 	scan.o		\
diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index d9f2a8a..2479560 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -119,7 +119,6 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	mutex_lock(&rc->uwb_dev.mutex);
 	if (channel < 0)
 		channel = -1;
 	if (channel == -1)
@@ -128,7 +127,7 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 		/* channel >= 0...dah */
 		result = uwb_rc_start_beacon(rc, bpst_offset, channel);
 		if (result < 0)
-			goto out_up;
+			return result;
 		if (le16_to_cpu(rc->ies->wIELength) > 0) {
 			result = uwb_rc_set_ie(rc, rc->ies);
 			if (result < 0) {
@@ -137,19 +136,14 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 				result = uwb_rc_stop_beacon(rc);
 				channel = -1;
 				bpst_offset = 0;
-			} else
-				result = 0;
+			}
 		}
 	}
 
-	if (result < 0)
-		goto out_up;
-	rc->beaconing = channel;
-
-	uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
-
-out_up:
-	mutex_unlock(&rc->uwb_dev.mutex);
+	if (result >= 0) {
+		rc->beaconing = channel;
+		uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
+	}
 	return result;
 }
 
@@ -618,9 +612,6 @@ static ssize_t uwb_rc_beacon_show(struct device *dev,
 
 /*
  * Start beaconing on the specified channel, or stop beaconing.
- *
- * The BPST offset of when to start searching for a beacon group to
- * join may be specified.
  */
 static ssize_t uwb_rc_beacon_store(struct device *dev,
 				   struct device_attribute *attr,
@@ -629,12 +620,11 @@ static ssize_t uwb_rc_beacon_store(struct device *dev,
 	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
 	struct uwb_rc *rc = uwb_dev->rc;
 	int channel;
-	unsigned bpst_offset = 0;
 	ssize_t result = -EINVAL;
 
-	result = sscanf(buf, "%d %u\n", &channel, &bpst_offset);
+	result = sscanf(buf, "%d", &channel);
 	if (result >= 1)
-		result = uwb_rc_beacon(rc, channel, bpst_offset);
+		result = uwb_radio_force_channel(rc, channel);
 
 	return result < 0 ? result : size;
 }
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
index c0b1e5e..fe32814 100644
--- a/drivers/uwb/drp.c
+++ b/drivers/uwb/drp.c
@@ -37,14 +37,13 @@
  *
  * A DRP Availability IE is appended.
  *
- * rc->uwb_dev.mutex is held
+ * rc->rsvs_mutex is held
  *
  * FIXME We currently ignore the returned value indicating the remaining space
  * in beacon. This could be used to deny reservation requests earlier if
  * determined that they would cause the beacon space to be exceeded.
  */
-static
-int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc)
+int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 {
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
@@ -102,25 +101,6 @@ error_cmd:
 	kfree(cmd);
 error:
 	return result;
-
-}
-/**
- * Send all DRP IEs associated with this host
- *
- * @returns:    >= 0 number of bytes still available in the beacon
- *              < 0 errno code on error.
- *
- * As per the protocol we obtain the host controller device lock to access
- * bandwidth structures.
- */
-int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
-{
-	int result;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_gen_send_drp_ie(rc);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
 }
 
 void uwb_drp_handle_timeout(struct uwb_rsv *rsv)
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index f00633d..9cf21e6 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -189,9 +189,9 @@ static int uwb_rc_setup(struct uwb_rc *rc)
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	result = uwb_rc_reset(rc);
+	result = uwb_radio_setup(rc);
 	if (result < 0) {
-		dev_err(dev, "cannot reset UWB radio: %d\n", result);
+		dev_err(dev, "cannot setup UWB radio: %d\n", result);
 		goto error;
 	}
 	result = uwb_rc_mac_addr_setup(rc);
@@ -311,12 +311,7 @@ void uwb_rc_rm(struct uwb_rc *rc)
 
 	uwb_dbg_del_rc(rc);
 	uwb_rsv_remove_all(rc);
-	uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE);
-	if (rc->beaconing >= 0)
-		uwb_rc_beacon(rc, -1, 0);
-	if (rc->scan_type != UWB_SCAN_DISABLED)
-		uwb_rc_scan(rc, rc->scanning, UWB_SCAN_DISABLED, 0);
-	uwb_rc_reset(rc);
+	uwb_radio_shutdown(rc);
 
 	rc->stop(rc);
 
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
index 1afb38e..6057651 100644
--- a/drivers/uwb/pal.c
+++ b/drivers/uwb/pal.c
@@ -32,13 +32,13 @@ EXPORT_SYMBOL_GPL(uwb_pal_init);
 
 /**
  * uwb_pal_register - register a UWB PAL
- * @rc: the radio controller the PAL will be using
  * @pal: the PAL
  *
  * The PAL must be initialized with uwb_pal_init().
  */
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal)
+int uwb_pal_register(struct uwb_pal *pal)
 {
+	struct uwb_rc *rc = pal->rc;
 	int ret;
 
 	if (pal->device) {
@@ -54,9 +54,9 @@ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal)
 		}
 	}
 
-	spin_lock(&rc->pal_lock);
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_add(&pal->node, &rc->pals);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	return 0;
 }
@@ -64,14 +64,17 @@ EXPORT_SYMBOL_GPL(uwb_pal_register);
 
 /**
  * uwb_pal_register - unregister a UWB PAL
- * @rc: the radio controller the PAL was using
  * @pal: the PAL
  */
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal)
+void uwb_pal_unregister(struct uwb_pal *pal)
 {
-	spin_lock(&rc->pal_lock);
+	struct uwb_rc *rc = pal->rc;
+
+	uwb_radio_stop(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_del(&pal->node);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	if (pal->device) {
 		sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
@@ -86,6 +89,5 @@ EXPORT_SYMBOL_GPL(uwb_pal_unregister);
  */
 void uwb_rc_pal_init(struct uwb_rc *rc)
 {
-	spin_lock_init(&rc->pal_lock);
 	INIT_LIST_HEAD(&rc->pals);
 }
diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c
new file mode 100644
index 0000000..f0d5549
--- /dev/null
+++ b/drivers/uwb/radio.c
@@ -0,0 +1,202 @@
+/*
+ * UWB radio (channel) management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+
+static int uwb_radio_select_channel(struct uwb_rc *rc)
+{
+	/*
+	 * Default to channel 9 (BG1, TFC1) unless the user has
+	 * selected a specific channel or there are no active PALs.
+	 */
+	if (rc->active_pals == 0)
+		return -1;
+	if (rc->beaconing_forced)
+		return rc->beaconing_forced;
+	return 9;
+}
+
+
+/*
+ * Notify all active PALs that the channel has changed.
+ */
+static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel)
+{
+	struct uwb_pal *pal;
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel && channel != pal->channel) {
+			pal->channel = channel;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, pal->channel);
+		}
+	}
+}
+
+/*
+ * Change to a new channel and notify any active PALs of the new
+ * channel.
+ *
+ * When stopping the radio, PALs need to be notified first so they can
+ * terminate any active reservations.
+ */
+static int uwb_radio_change_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	if (channel == -1)
+		uwb_radio_channel_changed(rc, channel);
+
+	if (channel != rc->beaconing) {
+		if (rc->beaconing != -1 && channel != -1) {
+			/*
+			 * FIXME: should signal the channel change
+			 * with a Channel Change IE.
+			 */
+			ret = uwb_radio_change_channel(rc, -1);
+			if (ret < 0)
+				return ret;
+		}
+		ret = uwb_rc_beacon(rc, channel, 0);
+	}
+
+	if (channel != -1)
+		uwb_radio_channel_changed(rc, rc->beaconing);
+
+	return ret;
+}
+
+/**
+ * uwb_radio_start - request that the radio be started
+ * @pal: the PAL making the request.
+ *
+ * If the radio is not already active, aa suitable channel is selected
+ * and beacons are started.
+ */
+int uwb_radio_start(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (!pal->channel) {
+		pal->channel = -1;
+		rc->active_pals++;
+		ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_radio_start);
+
+/**
+ * uwb_radio_stop - request tha the radio be stopped.
+ * @pal: the PAL making the request.
+ *
+ * Stops the radio if no other PAL is making use of it.
+ */
+void uwb_radio_stop(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (pal->channel) {
+		rc->active_pals--;
+		uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+		pal->channel = 0;
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+EXPORT_SYMBOL_GPL(uwb_radio_stop);
+
+/*
+ * uwb_radio_force_channel - force a specific channel to be used
+ * @rc: the radio controller.
+ * @channel: the channel to use; -1 to force the radio to stop; 0 to
+ *   use the default channel selection algorithm.
+ */
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	rc->beaconing_forced = channel;
+	ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+
+/*
+ * uwb_radio_setup - setup the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset to ensure it's in a known state
+ * before it's used.
+ */
+int uwb_radio_setup(struct uwb_rc *rc)
+{
+	return uwb_rc_reset(rc);
+}
+
+/*
+ * uwb_radio_reset_state - reset any radio manager state
+ * @rc: the radio controller.
+ *
+ * All internal radio manager state is reset to values corresponding
+ * to a reset radio controller.
+ */
+void uwb_radio_reset_state(struct uwb_rc *rc)
+{
+	struct uwb_pal *pal;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel) {
+			pal->channel = -1;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, -1);
+		}
+	}
+
+	rc->beaconing = -1;
+	rc->scanning = -1;
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+
+/*
+ * uwb_radio_shutdown - shutdown the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset.
+ */
+void uwb_radio_shutdown(struct uwb_rc *rc)
+{
+	uwb_radio_reset_state(rc);
+	uwb_rc_reset(rc);
+}
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index e39b3209..ce8283c 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -365,11 +365,7 @@ void uwb_rc_pre_reset(struct uwb_rc *rc)
 	rc->stop(rc);
 	uwbd_flush(rc);
 
-	mutex_lock(&rc->uwb_dev.mutex);
-	rc->beaconing = -1;
-	rc->scanning = -1;
-	mutex_unlock(&rc->uwb_dev.mutex);
-
+	uwb_radio_reset_state(rc);
 	uwb_rsv_remove_all(rc);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 935d5b5..1cd84f9 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -555,14 +555,14 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	 * deny the request.
 	 */
 	rsv->state = UWB_RSV_STATE_T_DENIED;
-	spin_lock(&rc->pal_lock);
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_for_each_entry(pal, &rc->pals, node) {
 		if (pal->new_rsv)
 			pal->new_rsv(pal, rsv);
 		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
 			break;
 	}
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	state = rsv->state;
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 217ebaa..0e58071a 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -192,7 +192,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 {
 	struct uwb_rc *rc = file->private_data;
 	struct uwb_dbg_cmd cmd;
-	int ret;
+	int ret = 0;
 
 	if (len != sizeof(struct uwb_dbg_cmd))
 		return -EINVAL;
@@ -213,6 +213,12 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	case UWB_DBG_CMD_IE_RM:
 		ret = cmd_ie_rm(rc, &cmd.ie_rm);
 		break;
+	case UWB_DBG_CMD_RADIO_START:
+		ret = uwb_radio_start(&rc->dbg->pal);
+		break;
+	case UWB_DBG_CMD_RADIO_STOP:
+		uwb_radio_stop(&rc->dbg->pal);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -306,6 +312,17 @@ static struct file_operations drp_avail_fops = {
 	.owner   = THIS_MODULE,
 };
 
+static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
+	struct device *dev = &pal->rc->uwb_dev.dev;
+
+	if (channel > 0)
+		dev_info(dev, "debug: channel %d started\n", channel);
+	else
+		dev_info(dev, "debug: channel stopped\n");
+}
+
 static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 {
 	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
@@ -329,8 +346,11 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 	INIT_LIST_HEAD(&rc->dbg->rsvs);
 
 	uwb_pal_init(&rc->dbg->pal);
+	rc->dbg->pal.rc = rc;
+	rc->dbg->pal.channel_changed = uwb_dbg_channel_changed;
 	rc->dbg->pal.new_rsv = uwb_dbg_new_rsv;
-	uwb_pal_register(rc, &rc->dbg->pal);
+	uwb_pal_register(&rc->dbg->pal);
+
 	if (root_dir) {
 		rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev),
 						     root_dir);
@@ -364,7 +384,7 @@ void uwb_dbg_del_rc(struct uwb_rc *rc)
 		uwb_rsv_terminate(rsv);
 	}
 
-	uwb_pal_unregister(rc, &rc->dbg->pal);
+	uwb_pal_unregister(&rc->dbg->pal);
 
 	if (root_dir) {
 		debugfs_remove(rc->dbg->drp_avail_f);
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index af95541..9c0cdb4 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -238,6 +238,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
 				       const struct uwb_mac_addr *macaddr);
 
+int uwb_radio_setup(struct uwb_rc *rc);
+void uwb_radio_reset_state(struct uwb_rc *rc);
+void uwb_radio_shutdown(struct uwb_rc *rc);
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel);
+
 /* -- UWB Sysfs representation */
 extern struct class uwb_rc_class;
 extern struct device_attribute dev_attr_mac_address;
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index 0799402..7e5eb49 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -543,7 +543,8 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 	uwb_notifs_register(rc, &wlp->uwb_notifs_handler);
 
 	uwb_pal_init(&wlp->pal);
-	result = uwb_pal_register(rc, &wlp->pal);
+	wlp->pal.rc = rc;
+	result = uwb_pal_register(&wlp->pal);
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 
@@ -557,7 +558,7 @@ void wlp_remove(struct wlp *wlp)
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	d_fnstart(6, dev, "wlp %p\n", wlp);
 	wlp_neighbors_release(wlp);
-	uwb_pal_unregister(wlp->rc, &wlp->pal);
+	uwb_pal_unregister(&wlp->pal);
 	uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 	wlp_eda_release(&wlp->eda);
 	mutex_lock(&wlp->mutex);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index effd979..7d3ebf0 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -355,6 +355,7 @@ struct uwb_rc {
 	u8 ctx_roll;
 
 	int beaconing;			/* Beaconing state [channel number] */
+	int beaconing_forced;
 	int scanning;
 	enum uwb_scan_type scan_type:3;
 	unsigned ready:1;
@@ -373,8 +374,8 @@ struct uwb_rc {
 	struct uwb_rc_cmd_set_ie *ies;
 	size_t ies_capacity;
 
-	spinlock_t pal_lock;
 	struct list_head pals;
+	int active_pals;
 
 	struct uwb_dbg *dbg;
 };
@@ -382,11 +383,17 @@ struct uwb_rc {
 
 /**
  * struct uwb_pal - a UWB PAL
- * @name:    descriptive name for this PAL (wushc, wlp, etc.).
+ * @name:    descriptive name for this PAL (wusbhc, wlp, etc.).
  * @device:  a device for the PAL.  Used to link the PAL and the radio
  *           controller in sysfs.
+ * @rc:      the radio controller the PAL uses.
+ * @channel_changed: called when the channel used by the radio changes.
+ *           A channel of -1 means the channel has been stopped.
  * @new_rsv: called when a peer requests a reservation (may be NULL if
  *           the PAL cannot accept reservation requests).
+ * @channel: channel being used by the PAL; 0 if the PAL isn't using
+ *           the radio; -1 if the PAL wishes to use the radio but
+ *           cannot.
  *
  * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
  * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
@@ -405,12 +412,20 @@ struct uwb_pal {
 	struct list_head node;
 	const char *name;
 	struct device *device;
+	struct uwb_rc *rc;
+
+	void (*channel_changed)(struct uwb_pal *pal, int channel);
 	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
+
+	int channel;
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal);
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal);
+int uwb_pal_register(struct uwb_pal *pal);
+void uwb_pal_unregister(struct uwb_pal *pal);
+
+int uwb_radio_start(struct uwb_pal *pal);
+void uwb_radio_stop(struct uwb_pal *pal);
 
 /*
  * General public API
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 6a16566..07efbe1 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -34,6 +34,8 @@ enum uwb_dbg_cmd_type {
 	UWB_DBG_CMD_RSV_TERMINATE = 2,
 	UWB_DBG_CMD_IE_ADD = 3,
 	UWB_DBG_CMD_IE_RM = 4,
+	UWB_DBG_CMD_RADIO_START = 5,
+	UWB_DBG_CMD_RADIO_STOP = 6,
 };
 
 struct uwb_dbg_cmd_rsv_establish {
-- 
cgit v0.10.2


From e8e1594c8126b1b773988fa2e3bfec76cff88336 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 16:16:51 +0000
Subject: wlp: start/stop radio on network interface up/down

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 384306c..488b2e3 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -206,7 +206,7 @@ int i1480u_add(struct i1480u *i1480u, struct usb_interface *iface)
 	wlp->fill_device_info = i1480u_fill_device_info;
 	wlp->stop_queue = i1480u_stop_queue;
 	wlp->start_queue = i1480u_start_queue;
-	result = wlp_setup(wlp, rc);
+	result = wlp_setup(wlp, rc, net_dev);
 	if (result < 0) {
 		dev_err(&iface->dev, "Cannot setup WLP\n");
 		goto error_wlp_setup;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 8802ac4..2eafb97 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -207,6 +207,11 @@ int i1480u_open(struct net_device *net_dev)
 	result = i1480u_rx_setup(i1480u);		/* Alloc RX stuff */
 	if (result < 0)
 		goto error_rx_setup;
+
+	result = uwb_radio_start(&wlp->pal);
+	if (result < 0)
+		goto error_radio_start;
+
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
@@ -215,25 +220,20 @@ int i1480u_open(struct net_device *net_dev)
 		goto error_notif_urb_submit;
 	}
 #endif
-	i1480u->uwb_notifs_handler.cb = i1480u_uwb_notifs_cb;
-	i1480u->uwb_notifs_handler.data = i1480u;
-	if (uwb_bg_joined(rc))
-		netif_carrier_on(net_dev);
-	else
-		netif_carrier_off(net_dev);
-	uwb_notifs_register(rc, &i1480u->uwb_notifs_handler);
 	/* Interface is up with an address, now we can create WSS */
 	result = wlp_wss_setup(net_dev, &wlp->wss);
 	if (result < 0) {
 		dev_err(dev, "Can't create WSS: %d. \n", result);
-		goto error_notif_deregister;
+		goto error_wss_setup;
 	}
 	return 0;
-error_notif_deregister:
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
+error_wss_setup:
 #ifdef i1480u_FLOW_CONTROL
+	usb_kill_urb(i1480u->notif_urb);
 error_notif_urb_submit:
 #endif
+	uwb_radio_stop(&wlp->pal);
+error_radio_start:
 	netif_stop_queue(net_dev);
 	i1480u_rx_release(i1480u);
 error_rx_setup:
@@ -248,16 +248,15 @@ int i1480u_stop(struct net_device *net_dev)
 {
 	struct i1480u *i1480u = netdev_priv(net_dev);
 	struct wlp *wlp = &i1480u->wlp;
-	struct uwb_rc *rc = wlp->rc;
 
 	BUG_ON(wlp->rc == NULL);
 	wlp_wss_remove(&wlp->wss);
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
 	netif_carrier_off(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	usb_kill_urb(i1480u->notif_urb);
 #endif
 	netif_stop_queue(net_dev);
+	uwb_radio_stop(&wlp->pal);
 	i1480u_rx_release(i1480u);
 	i1480u_tx_release(i1480u);
 	return 0;
@@ -303,34 +302,6 @@ int i1480u_change_mtu(struct net_device *net_dev, int mtu)
 	return 0;
 }
 
-
-/**
- * Callback function to handle events from UWB
- * When we see other devices we know the carrier is ok,
- * if we are the only device in the beacon group we set the carrier
- * state to off.
- * */
-void i1480u_uwb_notifs_cb(void *data, struct uwb_dev *uwb_dev,
-			  enum uwb_notifs event)
-{
-	struct i1480u *i1480u = data;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-	switch (event) {
-	case UWB_NOTIF_BG_JOIN:
-		netif_carrier_on(net_dev);
-		dev_info(dev, "Link is up\n");
-		break;
-	case UWB_NOTIF_BG_LEAVE:
-		netif_carrier_off(net_dev);
-		dev_info(dev, "Link is down\n");
-		break;
-	default:
-		dev_err(dev, "don't know how to handle event %d from uwb\n",
-				event);
-	}
-}
-
 /**
  * Stop the network queue
  *
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 0e58071a..e02fb83 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -33,8 +33,6 @@
 #include <linux/seq_file.h>
 
 #include <linux/uwb/debug-cmd.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "uwb-internal.h"
 
@@ -314,7 +312,6 @@ static struct file_operations drp_avail_fops = {
 
 static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
 {
-	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 	struct device *dev = &pal->rc->uwb_dev.dev;
 
 	if (channel > 0)
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index 7e5eb49..e531093 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -526,7 +526,17 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev,
 	}
 }
 
-int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
+static void wlp_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wlp *wlp = container_of(pal, struct wlp, pal);
+
+	if (channel < 0)
+		netif_carrier_off(wlp->ndev);
+	else
+		netif_carrier_on(wlp->ndev);
+}
+
+int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
 {
 	struct device *dev = &rc->uwb_dev.dev;
 	int result;
@@ -537,6 +547,7 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 	BUG_ON(wlp->stop_queue == NULL);
 	BUG_ON(wlp->start_queue == NULL);
 	wlp->rc = rc;
+	wlp->ndev = ndev;
 	wlp_eda_init(&wlp->eda);/* Set up address cache */
 	wlp->uwb_notifs_handler.cb = wlp_uwb_notifs_cb;
 	wlp->uwb_notifs_handler.data = wlp;
@@ -544,6 +555,7 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 
 	uwb_pal_init(&wlp->pal);
 	wlp->pal.rc = rc;
+	wlp->pal.channel_changed = wlp_channel_changed;
 	result = uwb_pal_register(&wlp->pal);
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index 033545e..ac95ce6 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -646,6 +646,7 @@ struct wlp_wss {
 struct wlp {
 	struct mutex mutex;
 	struct uwb_rc *rc;		/* UWB radio controller */
+	struct net_device *ndev;
 	struct uwb_pal pal;
 	struct wlp_eda eda;
 	struct wlp_uuid uuid;
@@ -675,7 +676,7 @@ struct wlp_wss_attribute {
 static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode,	\
 							  _show, _store)
 
-extern int wlp_setup(struct wlp *, struct uwb_rc *);
+extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev);
 extern void wlp_remove(struct wlp *);
 extern ssize_t wlp_neighborhood_show(struct wlp *, char *);
 extern int wlp_wss_setup(struct net_device *, struct wlp_wss *);
-- 
cgit v0.10.2


From 0996e6382482ce9014787693d3884e9468153a5c Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 16:23:22 +0000
Subject: uwb: remove unused beacon group join/leave events

The UWB_NOTIF_BG_JOIN/UWB_NOTIF_BG_LEAVE events have been
superceeded by the channel_changed callback in struct uwb_pal.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 2479560..d9c60cb 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -140,10 +140,8 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 		}
 	}
 
-	if (result >= 0) {
+	if (result >= 0)
 		rc->beaconing = channel;
-		uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
-	}
 	return result;
 }
 
@@ -581,19 +579,6 @@ error:
 	return result;
 }
 
-/**
- * uwb_bg_joined - is the RC in a beacon group?
- * @rc: the radio controller
- *
- * Returns true if the radio controller is in a beacon group (even if
- * it's the sole member).
- */
-int uwb_bg_joined(struct uwb_rc *rc)
-{
-	return rc->beaconing != -1;
-}
-EXPORT_SYMBOL_GPL(uwb_bg_joined);
-
 /*
  * Print beaconing state.
  */
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 7d3ebf0..1719709 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -479,7 +479,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
 		    struct uwb_rccb *cmd, size_t cmd_size,
 		    u8 expected_type, u16 expected_event,
 		    struct uwb_rceb **preply);
-int uwb_bg_joined(struct uwb_rc *rc);
 
 size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
 
@@ -568,7 +567,9 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
 }
 
 /**
- * Events generated by UWB that can be passed to any listeners
+ * enum uwb_notifs - UWB events that can be passed to any listeners
+ * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group.
+ * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group.
  *
  * Higher layers can register callback functions with the radio
  * controller using uwb_notifs_register(). The radio controller
@@ -576,8 +577,6 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
  * nodes when an event occurs.
  */
 enum uwb_notifs {
-	UWB_NOTIF_BG_JOIN = 0,	/* radio controller joined a beacon group */
-	UWB_NOTIF_BG_LEAVE = 1,	/* radio controller left a beacon group */
 	UWB_NOTIF_ONAIR,
 	UWB_NOTIF_OFFAIR,
 };
-- 
cgit v0.10.2


From b7af349b175af45f9d87b3bf3f0a221e1831ed39 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@nokia.com>
Date: Fri, 21 Nov 2008 13:39:45 -0800
Subject: i2c-omap: Do not use interruptible wait call in omap_i2c_xfer_msg

If there is a signal pending and wait_for_completion_interruptible_timeout
terminates with -ERESTARTSYS, we return and disable the i2c clocks in
omap_i2c_xfer.

If we terminate before sending last i2c message with a stop condition, the
bus remains busy and we are not able to send new messages into bus with
successive omap_i2c_xfer calls. Therefore a pending signal is not caught
here and we return only because of timeout or i2c error.

Signed-off-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Signed-off-by: Juha Yrjola <juha.yrjola@solidboot.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 608038d..17476ec 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -328,8 +328,12 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 		w |= OMAP_I2C_CON_STP;
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
 
-	r = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
-						      OMAP_I2C_TIMEOUT);
+	/*
+	 * REVISIT: We should abort the transfer on signals, but the bus goes
+	 * into arbitration and we're currently unable to recover from it.
+	 */
+	r = wait_for_completion_timeout(&dev->cmd_complete,
+					OMAP_I2C_TIMEOUT);
 	dev->buf_len = 0;
 	if (r < 0)
 		return r;
-- 
cgit v0.10.2


From 0cbbcffdf5f30ef60d918549014684eada4f5b3f Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:45 -0800
Subject: i2c-omap: Close suspected race between omap_i2c_idle() and
 omap_i2c_isr()

omap_i2c_idle() sets an internal flag, "dev->idle", instructing its
ISR to decline interrupts.  It sets this flag before it actually masks
the interrupts on the I2C controller.  This is problematic, since an
I2C interrupt could arrive after dev->idle is set, but before the
interrupt source is masked.  When this happens, Linux disables the I2C
controller's IRQ, causing all future transactions on the bus to fail.

Symptoms, happening on about 7% of boots:

   irq 56: nobody cared (try booting with the "irqpoll" option)
   <warning traceback here>
   Disabling IRQ #56
   i2c_omap i2c_omap.1: controller timed out

In omap_i2c_idle(), this patch sets dev->idle only after the interrupt
mask write to the I2C controller has left the ARM write buffer.
That's probably the major offender.  For additional prophylaxis, in
omap_i2c_unidle(), the patch clears the dev->idle flag before
interrupts are enabled, rather than afterwards.

The patch has survived twenty-two reboots on the 3430SDP here without
wedging I2C1.  Not absolutely dispositive, but promising!

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 17476ec..5ca0e00 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -181,22 +181,26 @@ static void omap_i2c_unidle(struct omap_i2c_dev *dev)
 	if (dev->iclk != NULL)
 		clk_enable(dev->iclk);
 	clk_enable(dev->fclk);
+	dev->idle = 0;
 	if (dev->iestate)
 		omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, dev->iestate);
-	dev->idle = 0;
 }
 
 static void omap_i2c_idle(struct omap_i2c_dev *dev)
 {
 	u16 iv;
 
-	dev->idle = 1;
 	dev->iestate = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, 0);
-	if (dev->rev1)
+	if (dev->rev1) {
 		iv = omap_i2c_read_reg(dev, OMAP_I2C_IV_REG);	/* Read clears */
-	else
+	} else {
 		omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, dev->iestate);
+
+		/* Flush posted write before the dev->idle store occurs */
+		omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
+	}
+	dev->idle = 1;
 	clk_disable(dev->fclk);
 	if (dev->iclk != NULL)
 		clk_disable(dev->iclk);
-- 
cgit v0.10.2


From 4574eb6892a13bc91aac8676457d46798935d653 Mon Sep 17 00:00:00 2001
From: Syed Mohammed Khasim <x0khasim@ti.com>
Date: Fri, 21 Nov 2008 13:39:45 -0800
Subject: i2c-omap: Add high-speed support to omap-i2c

Omap2430 has additional support for high-speed I2C.

This patch moves I2C speed parameter (from module) to platform data.
Also added basic High Speed support based on I2C bus speed.

This patch is tested for high speed I2C (with TWL4030 Keypad) and works as
expected.

Also change the 2430 i2chs_fck names to use the standard naming.

Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Syed Mohammed Khasim  <x0khasim@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index 242a19d..e5fc5bf 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -2321,7 +2321,7 @@ static struct clk i2c2_fck = {
 };
 
 static struct clk i2chs2_fck = {
-	.name		= "i2chs_fck",
+	.name		= "i2c_fck",
 	.id		= 2,
 	.parent		= &func_96m_ck,
 	.flags		= CLOCK_IN_OMAP243X,
@@ -2354,7 +2354,7 @@ static struct clk i2c1_fck = {
 };
 
 static struct clk i2chs1_fck = {
-	.name		= "i2chs_fck",
+	.name		= "i2c_fck",
 	.id		= 1,
 	.parent		= &func_96m_ck,
 	.flags		= CLOCK_IN_OMAP243X,
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 5ca0e00..b0aa0f8 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -83,6 +83,7 @@
 /* I2C Configuration Register (OMAP_I2C_CON): */
 #define OMAP_I2C_CON_EN		(1 << 15)	/* I2C module enable */
 #define OMAP_I2C_CON_BE		(1 << 14)	/* Big endian mode */
+#define OMAP_I2C_CON_OPMODE	(1 << 12)	/* High Speed support */
 #define OMAP_I2C_CON_STB	(1 << 11)	/* Start byte mode (master) */
 #define OMAP_I2C_CON_MST	(1 << 10)	/* Master/slave mode */
 #define OMAP_I2C_CON_TRX	(1 << 9)	/* TX/RX mode (master only) */
@@ -91,6 +92,10 @@
 #define OMAP_I2C_CON_STP	(1 << 1)	/* Stop cond (master only) */
 #define OMAP_I2C_CON_STT	(1 << 0)	/* Start condition (master) */
 
+/* I2C SCL time value when Master */
+#define OMAP_I2C_SCLL_HSSCLL	8
+#define OMAP_I2C_SCLH_HSSCLH	8
+
 /* I2C System Test Register (OMAP_I2C_SYSTEST): */
 #ifdef DEBUG
 #define OMAP_I2C_SYSTEST_ST_EN		(1 << 15)	/* System test enable */
@@ -109,12 +114,6 @@
 /* I2C System Configuration Register (OMAP_I2C_SYSC): */
 #define OMAP_I2C_SYSC_SRST		(1 << 1)	/* Soft Reset */
 
-/* REVISIT: Use platform_data instead of module parameters */
-/* Fast Mode = 400 kHz, Standard = 100 kHz */
-static int clock = 100; /* Default: 100 kHz */
-module_param(clock, int, 0);
-MODULE_PARM_DESC(clock, "Set I2C clock in kHz: 400=fast mode (default == 100)");
-
 struct omap_i2c_dev {
 	struct device		*dev;
 	void __iomem		*base;		/* virtual */
@@ -123,6 +122,7 @@ struct omap_i2c_dev {
 	struct clk		*fclk;		/* Functional clock */
 	struct completion	cmd_complete;
 	struct resource		*ioarea;
+	u32			speed;		/* Speed of bus in Khz */
 	u16			cmd_err;
 	u8			*buf;
 	size_t			buf_len;
@@ -208,9 +208,11 @@ static void omap_i2c_idle(struct omap_i2c_dev *dev)
 
 static int omap_i2c_init(struct omap_i2c_dev *dev)
 {
-	u16 psc = 0;
+	u16 psc = 0, scll = 0, sclh = 0;
+	u16 fsscll = 0, fssclh = 0, hsscll = 0, hssclh = 0;
 	unsigned long fclk_rate = 12000000;
 	unsigned long timeout;
+	unsigned long internal_clk = 0;
 
 	if (!dev->rev1) {
 		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, OMAP_I2C_SYSC_SRST);
@@ -253,18 +255,47 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			psc = fclk_rate / 12000000;
 	}
 
+	if (cpu_is_omap2430()) {
+
+		/* HSI2C controller internal clk rate should be 19.2 Mhz */
+		internal_clk = 19200;
+		fclk_rate = clk_get_rate(dev->fclk) / 1000;
+
+		/* Compute prescaler divisor */
+		psc = fclk_rate / internal_clk;
+		psc = psc - 1;
+
+		/* If configured for High Speed */
+		if (dev->speed > 400) {
+			/* For first phase of HS mode */
+			fsscll = internal_clk / (400 * 2) - 6;
+			fssclh = internal_clk / (400 * 2) - 6;
+
+			/* For second phase of HS mode */
+			hsscll = fclk_rate / (dev->speed * 2) - 6;
+			hssclh = fclk_rate / (dev->speed * 2) - 6;
+		} else {
+			/* To handle F/S modes */
+			fsscll = internal_clk / (dev->speed * 2) - 6;
+			fssclh = internal_clk / (dev->speed * 2) - 6;
+		}
+		scll = (hsscll << OMAP_I2C_SCLL_HSSCLL) | fsscll;
+		sclh = (hssclh << OMAP_I2C_SCLH_HSSCLH) | fssclh;
+	} else {
+		/* Program desired operating rate */
+		fclk_rate /= (psc + 1) * 1000;
+		if (psc > 2)
+			psc = 2;
+		scll = fclk_rate / (dev->speed * 2) - 7 + psc;
+		sclh = fclk_rate / (dev->speed * 2) - 7 + psc;
+	}
+
 	/* Setup clock prescaler to obtain approx 12MHz I2C module clock: */
 	omap_i2c_write_reg(dev, OMAP_I2C_PSC_REG, psc);
 
-	/* Program desired operating rate */
-	fclk_rate /= (psc + 1) * 1000;
-	if (psc > 2)
-		psc = 2;
-
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG,
-			   fclk_rate / (clock * 2) - 7 + psc);
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG,
-			   fclk_rate / (clock * 2) - 7 + psc);
+	/* SCL low and high time values */
+	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG, scll);
+	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG, sclh);
 
 	/* Take the I2C module out of reset: */
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
@@ -324,6 +355,11 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 	dev->cmd_err = 0;
 
 	w = OMAP_I2C_CON_EN | OMAP_I2C_CON_MST | OMAP_I2C_CON_STT;
+
+	/* High speed configuration */
+	if (dev->speed > 400)
+		w |= OMAP_I2C_CON_OPMODE;
+
 	if (msg->flags & I2C_M_TEN)
 		w |= OMAP_I2C_CON_XA;
 	if (!(msg->flags & I2C_M_RD))
@@ -564,6 +600,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	struct i2c_adapter	*adap;
 	struct resource		*mem, *irq, *ioarea;
 	int r;
+	u32 *speed = NULL;
 
 	/* NOTE: driver uses the static register mapping */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -584,17 +621,18 @@ omap_i2c_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	if (clock > 200)
-		clock = 400;	/* Fast mode */
-	else
-		clock = 100;	/* Standard mode */
-
 	dev = kzalloc(sizeof(struct omap_i2c_dev), GFP_KERNEL);
 	if (!dev) {
 		r = -ENOMEM;
 		goto err_release_region;
 	}
 
+	if (pdev->dev.platform_data != NULL)
+		speed = (u32 *) pdev->dev.platform_data;
+	else
+		*speed = 100; /* Defualt speed */
+
+	dev->speed = *speed;
 	dev->dev = &pdev->dev;
 	dev->irq = irq->start;
 	dev->base = ioremap(mem->start, mem->end - mem->start + 1);
@@ -625,7 +663,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	}
 	r = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) & 0xff;
 	dev_info(dev->dev, "bus %d rev%d.%d at %d kHz\n",
-		 pdev->id, r >> 4, r & 0xf, clock);
+		 pdev->id, r >> 4, r & 0xf, dev->speed);
 
 	adap = &dev->adapter;
 	i2c_set_adapdata(adap, dev);
-- 
cgit v0.10.2


From b6ee52c39999b2f3bcd9e26f0edf1f07599cf40e Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 21 Nov 2008 13:39:46 -0800
Subject: i2c-omap: FIFO handling support and broken hw workaround for i2c-omap

Based on an earlier patch from Nishant Menon:

- Transfers can use FIFO on FIFO capable devices
- Prevents errors for HSI2C if FIFO is not used
- Implemented errenous handling of STT-STP handling on SDP2430

Also merged in is a fix from Jaron Marini to fix occasional i2c
hang if OMAP_I2C_CON_STT remains asserted.

Signed-off-by: Jason P Marini <jason.marini@gmail.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index b0aa0f8..9ae4b74 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -55,8 +55,11 @@
 #define OMAP_I2C_SCLL_REG		0x34
 #define OMAP_I2C_SCLH_REG		0x38
 #define OMAP_I2C_SYSTEST_REG		0x3c
+#define OMAP_I2C_BUFSTAT_REG		0x40
 
 /* I2C Interrupt Enable Register (OMAP_I2C_IE): */
+#define OMAP_I2C_IE_XDR		(1 << 14)	/* TX Buffer drain int enable */
+#define OMAP_I2C_IE_RDR		(1 << 13)	/* RX Buffer drain int enable */
 #define OMAP_I2C_IE_XRDY	(1 << 4)	/* TX data ready int enable */
 #define OMAP_I2C_IE_RRDY	(1 << 3)	/* RX data ready int enable */
 #define OMAP_I2C_IE_ARDY	(1 << 2)	/* Access ready int enable */
@@ -64,7 +67,8 @@
 #define OMAP_I2C_IE_AL		(1 << 0)	/* Arbitration lost int ena */
 
 /* I2C Status Register (OMAP_I2C_STAT): */
-#define OMAP_I2C_STAT_SBD	(1 << 15)	/* Single byte data */
+#define OMAP_I2C_STAT_XDR	(1 << 14)	/* TX Buffer draining */
+#define OMAP_I2C_STAT_RDR	(1 << 13)	/* RX Buffer draining */
 #define OMAP_I2C_STAT_BB	(1 << 12)	/* Bus busy */
 #define OMAP_I2C_STAT_ROVR	(1 << 11)	/* Receive overrun */
 #define OMAP_I2C_STAT_XUDF	(1 << 10)	/* Transmit underflow */
@@ -78,12 +82,14 @@
 
 /* I2C Buffer Configuration Register (OMAP_I2C_BUF): */
 #define OMAP_I2C_BUF_RDMA_EN	(1 << 15)	/* RX DMA channel enable */
+#define OMAP_I2C_BUF_RXFIF_CLR	(1 << 14)	/* RX FIFO Clear */
 #define OMAP_I2C_BUF_XDMA_EN	(1 << 7)	/* TX DMA channel enable */
+#define OMAP_I2C_BUF_TXFIF_CLR	(1 << 6)	/* TX FIFO Clear */
 
 /* I2C Configuration Register (OMAP_I2C_CON): */
 #define OMAP_I2C_CON_EN		(1 << 15)	/* I2C module enable */
 #define OMAP_I2C_CON_BE		(1 << 14)	/* Big endian mode */
-#define OMAP_I2C_CON_OPMODE	(1 << 12)	/* High Speed support */
+#define OMAP_I2C_CON_OPMODE_HS	(1 << 12)	/* High Speed support */
 #define OMAP_I2C_CON_STB	(1 << 11)	/* Start byte mode (master) */
 #define OMAP_I2C_CON_MST	(1 << 10)	/* Master/slave mode */
 #define OMAP_I2C_CON_TRX	(1 << 9)	/* TX/RX mode (master only) */
@@ -127,7 +133,12 @@ struct omap_i2c_dev {
 	u8			*buf;
 	size_t			buf_len;
 	struct i2c_adapter	adapter;
+	u8			fifo_size;	/* use as flag and value
+						 * fifo_size==0 implies no fifo
+						 * if set, should be trsh+1
+						 */
 	unsigned		rev1:1;
+	unsigned		b_hw:1;		/* bad h/w fixes */
 	unsigned		idle:1;
 	u16			iestate;	/* Saved interrupt register */
 };
@@ -297,6 +308,14 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG, scll);
 	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG, sclh);
 
+	if (dev->fifo_size)
+		/* Note: setup required fifo size - 1 */
+		omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG,
+					(dev->fifo_size - 1) << 8 | /* RTRSH */
+					OMAP_I2C_BUF_RXFIF_CLR |
+					(dev->fifo_size - 1) | /* XTRSH */
+					OMAP_I2C_BUF_TXFIF_CLR);
+
 	/* Take the I2C module out of reset: */
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
 
@@ -304,7 +323,8 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG,
 			   (OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
 			    OMAP_I2C_IE_ARDY | OMAP_I2C_IE_NACK |
-			    OMAP_I2C_IE_AL));
+			    OMAP_I2C_IE_AL)  | ((dev->fifo_size) ?
+				(OMAP_I2C_IE_RDR | OMAP_I2C_IE_XDR) : 0));
 	return 0;
 }
 
@@ -351,6 +371,11 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 
 	omap_i2c_write_reg(dev, OMAP_I2C_CNT_REG, dev->buf_len);
 
+	/* Clear the FIFO Buffers */
+	w = omap_i2c_read_reg(dev, OMAP_I2C_BUF_REG);
+	w |= OMAP_I2C_BUF_RXFIF_CLR | OMAP_I2C_BUF_TXFIF_CLR;
+	omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, w);
+
 	init_completion(&dev->cmd_complete);
 	dev->cmd_err = 0;
 
@@ -358,17 +383,40 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 
 	/* High speed configuration */
 	if (dev->speed > 400)
-		w |= OMAP_I2C_CON_OPMODE;
+		w |= OMAP_I2C_CON_OPMODE_HS;
 
 	if (msg->flags & I2C_M_TEN)
 		w |= OMAP_I2C_CON_XA;
 	if (!(msg->flags & I2C_M_RD))
 		w |= OMAP_I2C_CON_TRX;
-	if (stop)
+	if (!dev->b_hw && stop)
 		w |= OMAP_I2C_CON_STP;
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
 
 	/*
+	 * Don't write stt and stp together on some hardware.
+	 */
+	if (dev->b_hw && stop) {
+		unsigned long delay = jiffies + OMAP_I2C_TIMEOUT;
+		u16 con = omap_i2c_read_reg(dev, OMAP_I2C_CON_REG);
+		while (con & OMAP_I2C_CON_STT) {
+			con = omap_i2c_read_reg(dev, OMAP_I2C_CON_REG);
+
+			/* Let the user know if i2c is in a bad state */
+			if (time_after(jiffies, delay)) {
+				dev_err(dev->dev, "controller timed out "
+				"waiting for start condition to finish\n");
+				return -ETIMEDOUT;
+			}
+			cpu_relax();
+		}
+
+		w |= OMAP_I2C_CON_STP;
+		w &= ~OMAP_I2C_CON_STT;
+		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
+	}
+
+	/*
 	 * REVISIT: We should abort the transfer on signals, but the bus goes
 	 * into arbitration and we're currently unable to recover from it.
 	 */
@@ -516,7 +564,7 @@ omap_i2c_isr(int this_irq, void *dev_id)
 	struct omap_i2c_dev *dev = dev_id;
 	u16 bits;
 	u16 stat, w;
-	int count = 0;
+	int err, count = 0;
 
 	if (dev->idle)
 		return IRQ_NONE;
@@ -531,39 +579,94 @@ omap_i2c_isr(int this_irq, void *dev_id)
 
 		omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, stat);
 
-		if (stat & OMAP_I2C_STAT_ARDY) {
-			omap_i2c_complete_cmd(dev, 0);
-			continue;
+		err = 0;
+		if (stat & OMAP_I2C_STAT_NACK) {
+			err |= OMAP_I2C_STAT_NACK;
+			omap_i2c_write_reg(dev, OMAP_I2C_CON_REG,
+					   OMAP_I2C_CON_STP);
 		}
-		if (stat & OMAP_I2C_STAT_RRDY) {
-			w = omap_i2c_read_reg(dev, OMAP_I2C_DATA_REG);
-			if (dev->buf_len) {
-				*dev->buf++ = w;
-				dev->buf_len--;
+		if (stat & OMAP_I2C_STAT_AL) {
+			dev_err(dev->dev, "Arbitration lost\n");
+			err |= OMAP_I2C_STAT_AL;
+		}
+		if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
+					OMAP_I2C_STAT_AL))
+			omap_i2c_complete_cmd(dev, err);
+		if (stat & (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR)) {
+			u8 num_bytes = 1;
+			if (dev->fifo_size) {
+				if (stat & OMAP_I2C_STAT_RRDY)
+					num_bytes = dev->fifo_size;
+				else
+					num_bytes = omap_i2c_read_reg(dev,
+							OMAP_I2C_BUFSTAT_REG);
+			}
+			while (num_bytes) {
+				num_bytes--;
+				w = omap_i2c_read_reg(dev, OMAP_I2C_DATA_REG);
 				if (dev->buf_len) {
-					*dev->buf++ = w >> 8;
+					*dev->buf++ = w;
 					dev->buf_len--;
+					/* Data reg from 2430 is 8 bit wide */
+					if (!cpu_is_omap2430()) {
+						if (dev->buf_len) {
+							*dev->buf++ = w >> 8;
+							dev->buf_len--;
+						}
+					}
+				} else {
+					if (stat & OMAP_I2C_STAT_RRDY)
+						dev_err(dev->dev,
+							"RRDY IRQ while no data"
+								" requested\n");
+					if (stat & OMAP_I2C_STAT_RDR)
+						dev_err(dev->dev,
+							"RDR IRQ while no data"
+								" requested\n");
+					break;
 				}
-			} else
-				dev_err(dev->dev, "RRDY IRQ while no data "
-						"requested\n");
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RRDY);
+			}
+			omap_i2c_ack_stat(dev,
+				stat & (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR));
 			continue;
 		}
-		if (stat & OMAP_I2C_STAT_XRDY) {
-			w = 0;
-			if (dev->buf_len) {
-				w = *dev->buf++;
-				dev->buf_len--;
+		if (stat & (OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR)) {
+			u8 num_bytes = 1;
+			if (dev->fifo_size) {
+				if (stat & OMAP_I2C_STAT_XRDY)
+					num_bytes = dev->fifo_size;
+				else
+					num_bytes = omap_i2c_read_reg(dev,
+							OMAP_I2C_BUFSTAT_REG);
+			}
+			while (num_bytes) {
+				num_bytes--;
+				w = 0;
 				if (dev->buf_len) {
-					w |= *dev->buf++ << 8;
+					w = *dev->buf++;
 					dev->buf_len--;
+					/* Data reg from  2430 is 8 bit wide */
+					if (!cpu_is_omap2430()) {
+						if (dev->buf_len) {
+							w |= *dev->buf++ << 8;
+							dev->buf_len--;
+						}
+					}
+				} else {
+					if (stat & OMAP_I2C_STAT_XRDY)
+						dev_err(dev->dev,
+							"XRDY IRQ while no "
+							"data to send\n");
+					if (stat & OMAP_I2C_STAT_XDR)
+						dev_err(dev->dev,
+							"XDR IRQ while no "
+							"data to send\n");
+					break;
 				}
-			} else
-				dev_err(dev->dev, "XRDY IRQ while no "
-					"data to send\n");
-			omap_i2c_write_reg(dev, OMAP_I2C_DATA_REG, w);
-			omap_i2c_ack_stat(dev, OMAP_I2C_STAT_XRDY);
+				omap_i2c_write_reg(dev, OMAP_I2C_DATA_REG, w);
+			}
+			omap_i2c_ack_stat(dev,
+				stat & (OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR));
 			continue;
 		}
 		if (stat & OMAP_I2C_STAT_ROVR) {
@@ -571,18 +674,9 @@ omap_i2c_isr(int this_irq, void *dev_id)
 			dev->cmd_err |= OMAP_I2C_STAT_ROVR;
 		}
 		if (stat & OMAP_I2C_STAT_XUDF) {
-			dev_err(dev->dev, "Transmit overflow\n");
+			dev_err(dev->dev, "Transmit underflow\n");
 			dev->cmd_err |= OMAP_I2C_STAT_XUDF;
 		}
-		if (stat & OMAP_I2C_STAT_NACK) {
-			omap_i2c_complete_cmd(dev, OMAP_I2C_STAT_NACK);
-			omap_i2c_write_reg(dev, OMAP_I2C_CON_REG,
-					   OMAP_I2C_CON_STP);
-		}
-		if (stat & OMAP_I2C_STAT_AL) {
-			dev_err(dev->dev, "Arbitration lost\n");
-			omap_i2c_complete_cmd(dev, OMAP_I2C_STAT_AL);
-		}
 	}
 
 	return count ? IRQ_HANDLED : IRQ_NONE;
@@ -651,6 +745,22 @@ omap_i2c_probe(struct platform_device *pdev)
 	if (cpu_is_omap15xx())
 		dev->rev1 = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) < 0x20;
 
+	if (cpu_is_omap2430()) {
+		u16 s;
+
+		/* Set up the fifo size - Get total size */
+		s = (omap_i2c_read_reg(dev, OMAP_I2C_BUFSTAT_REG) >> 14) & 0x3;
+		dev->fifo_size = 0x8 << s;
+
+		/*
+		 * Set up notification threshold as half the total available
+		 * size. This is to ensure that we can handle the status on int
+		 * call back latencies.
+		 */
+		dev->fifo_size = (dev->fifo_size / 2);
+		dev->b_hw = 1; /* Enable hardware fixes */
+	}
+
 	/* reset ASAP, clearing any IRQs */
 	omap_i2c_init(dev);
 
-- 
cgit v0.10.2


From 3d522fb41ead214d9d9236ec184271633e1cfc2f Mon Sep 17 00:00:00 2001
From: Chandra shekhar <x0044955@ti.com>
Date: Fri, 21 Nov 2008 13:39:46 -0800
Subject: i2c-omap: Add support for omap34xx

Add support for omap34xx

Signed-off-by: chandra shekhar <x0044955@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 9ae4b74..3f7726a 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -156,7 +156,7 @@ static inline u16 omap_i2c_read_reg(struct omap_i2c_dev *i2c_dev, int reg)
 
 static int omap_i2c_get_clocks(struct omap_i2c_dev *dev)
 {
-	if (cpu_is_omap16xx() || cpu_is_omap24xx()) {
+	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
 		dev->iclk = clk_get(dev->dev, "i2c_ick");
 		if (IS_ERR(dev->iclk)) {
 			dev->iclk = NULL;
@@ -266,7 +266,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			psc = fclk_rate / 12000000;
 	}
 
-	if (cpu_is_omap2430()) {
+	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
 
 		/* HSI2C controller internal clk rate should be 19.2 Mhz */
 		internal_clk = 19200;
@@ -608,7 +608,8 @@ omap_i2c_isr(int this_irq, void *dev_id)
 					*dev->buf++ = w;
 					dev->buf_len--;
 					/* Data reg from 2430 is 8 bit wide */
-					if (!cpu_is_omap2430()) {
+					if (!cpu_is_omap2430() &&
+							!cpu_is_omap34xx()) {
 						if (dev->buf_len) {
 							*dev->buf++ = w >> 8;
 							dev->buf_len--;
@@ -646,7 +647,8 @@ omap_i2c_isr(int this_irq, void *dev_id)
 					w = *dev->buf++;
 					dev->buf_len--;
 					/* Data reg from  2430 is 8 bit wide */
-					if (!cpu_is_omap2430()) {
+					if (!cpu_is_omap2430() &&
+							!cpu_is_omap34xx()) {
 						if (dev->buf_len) {
 							w |= *dev->buf++ << 8;
 							dev->buf_len--;
@@ -694,7 +696,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	struct i2c_adapter	*adap;
 	struct resource		*mem, *irq, *ioarea;
 	int r;
-	u32 *speed = NULL;
+	u32 speed = 0;
 
 	/* NOTE: driver uses the static register mapping */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -722,11 +724,11 @@ omap_i2c_probe(struct platform_device *pdev)
 	}
 
 	if (pdev->dev.platform_data != NULL)
-		speed = (u32 *) pdev->dev.platform_data;
+		speed = *(u32 *)pdev->dev.platform_data;
 	else
-		*speed = 100; /* Defualt speed */
+		speed = 100;	/* Defualt speed */
 
-	dev->speed = *speed;
+	dev->speed = speed;
 	dev->dev = &pdev->dev;
 	dev->irq = irq->start;
 	dev->base = ioremap(mem->start, mem->end - mem->start + 1);
@@ -745,7 +747,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	if (cpu_is_omap15xx())
 		dev->rev1 = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) < 0x20;
 
-	if (cpu_is_omap2430()) {
+	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
 		u16 s;
 
 		/* Set up the fifo size - Get total size */
-- 
cgit v0.10.2


From 510be9c9ad852dc902fd926ec8e03b67e62d8915 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:46 -0800
Subject: i2c-omap: Mark init-only functions as __init

Mark functions called only at init time as __init.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 3f7726a..61d2e5a 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -154,7 +154,7 @@ static inline u16 omap_i2c_read_reg(struct omap_i2c_dev *i2c_dev, int reg)
 	return __raw_readw(i2c_dev->base + reg);
 }
 
-static int omap_i2c_get_clocks(struct omap_i2c_dev *dev)
+static int __init omap_i2c_get_clocks(struct omap_i2c_dev *dev)
 {
 	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
 		dev->iclk = clk_get(dev->dev, "i2c_ick");
@@ -689,7 +689,7 @@ static const struct i2c_algorithm omap_i2c_algo = {
 	.functionality	= omap_i2c_func,
 };
 
-static int
+static int __init
 omap_i2c_probe(struct platform_device *pdev)
 {
 	struct omap_i2c_dev	*dev;
-- 
cgit v0.10.2


From 43469d8e2aaecc61403d1527dda7441fba8c0e50 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:47 -0800
Subject: i2c-omap: Don't compile in OMAP15xx I2C ISR for non-OMAP15xx builds

Skip compiling OMAP15xx I2C ISR for non-OMAP15xx builds.  Saves 400 bytes
of text for most OMAP builds.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 61d2e5a..195c3d1 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -503,6 +503,9 @@ omap_i2c_ack_stat(struct omap_i2c_dev *dev, u16 stat)
 	omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, stat);
 }
 
+/* rev1 devices are apparently only on some 15xx */
+#ifdef CONFIG_ARCH_OMAP15XX
+
 static irqreturn_t
 omap_i2c_rev1_isr(int this_irq, void *dev_id)
 {
@@ -557,6 +560,9 @@ omap_i2c_rev1_isr(int this_irq, void *dev_id)
 
 	return IRQ_HANDLED;
 }
+#else
+#define omap_i2c_rev1_isr		0
+#endif
 
 static irqreturn_t
 omap_i2c_isr(int this_irq, void *dev_id)
-- 
cgit v0.10.2


From c1a473bde4c06e8e6996ce3a33121b7a9a86b4b9 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 21 Nov 2008 13:39:47 -0800
Subject: i2c-omap: Clean-up i2c-omap

Minor sparse, checkpatch and formatting clean-up. Also update copyrights.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 195c3d1..4aeebad 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -2,13 +2,16 @@
  * TI OMAP I2C master mode driver
  *
  * Copyright (C) 2003 MontaVista Software, Inc.
- * Copyright (C) 2004 Texas Instruments.
- *
- * Updated to work with multiple I2C interfaces on 24xx by
- * Tony Lindgren <tony@atomide.com> and Imre Deak <imre.deak@nokia.com>
  * Copyright (C) 2005 Nokia Corporation
+ * Copyright (C) 2004 - 2007 Texas Instruments.
  *
- * Cleaned up by Juha Yrjölä <juha.yrjola@nokia.com>
+ * Originally written by MontaVista Software, Inc.
+ * Additional contributions by:
+ *	Tony Lindgren <tony@atomide.com>
+ *	Imre Deak <imre.deak@nokia.com>
+ *	Juha Yrjölä <juha.yrjola@solidboot.com>
+ *	Syed Khasim <x0khasim@ti.com>
+ *	Nishant Menon <nm@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -33,8 +36,7 @@
 #include <linux/completion.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
-
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* timeout waiting for the controller to respond */
 #define OMAP_I2C_TIMEOUT (msecs_to_jiffies(1000))
@@ -204,7 +206,7 @@ static void omap_i2c_idle(struct omap_i2c_dev *dev)
 	dev->iestate = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, 0);
 	if (dev->rev1) {
-		iv = omap_i2c_read_reg(dev, OMAP_I2C_IV_REG);	/* Read clears */
+		iv = omap_i2c_read_reg(dev, OMAP_I2C_IV_REG); /* Read clears */
 	} else {
 		omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, dev->iestate);
 
@@ -321,9 +323,9 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 
 	/* Enable interrupts */
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG,
-			   (OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
-			    OMAP_I2C_IE_ARDY | OMAP_I2C_IE_NACK |
-			    OMAP_I2C_IE_AL)  | ((dev->fifo_size) ?
+			(OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
+			OMAP_I2C_IE_ARDY | OMAP_I2C_IE_NACK |
+			OMAP_I2C_IE_AL)  | ((dev->fifo_size) ?
 				(OMAP_I2C_IE_RDR | OMAP_I2C_IE_XDR) : 0));
 	return 0;
 }
@@ -389,8 +391,10 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 		w |= OMAP_I2C_CON_XA;
 	if (!(msg->flags & I2C_M_RD))
 		w |= OMAP_I2C_CON_TRX;
+
 	if (!dev->b_hw && stop)
 		w |= OMAP_I2C_CON_STP;
+
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, w);
 
 	/*
@@ -468,7 +472,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 
 	omap_i2c_unidle(dev);
 
-	if ((r = omap_i2c_wait_for_bb(dev)) < 0)
+	r = omap_i2c_wait_for_bb(dev);
+	if (r < 0)
 		goto out;
 
 	for (i = 0; i < num; i++) {
@@ -561,7 +566,7 @@ omap_i2c_rev1_isr(int this_irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 #else
-#define omap_i2c_rev1_isr		0
+#define omap_i2c_rev1_isr		NULL
 #endif
 
 static irqreturn_t
-- 
cgit v0.10.2


From 3831f154418e058616129942e8175dc4c7e4a1d8 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:47 -0800
Subject: i2c-omap: fix I2C timeouts due to recursive omap_i2c_{un,}idle()

omap_i2c_unidle() and omap_i2c_idle() are called recursively during
omap_i2c_probe().  This is evidently unexpected and will wipe
out the I2C interrupt enable register the second time that
omap_i2c_idle() is called consecutively.  Any I2C transactions
following a probe of a bus with at least one device on it will then
time out.

Fix by moving omap_i2c_idle() further up in omap_i2c_probe().  Ensure
the I2C controller is marked as idle before the probe starts.  Also
attempt to catch future reappearances of this bug early in development
by warning in omap_i2c_{un,}idle() when they are called recursively.

Problem reported by David Brownell <david-b@pacbell.net>.

Tested on 3430SDP and 2430SDP.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Richard Woodruff <r-woodruff2@ti.com>
Acked-by; Steve Sakoman <steve@sakoman.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 4aeebad..40a1e4b 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -191,6 +191,8 @@ static void omap_i2c_put_clocks(struct omap_i2c_dev *dev)
 
 static void omap_i2c_unidle(struct omap_i2c_dev *dev)
 {
+	WARN_ON(!dev->idle);
+
 	if (dev->iclk != NULL)
 		clk_enable(dev->iclk);
 	clk_enable(dev->fclk);
@@ -203,6 +205,8 @@ static void omap_i2c_idle(struct omap_i2c_dev *dev)
 {
 	u16 iv;
 
+	WARN_ON(dev->idle);
+
 	dev->iestate = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, 0);
 	if (dev->rev1) {
@@ -740,6 +744,7 @@ omap_i2c_probe(struct platform_device *pdev)
 		speed = 100;	/* Defualt speed */
 
 	dev->speed = speed;
+	dev->idle = 1;
 	dev->dev = &pdev->dev;
 	dev->irq = irq->start;
 	dev->base = ioremap(mem->start, mem->end - mem->start + 1);
@@ -788,6 +793,8 @@ omap_i2c_probe(struct platform_device *pdev)
 	dev_info(dev->dev, "bus %d rev%d.%d at %d kHz\n",
 		 pdev->id, r >> 4, r & 0xf, dev->speed);
 
+	omap_i2c_idle(dev);
+
 	adap = &dev->adapter;
 	i2c_set_adapdata(adap, dev);
 	adap->owner = THIS_MODULE;
@@ -804,8 +811,6 @@ omap_i2c_probe(struct platform_device *pdev)
 		goto err_free_irq;
 	}
 
-	omap_i2c_idle(dev);
-
 	return 0;
 
 err_free_irq:
-- 
cgit v0.10.2


From 9c76b878eb3f837ff98b37aa254e6cc7942e946b Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:55 -0800
Subject: i2c-omap: convert 'rev1' flag to generic 'rev' u8

i2c-omap discriminates only between "revision 1" or "greater than
revision 1."  A following patch introduces code that must also
discriminate between rev2.x, rev3.6, and rev3.12 controllers.  Support
this by storing the full revision data from the I2C_REV register, rather
than just a single bit.

The revision definitions may need to be extended for other ES levels
that aren't currently available here.  rev3.6 is what's present on the
2430SDP here (unknown ES revision); rev3.12 is used on the 3430ES2
here.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 40a1e4b..3ac510d 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -38,6 +38,13 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 
+/* I2C controller revisions */
+#define OMAP_I2C_REV_2			0x20
+
+/* I2C controller revisions present on specific hardware */
+#define OMAP_I2C_REV_ON_2430		0x36
+#define OMAP_I2C_REV_ON_3430		0x3C
+
 /* timeout waiting for the controller to respond */
 #define OMAP_I2C_TIMEOUT (msecs_to_jiffies(1000))
 
@@ -139,7 +146,7 @@ struct omap_i2c_dev {
 						 * fifo_size==0 implies no fifo
 						 * if set, should be trsh+1
 						 */
-	unsigned		rev1:1;
+	u8			rev;
 	unsigned		b_hw:1;		/* bad h/w fixes */
 	unsigned		idle:1;
 	u16			iestate;	/* Saved interrupt register */
@@ -209,7 +216,7 @@ static void omap_i2c_idle(struct omap_i2c_dev *dev)
 
 	dev->iestate = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
 	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, 0);
-	if (dev->rev1) {
+	if (dev->rev < OMAP_I2C_REV_2) {
 		iv = omap_i2c_read_reg(dev, OMAP_I2C_IV_REG); /* Read clears */
 	} else {
 		omap_i2c_write_reg(dev, OMAP_I2C_STAT_REG, dev->iestate);
@@ -231,7 +238,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 	unsigned long timeout;
 	unsigned long internal_clk = 0;
 
-	if (!dev->rev1) {
+	if (dev->rev >= OMAP_I2C_REV_2) {
 		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, OMAP_I2C_SYSC_SRST);
 		/* For some reason we need to set the EN bit before the
 		 * reset done bit gets set. */
@@ -710,6 +717,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	struct omap_i2c_dev	*dev;
 	struct i2c_adapter	*adap;
 	struct resource		*mem, *irq, *ioarea;
+	void *isr;
 	int r;
 	u32 speed = 0;
 
@@ -760,8 +768,7 @@ omap_i2c_probe(struct platform_device *pdev)
 
 	omap_i2c_unidle(dev);
 
-	if (cpu_is_omap15xx())
-		dev->rev1 = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) < 0x20;
+	dev->rev = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) & 0xff;
 
 	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
 		u16 s;
@@ -782,16 +789,16 @@ omap_i2c_probe(struct platform_device *pdev)
 	/* reset ASAP, clearing any IRQs */
 	omap_i2c_init(dev);
 
-	r = request_irq(dev->irq, dev->rev1 ? omap_i2c_rev1_isr : omap_i2c_isr,
-			0, pdev->name, dev);
+	isr = (dev->rev < OMAP_I2C_REV_2) ? omap_i2c_rev1_isr : omap_i2c_isr;
+	r = request_irq(dev->irq, isr, 0, pdev->name, dev);
 
 	if (r) {
 		dev_err(dev->dev, "failure requesting irq %i\n", dev->irq);
 		goto err_unuse_clocks;
 	}
-	r = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) & 0xff;
+
 	dev_info(dev->dev, "bus %d rev%d.%d at %d kHz\n",
-		 pdev->id, r >> 4, r & 0xf, dev->speed);
+		 pdev->id, dev->rev >> 4, dev->rev & 0xf, dev->speed);
 
 	omap_i2c_idle(dev);
 
-- 
cgit v0.10.2


From fdd07fe6f6fe54250d8b1126b42ebdc72d938f05 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Fri, 21 Nov 2008 13:39:55 -0800
Subject: i2c-omap: reprogram OCP_SYSCONFIG register after reset

The I2C controller clears its OCP_SYSCONFIG register after an OCP soft reset.
Reprogram OCP_SYSCONFIG for maximum power savings on rev3.6 controllers
and beyond.  On 2430, this involves setting the module AUTOIDLE bit.
On 3430, this includes module AUTOIDLE, wakeup enable, slave smart-idle,
and considers only the module functional clock state for idle-ack.

Boot-tested on 2430SDP and 3430SDP.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 3ac510d..b202354 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -123,11 +123,19 @@
 #define OMAP_I2C_SYSTEST_SDA_O		(1 << 0)	/* SDA line drive out */
 #endif
 
-/* I2C System Status register (OMAP_I2C_SYSS): */
-#define OMAP_I2C_SYSS_RDONE		(1 << 0)	/* Reset Done */
+/* OCP_SYSSTATUS bit definitions */
+#define SYSS_RESETDONE_MASK		(1 << 0)
+
+/* OCP_SYSCONFIG bit definitions */
+#define SYSC_CLOCKACTIVITY_MASK		(0x3 << 8)
+#define SYSC_SIDLEMODE_MASK		(0x3 << 3)
+#define SYSC_ENAWAKEUP_MASK		(1 << 2)
+#define SYSC_SOFTRESET_MASK		(1 << 1)
+#define SYSC_AUTOIDLE_MASK		(1 << 0)
+
+#define SYSC_IDLEMODE_SMART		0x2
+#define SYSC_CLOCKACTIVITY_FCLK		0x2
 
-/* I2C System Configuration Register (OMAP_I2C_SYSC): */
-#define OMAP_I2C_SYSC_SRST		(1 << 1)	/* Soft Reset */
 
 struct omap_i2c_dev {
 	struct device		*dev;
@@ -239,13 +247,13 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 	unsigned long internal_clk = 0;
 
 	if (dev->rev >= OMAP_I2C_REV_2) {
-		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, OMAP_I2C_SYSC_SRST);
+		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, SYSC_SOFTRESET_MASK);
 		/* For some reason we need to set the EN bit before the
 		 * reset done bit gets set. */
 		timeout = jiffies + OMAP_I2C_TIMEOUT;
 		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
 		while (!(omap_i2c_read_reg(dev, OMAP_I2C_SYSS_REG) &
-			 OMAP_I2C_SYSS_RDONE)) {
+			 SYSS_RESETDONE_MASK)) {
 			if (time_after(jiffies, timeout)) {
 				dev_warn(dev->dev, "timeout waiting "
 						"for controller reset\n");
@@ -253,6 +261,26 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			}
 			msleep(1);
 		}
+
+		/* SYSC register is cleared by the reset; rewrite it */
+		if (dev->rev == OMAP_I2C_REV_ON_2430) {
+
+			omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG,
+					   SYSC_AUTOIDLE_MASK);
+
+		} else if (dev->rev >= OMAP_I2C_REV_ON_3430) {
+			u32 v;
+
+			v = SYSC_AUTOIDLE_MASK;
+			v |= SYSC_ENAWAKEUP_MASK;
+			v |= (SYSC_IDLEMODE_SMART <<
+			      __ffs(SYSC_SIDLEMODE_MASK));
+			v |= (SYSC_CLOCKACTIVITY_FCLK <<
+			      __ffs(SYSC_CLOCKACTIVITY_MASK));
+
+			omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, v);
+
+		}
 	}
 	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
 
-- 
cgit v0.10.2


From 5043e9e737c4909e7f187d479227df46f5a2dd53 Mon Sep 17 00:00:00 2001
From: Kalle Jokiniemi <ext-kalle.jokiniemi@nokia.com>
Date: Fri, 21 Nov 2008 13:39:55 -0800
Subject: i2c-omap: Enable I2C wakeups for 34xx

I2C_WE registers were not configured, which caused huge delays in
I2C operations while cpu idle was enabled and omap entered WFI.

This patch enables all I2C wakeup sources.

Signed-off-by: Kalle Jokiniemi <ext-kalle.jokiniemi@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index b202354..96f3bed 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -52,6 +52,8 @@
 #define OMAP_I2C_IE_REG			0x04
 #define OMAP_I2C_STAT_REG		0x08
 #define OMAP_I2C_IV_REG			0x0c
+/* For OMAP3 I2C_IV has changed to I2C_WE (wakeup enable) */
+#define OMAP_I2C_WE_REG			0x0c
 #define OMAP_I2C_SYSS_REG		0x10
 #define OMAP_I2C_BUF_REG		0x14
 #define OMAP_I2C_CNT_REG		0x18
@@ -89,6 +91,24 @@
 #define OMAP_I2C_STAT_NACK	(1 << 1)	/* No ack interrupt enable */
 #define OMAP_I2C_STAT_AL	(1 << 0)	/* Arbitration lost int ena */
 
+/* I2C WE wakeup enable register */
+#define OMAP_I2C_WE_XDR_WE	(1 << 14)	/* TX drain wakup */
+#define OMAP_I2C_WE_RDR_WE	(1 << 13)	/* RX drain wakeup */
+#define OMAP_I2C_WE_AAS_WE	(1 << 9)	/* Address as slave wakeup*/
+#define OMAP_I2C_WE_BF_WE	(1 << 8)	/* Bus free wakeup */
+#define OMAP_I2C_WE_STC_WE	(1 << 6)	/* Start condition wakeup */
+#define OMAP_I2C_WE_GC_WE	(1 << 5)	/* General call wakeup */
+#define OMAP_I2C_WE_DRDY_WE	(1 << 3)	/* TX/RX data ready wakeup */
+#define OMAP_I2C_WE_ARDY_WE	(1 << 2)	/* Reg access ready wakeup */
+#define OMAP_I2C_WE_NACK_WE	(1 << 1)	/* No acknowledgment wakeup */
+#define OMAP_I2C_WE_AL_WE	(1 << 0)	/* Arbitration lost wakeup */
+
+#define OMAP_I2C_WE_ALL		(OMAP_I2C_WE_XDR_WE | OMAP_I2C_WE_RDR_WE | \
+				OMAP_I2C_WE_AAS_WE | OMAP_I2C_WE_BF_WE | \
+				OMAP_I2C_WE_STC_WE | OMAP_I2C_WE_GC_WE | \
+				OMAP_I2C_WE_DRDY_WE | OMAP_I2C_WE_ARDY_WE | \
+				OMAP_I2C_WE_NACK_WE | OMAP_I2C_WE_AL_WE)
+
 /* I2C Buffer Configuration Register (OMAP_I2C_BUF): */
 #define OMAP_I2C_BUF_RDMA_EN	(1 << 15)	/* RX DMA channel enable */
 #define OMAP_I2C_BUF_RXFIF_CLR	(1 << 14)	/* RX FIFO Clear */
@@ -279,6 +299,13 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 			      __ffs(SYSC_CLOCKACTIVITY_MASK));
 
 			omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, v);
+			/*
+			 * Enabling all wakup sources to stop I2C freezing on
+			 * WFI instruction.
+			 * REVISIT: Some wkup sources might not be needed.
+			 */
+			omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
+							OMAP_I2C_WE_ALL);
 
 		}
 	}
-- 
cgit v0.10.2


From ea6f18ed5a1531caf678374f30a0990c9e6742f3 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 25 Nov 2008 02:35:02 +1030
Subject: sched: reduce stack size requirements in kernel/sched.c

Impact: cleanup

  * use node_to_cpumask_ptr in place of node_to_cpumask to reduce stack
    requirements in sched.c

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index bb82765..dd22cec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6110,8 +6110,9 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 
 	do {
 		/* On same node? */
-		mask = node_to_cpumask(cpu_to_node(dead_cpu));
-		cpus_and(mask, mask, p->cpus_allowed);
+		node_to_cpumask_ptr(pnodemask, cpu_to_node(dead_cpu));
+
+		cpus_and(mask, *pnodemask, p->cpus_allowed);
 		dest_cpu = any_online_cpu(mask);
 
 		/* On any allowed CPU? */
@@ -7098,9 +7099,9 @@ static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
 				 struct sched_group **sg, cpumask_t *nodemask)
 {
 	int group;
+	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	*nodemask = node_to_cpumask(cpu_to_node(cpu));
-	cpus_and(*nodemask, *nodemask, *cpu_map);
+	cpus_and(*nodemask, *pnodemask, *cpu_map);
 	group = first_cpu(*nodemask);
 
 	if (sg)
@@ -7150,9 +7151,9 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+			node_to_cpumask_ptr(pnodemask, i);
 
-			*nodemask = node_to_cpumask(i);
-			cpus_and(*nodemask, *nodemask, *cpu_map);
+			cpus_and(*nodemask, *pnodemask, *cpu_map);
 			if (cpus_empty(*nodemask))
 				continue;
 
-- 
cgit v0.10.2


From abcd083a1a658d2bc1f7fced02632bfe03918002 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:02 +1030
Subject: sched: convert sched.c from for_each_cpu_mask to for_each_cpu.

Impact: trivial API conversion

This is a simple conversion, but note that for_each_cpu() terminates
with i >= nr_cpu_ids, not i == NR_CPUS like for_each_cpu_mask() did.

I don't convert all of them: sd->span changes in a later patch, so
change those iterators there rather than here.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index dd22cec..e59978e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2061,7 +2061,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu_mask_nr(i, group->cpumask) {
+		for_each_cpu(i, &group->cpumask) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2103,7 +2103,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
 	/* Traverse only the allowed CPUs */
 	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
 
-	for_each_cpu_mask_nr(i, *tmp) {
+	for_each_cpu(i, tmp) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -3121,7 +3121,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu_mask_nr(i, group->cpumask) {
+		for_each_cpu(i, &group->cpumask) {
 			struct rq *rq;
 
 			if (!cpu_isset(i, *cpus))
@@ -3400,7 +3400,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu_mask_nr(i, group->cpumask) {
+	for_each_cpu(i, &group->cpumask) {
 		unsigned long wl;
 
 		if (!cpu_isset(i, *cpus))
@@ -3942,7 +3942,7 @@ static void run_rebalance_domains(struct softirq_action *h)
 		int balance_cpu;
 
 		cpu_clear(this_cpu, cpus);
-		for_each_cpu_mask_nr(balance_cpu, cpus) {
+		for_each_cpu(balance_cpu, &cpus) {
 			/*
 			 * If this cpu gets work to do, stop the load balancing
 			 * work being done for other cpus. Next load
@@ -6906,7 +6906,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 
 	cpus_clear(*covered);
 
-	for_each_cpu_mask_nr(i, *span) {
+	for_each_cpu(i, span) {
 		struct sched_group *sg;
 		int group = group_fn(i, cpu_map, &sg, tmpmask);
 		int j;
@@ -6917,7 +6917,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 		cpus_clear(sg->cpumask);
 		sg->__cpu_power = 0;
 
-		for_each_cpu_mask_nr(j, *span) {
+		for_each_cpu(j, span) {
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
 				continue;
 
@@ -7117,7 +7117,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 	if (!sg)
 		return;
 	do {
-		for_each_cpu_mask_nr(j, sg->cpumask) {
+		for_each_cpu(j, &sg->cpumask) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j);
@@ -7142,7 +7142,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 {
 	int cpu, i;
 
-	for_each_cpu_mask_nr(cpu, *cpu_map) {
+	for_each_cpu(cpu, cpu_map) {
 		struct sched_group **sched_group_nodes
 			= sched_group_nodes_bycpu[cpu];
 
@@ -7396,7 +7396,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	/*
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
 		SCHED_CPUMASK_VAR(nodemask, allmasks);
 
@@ -7463,7 +7463,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
 		SCHED_CPUMASK_VAR(send_covered, allmasks);
 
@@ -7480,7 +7480,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		SCHED_CPUMASK_VAR(this_core_map, allmasks);
 		SCHED_CPUMASK_VAR(send_covered, allmasks);
 
@@ -7547,7 +7547,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			goto error;
 		}
 		sched_group_nodes[i] = sg;
-		for_each_cpu_mask_nr(j, *nodemask) {
+		for_each_cpu(j, nodemask) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(node_domains, j);
@@ -7593,21 +7593,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 	/* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = &per_cpu(cpu_domains, i);
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 #ifdef CONFIG_SCHED_MC
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = &per_cpu(core_domains, i);
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = &per_cpu(phys_domains, i);
 
 		init_sched_groups_power(i, sd);
@@ -7627,7 +7627,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
 
 	/* Attach the domains */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
 		sd = &per_cpu(cpu_domains, i);
@@ -7709,7 +7709,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
 	cpumask_t tmpmask;
 	int i;
 
-	for_each_cpu_mask_nr(i, *cpu_map)
+	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
 	arch_destroy_sched_domains(cpu_map, &tmpmask);
-- 
cgit v0.10.2


From 3404c8d97c2d3eb87b1bf4aadad957bfb5235b14 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:03 +1030
Subject: sched: get rid of boutique sched.c allocations, use cpumask_var_t.

Impact: use new general API

Using lots of allocs rather than one big alloc is less efficient, but
who cares for this setup function?

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index e59978e..0dc9d57 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7263,48 +7263,6 @@ SD_INIT_FUNC(CPU)
  SD_INIT_FUNC(MC)
 #endif
 
-/*
- * To minimize stack usage kmalloc room for cpumasks and share the
- * space as the usage in build_sched_domains() dictates.  Used only
- * if the amount of space is significant.
- */
-struct allmasks {
-	cpumask_t tmpmask;			/* make this one first */
-	union {
-		cpumask_t nodemask;
-		cpumask_t this_sibling_map;
-		cpumask_t this_core_map;
-	};
-	cpumask_t send_covered;
-
-#ifdef CONFIG_NUMA
-	cpumask_t domainspan;
-	cpumask_t covered;
-	cpumask_t notcovered;
-#endif
-};
-
-#if	NR_CPUS > 128
-#define SCHED_CPUMASK_DECLARE(v)	struct allmasks *v
-static inline void sched_cpumask_alloc(struct allmasks **masks)
-{
-	*masks = kmalloc(sizeof(**masks), GFP_KERNEL);
-}
-static inline void sched_cpumask_free(struct allmasks *masks)
-{
-	kfree(masks);
-}
-#else
-#define SCHED_CPUMASK_DECLARE(v)	struct allmasks _v, *v = &_v
-static inline void sched_cpumask_alloc(struct allmasks **masks)
-{ }
-static inline void sched_cpumask_free(struct allmasks *masks)
-{ }
-#endif
-
-#define	SCHED_CPUMASK_VAR(v, a) 	cpumask_t *v = (cpumask_t *) \
-			((unsigned long)(a) + offsetof(struct allmasks, v))
-
 static int default_relax_domain_level = -1;
 
 static int __init setup_relax_domain_level(char *str)
@@ -7347,14 +7305,35 @@ static void set_domain_attribute(struct sched_domain *sd,
 static int __build_sched_domains(const cpumask_t *cpu_map,
 				 struct sched_domain_attr *attr)
 {
-	int i;
+	int i, err = -ENOMEM;
 	struct root_domain *rd;
-	SCHED_CPUMASK_DECLARE(allmasks);
-	cpumask_t *tmpmask;
+	cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
+		tmpmask;
 #ifdef CONFIG_NUMA
+	cpumask_var_t domainspan, covered, notcovered;
 	struct sched_group **sched_group_nodes = NULL;
 	int sd_allnodes = 0;
 
+	if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
+		goto out;
+	if (!alloc_cpumask_var(&covered, GFP_KERNEL))
+		goto free_domainspan;
+	if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
+		goto free_covered;
+#endif
+
+	if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
+		goto free_notcovered;
+	if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
+		goto free_nodemask;
+	if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
+		goto free_this_sibling_map;
+	if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
+		goto free_this_core_map;
+	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		goto free_send_covered;
+
+#ifdef CONFIG_NUMA
 	/*
 	 * Allocate the per-node list of sched groups
 	 */
@@ -7362,33 +7341,16 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 				    GFP_KERNEL);
 	if (!sched_group_nodes) {
 		printk(KERN_WARNING "Can not alloc sched group node list\n");
-		return -ENOMEM;
+		goto free_tmpmask;
 	}
 #endif
 
 	rd = alloc_rootdomain();
 	if (!rd) {
 		printk(KERN_WARNING "Cannot alloc root domain\n");
-#ifdef CONFIG_NUMA
-		kfree(sched_group_nodes);
-#endif
-		return -ENOMEM;
+		goto free_sched_groups;
 	}
 
-	/* get space for all scratch cpumask variables */
-	sched_cpumask_alloc(&allmasks);
-	if (!allmasks) {
-		printk(KERN_WARNING "Cannot alloc cpumask array\n");
-		kfree(rd);
-#ifdef CONFIG_NUMA
-		kfree(sched_group_nodes);
-#endif
-		return -ENOMEM;
-	}
-
-	tmpmask = (cpumask_t *)allmasks;
-
-
 #ifdef CONFIG_NUMA
 	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
 #endif
@@ -7398,7 +7360,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	 */
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
 
 		*nodemask = node_to_cpumask(cpu_to_node(i));
 		cpus_and(*nodemask, *nodemask, *cpu_map);
@@ -7464,9 +7425,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
 	for_each_cpu(i, cpu_map) {
-		SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
 		*this_sibling_map = per_cpu(cpu_sibling_map, i);
 		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
 		if (i != first_cpu(*this_sibling_map))
@@ -7481,9 +7439,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
 	for_each_cpu(i, cpu_map) {
-		SCHED_CPUMASK_VAR(this_core_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
 		if (i != first_cpu(*this_core_map))
@@ -7497,9 +7452,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
 		*nodemask = node_to_cpumask(i);
 		cpus_and(*nodemask, *nodemask, *cpu_map);
 		if (cpus_empty(*nodemask))
@@ -7513,8 +7465,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
 	if (sd_allnodes) {
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
 		init_sched_build_groups(cpu_map, cpu_map,
 					&cpu_to_allnodes_group,
 					send_covered, tmpmask);
@@ -7523,9 +7473,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	for (i = 0; i < nr_node_ids; i++) {
 		/* Set up node groups */
 		struct sched_group *sg, *prev;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(domainspan, allmasks);
-		SCHED_CPUMASK_VAR(covered, allmasks);
 		int j;
 
 		*nodemask = node_to_cpumask(i);
@@ -7560,7 +7507,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
-			SCHED_CPUMASK_VAR(notcovered, allmasks);
 			int n = (i + j) % nr_node_ids;
 			node_to_cpumask_ptr(pnodemask, n);
 
@@ -7639,15 +7585,40 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		cpu_attach_domain(sd, rd, i);
 	}
 
-	sched_cpumask_free(allmasks);
-	return 0;
+	err = 0;
+
+free_tmpmask:
+	free_cpumask_var(tmpmask);
+free_send_covered:
+	free_cpumask_var(send_covered);
+free_this_core_map:
+	free_cpumask_var(this_core_map);
+free_this_sibling_map:
+	free_cpumask_var(this_sibling_map);
+free_nodemask:
+	free_cpumask_var(nodemask);
+free_notcovered:
+#ifdef CONFIG_NUMA
+	free_cpumask_var(notcovered);
+free_covered:
+	free_cpumask_var(covered);
+free_domainspan:
+	free_cpumask_var(domainspan);
+out:
+#endif
+	return err;
+
+free_sched_groups:
+#ifdef CONFIG_NUMA
+	kfree(sched_group_nodes);
+#endif
+	goto free_tmpmask;
 
 #ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map, tmpmask);
-	sched_cpumask_free(allmasks);
 	kfree(rd);
-	return -ENOMEM;
+	goto free_tmpmask;
 #endif
 }
 
-- 
cgit v0.10.2


From 1e5ce4f4a755ee498bd9217dae26143afa0d8f31 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:03 +1030
Subject: sched: remove any_online_cpu()

Impact: use new API

any_online_cpu() is a good name, but it takes a cpumask_t, not a
pointer.

There are several places where any_online_cpu() doesn't really want a
mask arg at all.  Replace all callers with cpumask_any() and
cpumask_any_and().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 0dc9d57..a2de33d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5964,7 +5964,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 	if (cpu_isset(task_cpu(p), *new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
+	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -6113,11 +6113,12 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 		node_to_cpumask_ptr(pnodemask, cpu_to_node(dead_cpu));
 
 		cpus_and(mask, *pnodemask, p->cpus_allowed);
-		dest_cpu = any_online_cpu(mask);
+		dest_cpu = cpumask_any_and(cpu_online_mask, &mask);
 
 		/* On any allowed CPU? */
 		if (dest_cpu >= nr_cpu_ids)
-			dest_cpu = any_online_cpu(p->cpus_allowed);
+			dest_cpu = cpumask_any_and(cpu_online_mask,
+						   &p->cpus_allowed);
 
 		/* No more Mr. Nice Guy. */
 		if (dest_cpu >= nr_cpu_ids) {
@@ -6133,7 +6134,8 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 			 */
 			rq = task_rq_lock(p, &flags);
 			p->cpus_allowed = cpus_allowed;
-			dest_cpu = any_online_cpu(p->cpus_allowed);
+			dest_cpu = cpumask_any_and(cpu_online_mask,
+						    &p->cpus_allowed);
 			task_rq_unlock(rq, &flags);
 
 			/*
@@ -6159,7 +6161,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
  */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
+	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -6524,7 +6526,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 			break;
 		/* Unbind it from offline cpu so it can run. Fall thru. */
 		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     any_online_cpu(cpu_online_map));
+			     cpumask_any(cpu_online_mask));
 		kthread_stop(cpu_rq(cpu)->migration_thread);
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
-- 
cgit v0.10.2


From 758b2cdc6f6a22c702bd8f2344382fb1270b2161 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: wrap sched_group and sched_domain cpumask accesses.

Impact: trivial wrap of member accesses

This eases the transition in the next patch.

We also get rid of a temporary cpumask in find_idlest_cpu() thanks to
for_each_cpu_and, and sched_balance_self() due to getting weight before
setting sd to NULL.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ce5c60..2b95aa9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -786,6 +786,11 @@ struct sched_group {
 	u32 reciprocal_cpu_power;
 };
 
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+	return &sg->cpumask;
+}
+
 enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
@@ -866,6 +871,11 @@ struct sched_domain {
 #endif
 };
 
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return &sd->span;
+}
+
 extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
diff --git a/kernel/sched.c b/kernel/sched.c
index a2de33d..575f38a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1501,7 +1501,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	struct sched_domain *sd = data;
 	int i;
 
-	for_each_cpu_mask(i, sd->span) {
+	for_each_cpu(i, sched_domain_span(sd)) {
 		/*
 		 * If there are currently no tasks on the cpu pretend there
 		 * is one of average load so that when a new task gets to
@@ -1522,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
 		shares = tg->shares;
 
-	for_each_cpu_mask(i, sd->span)
+	for_each_cpu(i, sched_domain_span(sd))
 		update_group_shares_cpu(tg, i, shares, rq_weight);
 
 	return 0;
@@ -2053,15 +2053,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+		if (!cpumask_intersects(sched_group_cpus(group),
+					&p->cpus_allowed))
 			continue;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu(i, &group->cpumask) {
+		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2093,17 +2095,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
  * find_idlest_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
-		cpumask_t *tmp)
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	int idlest = -1;
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-
-	for_each_cpu(i, tmp) {
+	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2145,7 +2144,6 @@ static int sched_balance_self(int cpu, int flag)
 		update_shares(sd);
 
 	while (sd) {
-		cpumask_t span, tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2154,14 +2152,13 @@ static int sched_balance_self(int cpu, int flag)
 			continue;
 		}
 
-		span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
 		}
 
-		new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+		new_cpu = find_idlest_cpu(group, t, cpu);
 		if (new_cpu == -1 || new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
@@ -2170,10 +2167,10 @@ static int sched_balance_self(int cpu, int flag)
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
+		weight = cpumask_weight(sched_domain_span(sd));
 		sd = NULL;
-		weight = cpus_weight(span);
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpus_weight(tmp->span))
+			if (weight <= cpumask_weight(sched_domain_span(tmp)))
 				break;
 			if (tmp->flags & flag)
 				sd = tmp;
@@ -2218,7 +2215,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 		cpu = task_cpu(p);
 
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				update_shares(sd);
 				break;
 			}
@@ -2266,7 +2263,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	else {
 		struct sched_domain *sd;
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				schedstat_inc(sd, ttwu_wake_remote);
 				break;
 			}
@@ -3109,10 +3106,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		unsigned long sum_avg_load_per_task;
 		unsigned long avg_load_per_task;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		if (local_group)
-			balance_cpu = first_cpu(group->cpumask);
+			balance_cpu = cpumask_first(sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3121,13 +3119,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu(i, &group->cpumask) {
-			struct rq *rq;
-
-			if (!cpu_isset(i, *cpus))
-				continue;
-
-			rq = cpu_rq(i);
+		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+			struct rq *rq = cpu_rq(i);
 
 			if (*sd_idle && rq->nr_running)
 				*sd_idle = 0;
@@ -3238,8 +3231,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     first_cpu(group->cpumask) <
-		     first_cpu(group_min->cpumask))) {
+		     cpumask_first(sched_group_cpus(group)) <
+		     cpumask_first(sched_group_cpus(group_min)))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
 			min_load_per_task = sum_weighted_load /
@@ -3254,8 +3247,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     first_cpu(group->cpumask) >
-			      first_cpu(group_leader->cpumask))) {
+			     cpumask_first(sched_group_cpus(group)) >
+			     cpumask_first(sched_group_cpus(group_leader)))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
 			}
@@ -3400,7 +3393,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu(i, &group->cpumask) {
+	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long wl;
 
 		if (!cpu_isset(i, *cpus))
@@ -3746,7 +3739,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpu_isset(busiest_cpu, sd->span))
+		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
 				break;
 	}
 
@@ -6618,7 +6611,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sd->span);
+	cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
 	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -6633,11 +6626,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 	printk(KERN_CONT "span %s level %s\n", str, sd->name);
 
-	if (!cpu_isset(cpu, sd->span)) {
+	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpu_isset(cpu, group->cpumask)) {
+	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -6657,31 +6650,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!cpus_weight(group->cpumask)) {
+		if (!cpumask_weight(sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
-		if (cpus_intersects(*groupmask, group->cpumask)) {
+		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpus_or(*groupmask, *groupmask, group->cpumask);
+		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
 
-		cpulist_scnprintf(str, sizeof(str), group->cpumask);
+		cpulist_scnprintf(str, sizeof(str), *sched_group_cpus(group));
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
-	if (!cpus_equal(sd->span, *groupmask))
+	if (!cpumask_equal(sched_domain_span(sd), groupmask))
 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
-	if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+	if (sd->parent &&
+	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
 		printk(KERN_ERR "ERROR: parent span is not a superset "
 			"of domain->span\n");
 	return 0;
@@ -6721,7 +6715,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpus_weight(sd->span) == 1)
+	if (cpumask_weight(sched_domain_span(sd)) == 1)
 		return 1;
 
 	/* Following flags need at least 2 groups */
@@ -6752,7 +6746,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	if (sd_degenerate(parent))
 		return 1;
 
-	if (!cpus_equal(sd->span, parent->span))
+	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
 		return 0;
 
 	/* Does parent contain flags not in child? */
@@ -6913,10 +6907,10 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 		int group = group_fn(i, cpu_map, &sg, tmpmask);
 		int j;
 
-		if (cpu_isset(i, *covered))
+		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		cpus_clear(sg->cpumask);
+		cpumask_clear(sched_group_cpus(sg));
 		sg->__cpu_power = 0;
 
 		for_each_cpu(j, span) {
@@ -6924,7 +6918,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 				continue;
 
 			cpu_set(j, *covered);
-			cpu_set(j, sg->cpumask);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
 		if (!first)
 			first = sg;
@@ -7119,11 +7113,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 	if (!sg)
 		return;
 	do {
-		for_each_cpu(j, &sg->cpumask) {
+		for_each_cpu(j, sched_group_cpus(sg)) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
+			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7200,7 +7194,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != first_cpu(sd->groups->cpumask))
+	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
 		return;
 
 	child = sd->child;
@@ -7372,7 +7366,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
-			sd->span = *cpu_map;
+			cpumask_copy(sched_domain_span(sd), cpu_map);
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7382,18 +7376,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(node_domains, i);
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
-		sched_domain_node_span(cpu_to_node(i), &sd->span);
+		sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 #endif
 
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
-		sd->span = *nodemask;
+		cpumask_copy(sched_domain_span(sd), nodemask);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7404,8 +7399,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(core_domains, i);
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		sd->span = cpu_coregroup_map(i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		*sched_domain_span(sd) = cpu_coregroup_map(i);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7416,8 +7412,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(cpu_domains, i);
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
-		sd->span = per_cpu(cpu_sibling_map, i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7503,7 +7499,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sd->groups = sg;
 		}
 		sg->__cpu_power = 0;
-		sg->cpumask = *nodemask;
+		cpumask_copy(sched_group_cpus(sg), nodemask);
 		sg->next = sg;
 		cpus_or(*covered, *covered, *nodemask);
 		prev = sg;
@@ -7530,7 +7526,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 				goto error;
 			}
 			sg->__cpu_power = 0;
-			sg->cpumask = *tmpmask;
+			cpumask_copy(sched_group_cpus(sg), tmpmask);
 			sg->next = prev->next;
 			cpus_or(*covered, *covered, *tmpmask);
 			prev->next = sg;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 98345e4..bba0040 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1024,7 +1024,6 @@ static void yield_task_fair(struct rq *rq)
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
 static int wake_idle(int cpu, struct task_struct *p)
 {
-	cpumask_t tmp;
 	struct sched_domain *sd;
 	int i;
 
@@ -1044,10 +1043,9 @@ static int wake_idle(int cpu, struct task_struct *p)
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, task_rq(p)->clock, sd))) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			cpus_and(tmp, tmp, cpu_active_map);
-			for_each_cpu_mask_nr(i, tmp) {
-				if (idle_cpu(i)) {
+			for_each_cpu_and(i, sched_domain_span(sd),
+					 &p->cpus_allowed) {
+				if (cpu_active(i) && idle_cpu(i)) {
 					if (i != task_cpu(p)) {
 						schedstat_inc(p,
 						       se.nr_wakeups_idle);
@@ -1240,7 +1238,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	 * this_cpu and prev_cpu are present in:
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(prev_cpu, sd->span)) {
+		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
 			this_sd = sd;
 			break;
 		}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2bdd444..4cd813a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1017,7 +1017,8 @@ static int find_lowest_rq(struct task_struct *task)
 			cpumask_t domain_mask;
 			int       best_cpu;
 
-			cpus_and(domain_mask, sd->span, *lowest_mask);
+			cpumask_and(&domain_mask, sched_domain_span(sd),
+				    lowest_mask);
 
 			best_cpu = pick_optimal_cpu(this_cpu,
 						    &domain_mask);
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a..ce34083 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, sd->span);
+			cpumask_scnprintf(mask_str, mask_len,
+					  *sched_domain_span(sd));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
-- 
cgit v0.10.2


From 6c99e9ad47d9c082bd096f42fb49e397b05d58a8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: convert struct sched_group/sched_domain cpumask_ts to variable
 bitmaps

Impact: (future) size reduction for large NR_CPUS.

We move the 'cpumask' member of sched_group to the end, so when we
kmalloc it we can do a minimal allocation: saves space for small
nr_cpu_ids but big CONFIG_NR_CPUS.  Similar trick for 'span' in
sched_domain.

This isn't quite as good as converting to a cpumask_var_t, as some
sched_groups are actually static, but it's safer: we don't have to
figure out where to call alloc_cpumask_var/free_cpumask_var.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b95aa9..c5be6c6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -771,7 +771,6 @@ enum cpu_idle_type {
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
-	cpumask_t cpumask;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -784,11 +783,13 @@ struct sched_group {
 	 * (see include/linux/reciprocal_div.h)
 	 */
 	u32 reciprocal_cpu_power;
+
+	unsigned long cpumask[];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 {
-	return &sg->cpumask;
+	return to_cpumask(sg->cpumask);
 }
 
 enum sched_domain_level {
@@ -814,7 +815,6 @@ struct sched_domain {
 	struct sched_domain *parent;	/* top domain must be null terminated */
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
-	cpumask_t span;			/* span of all CPUs in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
@@ -869,11 +869,14 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+
+	/* span of all CPUs in this domain */
+	unsigned long span[];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 {
-	return &sd->span;
+	return to_cpumask(sd->span);
 }
 
 extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
diff --git a/kernel/sched.c b/kernel/sched.c
index 575f38a..6b9606a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7006,18 +7006,33 @@ static void sched_domain_node_span(int node, cpumask_t *span)
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
 /*
+ * The cpus mask in sched_group and sched_domain hangs off the end.
+ * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
+ * for nr_cpu_ids < CONFIG_NR_CPUS.
+ */
+struct static_sched_group {
+	struct sched_group sg;
+	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
+};
+
+struct static_sched_domain {
+	struct sched_domain sd;
+	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+};
+
+/*
  * SMT sched-domains:
  */
 #ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
 
 static int
 cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 		 cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_cpus, cpu);
+		*sg = &per_cpu(sched_group_cpus, cpu).sg;
 	return cpu;
 }
 #endif /* CONFIG_SCHED_SMT */
@@ -7026,8 +7041,8 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
  * multi-core sched-domains:
  */
 #ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_core);
+static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
 #endif /* CONFIG_SCHED_MC */
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
@@ -7041,7 +7056,7 @@ cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 	cpus_and(*mask, *mask, *cpu_map);
 	group = first_cpu(*mask);
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group);
+		*sg = &per_cpu(sched_group_core, group).sg;
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
@@ -7050,13 +7065,13 @@ cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 		  cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_core, cpu);
+		*sg = &per_cpu(sched_group_core, cpu).sg;
 	return cpu;
 }
 #endif
 
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
+static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
 static int
 cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
@@ -7075,7 +7090,7 @@ cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 	group = cpu;
 #endif
 	if (sg)
-		*sg = &per_cpu(sched_group_phys, group);
+		*sg = &per_cpu(sched_group_phys, group).sg;
 	return group;
 }
 
@@ -7089,7 +7104,7 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group ***sched_group_nodes_bycpu;
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
 
 static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
 				 struct sched_group **sg, cpumask_t *nodemask)
@@ -7101,7 +7116,7 @@ static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
 	group = first_cpu(*nodemask);
 
 	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group);
+		*sg = &per_cpu(sched_group_allnodes, group).sg;
 	return group;
 }
 
@@ -7116,7 +7131,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 		for_each_cpu(j, sched_group_cpus(sg)) {
 			struct sched_domain *sd;
 
-			sd = &per_cpu(phys_domains, j);
+			sd = &per_cpu(phys_domains, j).sd;
 			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
 				/*
 				 * Only add "power" once for each
@@ -7385,7 +7400,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
 
 		p = sd;
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
 		cpumask_copy(sched_domain_span(sd), nodemask);
@@ -7396,7 +7411,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
 		*sched_domain_span(sd) = cpu_coregroup_map(i);
@@ -7409,7 +7424,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
 		cpumask_and(sched_domain_span(sd),
@@ -7485,7 +7500,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sched_domain_node_span(i, domainspan);
 		cpus_and(*domainspan, *domainspan, *cpu_map);
 
-		sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
+		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+				  GFP_KERNEL, i);
 		if (!sg) {
 			printk(KERN_WARNING "Can not alloc domain group for "
 				"node %d\n", i);
@@ -7518,7 +7534,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			if (cpus_empty(*tmpmask))
 				continue;
 
-			sg = kmalloc_node(sizeof(struct sched_group),
+			sg = kmalloc_node(sizeof(struct sched_group) +
+					  cpumask_size(),
 					  GFP_KERNEL, i);
 			if (!sg) {
 				printk(KERN_WARNING
@@ -7538,21 +7555,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	/* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(cpu_domains, i);
+		struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 #ifdef CONFIG_SCHED_MC
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(core_domains, i);
+		struct sched_domain *sd = &per_cpu(core_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(phys_domains, i);
+		struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
@@ -7574,11 +7591,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 #elif defined(CONFIG_SCHED_MC)
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 #else
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 #endif
 		cpu_attach_domain(sd, rd, i);
 	}
-- 
cgit v0.10.2


From 6a7b3dc3440f7b5a9b67594af01ed562cdeb41e4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: convert nohz_cpu_mask to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5be6c6..1e33e2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,7 +249,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
 extern int runqueue_is_locked(void);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
-extern cpumask_t nohz_cpu_mask;
+extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 #else
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a00..c03ca3e 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b9606a..2723d7a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5870,9 +5870,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  * indicates which cpus entered this state. This is used
  * in the rcu update to wait only for active cpus. For system
  * which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_MASK_NONE.
+ * always be CPU_BITS_NONE.
  */
-cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+cpumask_var_t nohz_cpu_mask;
 
 /*
  * Increase the granularity value when there are more CPUs,
@@ -8274,6 +8274,9 @@ void __init sched_init(void)
 	 */
 	current->sched_class = &fair_sched_class;
 
+	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+
 	scheduler_running = 1;
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9c..70f872c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
 	if (!ts->tick_stopped)
 		return;
 
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	now = ktime_get();
 	ts->idle_waketime = now;
 
@@ -283,7 +283,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	if ((long)delta_jiffies >= 1) {
 
 		if (delta_jiffies > 1)
-			cpu_set(cpu, nohz_cpu_mask);
+			cpumask_set_cpu(cpu, nohz_cpu_mask);
 		/*
 		 * nohz_stop_sched_tick can be called several times before
 		 * the nohz_restart_sched_tick is called. This happens when
@@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 				/*
 				 * sched tick not stopped!
 				 */
-				cpu_clear(cpu, nohz_cpu_mask);
+				cpumask_clear_cpu(cpu, nohz_cpu_mask);
 				goto out;
 			}
 
@@ -354,7 +354,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 		 * softirq.
 		 */
 		tick_do_update_jiffies64(ktime_get());
-		cpu_clear(cpu, nohz_cpu_mask);
+		cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	}
 	raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
@@ -432,7 +432,7 @@ void tick_nohz_restart_sched_tick(void)
 	select_nohz_load_balancer(0);
 	now = ktime_get();
 	tick_do_update_jiffies64(now);
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
-- 
cgit v0.10.2


From c6c4927b22a3514c6660f0e72c78716226bd3cc8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:05 +1030
Subject: sched: convert struct root_domain to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

def_root_domain is static, and so its masks are initialized with
alloc_bootmem_cpumask_var.  After that, alloc_cpumask_var is used.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 2723d7a..93309c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -487,14 +487,14 @@ struct rt_rq {
  */
 struct root_domain {
 	atomic_t refcount;
-	cpumask_t span;
-	cpumask_t online;
+	cpumask_var_t span;
+	cpumask_var_t online;
 
 	/*
 	 * The "RT overload" flag: it gets set if a CPU has more than
 	 * one runnable RT task.
 	 */
-	cpumask_t rto_mask;
+	cpumask_var_t rto_mask;
 	atomic_t rto_count;
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
@@ -6444,7 +6444,7 @@ static void set_rq_online(struct rq *rq)
 	if (!rq->online) {
 		const struct sched_class *class;
 
-		cpu_set(rq->cpu, rq->rd->online);
+		cpumask_set_cpu(rq->cpu, rq->rd->online);
 		rq->online = 1;
 
 		for_each_class(class) {
@@ -6464,7 +6464,7 @@ static void set_rq_offline(struct rq *rq)
 				class->rq_offline(rq);
 		}
 
-		cpu_clear(rq->cpu, rq->rd->online);
+		cpumask_clear_cpu(rq->cpu, rq->rd->online);
 		rq->online = 0;
 	}
 }
@@ -6505,7 +6505,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
 			set_rq_online(rq);
 		}
@@ -6567,7 +6567,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
 		spin_unlock_irqrestore(&rq->lock, flags);
@@ -6768,6 +6768,14 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
+static void free_rootdomain(struct root_domain *rd)
+{
+	free_cpumask_var(rd->rto_mask);
+	free_cpumask_var(rd->online);
+	free_cpumask_var(rd->span);
+	kfree(rd);
+}
+
 static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 {
 	unsigned long flags;
@@ -6777,38 +6785,60 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	if (rq->rd) {
 		struct root_domain *old_rd = rq->rd;
 
-		if (cpu_isset(rq->cpu, old_rd->online))
+		if (cpumask_test_cpu(rq->cpu, old_rd->online))
 			set_rq_offline(rq);
 
-		cpu_clear(rq->cpu, old_rd->span);
+		cpumask_clear_cpu(rq->cpu, old_rd->span);
 
 		if (atomic_dec_and_test(&old_rd->refcount))
-			kfree(old_rd);
+			free_rootdomain(old_rd);
 	}
 
 	atomic_inc(&rd->refcount);
 	rq->rd = rd;
 
-	cpu_set(rq->cpu, rd->span);
-	if (cpu_isset(rq->cpu, cpu_online_map))
+	cpumask_set_cpu(rq->cpu, rd->span);
+	if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
 		set_rq_online(rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-static void init_rootdomain(struct root_domain *rd)
+static int init_rootdomain(struct root_domain *rd, bool bootmem)
 {
 	memset(rd, 0, sizeof(*rd));
 
-	cpus_clear(rd->span);
-	cpus_clear(rd->online);
+	if (bootmem) {
+		alloc_bootmem_cpumask_var(&def_root_domain.span);
+		alloc_bootmem_cpumask_var(&def_root_domain.online);
+		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
+		cpupri_init(&rd->cpupri);
+		return 0;
+	}
+
+	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+		goto free_rd;
+	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+		goto free_span;
+	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+		goto free_online;
 
 	cpupri_init(&rd->cpupri);
+	return 0;
+
+free_online:
+	free_cpumask_var(rd->online);
+free_span:
+	free_cpumask_var(rd->span);
+free_rd:
+	kfree(rd);
+	return -ENOMEM;
 }
 
 static void init_defrootdomain(void)
 {
-	init_rootdomain(&def_root_domain);
+	init_rootdomain(&def_root_domain, true);
+
 	atomic_set(&def_root_domain.refcount, 1);
 }
 
@@ -6820,7 +6850,10 @@ static struct root_domain *alloc_rootdomain(void)
 	if (!rd)
 		return NULL;
 
-	init_rootdomain(rd);
+	if (init_rootdomain(rd, false) != 0) {
+		kfree(rd);
+		return NULL;
+	}
 
 	return rd;
 }
@@ -7632,7 +7665,7 @@ free_sched_groups:
 #ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map, tmpmask);
-	kfree(rd);
+	free_rootdomain(rd);
 	goto free_tmpmask;
 #endif
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 4cd813a..820fc42 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
 	if (!rq->online)
 		return;
 
-	cpu_set(rq->cpu, rq->rd->rto_mask);
+	cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
 	/*
 	 * Make sure the mask is visible before we set
 	 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
 
 	/* the order here really doesn't matter */
 	atomic_dec(&rq->rd->rto_count);
-	cpu_clear(rq->cpu, rq->rd->rto_mask);
+	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
 }
 
 static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
 }
 
 #ifdef CONFIG_SMP
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
 	return cpu_rq(smp_processor_id())->rd->span;
 }
 #else
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 #endif
 
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 	return rt_rq->rt_throttled;
 }
 
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 
 static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
 	int i, weight, more = 0;
 	u64 rt_period;
 
-	weight = cpus_weight(rd->span);
+	weight = cpumask_weight(rd->span);
 
 	spin_lock(&rt_b->rt_runtime_lock);
 	rt_period = ktime_to_ns(rt_b->rt_period);
-	for_each_cpu_mask_nr(i, rd->span) {
+	for_each_cpu(i, rd->span) {
 		struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 		s64 diff;
 
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
 		/*
 		 * Greedy reclaim, take back as much as we can.
 		 */
-		for_each_cpu_mask(i, rd->span) {
+		for_each_cpu(i, rd->span) {
 			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 			s64 diff;
 
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
 	int i, idle = 1;
-	cpumask_t span;
+	const struct cpumask *span;
 
 	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return 1;
 
 	span = sched_rt_period_mask();
-	for_each_cpu_mask(i, span) {
+	for_each_cpu(i, span) {
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -1181,7 +1181,7 @@ static int pull_rt_task(struct rq *this_rq)
 
 	next = pick_next_task_rt(this_rq);
 
-	for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
+	for_each_cpu(cpu, this_rq->rd->rto_mask) {
 		if (this_cpu == cpu)
 			continue;
 
-- 
cgit v0.10.2


From 7d1e6a9b95e3edeac91888bc683ae62f18519432 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:09 +1030
Subject: sched: convert nohz struct to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 93309c3..2f8ea99 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3758,10 +3758,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 #ifdef CONFIG_NO_HZ
 static struct {
 	atomic_t load_balancer;
-	cpumask_t cpu_mask;
+	cpumask_var_t cpu_mask;
 } nohz ____cacheline_aligned = {
 	.load_balancer = ATOMIC_INIT(-1),
-	.cpu_mask = CPU_MASK_NONE,
 };
 
 /*
@@ -3789,7 +3788,7 @@ int select_nohz_load_balancer(int stop_tick)
 	int cpu = smp_processor_id();
 
 	if (stop_tick) {
-		cpu_set(cpu, nohz.cpu_mask);
+		cpumask_set_cpu(cpu, nohz.cpu_mask);
 		cpu_rq(cpu)->in_nohz_recently = 1;
 
 		/*
@@ -3803,7 +3802,7 @@ int select_nohz_load_balancer(int stop_tick)
 		}
 
 		/* time for ilb owner also to sleep */
-		if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+		if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
 			if (atomic_read(&nohz.load_balancer) == cpu)
 				atomic_set(&nohz.load_balancer, -1);
 			return 0;
@@ -3816,10 +3815,10 @@ int select_nohz_load_balancer(int stop_tick)
 		} else if (atomic_read(&nohz.load_balancer) == cpu)
 			return 1;
 	} else {
-		if (!cpu_isset(cpu, nohz.cpu_mask))
+		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
 			return 0;
 
-		cpu_clear(cpu, nohz.cpu_mask);
+		cpumask_clear_cpu(cpu, nohz.cpu_mask);
 
 		if (atomic_read(&nohz.load_balancer) == cpu)
 			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3930,12 +3929,13 @@ static void run_rebalance_domains(struct softirq_action *h)
 	 */
 	if (this_rq->idle_at_tick &&
 	    atomic_read(&nohz.load_balancer) == this_cpu) {
-		cpumask_t cpus = nohz.cpu_mask;
 		struct rq *rq;
 		int balance_cpu;
 
-		cpu_clear(this_cpu, cpus);
-		for_each_cpu(balance_cpu, &cpus) {
+		for_each_cpu(balance_cpu, nohz.cpu_mask) {
+			if (balance_cpu == this_cpu)
+				continue;
+
 			/*
 			 * If this cpu gets work to do, stop the load balancing
 			 * work being done for other cpus. Next load
@@ -3973,7 +3973,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 		rq->in_nohz_recently = 0;
 
 		if (atomic_read(&nohz.load_balancer) == cpu) {
-			cpu_clear(cpu, nohz.cpu_mask);
+			cpumask_clear_cpu(cpu, nohz.cpu_mask);
 			atomic_set(&nohz.load_balancer, -1);
 		}
 
@@ -3986,7 +3986,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 			 * TBD: Traverse the sched domains and nominate
 			 * the nearest cpu in the nohz.cpu_mask.
 			 */
-			int ilb = first_cpu(nohz.cpu_mask);
+			int ilb = cpumask_first(nohz.cpu_mask);
 
 			if (ilb < nr_cpu_ids)
 				resched_cpu(ilb);
@@ -3998,7 +3998,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 	 * cpus with ticks stopped, is it time for that to stop?
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-	    cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+	    cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
 		resched_cpu(cpu);
 		return;
 	}
@@ -4008,7 +4008,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 	 * someone else, then no need raise the SCHED_SOFTIRQ
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-	    cpu_isset(cpu, nohz.cpu_mask))
+	    cpumask_test_cpu(cpu, nohz.cpu_mask))
 		return;
 #endif
 	if (time_after_eq(jiffies, rq->next_balance))
@@ -8309,6 +8309,9 @@ void __init sched_init(void)
 
 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
 	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+#ifdef CONFIG_NO_HZ
+	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+#endif
 
 	scheduler_running = 1;
 }
-- 
cgit v0.10.2


From 4d2732c63e0c05cfef2a74868d08eace922dfc3e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:10 +1030
Subject: sched: convert idle_balance() to cpumask_var_t.

Impact: stack usage reduction

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space in the stack.  cpumask_var_t is just a struct cpumask for
!CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 2f8ea99..154a95f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3676,7 +3676,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 	struct sched_domain *sd;
 	int pulled_task = -1;
 	unsigned long next_balance = jiffies + HZ;
-	cpumask_t tmpmask;
+	cpumask_var_t tmpmask;
+
+	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+		return;
 
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
@@ -3687,7 +3690,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 		if (sd->flags & SD_BALANCE_NEWIDLE)
 			/* If we've pulled tasks over stop searching: */
 			pulled_task = load_balance_newidle(this_cpu, this_rq,
-							   sd, &tmpmask);
+							   sd, tmpmask);
 
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
@@ -3702,6 +3705,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 		 */
 		this_rq->next_balance = next_balance;
 	}
+	free_cpumask_var(tmpmask);
 }
 
 /*
-- 
cgit v0.10.2


From a0e902452da16b79d7c9230630ed8a595d14fa85 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:11 +1030
Subject: sched: convert rebalance_domains() to cpumask_var_t.

Impact: stack usage reduction

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space in the stack.  cpumask_var_t is just a struct cpumask for
!CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 154a95f..67383e7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3850,7 +3850,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
 	int need_serialize;
-	cpumask_t tmp;
+	cpumask_var_t tmp;
+
+	/* Fails alloc?  Rebalancing probably not a priority right now. */
+	if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
+		return;
 
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3875,7 +3879,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 		}
 
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
-			if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
+			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -3909,6 +3913,8 @@ out:
 	 */
 	if (likely(update_next_balance))
 		rq->next_balance = next_balance;
+
+	free_cpumask_var(tmp);
 }
 
 /*
-- 
cgit v0.10.2


From f17c860760927c2a8e41a021eab3317e4415e962 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:11 +1030
Subject: sched: convert sys_sched_getaffinity() to cpumask_var_t.

Impact: stack usage reduction

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space in the stack.  cpumask_var_t is just a struct cpumask for
!CONFIG_CPUMASK_OFFSTACK.

Some jiggling here to make sure we always exit at the bottom (so we hit
the free_cpumask_var there).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 67383e7..6deff24 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5499,19 +5499,24 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
 	int ret;
-	cpumask_t mask;
+	cpumask_var_t mask;
 
-	if (len < sizeof(cpumask_t))
+	if (len < cpumask_size())
 		return -EINVAL;
 
-	ret = sched_getaffinity(pid, &mask);
-	if (ret < 0)
-		return ret;
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t)))
-		return -EFAULT;
+	ret = sched_getaffinity(pid, mask);
+	if (ret == 0) {
+		if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+			ret = -EFAULT;
+		else
+			ret = cpumask_size();
+	}
+	free_cpumask_var(mask);
 
-	return sizeof(cpumask_t);
+	return ret;
 }
 
 /**
-- 
cgit v0.10.2


From e76bd8d9850c2296a7e8e24c9dce9b5e6b55fe2f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:11 +1030
Subject: sched: avoid stack var in move_task_off_dead_cpu

Impact: stack usage reduction

With some care, we can avoid needing a temporary cpumask (we can't
really allocate here, since we can't fail).

This version calls cpuset_cpus_allowed_locked() with the task_rq_lock
held.  I'm fairly sure this works, but there might be a deadlock
hiding.

And of course, we can't get rid of the last cpumask on stack until we
can use cpumask_of_node instead of node_to_cpumask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 6deff24..f7dee20 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6112,52 +6112,46 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	unsigned long flags;
-	cpumask_t mask;
 	struct rq *rq;
 	int dest_cpu;
+	/* FIXME: Use cpumask_of_node here. */
+	cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
+	const struct cpumask *nodemask = &_nodemask;
+
+again:
+	/* Look for allowed, online CPU in same node. */
+	for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+			goto move;
+
+	/* Any allowed, online CPU? */
+	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+	if (dest_cpu < nr_cpu_ids)
+		goto move;
+
+	/* No more Mr. Nice Guy. */
+	if (dest_cpu >= nr_cpu_ids) {
+		rq = task_rq_lock(p, &flags);
+		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+		dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
+		task_rq_unlock(rq, &flags);
 
-	do {
-		/* On same node? */
-		node_to_cpumask_ptr(pnodemask, cpu_to_node(dead_cpu));
-
-		cpus_and(mask, *pnodemask, p->cpus_allowed);
-		dest_cpu = cpumask_any_and(cpu_online_mask, &mask);
-
-		/* On any allowed CPU? */
-		if (dest_cpu >= nr_cpu_ids)
-			dest_cpu = cpumask_any_and(cpu_online_mask,
-						   &p->cpus_allowed);
-
-		/* No more Mr. Nice Guy. */
-		if (dest_cpu >= nr_cpu_ids) {
-			cpumask_t cpus_allowed;
-
-			cpuset_cpus_allowed_locked(p, &cpus_allowed);
-			/*
-			 * Try to stay on the same cpuset, where the
-			 * current cpuset may be a subset of all cpus.
-			 * The cpuset_cpus_allowed_locked() variant of
-			 * cpuset_cpus_allowed() will not block. It must be
-			 * called within calls to cpuset_lock/cpuset_unlock.
-			 */
-			rq = task_rq_lock(p, &flags);
-			p->cpus_allowed = cpus_allowed;
-			dest_cpu = cpumask_any_and(cpu_online_mask,
-						    &p->cpus_allowed);
-			task_rq_unlock(rq, &flags);
-
-			/*
-			 * Don't tell them about moving exiting tasks or
-			 * kernel threads (both mm NULL), since they never
-			 * leave kernel.
-			 */
-			if (p->mm && printk_ratelimit()) {
-				printk(KERN_INFO "process %d (%s) no "
-				       "longer affine to cpu%d\n",
-					task_pid_nr(p), p->comm, dead_cpu);
-			}
+		/*
+		 * Don't tell them about moving exiting tasks or
+		 * kernel threads (both mm NULL), since they never
+		 * leave kernel.
+		 */
+		if (p->mm && printk_ratelimit()) {
+			printk(KERN_INFO "process %d (%s) no "
+			       "longer affine to cpu%d\n",
+			       task_pid_nr(p), p->comm, dead_cpu);
 		}
-	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
+	}
+
+move:
+	/* It can have affinity changed while we were choosing. */
+	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+		goto again;
 }
 
 /*
-- 
cgit v0.10.2


From 5a16f3d30ca4e3f166d691220c003066a14e32b5 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:11 +1030
Subject: sched: convert struct (sys_)sched_setaffinity() to cpumask_var_t.

Impact: stack usage reduction

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space on the stack.  cpumask_var_t is just a struct cpumask for
!CONFIG_CPUMASK_OFFSTACK.

Note the removal of the initializer of new_mask: since the first thing
we did was "cpus_and(new_mask, new_mask, cpus_allowed)" I just changed
that to "cpumask_and(new_mask, in_mask, cpus_allowed);".

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index f7dee20..2d4ff91 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5378,8 +5378,7 @@ out_unlock:
 
 long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 {
-	cpumask_t cpus_allowed;
-	cpumask_t new_mask = *in_mask;
+	cpumask_var_t cpus_allowed, new_mask;
 	struct task_struct *p;
 	int retval;
 
@@ -5401,6 +5400,14 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_put_task;
+	}
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_free_cpus_allowed;
+	}
 	retval = -EPERM;
 	if ((current->euid != p->euid) && (current->euid != p->uid) &&
 			!capable(CAP_SYS_NICE))
@@ -5410,24 +5417,28 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 	if (retval)
 		goto out_unlock;
 
-	cpuset_cpus_allowed(p, &cpus_allowed);
-	cpus_and(new_mask, new_mask, cpus_allowed);
+	cpuset_cpus_allowed(p, cpus_allowed);
+	cpumask_and(new_mask, in_mask, cpus_allowed);
  again:
-	retval = set_cpus_allowed_ptr(p, &new_mask);
+	retval = set_cpus_allowed_ptr(p, new_mask);
 
 	if (!retval) {
-		cpuset_cpus_allowed(p, &cpus_allowed);
-		if (!cpus_subset(new_mask, cpus_allowed)) {
+		cpuset_cpus_allowed(p, cpus_allowed);
+		if (!cpumask_subset(new_mask, cpus_allowed)) {
 			/*
 			 * We must have raced with a concurrent cpuset
 			 * update. Just reset the cpus_allowed to the
 			 * cpuset's cpus_allowed
 			 */
-			new_mask = cpus_allowed;
+			cpumask_copy(new_mask, cpus_allowed);
 			goto again;
 		}
 	}
 out_unlock:
+	free_cpumask_var(new_mask);
+out_free_cpus_allowed:
+	free_cpumask_var(cpus_allowed);
+out_put_task:
 	put_task_struct(p);
 	put_online_cpus();
 	return retval;
@@ -5453,14 +5464,17 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
 asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
-	cpumask_t new_mask;
+	cpumask_var_t new_mask;
 	int retval;
 
-	retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask);
-	if (retval)
-		return retval;
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	return sched_setaffinity(pid, &new_mask);
+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
+	if (retval == 0)
+		retval = sched_setaffinity(pid, new_mask);
+	free_cpumask_var(new_mask);
+	return retval;
 }
 
 long sched_getaffinity(pid_t pid, cpumask_t *mask)
-- 
cgit v0.10.2


From d5dd3db1dce73cdd5c45c5a3498c51bd21b8864b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:12 +1030
Subject: sched: convert sched_domain_debug to cpumask_var_t.

Impact: stack usage reduction

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
stack space.  cpumask_var_t is just a struct cpumask for
!CONFIG_CPUMASK_OFFSTACK.

In this case, we always alloced, but we don't need to any more.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 2d4ff91..24012c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6706,7 +6706,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
-	cpumask_t *groupmask;
+	cpumask_var_t groupmask;
 	int level = 0;
 
 	if (!sd) {
@@ -6716,8 +6716,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
 
-	groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
-	if (!groupmask) {
+	if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
 		printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
 		return;
 	}
@@ -6730,7 +6729,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		if (!sd)
 			break;
 	}
-	kfree(groupmask);
+	free_cpumask_var(groupmask);
 }
 #else /* !CONFIG_SCHED_DEBUG */
 # define sched_domain_debug(sd, cpu) do { } while (0)
-- 
cgit v0.10.2


From dcc30a35f71bcf51f1e9b336dc5e41923071509a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:12 +1030
Subject: sched: convert cpu_isolated_map to cpumask_var_t.

Impact: stack usage reduction, (future) size reduction, cleanup

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

We can also use cpulist_parse() instead of doing it manually in
isolated_cpu_setup.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 24012c2..526618f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6917,19 +6917,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 }
 
 /* cpus with isolated domains */
-static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_var_t cpu_isolated_map;
 
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
-	static int __initdata ints[NR_CPUS];
-	int i;
-
-	str = get_options(str, ARRAY_SIZE(ints), ints);
-	cpus_clear(cpu_isolated_map);
-	for (i = 1; i <= ints[0]; i++)
-		if (ints[i] < NR_CPUS)
-			cpu_set(ints[i], cpu_isolated_map);
+	cpulist_parse(str, *cpu_isolated_map);
 	return 1;
 }
 
@@ -7727,7 +7720,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
 	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
 	if (!doms_cur)
 		doms_cur = &fallback_doms;
-	cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
+	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
 	err = build_sched_domains(doms_cur);
 	register_sched_domain_sysctl();
@@ -7826,7 +7819,7 @@ match1:
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
 		doms_new = &fallback_doms;
-		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
@@ -7985,7 +7978,9 @@ static int update_runtime(struct notifier_block *nfb,
 
 void __init sched_init_smp(void)
 {
-	cpumask_t non_isolated_cpus;
+	cpumask_var_t non_isolated_cpus;
+
+	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
 
 #if defined(CONFIG_NUMA)
 	sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -7994,10 +7989,10 @@ void __init sched_init_smp(void)
 #endif
 	get_online_cpus();
 	mutex_lock(&sched_domains_mutex);
-	arch_init_sched_domains(&cpu_online_map);
-	cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
-	if (cpus_empty(non_isolated_cpus))
-		cpu_set(smp_processor_id(), non_isolated_cpus);
+	arch_init_sched_domains(cpu_online_mask);
+	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+	if (cpumask_empty(non_isolated_cpus))
+		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
 	mutex_unlock(&sched_domains_mutex);
 	put_online_cpus();
 
@@ -8012,9 +8007,10 @@ void __init sched_init_smp(void)
 	init_hrtick();
 
 	/* Move init over to a non-isolated CPU */
-	if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
+	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
 		BUG();
 	sched_init_granularity();
+	free_cpumask_var(non_isolated_cpus);
 }
 #else
 void __init sched_init_smp(void)
@@ -8334,6 +8330,7 @@ void __init sched_init(void)
 #ifdef CONFIG_NO_HZ
 	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
 #endif
+	alloc_bootmem_cpumask_var(&cpu_isolated_map);
 
 	scheduler_running = 1;
 }
-- 
cgit v0.10.2


From 4212823fb459eacc8098dd420bb68ebb9917989d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:12 +1030
Subject: sched: convert falback_doms to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 526618f..42588ad 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7697,10 +7697,10 @@ static struct sched_domain_attr *dattr_cur;
 
 /*
  * Special case: If a kmalloc of a doms_cur partition (array of
- * cpumask_t) fails, then fallback to a single sched domain,
- * as determined by the single cpumask_t fallback_doms.
+ * cpumask) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask fallback_doms.
  */
-static cpumask_t fallback_doms;
+static cpumask_var_t fallback_doms;
 
 void __attribute__((weak)) arch_update_cpu_topology(void)
 {
@@ -7719,7 +7719,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
 	ndoms_cur = 1;
 	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
 	if (!doms_cur)
-		doms_cur = &fallback_doms;
+		doms_cur = fallback_doms;
 	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
 	err = build_sched_domains(doms_cur);
@@ -7818,7 +7818,7 @@ match1:
 
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
-		doms_new = &fallback_doms;
+		doms_new = fallback_doms;
 		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
@@ -7838,7 +7838,7 @@ match2:
 	}
 
 	/* Remember the new sched domains */
-	if (doms_cur != &fallback_doms)
+	if (doms_cur != fallback_doms)
 		kfree(doms_cur);
 	kfree(dattr_cur);	/* kfree(NULL) is safe */
 	doms_cur = doms_new;
@@ -8011,6 +8011,8 @@ void __init sched_init_smp(void)
 		BUG();
 	sched_init_granularity();
 	free_cpumask_var(non_isolated_cpus);
+
+	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 }
 #else
 void __init sched_init_smp(void)
-- 
cgit v0.10.2


From 68e74568fbe5854952355e942acca51f138096d9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:13 +1030
Subject: sched: convert struct cpupri_vec cpumask_var_t.

Impact: stack usage reduction, (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.

The fact cpupro_init is called both before and after the slab is
available makes for an ugly parameter unfortunately.

We also use cpumask_any_and to get rid of a temporary in cpupri_find.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 42588ad..94fa333 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6792,6 +6792,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 
 static void free_rootdomain(struct root_domain *rd)
 {
+	cpupri_cleanup(&rd->cpupri);
+
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
@@ -6834,7 +6836,7 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem)
 		alloc_bootmem_cpumask_var(&def_root_domain.span);
 		alloc_bootmem_cpumask_var(&def_root_domain.online);
 		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
-		cpupri_init(&rd->cpupri);
+		cpupri_init(&rd->cpupri, true);
 		return 0;
 	}
 
@@ -6845,9 +6847,12 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem)
 	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
 		goto free_online;
 
-	cpupri_init(&rd->cpupri);
+	if (cpupri_init(&rd->cpupri, false) != 0)
+		goto free_rto_mask;
 	return 0;
 
+free_rto_mask:
+	free_cpumask_var(rd->rto_mask);
 free_online:
 	free_cpumask_var(rd->online);
 free_span:
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 52154fe..018b7be 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -67,24 +67,21 @@ static int convert_prio(int prio)
  * Returns: (int)bool - CPUs were found
  */
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
-		cpumask_t *lowest_mask)
+		struct cpumask *lowest_mask)
 {
 	int                  idx      = 0;
 	int                  task_pri = convert_prio(p->prio);
 
 	for_each_cpupri_active(cp->pri_active, idx) {
 		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
-		cpumask_t mask;
 
 		if (idx >= task_pri)
 			break;
 
-		cpus_and(mask, p->cpus_allowed, vec->mask);
-
-		if (cpus_empty(mask))
+		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
 			continue;
 
-		*lowest_mask = mask;
+		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
 		return 1;
 	}
 
@@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 		vec->count--;
 		if (!vec->count)
 			clear_bit(oldpri, cp->pri_active);
-		cpu_clear(cpu, vec->mask);
+		cpumask_clear_cpu(cpu, vec->mask);
 
 		spin_unlock_irqrestore(&vec->lock, flags);
 	}
@@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 		spin_lock_irqsave(&vec->lock, flags);
 
-		cpu_set(cpu, vec->mask);
+		cpumask_set_cpu(cpu, vec->mask);
 		vec->count++;
 		if (vec->count == 1)
 			set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
+ * @bootmem: true if allocations need to use bootmem
  *
- * Returns: (void)
+ * Returns: -ENOMEM if memory fails.
  */
-void cpupri_init(struct cpupri *cp)
+int cpupri_init(struct cpupri *cp, bool bootmem)
 {
 	int i;
 
@@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp)
 
 		spin_lock_init(&vec->lock);
 		vec->count = 0;
-		cpus_clear(vec->mask);
+		if (bootmem)
+			alloc_bootmem_cpumask_var(&vec->mask);
+		else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+			goto cleanup;
 	}
 
 	for_each_possible_cpu(i)
 		cp->cpu_to_pri[i] = CPUPRI_INVALID;
+	return 0;
+
+cleanup:
+	for (i--; i >= 0; i--)
+		free_cpumask_var(cp->pri_to_cpu[i].mask);
+	return -ENOMEM;
 }
 
+/**
+ * cpupri_cleanup - clean up the cpupri structure
+ * @cp: The cpupri context
+ */
+void cpupri_cleanup(struct cpupri *cp)
+{
+	int i;
 
+	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
+		free_cpumask_var(cp->pri_to_cpu[i].mask);
+}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index f25811b0..642a94e 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
 struct cpupri_vec {
 	spinlock_t lock;
 	int        count;
-	cpumask_t  mask;
+	cpumask_var_t mask;
 };
 
 struct cpupri {
@@ -27,7 +27,8 @@ struct cpupri {
 int  cpupri_find(struct cpupri *cp,
 		 struct task_struct *p, cpumask_t *lowest_mask);
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
-void cpupri_init(struct cpupri *cp);
+int cpupri_init(struct cpupri *cp, bool bootmem);
+void cpupri_cleanup(struct cpupri *cp);
 #else
 #define cpupri_set(cp, cpu, pri) do { } while (0)
 #define cpupri_init() do { } while (0)
-- 
cgit v0.10.2


From 24600ce89a819a8f2fb4fd69fd777218a82ade20 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:13 +1030
Subject: sched: convert check_preempt_equal_prio to cpumask_var_t.

Impact: stack reduction for large NR_CPUS

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
stack space.

We simply return if the allocation fails: since we don't use it we
could just pass NULL to cpupri_find and have it handle that.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 820fc42..1fa1362 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
-	cpumask_t mask;
+	cpumask_var_t mask;
 
 	if (rq->curr->rt.nr_cpus_allowed == 1)
 		return;
 
-	if (p->rt.nr_cpus_allowed != 1
-	    && cpupri_find(&rq->rd->cpupri, p, &mask))
+	if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
 		return;
 
-	if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
-		return;
+	if (p->rt.nr_cpus_allowed != 1
+	    && cpupri_find(&rq->rd->cpupri, p, mask))
+		goto free;
+
+	if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
+		goto free;
 
 	/*
 	 * There appears to be other cpus that can accept
@@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 */
 	requeue_task_rt(rq, p, 1);
 	resched_task(rq->curr);
+free:
+	free_cpumask_var(mask);
 }
 
 #endif /* CONFIG_SMP */
-- 
cgit v0.10.2


From 0e3900e6d3b04c44737ebc505604dcd8ed30e354 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:13 +1030
Subject: sched: convert local_cpu_mask to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 94fa333..f2be618 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8018,6 +8018,7 @@ void __init sched_init_smp(void)
 	free_cpumask_var(non_isolated_cpus);
 
 	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+	init_sched_rt_class();
 }
 #else
 void __init sched_init_smp(void)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1fa1362..1f0e99d 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -962,7 +962,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
 	return next;
 }
 
-static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
+static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
 static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 {
@@ -982,7 +982,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
+	cpumask_t *lowest_mask = __get_cpu_var(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
@@ -1551,3 +1551,12 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 	rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
+
+/* Note that this is never called for !SMP, but that's OK. */
+static inline void init_sched_rt_class(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
+}
-- 
cgit v0.10.2


From 96f874e26428ab5d2db681c100210c254775e154 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:14 +1030
Subject: sched: convert remaining old-style cpumask operators

Impact: Trivial API conversion

  NR_CPUS -> nr_cpu_ids
  cpumask_t -> struct cpumask
  sizeof(cpumask_t) -> cpumask_size()
  cpumask_a = cpumask_b -> cpumask_copy(&cpumask_a, &cpumask_b)

  cpu_set() -> cpumask_set_cpu()
  first_cpu() -> cpumask_first()
  cpumask_of_cpu() -> cpumask_of()
  cpus_* -> cpumask_*

There are some FIXMEs where we all archs to complete infrastructure
(patches have been sent):

  cpu_coregroup_map -> cpu_coregroup_mask
  node_to_cpumask* -> cpumask_of_node

There is also one FIXME where we pass an array of cpumasks to
partition_sched_domains(): this implies knowing the definition of
'struct cpumask' and the size of a cpumask.  This will be fixed in a
future patch.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e33e2c..4b7b018 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -879,7 +879,7 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 	return to_cpumask(sd->span);
 }
 
-extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
@@ -888,7 +888,7 @@ extern int arch_reinit_sched_domains(void);
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			struct sched_domain_attr *dattr_new)
 {
 }
@@ -970,7 +970,7 @@ struct sched_class {
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
-				 const cpumask_t *newmask);
+				 const struct cpumask *newmask);
 
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
@@ -1612,12 +1612,12 @@ extern cputime_t task_gtime(struct task_struct *p);
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const cpumask_t *new_mask);
+				const struct cpumask *new_mask);
 #else
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const cpumask_t *new_mask)
+				       const struct cpumask *new_mask)
 {
-	if (!cpu_isset(0, *new_mask))
+	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
 	return 0;
 }
@@ -2230,8 +2230,8 @@ __trace_special(void *__tr, void *__data,
 }
 #endif
 
-extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
-extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
+extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 extern int sched_mc_power_savings, sched_smt_power_savings;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index f2be618..eba6a15 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2829,7 +2829,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
@@ -2895,7 +2895,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
 		schedstat_inc(p, se.nr_failed_migrations_affine);
 		return 0;
 	}
@@ -3070,7 +3070,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum cpu_idle_type idle,
-		   int *sd_idle, const cpumask_t *cpus, int *balance)
+		   int *sd_idle, const struct cpumask *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3387,7 +3387,7 @@ ret:
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const cpumask_t *cpus)
+		   unsigned long imbalance, const struct cpumask *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
@@ -3396,7 +3396,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long wl;
 
-		if (!cpu_isset(i, *cpus))
+		if (!cpumask_test_cpu(i, cpus))
 			continue;
 
 		rq = cpu_rq(i);
@@ -3426,7 +3426,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *balance, cpumask_t *cpus)
+			int *balance, struct cpumask *cpus)
 {
 	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
@@ -3434,7 +3434,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct rq *busiest;
 	unsigned long flags;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3494,8 +3494,8 @@ redo:
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 			goto out_balanced;
 		}
@@ -3512,7 +3512,8 @@ redo:
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!cpumask_test_cpu(this_cpu,
+					      &busiest->curr->cpus_allowed)) {
 				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -3587,7 +3588,7 @@ out:
  */
 static int
 load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-			cpumask_t *cpus)
+			struct cpumask *cpus)
 {
 	struct sched_group *group;
 	struct rq *busiest = NULL;
@@ -3596,7 +3597,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
 	int sd_idle = 0;
 	int all_pinned = 0;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3640,8 +3641,8 @@ redo:
 		double_unlock_balance(this_rq, busiest);
 
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 		}
 	}
@@ -5376,7 +5377,7 @@ out_unlock:
 	return retval;
 }
 
-long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
 	cpumask_var_t cpus_allowed, new_mask;
 	struct task_struct *p;
@@ -5445,13 +5446,13 @@ out_put_task:
 }
 
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-			     cpumask_t *new_mask)
+			     struct cpumask *new_mask)
 {
-	if (len < sizeof(cpumask_t)) {
-		memset(new_mask, 0, sizeof(cpumask_t));
-	} else if (len > sizeof(cpumask_t)) {
-		len = sizeof(cpumask_t);
-	}
+	if (len < cpumask_size())
+		cpumask_clear(new_mask);
+	else if (len > cpumask_size())
+		len = cpumask_size();
+
 	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
 }
 
@@ -5477,7 +5478,7 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 	return retval;
 }
 
-long sched_getaffinity(pid_t pid, cpumask_t *mask)
+long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
 	int retval;
@@ -5494,7 +5495,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
 	if (retval)
 		goto out_unlock;
 
-	cpus_and(*mask, p->cpus_allowed, cpu_online_map);
+	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
 
 out_unlock:
 	read_unlock(&tasklist_lock);
@@ -5872,7 +5873,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	idle->se.exec_start = sched_clock();
 
 	idle->prio = idle->normal_prio = MAX_PRIO;
-	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
 	__set_task_cpu(idle, cpu);
 
 	rq->curr = rq->idle = idle;
@@ -5956,7 +5957,7 @@ static inline void sched_init_granularity(void)
  * task must not exit() & deallocate itself prematurely. The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	struct migration_req req;
 	unsigned long flags;
@@ -5964,13 +5965,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(*new_mask, cpu_online_map)) {
+	if (!cpumask_intersects(new_mask, cpu_online_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-		     !cpus_equal(p->cpus_allowed, *new_mask))) {
+		     !cpumask_equal(&p->cpus_allowed, new_mask))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -5978,12 +5979,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 	else {
-		p->cpus_allowed = *new_mask;
-		p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
+		cpumask_copy(&p->cpus_allowed, new_mask);
+		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
 	}
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpu_isset(task_cpu(p), *new_mask))
+	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
 	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
@@ -6028,7 +6029,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (task_cpu(p) != src_cpu)
 		goto done;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		goto fail;
 
 	on_rq = p->se.on_rq;
@@ -6629,13 +6630,13 @@ early_initcall(migration_init);
 #ifdef CONFIG_SCHED_DEBUG
 
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
-				  cpumask_t *groupmask)
+				  struct cpumask *groupmask)
 {
 	struct sched_group *group = sd->groups;
 	char str[256];
 
 	cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
-	cpus_clear(*groupmask);
+	cpumask_clear(groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
@@ -6936,24 +6937,25 @@ __setup("isolcpus=", isolated_cpu_setup);
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer
  * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
- * (due to the fact that we keep track of groups covered with a cpumask_t).
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
-			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+init_sched_build_groups(const struct cpumask *span,
+			const struct cpumask *cpu_map,
+			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
 					struct sched_group **sg,
-					cpumask_t *tmpmask),
-			cpumask_t *covered, cpumask_t *tmpmask)
+					struct cpumask *tmpmask),
+			struct cpumask *covered, struct cpumask *tmpmask)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	int i;
 
-	cpus_clear(*covered);
+	cpumask_clear(covered);
 
 	for_each_cpu(i, span) {
 		struct sched_group *sg;
@@ -6970,7 +6972,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
 				continue;
 
-			cpu_set(j, *covered);
+			cpumask_set_cpu(j, covered);
 			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
 		if (!first)
@@ -7035,9 +7037,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-static void sched_domain_node_span(int node, cpumask_t *span)
+static void sched_domain_node_span(int node, struct cpumask *span)
 {
 	nodemask_t used_nodes;
+	/* FIXME: use cpumask_of_node() */
 	node_to_cpumask_ptr(nodemask, node);
 	int i;
 
@@ -7081,8 +7084,8 @@ static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
 
 static int
-cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		 cpumask_t *unused)
+cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
+		 struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
 		*sg = &per_cpu(sched_group_cpus, cpu).sg;
@@ -7100,22 +7103,21 @@ static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 	if (sg)
 		*sg = &per_cpu(sched_group_core, group).sg;
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *unused)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
 		*sg = &per_cpu(sched_group_core, cpu).sg;
@@ -7127,18 +7129,18 @@ static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
 static int
-cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
+	/* FIXME: Use cpu_coregroup_mask. */
 	*mask = cpu_coregroup_map(cpu);
 	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 #else
 	group = cpu;
 #endif
@@ -7159,14 +7161,16 @@ static struct sched_group ***sched_group_nodes_bycpu;
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
-				 struct sched_group **sg, cpumask_t *nodemask)
+static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+				 struct sched_group **sg,
+				 struct cpumask *nodemask)
 {
 	int group;
+	/* FIXME: use cpumask_of_node */
 	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	cpus_and(*nodemask, *pnodemask, *cpu_map);
-	group = first_cpu(*nodemask);
+	cpumask_and(nodemask, pnodemask, cpu_map);
+	group = cpumask_first(nodemask);
 
 	if (sg)
 		*sg = &per_cpu(sched_group_allnodes, group).sg;
@@ -7202,7 +7206,8 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 
 #ifdef CONFIG_NUMA
 /* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 	int cpu, i;
 
@@ -7215,10 +7220,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, i);
 
 			cpus_and(*nodemask, *pnodemask, *cpu_map);
-			if (cpus_empty(*nodemask))
+			if (cpumask_empty(nodemask))
 				continue;
 
 			if (sg == NULL)
@@ -7236,7 +7242,8 @@ next_sg:
 	}
 }
 #else /* !CONFIG_NUMA */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 }
 #endif /* CONFIG_NUMA */
@@ -7366,7 +7373,7 @@ static void set_domain_attribute(struct sched_domain *sd,
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const cpumask_t *cpu_map,
+static int __build_sched_domains(const struct cpumask *cpu_map,
 				 struct sched_domain_attr *attr)
 {
 	int i, err = -ENOMEM;
@@ -7416,7 +7423,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	}
 
 #ifdef CONFIG_NUMA
-	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
 #endif
 
 	/*
@@ -7425,12 +7432,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
 
+		/* FIXME: use cpumask_of_node */
 		*nodemask = node_to_cpumask(cpu_to_node(i));
 		cpus_and(*nodemask, *nodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
-		if (cpus_weight(*cpu_map) >
-				SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
+		if (cpumask_weight(cpu_map) >
+				SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
@@ -7491,9 +7499,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
 	for_each_cpu(i, cpu_map) {
-		*this_sibling_map = per_cpu(cpu_sibling_map, i);
-		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
-		if (i != first_cpu(*this_sibling_map))
+		cpumask_and(this_sibling_map,
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
+		if (i != cpumask_first(this_sibling_map))
 			continue;
 
 		init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7505,9 +7513,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
 	for_each_cpu(i, cpu_map) {
+		/* FIXME: Use cpu_coregroup_mask */
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
-		if (i != first_cpu(*this_core_map))
+		if (i != cpumask_first(this_core_map))
 			continue;
 
 		init_sched_build_groups(this_core_map, cpu_map,
@@ -7518,9 +7527,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask))
+		if (cpumask_empty(nodemask))
 			continue;
 
 		init_sched_build_groups(nodemask, cpu_map,
@@ -7541,17 +7551,18 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		struct sched_group *sg, *prev;
 		int j;
 
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
-		cpus_clear(*covered);
+		cpumask_clear(covered);
 
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask)) {
+		if (cpumask_empty(nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
 		}
 
 		sched_domain_node_span(i, domainspan);
-		cpus_and(*domainspan, *domainspan, *cpu_map);
+		cpumask_and(domainspan, domainspan, cpu_map);
 
 		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 				  GFP_KERNEL, i);
@@ -7570,21 +7581,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sg->__cpu_power = 0;
 		cpumask_copy(sched_group_cpus(sg), nodemask);
 		sg->next = sg;
-		cpus_or(*covered, *covered, *nodemask);
+		cpumask_or(covered, covered, nodemask);
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
 			int n = (i + j) % nr_node_ids;
+			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, n);
 
-			cpus_complement(*notcovered, *covered);
-			cpus_and(*tmpmask, *notcovered, *cpu_map);
-			cpus_and(*tmpmask, *tmpmask, *domainspan);
-			if (cpus_empty(*tmpmask))
+			cpumask_complement(notcovered, covered);
+			cpumask_and(tmpmask, notcovered, cpu_map);
+			cpumask_and(tmpmask, tmpmask, domainspan);
+			if (cpumask_empty(tmpmask))
 				break;
 
-			cpus_and(*tmpmask, *tmpmask, *pnodemask);
-			if (cpus_empty(*tmpmask))
+			cpumask_and(tmpmask, tmpmask, pnodemask);
+			if (cpumask_empty(tmpmask))
 				continue;
 
 			sg = kmalloc_node(sizeof(struct sched_group) +
@@ -7598,7 +7610,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sg->__cpu_power = 0;
 			cpumask_copy(sched_group_cpus(sg), tmpmask);
 			sg->next = prev->next;
-			cpus_or(*covered, *covered, *tmpmask);
+			cpumask_or(covered, covered, tmpmask);
 			prev->next = sg;
 			prev = sg;
 		}
@@ -7634,7 +7646,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	if (sd_allnodes) {
 		struct sched_group *sg;
 
-		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
+		cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
 								tmpmask);
 		init_numa_sched_groups_power(sg);
 	}
@@ -7690,12 +7702,12 @@ error:
 #endif
 }
 
-static int build_sched_domains(const cpumask_t *cpu_map)
+static int build_sched_domains(const struct cpumask *cpu_map)
 {
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static cpumask_t *doms_cur;	/* current sched domains */
+static struct cpumask *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
@@ -7716,13 +7728,13 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
  * For now this just excludes isolated cpus, but could be used to
  * exclude other special cases in the future.
  */
-static int arch_init_sched_domains(const cpumask_t *cpu_map)
+static int arch_init_sched_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
 	if (!doms_cur)
 		doms_cur = fallback_doms;
 	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
@@ -7733,8 +7745,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
 	return err;
 }
 
-static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
-				       cpumask_t *tmpmask)
+static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
+				       struct cpumask *tmpmask)
 {
 	free_sched_groups(cpu_map, tmpmask);
 }
@@ -7743,15 +7755,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
-static void detach_destroy_domains(const cpumask_t *cpu_map)
+static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	cpumask_t tmpmask;
+	/* Save because hotplug lock held. */
+	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
 	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
-	arch_destroy_sched_domains(cpu_map, &tmpmask);
+	arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
 }
 
 /* handle null as "default" */
@@ -7776,7 +7789,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
@@ -7790,13 +7803,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * the single partition 'fallback_doms', it also forces the domains
  * to be rebuilt.
  *
- * If doms_new == NULL it will be replaced with cpu_online_map.
+ * If doms_new == NULL it will be replaced with cpu_online_mask.
  * ndoms_new == 0 is a special case for destroying existing domains,
  * and it will not create the default domain.
  *
  * Call with hotplug lock held
  */
-void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+/* FIXME: Change to struct cpumask *doms_new[] */
+void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -7811,7 +7825,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n; j++) {
-			if (cpus_equal(doms_cur[i], doms_new[j])
+			if (cpumask_equal(&doms_cur[i], &doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
@@ -7831,7 +7845,7 @@ match1:
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur; j++) {
-			if (cpus_equal(doms_new[i], doms_cur[j])
+			if (cpumask_equal(&doms_new[i], &doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index bba0040..08ffffd 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1017,7 +1017,7 @@ static void yield_task_fair(struct rq *rq)
  * search starts with cpus closest then further out as needed,
  * so we always favor a closer, idle cpu.
  * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (cpu_active_map)
+ * hence we need to mask them out (cpu_active_mask)
  *
  * Returns the CPU we should wake onto.
  */
@@ -1244,7 +1244,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
 		goto out;
 
 	/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1f0e99d..fb39645 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -923,7 +923,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
 	    (p->rt.nr_cpus_allowed > 1))
 		return 1;
 	return 0;
@@ -982,7 +982,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	cpumask_t *lowest_mask = __get_cpu_var(local_cpu_mask);
+	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
@@ -997,7 +997,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * I guess we might want to change cpupri_find() to ignore those
 	 * in the first place.
 	 */
-	cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+	cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
 
 	/*
 	 * At this point we have built a mask of cpus representing the
@@ -1007,7 +1007,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * We prioritize the last cpu that the task executed on since
 	 * it is most likely cache-hot in that location.
 	 */
-	if (cpu_isset(cpu, *lowest_mask))
+	if (cpumask_test_cpu(cpu, lowest_mask))
 		return cpu;
 
 	/*
@@ -1064,8 +1064,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpu_isset(lowest_rq->cpu,
-						task->cpus_allowed) ||
+				     !cpumask_test_cpu(lowest_rq->cpu,
+						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !task->se.on_rq)) {
 
@@ -1315,9 +1315,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 }
 
 static void set_cpus_allowed_rt(struct task_struct *p,
-				const cpumask_t *new_mask)
+				const struct cpumask *new_mask)
 {
-	int weight = cpus_weight(*new_mask);
+	int weight = cpumask_weight(new_mask);
 
 	BUG_ON(!rt_task(p));
 
@@ -1338,7 +1338,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 		update_rt_migration(rq);
 	}
 
-	p->cpus_allowed    = *new_mask;
+	cpumask_copy(&p->cpus_allowed, new_mask);
 	p->rt.nr_cpus_allowed = weight;
 }
 
-- 
cgit v0.10.2


From 56968d0c1a920eb165c06318f5c458724e1df0af Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Tue, 25 Nov 2008 14:23:40 +0000
Subject: wusb: whci-hcd shouldn't do ASL/PZL updates while channel is inactive

ASL/PZL updates while the WUSB channel is inactive (i.e., the PZL and
ASL are stopped) may not complete.  This causes hangs when removing the
whci-hcd module if a device is still connected (removing the device
does an endpoint_disable which results in an ASL update to remove the
qset).

If the WUSB channel is inactive the update can simply be skipped as the
WHC doesn't care about the state of the ASL/PZL.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index 4d7078e..ba99a7a 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -179,11 +179,26 @@ void asl_stop(struct whc *whc)
 		      1000, "stop ASL");
 }
 
+/**
+ * asl_update - request an ASL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the ASL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
 void asl_update(struct whc *whc, uint32_t wusbcmd)
 {
-	whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-	wait_event(whc->async_list_wq,
-		   (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0);
+	struct wusbhc *wusbhc = &whc->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		wait_event(whc->async_list_wq,
+			   (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0);
+	}
+	mutex_unlock(&wusbhc->mutex);
 }
 
 /**
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index 8d62df0..34d3a0a 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -195,11 +195,26 @@ void pzl_stop(struct whc *whc)
 		      1000, "stop PZL");
 }
 
+/**
+ * pzl_update - request a PZL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the PZL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
 void pzl_update(struct whc *whc, uint32_t wusbcmd)
 {
-	whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-	wait_event(whc->periodic_list_wq,
-		   (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0);
+	struct wusbhc *wusbhc = &whc->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		wait_event(whc->periodic_list_wq,
+			   (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0);
+	}
+	mutex_unlock(&wusbhc->mutex);
 }
 
 static void update_pzl_hw_view(struct whc *whc)
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index 08a1ec9..26cbc89 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -484,21 +484,15 @@ static void __wusbhc_keep_alive(struct wusbhc *wusbhc)
  */
 static void wusbhc_keep_alive_run(struct work_struct *ws)
 {
-	struct delayed_work *dw =
-		container_of(ws, struct delayed_work, work);
-	struct wusbhc *wusbhc =
-		container_of(dw, struct wusbhc, keep_alive_timer);
-
-	d_fnstart(5, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-	if (wusbhc->active) {
-		mutex_lock(&wusbhc->mutex);
-		__wusbhc_keep_alive(wusbhc);
-		mutex_unlock(&wusbhc->mutex);
-		queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
-				   (wusbhc->trust_timeout * CONFIG_HZ)/1000/2);
-	}
-	d_fnend(5, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
-	return;
+	struct delayed_work *dw = container_of(ws, struct delayed_work, work);
+	struct wusbhc *wusbhc =	container_of(dw, struct wusbhc, keep_alive_timer);
+
+	mutex_lock(&wusbhc->mutex);
+	__wusbhc_keep_alive(wusbhc);
+	mutex_unlock(&wusbhc->mutex);
+
+	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
+			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
 }
 
 /*
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index 5463ece..3b52161 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -159,6 +159,27 @@ found:
 }
 EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
 
+static int wusbhc_mmc_start(struct wusbhc *wusbhc)
+{
+	int ret;
+
+	mutex_lock(&wusbhc->mutex);
+	ret = wusbhc->start(wusbhc);
+	if (ret >= 0)
+		wusbhc->active = 1;
+	mutex_unlock(&wusbhc->mutex);
+
+	return ret;
+}
+
+static void wusbhc_mmc_stop(struct wusbhc *wusbhc)
+{
+	mutex_lock(&wusbhc->mutex);
+	wusbhc->active = 0;
+	wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
+	mutex_unlock(&wusbhc->mutex);
+}
+
 /*
  * wusbhc_start - start transmitting MMCs and accepting connections
  * @wusbhc: the HC to start
@@ -198,12 +219,12 @@ int wusbhc_start(struct wusbhc *wusbhc)
 		dev_err(dev, "Cannot set DNTS parameters: %d\n", result);
 		goto error_set_num_dnts;
 	}
-	result = wusbhc->start(wusbhc);
+	result = wusbhc_mmc_start(wusbhc);
 	if (result < 0) {
 		dev_err(dev, "error starting wusbch: %d\n", result);
 		goto error_wusbhc_start;
 	}
-	wusbhc->active = 1;
+
 	return 0;
 
 error_wusbhc_start:
@@ -225,15 +246,11 @@ error_rsv_establish:
  */
 void wusbhc_stop(struct wusbhc *wusbhc)
 {
-	if (wusbhc->active) {
-		wusbhc->active = 0;
-		wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
-		wusbhc_sec_stop(wusbhc);
-		wusbhc_devconnect_stop(wusbhc);
-		wusbhc_rsv_terminate(wusbhc);
-	}
+	wusbhc_mmc_stop(wusbhc);
+	wusbhc_sec_stop(wusbhc);
+	wusbhc_devconnect_stop(wusbhc);
+	wusbhc_rsv_terminate(wusbhc);
 }
-EXPORT_SYMBOL_GPL(wusbhc_stop);
 
 /*
  * Set/reset/update a new CHID
-- 
cgit v0.10.2


From 5a4e1a795d7c5b47e94067a72db09f8cfb52bcff Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Tue, 25 Nov 2008 14:34:47 +0000
Subject: uwb: clean up whci_wait_for() timeout error message

All callers of whci_wait_for() should get consistant error message if a
timeout occurs.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index e0d6693..5f00386 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -332,47 +332,23 @@ void whcrc_release_rc_umc(struct whcrc *whcrc)
 static int whcrc_start_rc(struct uwb_rc *rc)
 {
 	struct whcrc *whcrc = rc->priv;
-	int result = 0;
 	struct device *dev = &whcrc->umc_dev->dev;
-	unsigned long start, duration;
 
 	/* Reset the thing */
 	le_writel(URCCMD_RESET, whcrc->rc_base + URCCMD);
-	if (d_test(3))
-		start = jiffies;
 	if (whci_wait_for(dev, whcrc->rc_base + URCCMD, URCCMD_RESET, 0,
-			  5000, "device to reset at init") < 0) {
-		result = -EBUSY;
-		goto error;
-	} else if (d_test(3)) {
-		duration = jiffies - start;
-		if (duration > msecs_to_jiffies(40))
-			dev_err(dev, "Device took %ums to "
-				     "reset. MAX expected: 40ms\n",
-				     jiffies_to_msecs(duration));
-	}
+			  5000, "hardware reset") < 0)
+		return -EBUSY;
 
 	/* Set the event buffer, start the controller (enable IRQs later) */
 	le_writel(0, whcrc->rc_base + URCINTR);
 	le_writel(URCCMD_RS, whcrc->rc_base + URCCMD);
-	result = -ETIMEDOUT;
-	if (d_test(3))
-		start = jiffies;
 	if (whci_wait_for(dev, whcrc->rc_base + URCSTS, URCSTS_HALTED, 0,
-			  5000, "device to start") < 0)
-		goto error;
-	if (d_test(3)) {
-		duration = jiffies - start;
-		if (duration > msecs_to_jiffies(40))
-			dev_err(dev, "Device took %ums to start. "
-				     "MAX expected: 40ms\n",
-				     jiffies_to_msecs(duration));
-	}
+			  5000, "radio controller start") < 0)
+		return -ETIMEDOUT;
 	whcrc_enable_events(whcrc);
-	result = 0;
 	le_writel(URCINTR_EN_ALL, whcrc->rc_base + URCINTR);
-error:
-	return result;
+	return 0;
 }
 
 
@@ -394,7 +370,7 @@ void whcrc_stop_rc(struct uwb_rc *rc)
 
 	le_writel(0, whcrc->rc_base + URCCMD);
 	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
-		      URCSTS_HALTED, URCSTS_HALTED, 100, "URCSTS.HALTED");
+		      URCSTS_HALTED, URCSTS_HALTED, 100, "radio controller stop");
 }
 
 static void whcrc_init(struct whcrc *whcrc)
diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c
index e626467..1f8964e 100644
--- a/drivers/uwb/whci.c
+++ b/drivers/uwb/whci.c
@@ -67,11 +67,11 @@ int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result,
 		val = le_readl(reg);
 		if ((val & mask) == result)
 			break;
-		msleep(10);
 		if (t >= max_ms) {
-			dev_err(dev, "timed out waiting for %s ", tag);
+			dev_err(dev, "%s timed out\n", tag);
 			return -ETIMEDOUT;
 		}
+		msleep(10);
 		t += 10;
 	}
 	return 0;
-- 
cgit v0.10.2


From bf4d83f66476086c6b50dc52aac00d71ad70494e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 09:57:51 +1030
Subject: sched: convert nohz struct to cpumask_var_t, fix

Impact: build fix

Fix the !CONFIG_SMP case.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index eba6a15..1aa840a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8349,10 +8349,12 @@ void __init sched_init(void)
 
 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
 	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+#ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
 	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
 #endif
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
+#endif /* SMP */
 
 	scheduler_running = 1;
 }
-- 
cgit v0.10.2


From 3d8cbdf8650f44d95333ca645d950832a0653f35 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 09:58:41 +1030
Subject: sched: convert local_cpu_mask to cpumask_var_t, fix

Impact: build fix for !CONFIG_SMP

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index fb39645..94aab72 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1381,6 +1381,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
 	if (!rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
+
+static inline void init_sched_rt_class(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1552,11 +1560,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 }
 #endif /* CONFIG_SCHED_DEBUG */
 
-/* Note that this is never called for !SMP, but that's OK. */
-static inline void init_sched_rt_class(void)
-{
-	unsigned int i;
-
-	for_each_possible_cpu(i)
-		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
-}
-- 
cgit v0.10.2


From 1224e376f2a7e3c7ab19ef37099a78597978a696 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 09:59:20 +1030
Subject: sched: avoid stack var in move_task_off_dead_cpu, fix

Impact: locking fix

We can't call cpuset_cpus_allowed_locked() with the rq lock held.
However, the rq lock merely protects us from (1) cpu_online_mask changing
and (2) someone else changing p->cpus_allowed.

The first can't happen because we're being called from a cpu hotplug
notifier.  The second doesn't really matter: we are forcing the task off
a CPU it was affine to, so we're not doing very well anyway.

So we remove the rq lock from this path, and all is good.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 1aa840a..3f5bfdc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6126,8 +6126,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
  */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
-	unsigned long flags;
-	struct rq *rq;
 	int dest_cpu;
 	/* FIXME: Use cpumask_of_node here. */
 	cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
@@ -6146,10 +6144,8 @@ again:
 
 	/* No more Mr. Nice Guy. */
 	if (dest_cpu >= nr_cpu_ids) {
-		rq = task_rq_lock(p, &flags);
 		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
 		dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
-		task_rq_unlock(rq, &flags);
 
 		/*
 		 * Don't tell them about moving exiting tasks or
-- 
cgit v0.10.2


From e4b49580f70380a4216ff8220c8f48a95e21c238 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Wed, 26 Nov 2008 12:47:05 +0000
Subject: uwb: fix oops in debug PAL's reservation callback

Initialize pal_priv for reservations created by the debug PAL.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index e02fb83..ec1b7a4 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -122,7 +122,7 @@ static int cmd_rsv_establish(struct uwb_rc *rc,
 	if (target == NULL)
 		return -ENODEV;
 
-	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, NULL);
+	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, rc->dbg);
 	if (rsv == NULL) {
 		uwb_dev_put(target);
 		return -ENOMEM;
-- 
cgit v0.10.2


From 1c39194878c09bd88ffc9c9d4c2f01c3397c7aed Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 26 Nov 2008 14:13:42 +0100
Subject: sched: convert struct root_domain to cpumask_var_t, fix

Mathieu Desnoyers reported this build failure on powerpc:

 kernel/sched.c: In function 'sd_init_NODE':
 kernel/sched.c:7319: error: non-static initialization of a flexible array member
 kernel/sched.c:7319: error: (near initialization for '(anonymous)')

this happens because .span changed to cpumask_var_t, hence
the static CPU_MASK_NONE initializers in the SD_*_INIT
templates are not type-correct anymore.

Remove them, as they default to empty anyway.

Also remove them from IA64, MIPS and SH.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb64..a3cc9f6 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -55,7 +55,6 @@
 void build_cpu_to_node_map(void);
 
 #define SD_CPU_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
@@ -80,7 +79,6 @@ void build_cpu_to_node_map(void);
 
 /* sched_domains SD_NODE_INIT for IA64 NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec..1fb959f 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 
 /* sched_domains SD_NODE_INIT for SGI IP27 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f..373fca3 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 
 /* sched_domains SD_NODE_INIT for PPC64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085..279d9cc 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
 
 /* sched_domains SD_NODE_INIT for sh machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
-- 
cgit v0.10.2


From dcc7461eef7341e84e2f7274f904ce01a43b2506 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Wed, 26 Nov 2008 13:36:59 +0000
Subject: wusb: add debug files for ASL, PZL and DI to the whci-hcd driver

Add asl, pzl and di debugfs files to uwb/uwbN/wusbhc for WHCI host
controller.  These dump the current ASL, PZL and DI buffer.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/whci/Kbuild b/drivers/usb/host/whci/Kbuild
index 26a3871..11e5040 100644
--- a/drivers/usb/host/whci/Kbuild
+++ b/drivers/usb/host/whci/Kbuild
@@ -2,6 +2,7 @@ obj-$(CONFIG_USB_WHCI_HCD) += whci-hcd.o
 
 whci-hcd-y := \
 	asl.o	\
+	debug.o \
 	hcd.o 	\
 	hw.o	\
 	init.o	\
diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index ba99a7a..577c0d2 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -19,32 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_asl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-
-	d_printf(4, dev, "ASL %s\n", tag);
-
-	list_for_each_entry(qset, &whc->async_list, list_node) {
-		dump_qset(qset, dev);
-	}
-}
-#else
-static inline void dump_asl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
-
 static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset,
 			       struct whc_qset **next, struct whc_qset **prev)
 {
@@ -217,8 +196,6 @@ void scan_async_work(struct work_struct *work)
 
 	spin_lock_irq(&whc->lock);
 
-	dump_asl(whc, "before processing");
-
 	/*
 	 * Transerve the software list backwards so new qsets can be
 	 * safely inserted into the ASL without making it non-circular.
@@ -232,8 +209,6 @@ void scan_async_work(struct work_struct *work)
 		update |= process_qset(whc, qset);
 	}
 
-	dump_asl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
new file mode 100644
index 0000000..cf2d459
--- /dev/null
+++ b/drivers/usb/host/whci/debug.c
@@ -0,0 +1,189 @@
+/*
+ * Wireless Host Controller (WHC) debug.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../../wusbcore/wusbhc.h"
+
+#include "whcd.h"
+
+struct whc_dbg {
+	struct dentry *di_f;
+	struct dentry *asl_f;
+	struct dentry *pzl_f;
+};
+
+void qset_print(struct seq_file *s, struct whc_qset *qset)
+{
+	struct whc_std *std;
+	struct urb *urb = NULL;
+	int i;
+
+	seq_printf(s, "qset %08x\n", (u32)qset->qset_dma);
+	seq_printf(s, "  -> %08x\n", (u32)qset->qh.link);
+	seq_printf(s, "  info: %08x %08x %08x\n",
+		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
+	seq_printf(s, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
+	seq_printf(s, "  TD: sts: %08x opts: %08x\n",
+		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
+
+	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
+		seq_printf(s, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
+			i == qset->td_start ? 'S' : ' ',
+			i == qset->td_end ? 'E' : ' ',
+			i, qset->qtd[i].status, qset->qtd[i].options,
+			(u32)qset->qtd[i].page_list_ptr);
+	}
+	seq_printf(s, "  ntds: %d\n", qset->ntds);
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (urb != std->urb) {
+			urb = std->urb;
+			seq_printf(s, "  urb %p transferred: %d bytes\n", urb,
+				urb->actual_length);
+		}
+		if (std->qtd)
+			seq_printf(s, "    sTD[%td]: %zu bytes @ %08x\n",
+				std->qtd - &qset->qtd[0],
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+		else
+			seq_printf(s, "    sTD[-]: %zd bytes @ %08x\n",
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+	}
+}
+
+static int di_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	char buf[72];
+	int d;
+
+	for (d = 0; d < whc->n_devices; d++) {
+		struct di_buf_entry *di = &whc->di_buf[d];
+
+		bitmap_scnprintf(buf, sizeof(buf),
+				 (unsigned long *)di->availability_info, UWB_NUM_MAS);
+
+		seq_printf(s, "DI[%d]\n", d);
+		seq_printf(s, "  availability: %s\n", buf);
+		seq_printf(s, "  %c%c key idx: %d dev addr: %d\n",
+			   (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
+			   (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
+			   (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
+			   (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
+	}
+	return 0;
+}
+
+static int asl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+
+	list_for_each_entry(qset, &whc->async_list, list_node) {
+		qset_print(s, qset);
+	}
+
+	return 0;
+}
+
+static int pzl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+	int period;
+
+	for (period = 0; period < 5; period++) {
+		seq_printf(s, "Period %d\n", period);
+		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
+			qset_print(s, qset);
+		}
+	}
+	return 0;
+}
+
+static int di_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, di_print, inode->i_private);
+}
+
+static int asl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, asl_print, inode->i_private);
+}
+
+static int pzl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pzl_print, inode->i_private);
+}
+
+static struct file_operations di_fops = {
+	.open    = di_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations asl_fops = {
+	.open    = asl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations pzl_fops = {
+	.open    = pzl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+void whc_dbg_init(struct whc *whc)
+{
+	if (whc->wusbhc.pal.debugfs_dir == NULL)
+		return;
+
+	whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL);
+	if (whc->dbg == NULL)
+		return;
+
+	whc->dbg->di_f = debugfs_create_file("di", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &di_fops);
+	whc->dbg->asl_f = debugfs_create_file("asl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &asl_fops);
+	whc->dbg->pzl_f = debugfs_create_file("pzl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &pzl_fops);
+}
+
+void whc_dbg_clean_up(struct whc *whc)
+{
+	if (whc->dbg) {
+		debugfs_remove(whc->dbg->pzl_f);
+		debugfs_remove(whc->dbg->asl_f);
+		debugfs_remove(whc->dbg->di_f);
+		kfree(whc->dbg);
+	}
+}
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index f599f89..1569afd 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -273,6 +273,8 @@ static int whc_probe(struct umc_dev *umc)
 		goto error_wusbhc_b_create;
 	}
 
+	whc_dbg_init(whc);
+
 	return 0;
 
 error_wusbhc_b_create:
@@ -296,6 +298,7 @@ static void whc_remove(struct umc_dev *umc)
 	struct whc *whc = wusbhc_to_whc(wusbhc);
 
 	if (usb_hcd) {
+		whc_dbg_clean_up(whc);
 		wusbhc_b_destroy(wusbhc);
 		usb_remove_hcd(usb_hcd);
 		wusbhc_destroy(wusbhc);
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index 34d3a0a..2ae5abf 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -19,35 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_pzl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-	int period = 0;
-
-	d_printf(4, dev, "PZL %s\n", tag);
-
-	for (period = 0; period < 5; period++) {
-		d_printf(4, dev, "Period %d\n", period);
-		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
-			dump_qset(qset, dev);
-		}
-	}
-}
-#else
-static inline void dump_pzl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
 static void update_pzl_pointers(struct whc *whc, int period, u64 addr)
 {
 	switch (period) {
@@ -250,8 +226,6 @@ void scan_periodic_work(struct work_struct *work)
 
 	spin_lock_irq(&whc->lock);
 
-	dump_pzl(whc, "before processing");
-
 	for (period = 4; period >= 0; period--) {
 		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
 			if (!qset->in_hw_list)
@@ -263,8 +237,6 @@ void scan_periodic_work(struct work_struct *work)
 	if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED))
 		update_pzl_hw_view(whc);
 
-	dump_pzl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index 0420037..7be7431 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -24,46 +24,6 @@
 
 #include "whcd.h"
 
-void dump_qset(struct whc_qset *qset, struct device *dev)
-{
-	struct whc_std *std;
-	struct urb *urb = NULL;
-	int i;
-
-	dev_dbg(dev, "qset %08x\n", (u32)qset->qset_dma);
-	dev_dbg(dev, "  -> %08x\n", (u32)qset->qh.link);
-	dev_dbg(dev, "  info: %08x %08x %08x\n",
-		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
-	dev_dbg(dev, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
-	dev_dbg(dev, "  TD: sts: %08x opts: %08x\n",
-		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
-
-	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
-		dev_dbg(dev, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
-			i == qset->td_start ? 'S' : ' ',
-			i == qset->td_end ? 'E' : ' ',
-			i, qset->qtd[i].status, qset->qtd[i].options,
-			(u32)qset->qtd[i].page_list_ptr);
-	}
-	dev_dbg(dev, "  ntds: %d\n", qset->ntds);
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (urb != std->urb) {
-			urb = std->urb;
-			dev_dbg(dev, "  urb %p transferred: %d bytes\n", urb,
-				urb->actual_length);
-		}
-		if (std->qtd)
-			dev_dbg(dev, "    sTD[%td]: %zu bytes @ %08x\n",
-				std->qtd - &qset->qtd[0],
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-		else
-			dev_dbg(dev, "    sTD[-]: %zd bytes @ %08x\n",
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-	}
-}
-
 struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags)
 {
 	struct whc_qset *qset;
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
index 1bbb8cb..0f3540f 100644
--- a/drivers/usb/host/whci/whcd.h
+++ b/drivers/usb/host/whci/whcd.h
@@ -21,6 +21,7 @@
 #define __WHCD_H
 
 #include <linux/uwb/whci.h>
+#include <linux/uwb/umc.h>
 #include <linux/workqueue.h>
 
 #include "whci-hc.h"
@@ -28,6 +29,7 @@
 /* Generic command timeout. */
 #define WHC_GENCMD_TIMEOUT_MS 100
 
+struct whc_dbg;
 
 struct whc {
 	struct wusbhc wusbhc;
@@ -69,6 +71,8 @@ struct whc {
 	struct list_head periodic_removed_list;
 	wait_queue_head_t periodic_list_wq;
 	struct work_struct periodic_work;
+
+	struct whc_dbg *dbg;
 };
 
 #define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc))
@@ -190,8 +194,11 @@ void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
 				 struct whc_qtd *qtd);
 enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset);
 void qset_remove_complete(struct whc *whc, struct whc_qset *qset);
-void dump_qset(struct whc_qset *qset, struct device *dev);
 void pzl_update(struct whc *whc, uint32_t wusbcmd);
 void asl_update(struct whc *whc, uint32_t wusbcmd);
 
+/* debug.c */
+void whc_dbg_init(struct whc *whc);
+void whc_dbg_clean_up(struct whc *whc);
+
 #endif /* #ifndef __WHCD_H */
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 540021a..f24efde 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -18,43 +18,16 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 1
-static void dump_di(struct whc *whc, int idx)
-{
-	struct di_buf_entry *di = &whc->di_buf[idx];
-	struct device *dev = &whc->umc->dev;
-	char buf[128];
-
-	bitmap_scnprintf(buf, sizeof(buf), (unsigned long *)di->availability_info, UWB_NUM_MAS);
-
-	d_printf(1, dev, "DI[%d]\n", idx);
-	d_printf(1, dev, "  availability: %s\n", buf);
-	d_printf(1, dev, "  %c%c key idx: %d dev addr: %d\n",
-		 (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
-		 (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
-		 (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
-		 (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
-}
-#else
-static inline void dump_di(struct whc *whc, int idx)
-{
-}
-#endif
-
 static int whc_update_di(struct whc *whc, int idx)
 {
 	int offset = idx / 32;
 	u32 bit = 1 << (idx % 32);
 
-	dump_di(whc, idx);
-
 	le_writel(bit, whc->base + WUSBDIBUPDATED + offset);
 
 	return whci_wait_for(&whc->umc->dev,
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
index 6057651..99a19c1 100644
--- a/drivers/uwb/pal.c
+++ b/drivers/uwb/pal.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/kernel.h>
+#include <linux/debugfs.h>
 #include <linux/uwb.h>
 
 #include "uwb-internal.h"
@@ -54,6 +55,8 @@ int uwb_pal_register(struct uwb_pal *pal)
 		}
 	}
 
+	pal->debugfs_dir = uwb_dbg_create_pal_dir(pal);
+
 	mutex_lock(&rc->uwb_dev.mutex);
 	list_add(&pal->node, &rc->pals);
 	mutex_unlock(&rc->uwb_dev.mutex);
@@ -76,6 +79,8 @@ void uwb_pal_unregister(struct uwb_pal *pal)
 	list_del(&pal->node);
 	mutex_unlock(&rc->uwb_dev.mutex);
 
+	debugfs_remove(pal->debugfs_dir);
+
 	if (pal->device) {
 		sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
 		sysfs_remove_link(&pal->device->kobj, "uwb_rc");
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index ec1b7a4..a6debb9 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -407,3 +407,16 @@ void uwb_dbg_exit(void)
 {
 	debugfs_remove(root_dir);
 }
+
+/**
+ * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL
+ * @pal: The PAL.
+ */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	if (root_dir && rc->dbg && rc->dbg->root_d && pal->name)
+		return debugfs_create_dir(pal->name, rc->dbg->root_d);
+	return NULL;
+}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 9c0cdb4..f0f21f4 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -284,8 +284,7 @@ void uwb_dbg_init(void);
 void uwb_dbg_exit(void);
 void uwb_dbg_add_rc(struct uwb_rc *rc);
 void uwb_dbg_del_rc(struct uwb_rc *rc);
-
-/* Workarounds for version specific stuff */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
 
 static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
 {
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 1719709..d7ed520 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -394,6 +394,8 @@ struct uwb_rc {
  * @channel: channel being used by the PAL; 0 if the PAL isn't using
  *           the radio; -1 if the PAL wishes to use the radio but
  *           cannot.
+ * @debugfs_dir: a debugfs directory which the PAL can use for its own
+ *           debugfs files.
  *
  * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
  * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
@@ -418,6 +420,7 @@ struct uwb_pal {
 	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
 
 	int channel;
+	struct dentry *debugfs_dir;
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-- 
cgit v0.10.2


From 99ba04053a3712498327bd147c22a9877100a904 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Thu, 27 Nov 2008 17:23:49 +0100
Subject: mmc: at91_mci: reorder timer setup and mmc_add_host() call

As said in function comment mmc_add_host() requires that:
"The host must be prepared to start servicing requests
before this function completes."

During this function, at91_mci_request() can be invoqued
without timer beeing setup leading to a kernel Oops.
This has been reported inserting this driver as a module.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Reported-by: Wu Xuan <wux@landicorp.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 1f8b5b3..e556d42 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -1088,6 +1088,8 @@ static int __init at91_mci_probe(struct platform_device *pdev)
 		goto fail0;
 	}
 
+	setup_timer(&host->timer, at91_timeout_timer, (unsigned long)host);
+
 	platform_set_drvdata(pdev, mmc);
 
 	/*
@@ -1101,8 +1103,6 @@ static int __init at91_mci_probe(struct platform_device *pdev)
 
 	mmc_add_host(mmc);
 
-	setup_timer(&host->timer, at91_timeout_timer, (unsigned long)host);
-
 	/*
 	 * monitor card insertion/removal if we can
 	 */
-- 
cgit v0.10.2


From 98444d3dd975653a4a970ecc0dfc30918da92f60 Mon Sep 17 00:00:00 2001
From: Sascha Sommer <saschasommer@freenet.de>
Date: Sat, 29 Nov 2008 07:51:19 +0100
Subject: sdricoh_cs: Add support for Bay Controller devices

Some Ricoh SD card readers seems to advertise themselves slightly differently.
This patches the driver to will recognise an additional product id, and it
appears to work perfectly.

  % pccardctl info
  PRODID_1="RICOH"
  PRODID_2="Bay Controller"
  PRODID_3=""
  PRODID_4=""
  MANFID=0000,0000

Signed-off-by: Charles Lowe <aquasync@gmail.com>
Acked-by: Sascha Sommer <saschasommer@freenet.de>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index 1df44d9..4358633 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -82,6 +82,8 @@ static struct pcmcia_device_id pcmcia_ids[] = {
 	/* vendor and device strings followed by their crc32 hashes */
 	PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay1Controller", 0xd9f522ed,
 				0xc3901202),
+	PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay Controller", 0xd9f522ed,
+				0xace80909),
 	PCMCIA_DEVICE_NULL,
 };
 
-- 
cgit v0.10.2


From 062e4fee4400f283307cf8ac1b7931c939010229 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 26 Oct 2008 16:58:25 +0200
Subject: UBIFS: slight compression optimization

If data does not compress, it is better to leave it uncompressed
because we'll read it faster then. So do not compress data if we
save less than 64 bytes.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index a0ada59..6414d50 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -119,10 +119,10 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 	}
 
 	/*
-	 * Presently, we just require that compression results in less data,
-	 * rather than any defined minimum compression ratio or amount.
+	 * If the data compressed only slightly, it is better to leave it
+	 * uncompressed to improve read speed.
 	 */
-	if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8))
+	if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
 		goto no_compr;
 
 	return;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0b37804..b25fc36 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -51,6 +51,13 @@
  */
 #define UBIFS_MIN_COMPR_LEN 128
 
+/*
+ * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
+ * shorter than uncompressed data length, UBIFS preferes to leave this data
+ * node uncompress, because it'll be read faster.
+ */
+#define UBIFS_MIN_COMPRESS_DIFF 64
+
 /* Root inode number */
 #define UBIFS_ROOT_INO 1
 
-- 
cgit v0.10.2


From a1dc080c27ec8ea7ca1c8a9b499362a71ebff792 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sat, 1 Nov 2008 14:20:50 +0200
Subject: UBIFS: use bit-fields to store compression type

Save a 4 bytes of RAM per 'struct inode' by stroring inode
compression type in bit-filed, instead of using 'int'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d80b2ae..21b4103 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2021,6 +2021,14 @@ static int __init ubifs_init(void)
 	BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
 
 	/*
+	 * We use 2 bit wide bit-fields to store compression type, which should
+	 * be amended if more compressors are added. The bit-fields are:
+	 * @compr_type in 'struct ubifs_inode' and @default_compr in
+	 * 'struct ubifs_info'.
+	 */
+	BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
+
+	/*
 	 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
 	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
 	 */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 46b1725..4d76aba 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -386,12 +386,12 @@ struct ubifs_inode {
 	unsigned int dirty:1;
 	unsigned int xattr:1;
 	unsigned int bulk_read:1;
+	unsigned int compr_type:2;
 	struct mutex ui_mutex;
 	spinlock_t ui_lock;
 	loff_t synced_i_size;
 	loff_t ui_size;
 	int flags;
-	int compr_type;
 	pgoff_t last_page_read;
 	pgoff_t read_in_a_row;
 	int data_len;
@@ -946,6 +946,7 @@ struct ubifs_mount_opts {
  * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
  *                   recovery)
  * @bulk_read: enable bulk-reads
+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
  *
  * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
  *             @calc_idx_sz
@@ -986,7 +987,6 @@ struct ubifs_mount_opts {
  * @main_lebs: count of LEBs in the main area
  * @main_first: first LEB of the main area
  * @main_bytes: main area size in bytes
- * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
  *
  * @key_hash_type: type of the key hash
  * @key_hash: direntry key hash function
@@ -1196,6 +1196,7 @@ struct ubifs_info {
 	unsigned int big_lpt:1;
 	unsigned int no_chk_data_crc:1;
 	unsigned int bulk_read:1;
+	unsigned int default_compr:2;
 
 	struct mutex tnc_mutex;
 	struct ubifs_zbranch zroot;
@@ -1237,7 +1238,6 @@ struct ubifs_info {
 	int main_lebs;
 	int main_first;
 	long long main_bytes;
-	int default_compr;
 
 	uint8_t key_hash_type;
 	uint32_t (*key_hash)(const char *str, int len);
-- 
cgit v0.10.2


From 553dea4dd531562688ba01c641c7f8fc7abaaf8c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sat, 1 Nov 2008 14:57:49 +0200
Subject: UBIFS: introduce compression mount options

It is very handy to be able to change default UBIFS compressor
via mount options. Introduce -o compr=<name> mount option support.
Currently only "none", "lzo" and "zlib" compressors are supported.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index dd84ea3..2d0db54 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -95,6 +95,9 @@ no_chk_data_crc		skip checking of CRCs on data nodes in order to
 			of this option is that corruption of the contents
 			of a file can go unnoticed.
 chk_data_crc (*)	do not skip checking CRCs on data nodes
+compr=none              override defoult comressor and set it to "none"
+compr=lzo               override defoult comressor and set it to "lzo"
+compr=zlib              override defoult comressor and set it to "zlib"
 
 
 Quick usage instructions
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 6414d50..4afb3ea 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -33,7 +33,7 @@
 /* Fake description object for the "none" compressor */
 static struct ubifs_compressor none_compr = {
 	.compr_type = UBIFS_COMPR_NONE,
-	.name = "no compression",
+	.name = "none",
 	.capi_name = "",
 };
 
@@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex);
 static struct ubifs_compressor lzo_compr = {
 	.compr_type = UBIFS_COMPR_LZO,
 	.comp_mutex = &lzo_mutex,
-	.name = "LZO",
+	.name = "lzo",
 	.capi_name = "lzo",
 };
 #else
 static struct ubifs_compressor lzo_compr = {
 	.compr_type = UBIFS_COMPR_LZO,
-	.name = "LZO",
+	.name = "lzo",
 };
 #endif
 
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 0f39235..c5da201 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -179,8 +179,11 @@ static int create_default_filesystem(struct ubifs_info *c)
 	sup->fanout        = cpu_to_le32(DEFAULT_FANOUT);
 	sup->lsave_cnt     = cpu_to_le32(c->lsave_cnt);
 	sup->fmt_version   = cpu_to_le32(UBIFS_FORMAT_VERSION);
-	sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
 	sup->time_gran     = cpu_to_le32(DEFAULT_TIME_GRAN);
+	if (c->mount_opts.override_compr)
+		sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
+	else
+		sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
 
 	generate_random_uuid(sup->uuid);
 
@@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c)
 	c->jhead_cnt     = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
 	c->fanout        = le32_to_cpu(sup->fanout);
 	c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt);
-	c->default_compr = le16_to_cpu(sup->default_compr);
 	c->rp_size       = le64_to_cpu(sup->rp_size);
 	c->rp_uid        = le32_to_cpu(sup->rp_uid);
 	c->rp_gid        = le32_to_cpu(sup->rp_gid);
 	sup_flags        = le32_to_cpu(sup->flags);
+	if (!c->mount_opts.override_compr)
+		c->default_compr = le16_to_cpu(sup->default_compr);
 
 	c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
-
 	memcpy(&c->uuid, &sup->uuid, 16);
-
 	c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
 
 	/* Automatically increase file system size to the maximum size */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 21b4103..fc81022 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -417,6 +417,11 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
 	else if (c->mount_opts.chk_data_crc == 1)
 		seq_printf(s, ",no_chk_data_crc");
 
+	if (c->mount_opts.override_compr) {
+		seq_printf(s, ",compr=");
+		seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
+	}
+
 	return 0;
 }
 
@@ -878,6 +883,7 @@ static int check_volume_empty(struct ubifs_info *c)
  * Opt_no_bulk_read: disable bulk-reads
  * Opt_chk_data_crc: check CRCs when reading data nodes
  * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
+ * Opt_override_compr: override default compressor
  * Opt_err: just end of array marker
  */
 enum {
@@ -887,6 +893,7 @@ enum {
 	Opt_no_bulk_read,
 	Opt_chk_data_crc,
 	Opt_no_chk_data_crc,
+	Opt_override_compr,
 	Opt_err,
 };
 
@@ -897,6 +904,7 @@ static const match_table_t tokens = {
 	{Opt_no_bulk_read, "no_bulk_read"},
 	{Opt_chk_data_crc, "chk_data_crc"},
 	{Opt_no_chk_data_crc, "no_chk_data_crc"},
+	{Opt_override_compr, "compr=%s"},
 	{Opt_err, NULL},
 };
 
@@ -950,6 +958,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			c->mount_opts.chk_data_crc = 1;
 			c->no_chk_data_crc = 1;
 			break;
+		case Opt_override_compr:
+		{
+			char *name = match_strdup(&args[0]);
+
+			if (!name)
+				return -ENOMEM;
+			if (!strcmp(name, "none"))
+				c->mount_opts.compr_type = UBIFS_COMPR_NONE;
+			else if (!strcmp(name, "lzo"))
+				c->mount_opts.compr_type = UBIFS_COMPR_LZO;
+			else if (!strcmp(name, "zlib"))
+				c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
+			else {
+				ubifs_err("unknown compressor \"%s\"", name);
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			c->mount_opts.override_compr = 1;
+			c->default_compr = c->mount_opts.compr_type;
+			break;
+		}
 		default:
 			ubifs_err("unrecognized mount option \"%s\" "
 				  "or missing value", p);
@@ -1100,13 +1130,13 @@ static int mount_ubifs(struct ubifs_info *c)
 		goto out_free;
 
 	/*
-	 * Make sure the compressor which is set as the default on in the
-	 * superblock was actually compiled in.
+	 * Make sure the compressor which is set as default in the superblock
+	 * or overriden by mount options is actually compiled in.
 	 */
 	if (!ubifs_compr_present(c->default_compr)) {
-		ubifs_warn("'%s' compressor is set by superblock, but not "
-			   "compiled in", ubifs_compr_name(c->default_compr));
-		c->default_compr = UBIFS_COMPR_NONE;
+		ubifs_err("'compressor \"%s\" is not compiled in",
+			  ubifs_compr_name(c->default_compr));
+		goto out_free;
 	}
 
 	dbg_failure_mode_registration(c);
@@ -2023,8 +2053,8 @@ static int __init ubifs_init(void)
 	/*
 	 * We use 2 bit wide bit-fields to store compression type, which should
 	 * be amended if more compressors are added. The bit-fields are:
-	 * @compr_type in 'struct ubifs_inode' and @default_compr in
-	 * 'struct ubifs_info'.
+	 * @compr_type in 'struct ubifs_inode', @default_compr in
+	 * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'.
 	 */
 	BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4d76aba..16840e0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -893,13 +893,21 @@ struct ubifs_orphan {
 /**
  * struct ubifs_mount_opts - UBIFS-specific mount options information.
  * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
- * @bulk_read: enable bulk-reads
- * @chk_data_crc: check CRCs when reading data nodes
+ * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable)
+ * @chk_data_crc: enable/disable CRC data checking when reading data nodes
+ *                (%0 default, %1 disabe, %2 enable)
+ * @override_compr: override default compressor (%0 - do not override and use
+ *                  superblock compressor, %1 - override and use compressor
+ *                  specified in @compr_type)
+ * @compr_type: compressor type to override the superblock compressor with
+ *              (%UBIFS_COMPR_NONE, etc)
  */
 struct ubifs_mount_opts {
 	unsigned int unmount_mode:2;
 	unsigned int bulk_read:2;
 	unsigned int chk_data_crc:2;
+	unsigned int override_compr:1;
+	unsigned int compr_type:2;
 };
 
 /**
-- 
cgit v0.10.2


From 5dd7cbc083f3a91fa7454125fe992826701b67bc Mon Sep 17 00:00:00 2001
From: Kukkonen Mika <mika.kukkonen@nokia.com>
Date: Tue, 2 Dec 2008 11:32:49 +0200
Subject: UBIFS: avoid unnecessary checks

I have a habit of compiling kernel with
EXTRA_CFLAGS="-Wextra -Wno-unused -Wno-sign-compare -Wno-missing-field-initializers"
and so fs/ubifs/key.h give lots (~10) of these every time:

CC      fs/ubifs/tnc_misc.o
In file included from fs/ubifs/ubifs.h:1725,
from fs/ubifs/tnc_misc.c:30:
fs/ubifs/key.h: In function 'key_r5_hash':
fs/ubifs/key.h:64: warning: comparison of unsigned expression >= 0 is always true
fs/ubifs/key.h: In function 'key_test_hash':
fs/ubifs/key.h:81: warning: comparison of unsigned expression >= 0 is always true

This patch fixes the warnings.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 3f1f16b..efb3430 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -38,6 +38,22 @@
 #define __UBIFS_KEY_H__
 
 /**
+ * key_mask_hash - mask a valid hash value.
+ * @val: value to be masked
+ *
+ * We use hash values as offset in directories, so values %0 and %1 are
+ * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This
+ * function makes sure the reserved values are not used.
+ */
+static inline uint32_t key_mask_hash(uint32_t hash)
+{
+	hash &= UBIFS_S_KEY_HASH_MASK;
+	if (unlikely(hash <= 2))
+		hash += 3;
+	return hash;
+}
+
+/**
  * key_r5_hash - R5 hash function (borrowed from reiserfs).
  * @s: direntry name
  * @len: name length
@@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len)
 		str++;
 	}
 
-	a &= UBIFS_S_KEY_HASH_MASK;
-
-	/*
-	 * We use hash values as offset in directories, so values %0 and %1 are
-	 * reserved for "." and "..". %2 is reserved for "end of readdir"
-	 * marker.
-	 */
-	if (unlikely(a >= 0 && a <= 2))
-		a += 3;
-	return a;
+	return key_mask_hash(a);
 }
 
 /**
@@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len)
 
 	len = min_t(uint32_t, len, 4);
 	memcpy(&a, str, len);
-	a &= UBIFS_S_KEY_HASH_MASK;
-	if (unlikely(a >= 0 && a <= 2))
-		a += 3;
-	return a;
+	return key_mask_hash(a);
 }
 
 /**
-- 
cgit v0.10.2


From 17c2f9f85c896b48a5d74a9155d99ec5b241a0e6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 17 Oct 2008 13:31:39 +0300
Subject: UBIFS: separate debugging fields out

Introduce a new data structure which contains all debugging
stuff inside. This is cleaner than having debugging stuff
directly in 'c'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b49884c..f3a7945 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -470,12 +470,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 {
 	struct ubifs_idx_node *idx;
 	int lnum, offs, len, err = 0;
+	struct ubifs_debug_info *d = c->dbg;
 
-	c->old_zroot = *zroot;
-
-	lnum = c->old_zroot.lnum;
-	offs = c->old_zroot.offs;
-	len = c->old_zroot.len;
+	d->old_zroot = *zroot;
+	lnum = d->old_zroot.lnum;
+	offs = d->old_zroot.offs;
+	len = d->old_zroot.len;
 
 	idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
 	if (!idx)
@@ -485,8 +485,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 	if (err)
 		goto out;
 
-	c->old_zroot_level = le16_to_cpu(idx->level);
-	c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
+	d->old_zroot_level = le16_to_cpu(idx->level);
+	d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
 out:
 	kfree(idx);
 	return err;
@@ -509,6 +509,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 {
 	int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
 	int first = 1, iip;
+	struct ubifs_debug_info *d = c->dbg;
 	union ubifs_key lower_key, upper_key, l_key, u_key;
 	unsigned long long uninitialized_var(last_sqnum);
 	struct ubifs_idx_node *idx;
@@ -525,9 +526,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 	     UBIFS_IDX_NODE_SZ;
 
 	/* Start at the old zroot */
-	lnum = c->old_zroot.lnum;
-	offs = c->old_zroot.offs;
-	len = c->old_zroot.len;
+	lnum = d->old_zroot.lnum;
+	offs = d->old_zroot.offs;
+	len = d->old_zroot.len;
 	iip = 0;
 
 	/*
@@ -560,11 +561,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 		if (first) {
 			first = 0;
 			/* Check root level and sqnum */
-			if (le16_to_cpu(idx->level) != c->old_zroot_level) {
+			if (le16_to_cpu(idx->level) != d->old_zroot_level) {
 				err = 2;
 				goto out_dump;
 			}
-			if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
+			if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
 				err = 3;
 				goto out_dump;
 			}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 510ffa0..0332a85 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -705,7 +705,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 
 	printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
 
-	sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+	sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
 	if (IS_ERR(sleb)) {
 		ubifs_err("scan error %d", (int)PTR_ERR(sleb));
 		return;
@@ -2097,7 +2097,7 @@ static int simple_rand(void)
 	return (next >> 16) & 32767;
 }
 
-void dbg_failure_mode_registration(struct ubifs_info *c)
+static void failure_mode_init(struct ubifs_info *c)
 {
 	struct failure_mode_info *fmi;
 
@@ -2112,7 +2112,7 @@ void dbg_failure_mode_registration(struct ubifs_info *c)
 	spin_unlock(&fmi_lock);
 }
 
-void dbg_failure_mode_deregistration(struct ubifs_info *c)
+static void failure_mode_exit(struct ubifs_info *c)
 {
 	struct failure_mode_info *fmi, *tmp;
 
@@ -2146,42 +2146,44 @@ static int in_failure_mode(struct ubi_volume_desc *desc)
 	struct ubifs_info *c = dbg_find_info(desc);
 
 	if (c && dbg_failure_mode)
-		return c->failure_mode;
+		return c->dbg->failure_mode;
 	return 0;
 }
 
 static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
 {
 	struct ubifs_info *c = dbg_find_info(desc);
+	struct ubifs_debug_info *d;
 
 	if (!c || !dbg_failure_mode)
 		return 0;
-	if (c->failure_mode)
+	d = c->dbg;
+	if (d->failure_mode)
 		return 1;
-	if (!c->fail_cnt) {
+	if (!d->fail_cnt) {
 		/* First call - decide delay to failure */
 		if (chance(1, 2)) {
 			unsigned int delay = 1 << (simple_rand() >> 11);
 
 			if (chance(1, 2)) {
-				c->fail_delay = 1;
-				c->fail_timeout = jiffies +
+				d->fail_delay = 1;
+				d->fail_timeout = jiffies +
 						  msecs_to_jiffies(delay);
 				dbg_rcvry("failing after %ums", delay);
 			} else {
-				c->fail_delay = 2;
-				c->fail_cnt_max = delay;
+				d->fail_delay = 2;
+				d->fail_cnt_max = delay;
 				dbg_rcvry("failing after %u calls", delay);
 			}
 		}
-		c->fail_cnt += 1;
+		d->fail_cnt += 1;
 	}
 	/* Determine if failure delay has expired */
-	if (c->fail_delay == 1) {
-		if (time_before(jiffies, c->fail_timeout))
+	if (d->fail_delay == 1) {
+		if (time_before(jiffies, d->fail_timeout))
 			return 0;
-	} else if (c->fail_delay == 2)
-		if (c->fail_cnt++ < c->fail_cnt_max)
+	} else if (d->fail_delay == 2)
+		if (d->fail_cnt++ < d->fail_cnt_max)
 			return 0;
 	if (lnum == UBIFS_SB_LNUM) {
 		if (write) {
@@ -2239,7 +2241,7 @@ static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
 		dbg_rcvry("failing in bud LEB %d commit not running", lnum);
 	}
 	ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
-	c->failure_mode = 1;
+	d->failure_mode = 1;
 	dump_stack();
 	return 1;
 }
@@ -2344,4 +2346,41 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
 	return 0;
 }
 
+/**
+ * ubifs_debugging_init - initialize UBIFS debugging.
+ * @c: UBIFS file-system description object
+ *
+ * This function initializes debugging-related data for the file system.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_debugging_init(struct ubifs_info *c)
+{
+	c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+	if (!c->dbg)
+		return -ENOMEM;
+
+	c->dbg->buf = vmalloc(c->leb_size);
+	if (!c->dbg->buf)
+		goto out;
+
+	failure_mode_init(c);
+	return 0;
+
+out:
+	kfree(c->dbg);
+	return -ENOMEM;
+}
+
+/**
+ * ubifs_debugging_exit - free debugging data.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_debugging_exit(struct ubifs_info *c)
+{
+	failure_mode_exit(c);
+	vfree(c->dbg->buf);
+	kfree(c->dbg);
+}
+
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 33d6b95..d6ea136 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -25,7 +25,43 @@
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
-#define UBIFS_DBG(op) op
+/**
+ * ubifs_debug_info - per-FS debugging information.
+ * @buf: a buffer of LEB size, used for various purposes
+ * @old_zroot: old index root - used by 'dbg_check_old_index()'
+ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
+ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
+ * @failure_mode: failure mode for recovery testing
+ * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @fail_timeout: time in jiffies when delay of failure mode expires
+ * @fail_cnt: current number of calls to failure mode I/O functions
+ * @fail_cnt_max: number of calls by which to delay failure mode
+ * @chk_lpt_sz: used by LPT tree size checker
+ * @chk_lpt_sz2: used by LPT tree size checker
+ * @chk_lpt_wastage: used by LPT tree size checker
+ * @chk_lpt_lebs: used by LPT tree size checker
+ * @new_nhead_offs: used by LPT tree size checker
+ * @new_ihead_lnum: used by debugging to check ihead_lnum
+ * @new_ihead_offs: used by debugging to check ihead_offs
+ */
+struct ubifs_debug_info {
+	void *buf;
+	struct ubifs_zbranch old_zroot;
+	int old_zroot_level;
+	unsigned long long old_zroot_sqnum;
+	int failure_mode;
+	int fail_delay;
+	unsigned long fail_timeout;
+	unsigned int fail_cnt;
+	unsigned int fail_cnt_max;
+	long long chk_lpt_sz;
+	long long chk_lpt_sz2;
+	long long chk_lpt_wastage;
+	int chk_lpt_lebs;
+	int new_nhead_offs;
+	int new_ihead_lnum;
+	int new_ihead_offs;
+};
 
 #define ubifs_assert(expr) do {                                                \
 	if (unlikely(!(expr))) {                                               \
@@ -211,6 +247,9 @@ extern unsigned int ubifs_msg_flags;
 extern unsigned int ubifs_chk_flags;
 extern unsigned int ubifs_tst_flags;
 
+int ubifs_debugging_init(struct ubifs_info *c);
+void ubifs_debugging_exit(struct ubifs_info *c);
+
 /* Dump functions */
 
 const char *dbg_ntype(int type);
@@ -274,9 +313,6 @@ int dbg_force_in_the_gaps(void);
 
 #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
 
-void dbg_failure_mode_registration(struct ubifs_info *c);
-void dbg_failure_mode_deregistration(struct ubifs_info *c);
-
 #ifndef UBIFS_DBG_PRESERVE_UBI
 
 #define ubi_leb_read   dbg_leb_read
@@ -320,8 +356,6 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 
 #else /* !CONFIG_UBIFS_FS_DEBUG */
 
-#define UBIFS_DBG(op)
-
 /* Use "if (0)" to make compiler check arguments even if debugging is off */
 #define ubifs_assert(expr)  do {                                               \
 	if (0 && (expr))                                                       \
@@ -360,6 +394,9 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
 
+#define ubifs_debugging_init(c)               0
+#define ubifs_debugging_exit(c)               ({})
+
 #define dbg_ntype(type)                       ""
 #define dbg_cstate(cmt_state)                 ""
 #define dbg_get_key_dump(c, key)              ({})
@@ -396,8 +433,6 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_force_in_the_gaps_enabled              0
 #define dbg_force_in_the_gaps()                    0
 #define dbg_failure_mode                           0
-#define dbg_failure_mode_registration(c)           ({})
-#define dbg_failure_mode_deregistration(c)         ({})
 
 #endif /* !CONFIG_UBIFS_FS_DEBUG */
 
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f27176e..10ba663 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1088,7 +1088,7 @@ static int scan_check_cb(struct ubifs_info *c,
 		}
 	}
 
-	sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+	sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
 	if (IS_ERR(sleb)) {
 		/*
 		 * After an unclean unmount, empty and freeable LEBs
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index a41434b..1aefab9 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1602,7 +1602,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 {
 	int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
 	int ret;
-	void *buf = c->dbg_buf;
+	void *buf = c->dbg->buf;
 
 	dbg_lp("LEB %d", lnum);
 	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
@@ -1731,15 +1731,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
  */
 int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 {
+	struct ubifs_debug_info *d = c->dbg;
 	long long chk_lpt_sz, lpt_sz;
 	int err = 0;
 
 	switch (action) {
 	case 0:
-		c->chk_lpt_sz = 0;
-		c->chk_lpt_sz2 = 0;
-		c->chk_lpt_lebs = 0;
-		c->chk_lpt_wastage = 0;
+		d->chk_lpt_sz = 0;
+		d->chk_lpt_sz2 = 0;
+		d->chk_lpt_lebs = 0;
+		d->chk_lpt_wastage = 0;
 		if (c->dirty_pn_cnt > c->pnode_cnt) {
 			dbg_err("dirty pnodes %d exceed max %d",
 				c->dirty_pn_cnt, c->pnode_cnt);
@@ -1752,35 +1753,35 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 		}
 		return err;
 	case 1:
-		c->chk_lpt_sz += len;
+		d->chk_lpt_sz += len;
 		return 0;
 	case 2:
-		c->chk_lpt_sz += len;
-		c->chk_lpt_wastage += len;
-		c->chk_lpt_lebs += 1;
+		d->chk_lpt_sz += len;
+		d->chk_lpt_wastage += len;
+		d->chk_lpt_lebs += 1;
 		return 0;
 	case 3:
 		chk_lpt_sz = c->leb_size;
-		chk_lpt_sz *= c->chk_lpt_lebs;
+		chk_lpt_sz *= d->chk_lpt_lebs;
 		chk_lpt_sz += len - c->nhead_offs;
-		if (c->chk_lpt_sz != chk_lpt_sz) {
+		if (d->chk_lpt_sz != chk_lpt_sz) {
 			dbg_err("LPT wrote %lld but space used was %lld",
-				c->chk_lpt_sz, chk_lpt_sz);
+				d->chk_lpt_sz, chk_lpt_sz);
 			err = -EINVAL;
 		}
-		if (c->chk_lpt_sz > c->lpt_sz) {
+		if (d->chk_lpt_sz > c->lpt_sz) {
 			dbg_err("LPT wrote %lld but lpt_sz is %lld",
-				c->chk_lpt_sz, c->lpt_sz);
+				d->chk_lpt_sz, c->lpt_sz);
 			err = -EINVAL;
 		}
-		if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) {
+		if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
 			dbg_err("LPT layout size %lld but wrote %lld",
-				c->chk_lpt_sz, c->chk_lpt_sz2);
+				d->chk_lpt_sz, d->chk_lpt_sz2);
 			err = -EINVAL;
 		}
-		if (c->chk_lpt_sz2 && c->new_nhead_offs != len) {
+		if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
 			dbg_err("LPT new nhead offs: expected %d was %d",
-				c->new_nhead_offs, len);
+				d->new_nhead_offs, len);
 			err = -EINVAL;
 		}
 		lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
@@ -1788,22 +1789,22 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 		lpt_sz += c->ltab_sz;
 		if (c->big_lpt)
 			lpt_sz += c->lsave_sz;
-		if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) {
+		if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
 			dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
-				c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz);
+				d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
 			err = -EINVAL;
 		}
 		if (err)
 			dbg_dump_lpt_info(c);
-		c->chk_lpt_sz2 = c->chk_lpt_sz;
-		c->chk_lpt_sz = 0;
-		c->chk_lpt_wastage = 0;
-		c->chk_lpt_lebs = 0;
-		c->new_nhead_offs = len;
+		d->chk_lpt_sz2 = d->chk_lpt_sz;
+		d->chk_lpt_sz = 0;
+		d->chk_lpt_wastage = 0;
+		d->chk_lpt_lebs = 0;
+		d->new_nhead_offs = len;
 		return err;
 	case 4:
-		c->chk_lpt_sz += len;
-		c->chk_lpt_wastage += len;
+		d->chk_lpt_sz += len;
+		d->chk_lpt_wastage += len;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 9bd5a43..9e6f403 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -899,7 +899,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
 	for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
 		struct ubifs_scan_leb *sleb;
 
-		sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+		sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
 		if (IS_ERR(sleb)) {
 			err = PTR_ERR(sleb);
 			break;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index fc81022..ad44822 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1069,11 +1069,9 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (err)
 		return err;
 
-#ifdef CONFIG_UBIFS_FS_DEBUG
-	c->dbg_buf = vmalloc(c->leb_size);
-	if (!c->dbg_buf)
-		return -ENOMEM;
-#endif
+	err = ubifs_debugging_init(c);
+	if (err)
+		return err;
 
 	err = check_volume_empty(c);
 	if (err)
@@ -1139,18 +1137,16 @@ static int mount_ubifs(struct ubifs_info *c)
 		goto out_free;
 	}
 
-	dbg_failure_mode_registration(c);
-
 	err = init_constants_late(c);
 	if (err)
-		goto out_dereg;
+		goto out_free;
 
 	sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
 	sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
 	c->cbuf = kmalloc(sz, GFP_NOFS);
 	if (!c->cbuf) {
 		err = -ENOMEM;
-		goto out_dereg;
+		goto out_free;
 	}
 
 	sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
@@ -1350,14 +1346,12 @@ out_wbufs:
 	free_wbufs(c);
 out_cbuf:
 	kfree(c->cbuf);
-out_dereg:
-	dbg_failure_mode_deregistration(c);
 out_free:
 	kfree(c->bu.buf);
 	vfree(c->ileb_buf);
 	vfree(c->sbuf);
 	kfree(c->bottom_up_buf);
-	UBIFS_DBG(vfree(c->dbg_buf));
+	ubifs_debugging_exit(c);
 	return err;
 }
 
@@ -1394,8 +1388,7 @@ static void ubifs_umount(struct ubifs_info *c)
 	vfree(c->ileb_buf);
 	vfree(c->sbuf);
 	kfree(c->bottom_up_buf);
-	UBIFS_DBG(vfree(c->dbg_buf));
-	dbg_failure_mode_deregistration(c);
+	ubifs_debugging_exit(c);
 }
 
 /**
@@ -1879,7 +1872,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_iput;
 
 	mutex_unlock(&c->umount_mutex);
-
 	return 0;
 
 out_iput:
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8ac76b1..3c0af45 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -553,8 +553,8 @@ static int layout_in_empty_space(struct ubifs_info *c)
 	}
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
-	c->new_ihead_lnum = lnum;
-	c->new_ihead_offs = buf_offs;
+	c->dbg->new_ihead_lnum = lnum;
+	c->dbg->new_ihead_offs = buf_offs;
 #endif
 
 	return 0;
@@ -1002,7 +1002,8 @@ static int write_index(struct ubifs_info *c)
 	}
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
-	if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
+	if (lnum != c->dbg->new_ihead_lnum ||
+	    buf_offs != c->dbg->new_ihead_offs) {
 		ubifs_err("inconsistent ihead");
 		return -EINVAL;
 	}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 16840e0..7e090a5 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -910,6 +910,8 @@ struct ubifs_mount_opts {
 	unsigned int compr_type:2;
 };
 
+struct ubifs_debug_info;
+
 /**
  * struct ubifs_info - UBIFS file-system description data structure
  * (per-superblock).
@@ -972,8 +974,6 @@ struct ubifs_mount_opts {
  * @ileb_nxt: next pre-allocated index LEBs
  * @old_idx: tree of index nodes obsoleted since the last commit start
  * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
- * @new_ihead_lnum: used by debugging to check ihead_lnum
- * @new_ihead_offs: used by debugging to check ihead_offs
  *
  * @mst_node: master node
  * @mst_offs: offset of valid master node
@@ -1157,15 +1157,7 @@ struct ubifs_mount_opts {
  * @always_chk_crc: always check CRCs (while mounting and remounting rw)
  * @mount_opts: UBIFS-specific mount options
  *
- * @dbg_buf: a buffer of LEB size used for debugging purposes
- * @old_zroot: old index root - used by 'dbg_check_old_index()'
- * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
- * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
- * @failure_mode: failure mode for recovery testing
- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
- * @fail_timeout: time in jiffies when delay of failure mode expires
- * @fail_cnt: current number of calls to failure mode I/O functions
- * @fail_cnt_max: number of calls by which to delay failure mode
+ * @dbg: debugging-related information
  */
 struct ubifs_info {
 	struct super_block *vfs_sb;
@@ -1221,10 +1213,6 @@ struct ubifs_info {
 	int ileb_nxt;
 	struct rb_root old_idx;
 	int *bottom_up_buf;
-#ifdef CONFIG_UBIFS_FS_DEBUG
-	int new_ihead_lnum;
-	int new_ihead_offs;
-#endif
 
 	struct ubifs_mst_node *mst_node;
 	int mst_offs;
@@ -1399,21 +1387,7 @@ struct ubifs_info {
 	struct ubifs_mount_opts mount_opts;
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
-	void *dbg_buf;
-	struct ubifs_zbranch old_zroot;
-	int old_zroot_level;
-	unsigned long long old_zroot_sqnum;
-	int failure_mode;
-	int fail_delay;
-	unsigned long fail_timeout;
-	unsigned int fail_cnt;
-	unsigned int fail_cnt_max;
-	long long chk_lpt_sz;
-	long long chk_lpt_sz2;
-	long long chk_lpt_wastage;
-	int chk_lpt_lebs;
-	int new_nhead_lnum;
-	int new_nhead_offs;
+	struct ubifs_debug_info *dbg;
 #endif
 };
 
-- 
cgit v0.10.2


From 552ff3179d1e93a3e982357544c059f3e9a5516e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 23 Oct 2008 11:49:28 +0300
Subject: UBIFS: add debugfs support

We need to have a possibility to see various UBIFS variables
and ask UBIFS to dump various information. Debugfs is what
we need.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0332a85..5684277 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -32,6 +32,7 @@
 #include "ubifs.h"
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/debugfs.h>
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
@@ -988,22 +989,20 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 	err = 1;
 	key_read(c, &dent1->key, &key);
 	if (keys_cmp(c, &zbr1->key, &key)) {
-		dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
-			zbr1->offs, DBGKEY(&key));
-		dbg_err("but it should have key %s according to tnc",
-			DBGKEY(&zbr1->key));
-			dbg_dump_node(c, dent1);
-			goto out_free;
+		ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum,
+			  zbr1->offs, DBGKEY(&key));
+		ubifs_err("but it should have key %s according to tnc",
+			  DBGKEY(&zbr1->key)); dbg_dump_node(c, dent1);
+		goto out_free;
 	}
 
 	key_read(c, &dent2->key, &key);
 	if (keys_cmp(c, &zbr2->key, &key)) {
-		dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
-			zbr1->offs, DBGKEY(&key));
-		dbg_err("but it should have key %s according to tnc",
-			DBGKEY(&zbr2->key));
-			dbg_dump_node(c, dent2);
-			goto out_free;
+		ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum,
+			  zbr1->offs, DBGKEY(&key));
+		ubifs_err("but it should have key %s according to tnc",
+			  DBGKEY(&zbr2->key)); dbg_dump_node(c, dent2);
+		goto out_free;
 	}
 
 	nlen1 = le16_to_cpu(dent1->nlen);
@@ -1015,14 +1014,14 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		goto out_free;
 	}
 	if (cmp == 0 && nlen1 == nlen2)
-		dbg_err("2 xent/dent nodes with the same name");
+		ubifs_err("2 xent/dent nodes with the same name");
 	else
-		dbg_err("bad order of colliding key %s",
+		ubifs_err("bad order of colliding key %s",
 			DBGKEY(&key));
 
-	dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
+	ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
 	dbg_dump_node(c, dent1);
-	dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
+	ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
 	dbg_dump_node(c, dent2);
 
 out_free:
@@ -2103,7 +2102,7 @@ static void failure_mode_init(struct ubifs_info *c)
 
 	fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
 	if (!fmi) {
-		dbg_err("Failed to register failure mode - no memory");
+		ubifs_err("Failed to register failure mode - no memory");
 		return;
 	}
 	fmi->c = c;
@@ -2383,4 +2382,144 @@ void ubifs_debugging_exit(struct ubifs_info *c)
 	kfree(c->dbg);
 }
 
+/*
+ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
+ * contain the stuff specific to particular file-system mounts.
+ */
+static struct dentry *debugfs_rootdir;
+
+/**
+ * dbg_debugfs_init - initialize debugfs file-system.
+ *
+ * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * user-space. This function creates "ubifs" directory in the debugfs
+ * file-system. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int dbg_debugfs_init(void)
+{
+	debugfs_rootdir = debugfs_create_dir("ubifs", NULL);
+	if (IS_ERR(debugfs_rootdir)) {
+		int err = PTR_ERR(debugfs_rootdir);
+		ubifs_err("cannot create \"ubifs\" debugfs directory, "
+			  "error %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+ */
+void dbg_debugfs_exit(void)
+{
+	debugfs_remove(debugfs_rootdir);
+}
+
+static int open_debugfs_file(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct ubifs_info *c = file->private_data;
+	struct ubifs_debug_info *d = c->dbg;
+
+	if (file->f_path.dentry == d->dump_lprops)
+		dbg_dump_lprops(c);
+	else if (file->f_path.dentry == d->dump_budg) {
+		spin_lock(&c->space_lock);
+		dbg_dump_budg(c);
+		spin_unlock(&c->space_lock);
+	} else if (file->f_path.dentry == d->dump_budg) {
+		mutex_lock(&c->tnc_mutex);
+		dbg_dump_tnc(c);
+		mutex_unlock(&c->tnc_mutex);
+	} else
+		return -EINVAL;
+
+	*ppos += count;
+	return count;
+}
+
+static const struct file_operations debugfs_fops = {
+	.open = open_debugfs_file,
+	.write = write_debugfs_file,
+	.owner = THIS_MODULE,
+};
+
+/**
+ * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance.
+ * @c: UBIFS file-system description object
+ *
+ * This function creates all debugfs files for this instance of UBIFS. Returns
+ * zero in case of success and a negative error code in case of failure.
+ *
+ * Note, the only reason we have not merged this function with the
+ * 'ubifs_debugging_init()' function is because it is better to initialize
+ * debugfs interfaces at the very end of the mount process, and remove them at
+ * the very beginning of the mount process.
+ */
+int dbg_debugfs_init_fs(struct ubifs_info *c)
+{
+	int err;
+	const char *fname;
+	struct dentry *dent;
+	struct ubifs_debug_info *d = c->dbg;
+
+	sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+	d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name,
+					      debugfs_rootdir);
+	if (IS_ERR(d->debugfs_dir)) {
+		err = PTR_ERR(d->debugfs_dir);
+		ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+			  d->debugfs_dir_name, err);
+		goto out;
+	}
+
+	fname = "dump_lprops";
+	dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
+				   &debugfs_fops);
+	if (IS_ERR(dent))
+		goto out_remove;
+	d->dump_lprops = dent;
+
+	fname = "dump_budg";
+	dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
+				   &debugfs_fops);
+	if (IS_ERR(dent))
+		goto out_remove;
+	d->dump_budg = dent;
+
+	fname = "dump_tnc";
+	dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
+				   &debugfs_fops);
+	if (IS_ERR(dent))
+		goto out_remove;
+	d->dump_tnc = dent;
+
+	return 0;
+
+out_remove:
+	err = PTR_ERR(dent);
+	ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+		  fname, err);
+	debugfs_remove_recursive(d->debugfs_dir);
+out:
+	return err;
+}
+
+/**
+ * dbg_debugfs_exit_fs - remove all debugfs files.
+ * @c: UBIFS file-system description object
+ */
+void dbg_debugfs_exit_fs(struct ubifs_info *c)
+{
+	debugfs_remove_recursive(c->dbg->debugfs_dir);
+}
+
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index d6ea136..a6b70f8 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -43,6 +43,13 @@
  * @new_nhead_offs: used by LPT tree size checker
  * @new_ihead_lnum: used by debugging to check ihead_lnum
  * @new_ihead_offs: used by debugging to check ihead_offs
+ *
+ * debugfs_dir_name: name of debugfs directory containing this file-system's
+ *                   files
+ * debugfs_dir: direntry object of the file-system debugfs directory
+ * dump_lprops: "dump lprops" debugfs knob
+ * dump_budg: "dump budgeting information" debugfs knob
+ * dump_tnc: "dump TNC" debugfs knob
  */
 struct ubifs_debug_info {
 	void *buf;
@@ -61,6 +68,12 @@ struct ubifs_debug_info {
 	int new_nhead_offs;
 	int new_ihead_lnum;
 	int new_ihead_offs;
+
+	char debugfs_dir_name[100];
+	struct dentry *debugfs_dir;
+	struct dentry *dump_lprops;
+	struct dentry *dump_budg;
+	struct dentry *dump_tnc;
 };
 
 #define ubifs_assert(expr) do {                                                \
@@ -251,7 +264,6 @@ int ubifs_debugging_init(struct ubifs_info *c);
 void ubifs_debugging_exit(struct ubifs_info *c);
 
 /* Dump functions */
-
 const char *dbg_ntype(int type);
 const char *dbg_cstate(int cmt_state);
 const char *dbg_get_key_dump(const struct ubifs_info *c,
@@ -274,7 +286,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
 void dbg_dump_index(struct ubifs_info *c);
 
 /* Checking helper functions */
-
 typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
 				 struct ubifs_zbranch *zbr, void *priv);
 typedef int (*dbg_znode_callback)(struct ubifs_info *c,
@@ -354,6 +365,12 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 	return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
 }
 
+/* Debugfs-related stuff */
+int dbg_debugfs_init(void);
+void dbg_debugfs_exit(void);
+int dbg_debugfs_init_fs(struct ubifs_info *c);
+void dbg_debugfs_exit_fs(struct ubifs_info *c);
+
 #else /* !CONFIG_UBIFS_FS_DEBUG */
 
 /* Use "if (0)" to make compiler check arguments even if debugging is off */
@@ -434,6 +451,10 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #define dbg_force_in_the_gaps()                    0
 #define dbg_failure_mode                           0
 
-#endif /* !CONFIG_UBIFS_FS_DEBUG */
+#define dbg_debugfs_init()                         0
+#define dbg_debugfs_exit()
+#define dbg_debugfs_init_fs(c)                     0
+#define dbg_debugfs_exit_fs(c)                     0
 
+#endif /* !CONFIG_UBIFS_FS_DEBUG */
 #endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ad44822..2dbaa4f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1258,6 +1258,10 @@ static int mount_ubifs(struct ubifs_info *c)
 		}
 	}
 
+	err = dbg_debugfs_init_fs(c);
+	if (err)
+		goto out_infos;
+
 	err = dbg_check_filesystem(c);
 	if (err)
 		goto out_infos;
@@ -1369,6 +1373,7 @@ static void ubifs_umount(struct ubifs_info *c)
 	dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
 		c->vi.vol_id);
 
+	dbg_debugfs_exit_fs(c);
 	spin_lock(&ubifs_infos_lock);
 	list_del(&c->infos_list);
 	spin_unlock(&ubifs_infos_lock);
@@ -2079,11 +2084,17 @@ static int __init ubifs_init(void)
 
 	err = ubifs_compressors_init();
 	if (err)
+		goto out_shrinker;
+
+	err = dbg_debugfs_init();
+	if (err)
 		goto out_compr;
 
 	return 0;
 
 out_compr:
+	ubifs_compressors_exit();
+out_shrinker:
 	unregister_shrinker(&ubifs_shrinker_info);
 	kmem_cache_destroy(ubifs_inode_slab);
 out_reg:
@@ -2098,6 +2109,7 @@ static void __exit ubifs_exit(void)
 	ubifs_assert(list_empty(&ubifs_infos));
 	ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
 
+	dbg_debugfs_exit();
 	ubifs_compressors_exit();
 	unregister_shrinker(&ubifs_shrinker_info);
 	kmem_cache_destroy(ubifs_inode_slab);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 7e090a5..4cf28e8 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1158,6 +1158,7 @@ struct ubifs_debug_info;
  * @mount_opts: UBIFS-specific mount options
  *
  * @dbg: debugging-related information
+ * @dfs: debugfs support-related information
  */
 struct ubifs_info {
 	struct super_block *vfs_sb;
-- 
cgit v0.10.2


From 45e12d901fee57bccf90f6940155724954e1aac7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 31 Oct 2008 11:42:18 +0200
Subject: UBIFS: run debugging checks only if they are enabled

Do not forget to check whether lpt debugging is enabled before
running the check functions. This commit also makes some spelling
fixes.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index db8bd0e..93c181c 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -36,7 +36,7 @@
  * can be written into a single eraseblock. In that case, garbage collection
  * consists of just writing the whole table, which therefore makes all other
  * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
- * selected for garbage collection, which consists are marking the nodes in
+ * selected for garbage collection, which consists of marking the clean nodes in
  * that LEB as dirty, and then only the dirty nodes are written out. Also, in
  * the case of the big model, a table of LEB numbers is saved so that the entire
  * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
@@ -156,7 +156,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
 	}
 
 	c->check_lpt_free = c->big_lpt;
-
 	return 0;
 }
 
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 1aefab9..7bbf035 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1604,6 +1604,9 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 	int ret;
 	void *buf = c->dbg->buf;
 
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+		return 0;
+
 	dbg_lp("LEB %d", lnum);
 	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
 	if (err) {
@@ -1704,6 +1707,9 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
 	long long free = 0;
 	int i;
 
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+		return 0;
+
 	for (i = 0; i < c->lpt_lebs; i++) {
 		if (c->ltab[i].tgc || c->ltab[i].cmt)
 			continue;
@@ -1735,6 +1741,9 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 	long long chk_lpt_sz, lpt_sz;
 	int err = 0;
 
+	if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+		return 0;
+
 	switch (action) {
 	case 0:
 		d->chk_lpt_sz = 0;
-- 
cgit v0.10.2


From 787845bdeadd368eedeace92d5bf53f5aa1450ba Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 31 Oct 2008 12:17:42 +0200
Subject: UBIFS: dump stack in LPT check functions

It is useful to know how we got to the checking function when
hunting the bugs.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 7bbf035..c5c07f9 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,6 +320,7 @@ no_space:
 	dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
 		"done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
 	dbg_dump_lpt_info(c);
+	dump_stack();
 	return err;
 }
 
@@ -548,6 +549,7 @@ no_space:
 	dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
 	        "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
 	dbg_dump_lpt_info(c);
+	dump_stack();
 	return err;
 }
 
@@ -1722,6 +1724,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
 		dbg_err("LPT space error: free %lld lpt_sz %lld",
 			free, c->lpt_sz);
 		dbg_dump_lpt_info(c);
+		dump_stack();
 		return -EINVAL;
 	}
 	return 0;
@@ -1803,8 +1806,10 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 				d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
 			err = -EINVAL;
 		}
-		if (err)
+		if (err) {
 			dbg_dump_lpt_info(c);
+			dump_stack();
+		}
 		d->chk_lpt_sz2 = d->chk_lpt_sz;
 		d->chk_lpt_sz = 0;
 		d->chk_lpt_wastage = 0;
-- 
cgit v0.10.2


From 2ba5f7ae8165b3f575dd3a7d8bb18f421fab8273 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 31 Oct 2008 17:32:30 +0200
Subject: UBIFS: introduce LPT dump function

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 5684277..934db18 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -646,7 +646,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
 	struct ubifs_lprops lp;
 	struct ubifs_lp_stats lst;
 
-	printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid);
+	printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n",
+	       current->pid);
 	ubifs_get_lp_stats(c, &lst);
 	dbg_dump_lstats(&lst);
 
@@ -657,6 +658,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
 
 		dbg_dump_lprop(c, &lp);
 	}
+	printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n",
+	       current->pid);
 }
 
 void dbg_dump_lpt_info(struct ubifs_info *c)
@@ -664,6 +667,7 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
 	int i;
 
 	spin_lock(&dbg_lock);
+	printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid);
 	printk(KERN_DEBUG "\tlpt_sz:        %lld\n", c->lpt_sz);
 	printk(KERN_DEBUG "\tpnode_sz:      %d\n", c->pnode_sz);
 	printk(KERN_DEBUG "\tnnode_sz:      %d\n", c->nnode_sz);
@@ -704,8 +708,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 	if (dbg_failure_mode)
 		return;
 
-	printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
-
+	printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
+	       current->pid, lnum);
 	sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
 	if (IS_ERR(sleb)) {
 		ubifs_err("scan error %d", (int)PTR_ERR(sleb));
@@ -722,6 +726,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
 		dbg_dump_node(c, snod->node);
 	}
 
+	printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
+	       current->pid, lnum);
 	ubifs_scan_destroy(sleb);
 	return;
 }
@@ -769,7 +775,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 {
 	int i;
 
-	printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n",
+	printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n",
 	       current->pid, cat, heap->cnt);
 	for (i = 0; i < heap->cnt; i++) {
 		struct ubifs_lprops *lprops = heap->arr[i];
@@ -778,6 +784,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 		       "flags %d\n", i, lprops->lnum, lprops->hpos,
 		       lprops->free, lprops->dirty, lprops->flags);
 	}
+	printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid);
 }
 
 void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -785,7 +792,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
 {
 	int i;
 
-	printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid);
+	printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid);
 	printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
 	       (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
 	printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -804,7 +811,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
 	int level;
 
 	printk(KERN_DEBUG "\n");
-	printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid);
+	printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid);
 	znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
 	level = znode->level;
 	printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -816,8 +823,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
 		dbg_dump_znode(c, znode);
 		znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
 	}
-
-	printk(KERN_DEBUG "\n");
+	printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid);
 }
 
 static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
@@ -992,7 +998,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum,
 			  zbr1->offs, DBGKEY(&key));
 		ubifs_err("but it should have key %s according to tnc",
-			  DBGKEY(&zbr1->key)); dbg_dump_node(c, dent1);
+			  DBGKEY(&zbr1->key));
+		dbg_dump_node(c, dent1);
 		goto out_free;
 	}
 
@@ -1001,7 +1008,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum,
 			  zbr1->offs, DBGKEY(&key));
 		ubifs_err("but it should have key %s according to tnc",
-			  DBGKEY(&zbr2->key)); dbg_dump_node(c, dent2);
+			  DBGKEY(&zbr2->key));
+		dbg_dump_node(c, dent2);
 		goto out_free;
 	}
 
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a6b70f8..9820d69 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -270,6 +270,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c,
 			     const union ubifs_key *key);
 void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
 void dbg_dump_node(const struct ubifs_info *c, const void *node);
+void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
+		       int offs);
 void dbg_dump_budget_req(const struct ubifs_budget_req *req);
 void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
 void dbg_dump_budg(struct ubifs_info *c);
@@ -284,6 +286,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
 		    struct ubifs_nnode *parent, int iip);
 void dbg_dump_tnc(struct ubifs_info *c);
 void dbg_dump_index(struct ubifs_info *c);
+void dbg_dump_lpt_lebs(const struct ubifs_info *c);
 
 /* Checking helper functions */
 typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
@@ -411,26 +414,28 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
 
-#define ubifs_debugging_init(c)               0
-#define ubifs_debugging_exit(c)               ({})
-
-#define dbg_ntype(type)                       ""
-#define dbg_cstate(cmt_state)                 ""
-#define dbg_get_key_dump(c, key)              ({})
-#define dbg_dump_inode(c, inode)              ({})
-#define dbg_dump_node(c, node)                ({})
-#define dbg_dump_budget_req(req)              ({})
-#define dbg_dump_lstats(lst)                  ({})
-#define dbg_dump_budg(c)                      ({})
-#define dbg_dump_lprop(c, lp)                 ({})
-#define dbg_dump_lprops(c)                    ({})
-#define dbg_dump_lpt_info(c)                  ({})
-#define dbg_dump_leb(c, lnum)                 ({})
-#define dbg_dump_znode(c, znode)              ({})
-#define dbg_dump_heap(c, heap, cat)           ({})
-#define dbg_dump_pnode(c, pnode, parent, iip) ({})
-#define dbg_dump_tnc(c)                       ({})
-#define dbg_dump_index(c)                     ({})
+#define ubifs_debugging_init(c)                0
+#define ubifs_debugging_exit(c)                ({})
+
+#define dbg_ntype(type)                        ""
+#define dbg_cstate(cmt_state)                  ""
+#define dbg_get_key_dump(c, key)               ({})
+#define dbg_dump_inode(c, inode)               ({})
+#define dbg_dump_node(c, node)                 ({})
+#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
+#define dbg_dump_budget_req(req)               ({})
+#define dbg_dump_lstats(lst)                   ({})
+#define dbg_dump_budg(c)                       ({})
+#define dbg_dump_lprop(c, lp)                  ({})
+#define dbg_dump_lprops(c)                     ({})
+#define dbg_dump_lpt_info(c)                   ({})
+#define dbg_dump_leb(c, lnum)                  ({})
+#define dbg_dump_znode(c, znode)               ({})
+#define dbg_dump_heap(c, heap, cat)            ({})
+#define dbg_dump_pnode(c, pnode, parent, iip)  ({})
+#define dbg_dump_tnc(c)                        ({})
+#define dbg_dump_index(c)                      ({})
+#define dbg_dump_lpt_lebs(c)                   ({})
 
 #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
 #define dbg_old_index_check_init(c, zroot)         0
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 93c181c..6d91416 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -557,7 +557,7 @@ static int calc_nnode_num(int row, int col)
  * This function calculates and returns the nnode number based on the parent's
  * nnode number and the index in parent.
  */
-static int calc_nnode_num_from_parent(struct ubifs_info *c,
+static int calc_nnode_num_from_parent(const struct ubifs_info *c,
 				      struct ubifs_nnode *parent, int iip)
 {
 	int num, shft;
@@ -582,7 +582,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c,
  * This function calculates and returns the pnode number based on the parent's
  * nnode number and the index in parent.
  */
-static int calc_pnode_num_from_parent(struct ubifs_info *c,
+static int calc_pnode_num_from_parent(const struct ubifs_info *c,
 				      struct ubifs_nnode *parent, int iip)
 {
 	int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
@@ -965,7 +965,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type)
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int unpack_pnode(struct ubifs_info *c, void *buf,
+static int unpack_pnode(const struct ubifs_info *c, void *buf,
 			struct ubifs_pnode *pnode)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
@@ -995,15 +995,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf,
 }
 
 /**
- * unpack_nnode - unpack a nnode.
+ * ubifs_unpack_nnode - unpack a nnode.
  * @c: UBIFS file-system description object
  * @buf: buffer containing packed nnode to unpack
  * @nnode: nnode structure to fill
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int unpack_nnode(struct ubifs_info *c, void *buf,
-			struct ubifs_nnode *nnode)
+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
+		       struct ubifs_nnode *nnode)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
 	int i, pos = 0, err;
@@ -1035,7 +1035,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf,
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int unpack_ltab(struct ubifs_info *c, void *buf)
+static int unpack_ltab(const struct ubifs_info *c, void *buf)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
 	int i, pos = 0, err;
@@ -1067,7 +1067,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf)
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int unpack_lsave(struct ubifs_info *c, void *buf)
+static int unpack_lsave(const struct ubifs_info *c, void *buf)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
 	int i, pos = 0, err;
@@ -1095,7 +1095,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf)
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
+static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode,
 			  struct ubifs_nnode *parent, int iip)
 {
 	int i, lvl, max_offs;
@@ -1139,7 +1139,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
  *
  * This function returns %0 on success and a negative error code on failure.
  */
-static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode,
 			  struct ubifs_nnode *parent, int iip)
 {
 	int i;
@@ -1173,7 +1173,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
  * This function calculates the LEB numbers for the LEB properties it contains
  * based on the pnode number.
  */
-static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode)
+static void set_pnode_lnum(const struct ubifs_info *c,
+			   struct ubifs_pnode *pnode)
 {
 	int i, lnum;
 
@@ -1226,7 +1227,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 		err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
 		if (err)
 			goto out;
-		err = unpack_nnode(c, buf, nnode);
+		err = ubifs_unpack_nnode(c, buf, nnode);
 		if (err)
 			goto out;
 	}
@@ -1815,7 +1816,7 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
 			       c->nnode_sz);
 		if (err)
 			return ERR_PTR(err);
-		err = unpack_nnode(c, buf, nnode);
+		err = ubifs_unpack_nnode(c, buf, nnode);
 		if (err)
 			return ERR_PTR(err);
 	}
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index c5c07f9..da60b5a 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,6 +320,7 @@ no_space:
 	dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
 		"done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
 	dbg_dump_lpt_info(c);
+	dbg_dump_lpt_lebs(c);
 	dump_stack();
 	return err;
 }
@@ -549,6 +550,7 @@ no_space:
 	dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
 	        "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
 	dbg_dump_lpt_info(c);
+	dbg_dump_lpt_lebs(c);
 	dump_stack();
 	return err;
 }
@@ -1027,7 +1029,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
  * @c: UBIFS file-system description object
  * @node_type: LPT node type
  */
-static int get_lpt_node_len(struct ubifs_info *c, int node_type)
+static int get_lpt_node_len(const struct ubifs_info *c, int node_type)
 {
 	switch (node_type) {
 	case UBIFS_LPT_NNODE:
@@ -1048,7 +1050,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type)
  * @buf: buffer
  * @len: length of buffer
  */
-static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
+static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len)
 {
 	int offs, pad_len;
 
@@ -1065,7 +1067,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
  * @buf: buffer
  * @node_num: node number is returned here
  */
-static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
+static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf,
+			     int *node_num)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
 	int pos = 0, node_type;
@@ -1083,7 +1086,7 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
  *
  * This function returns %1 if the buffer contains a node or %0 if it does not.
  */
-static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
+static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
 {
 	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
 	int pos = 0, node_type, node_len;
@@ -1107,7 +1110,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
 	return 1;
 }
 
-
 /**
  * lpt_gc_lnum - garbage collect a LPT LEB.
  * @c: UBIFS file-system description object
@@ -1724,6 +1726,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
 		dbg_err("LPT space error: free %lld lpt_sz %lld",
 			free, c->lpt_sz);
 		dbg_dump_lpt_info(c);
+		dbg_dump_lpt_lebs(c);
 		dump_stack();
 		return -EINVAL;
 	}
@@ -1808,6 +1811,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 		}
 		if (err) {
 			dbg_dump_lpt_info(c);
+			dbg_dump_lpt_lebs(c);
 			dump_stack();
 		}
 		d->chk_lpt_sz2 = d->chk_lpt_sz;
@@ -1825,4 +1829,121 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 	}
 }
 
+/**
+ * dbg_dump_lpt_leb - dump an LPT LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to dump
+ *
+ * This function dumps an LEB from LPT area. Nodes in this area are very
+ * different to nodes in the main area (e.g., they do not have common headers,
+ * they do not have 8-byte alignments, etc), so we have a separate function to
+ * dump LPT area LEBs. Note, LPT has to be locked by the coller.
+ */
+static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
+{
+	int err, len = c->leb_size, node_type, node_num, node_len, offs;
+	void *buf = c->dbg->buf;
+
+	printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
+	       current->pid, lnum);
+	err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+	if (err) {
+		ubifs_err("cannot read LEB %d, error %d", lnum, err);
+		return;
+	}
+	while (1) {
+		offs = c->leb_size - len;
+		if (!is_a_node(c, buf, len)) {
+			int pad_len;
+
+			pad_len = get_pad_len(c, buf, len);
+			if (pad_len) {
+				printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
+				       lnum, offs, pad_len);
+				buf += pad_len;
+				len -= pad_len;
+				continue;
+			}
+			if (len)
+				printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n",
+				       lnum, offs, len);
+			break;
+		}
+
+		node_type = get_lpt_node_type(c, buf, &node_num);
+		switch (node_type) {
+		case UBIFS_LPT_PNODE:
+		{
+			node_len = c->pnode_sz;
+			if (c->big_lpt)
+				printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n",
+				       lnum, offs, node_num);
+			else
+				printk(KERN_DEBUG "LEB %d:%d, pnode\n",
+				       lnum, offs);
+			break;
+		}
+		case UBIFS_LPT_NNODE:
+		{
+			int i;
+			struct ubifs_nnode nnode;
+
+			node_len = c->nnode_sz;
+			if (c->big_lpt)
+				printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ",
+				       lnum, offs, node_num);
+			else
+				printk(KERN_DEBUG "LEB %d:%d, nnode, ",
+				       lnum, offs);
+			err = ubifs_unpack_nnode(c, buf, &nnode);
+			for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+				printk("%d:%d", nnode.nbranch[i].lnum,
+				       nnode.nbranch[i].offs);
+				if (i != UBIFS_LPT_FANOUT - 1)
+					printk(", ");
+			}
+			printk("\n");
+			break;
+		}
+		case UBIFS_LPT_LTAB:
+			node_len = c->ltab_sz;
+			printk(KERN_DEBUG "LEB %d:%d, ltab\n",
+			       lnum, offs);
+			break;
+		case UBIFS_LPT_LSAVE:
+			node_len = c->lsave_sz;
+			printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs);
+			break;
+		default:
+			ubifs_err("LPT node type %d not recognized", node_type);
+			return;
+		}
+
+		buf += node_len;
+		len -= node_len;
+	}
+
+	printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
+	       current->pid, lnum);
+}
+
+/**
+ * dbg_dump_lpt_lebs - dump LPT lebs.
+ * @c: UBIFS file-system description object
+ *
+ * This function dumps all LPT LEBs. The caller has to make sure the LPT is
+ * locked.
+ */
+void dbg_dump_lpt_lebs(const struct ubifs_info *c)
+{
+	int i;
+
+	printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n",
+	       current->pid);
+	for (i = 0; i < c->lpt_lebs; i++)
+		dump_lpt_leb(c, i + c->lpt_first);
+	printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n",
+	       current->pid);
+}
+
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4cf28e8..e658b06 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1622,6 +1622,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
 void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
 uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
 struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
+/* Needed only in debugging code in lpt_commit.c */
+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
+		       struct ubifs_nnode *nnode);
 
 /* lpt_commit.c */
 int ubifs_lpt_start_commit(struct ubifs_info *c);
-- 
cgit v0.10.2


From ed45819f315b5a8844b5bfce881a18e9f3a055e7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 12 Nov 2008 10:14:10 +0200
Subject: UBI: fix warnings when debugging is enabled

The 'ubi_io_read_vid_hdr()' and 'ubi_io_read_ec_hdr()' function
have the 'verbose' argument which controls whether they should
print a warning if the VID/EC header was not found or was corrupted.
Some callers require the headers to be OK, and pass 1. Some allow
a corrupted/not present header, and pass 0.

       if (UBI_IO_DEBUG)
               verbose = 1;

And UBI_IO_DEBUG is 1 if CONFIG_MTD_UBI_DEBUG_MSG_BLD is true. So in
this case the warning is printed all the time. This confuses people.

Thus, do not print the messages as warnings if UBI_IO_DEBUG is true,
but print them as debugging messages instead.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 2fb64be..f60f7002 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -637,8 +637,6 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
 
 	dbg_io("read EC header from PEB %d", pnum);
 	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
-	if (UBI_IO_DEBUG)
-		verbose = 1;
 
 	err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
 	if (err) {
@@ -685,6 +683,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
 			if (verbose)
 				ubi_warn("no EC header found at PEB %d, "
 					 "only 0xFF bytes", pnum);
+			else if (UBI_IO_DEBUG)
+				dbg_msg("no EC header found at PEB %d, "
+					"only 0xFF bytes", pnum);
 			return UBI_IO_PEB_EMPTY;
 		}
 
@@ -696,7 +697,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
 			ubi_warn("bad magic number at PEB %d: %08x instead of "
 				 "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
 			ubi_dbg_dump_ec_hdr(ec_hdr);
-		}
+		} else if (UBI_IO_DEBUG)
+			dbg_msg("bad magic number at PEB %d: %08x instead of "
+				"%08x", pnum, magic, UBI_EC_HDR_MAGIC);
 		return UBI_IO_BAD_EC_HDR;
 	}
 
@@ -708,7 +711,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
 			ubi_warn("bad EC header CRC at PEB %d, calculated "
 				 "%#08x, read %#08x", pnum, crc, hdr_crc);
 			ubi_dbg_dump_ec_hdr(ec_hdr);
-		}
+		} else if (UBI_IO_DEBUG)
+			dbg_msg("bad EC header CRC at PEB %d, calculated "
+				"%#08x, read %#08x", pnum, crc, hdr_crc);
 		return UBI_IO_BAD_EC_HDR;
 	}
 
@@ -912,8 +917,6 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
 
 	dbg_io("read VID header from PEB %d", pnum);
 	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
-	if (UBI_IO_DEBUG)
-		verbose = 1;
 
 	p = (char *)vid_hdr - ubi->vid_hdr_shift;
 	err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
@@ -960,6 +963,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
 			if (verbose)
 				ubi_warn("no VID header found at PEB %d, "
 					 "only 0xFF bytes", pnum);
+			else if (UBI_IO_DEBUG)
+				dbg_msg("no VID header found at PEB %d, "
+					"only 0xFF bytes", pnum);
 			return UBI_IO_PEB_FREE;
 		}
 
@@ -971,7 +977,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
 			ubi_warn("bad magic number at PEB %d: %08x instead of "
 				 "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
 			ubi_dbg_dump_vid_hdr(vid_hdr);
-		}
+		} else if (UBI_IO_DEBUG)
+			dbg_msg("bad magic number at PEB %d: %08x instead of "
+				"%08x", pnum, magic, UBI_VID_HDR_MAGIC);
 		return UBI_IO_BAD_VID_HDR;
 	}
 
@@ -983,7 +991,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
 			ubi_warn("bad CRC at PEB %d, calculated %#08x, "
 				 "read %#08x", pnum, crc, hdr_crc);
 			ubi_dbg_dump_vid_hdr(vid_hdr);
-		}
+		} else if (UBI_IO_DEBUG)
+			dbg_msg("bad CRC at PEB %d, calculated %#08x, "
+				"read %#08x", pnum, crc, hdr_crc);
 		return UBI_IO_BAD_VID_HDR;
 	}
 
-- 
cgit v0.10.2


From 995be04548f62c8e6b447410cd28b0666614b461 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 4 Dec 2008 17:04:18 +0300
Subject: UBIFS: fix section mismatch

This patch fixes the following section mismatch:

WARNING: fs/ubifs/ubifs.o(.init.text+0xec): Section mismatch in reference from the function init_module() to the function .exit.text:ubifs_compressors_exit()

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 4afb3ea..4c90ee2 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -244,7 +244,7 @@ out_lzo:
 /**
  * ubifs_compressors_exit - de-initialize UBIFS compressors.
  */
-void __exit ubifs_compressors_exit(void)
+void ubifs_compressors_exit(void)
 {
 	compr_exit(&lzo_compr);
 	compr_exit(&zlib_compr);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e658b06..055c6b5 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1700,7 +1700,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
 /* compressor.c */
 int __init ubifs_compressors_init(void);
-void __exit ubifs_compressors_exit(void);
+void ubifs_compressors_exit(void);
 void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 		    int *compr_type);
 int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
-- 
cgit v0.10.2


From 4df581f3dc6a91a63b9965ac8bdb47d8db294e37 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 4 Dec 2008 20:52:44 +0200
Subject: UBI: fix deadlock

We cannot call 'ubi_wl_get_peb()' with @ubi->buf_mutex locked,
because 'ubi_wl_get_peb()' may force erasure, which, in turn,
may call 'torture_peb()' which also locks the @ubi->buf_mutex
and deadlocks.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index d8966ba..2e4d6bf 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -504,12 +504,9 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	if (!vid_hdr)
 		return -ENOMEM;
 
-	mutex_lock(&ubi->buf_mutex);
-
 retry:
 	new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
 	if (new_pnum < 0) {
-		mutex_unlock(&ubi->buf_mutex);
 		ubi_free_vid_hdr(ubi, vid_hdr);
 		return new_pnum;
 	}
@@ -529,20 +526,23 @@ retry:
 		goto write_error;
 
 	data_size = offset + len;
+	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf1 + offset, 0xFF, len);
 
 	/* Read everything before the area where the write failure happened */
 	if (offset > 0) {
 		err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset);
 		if (err && err != UBI_IO_BITFLIPS)
-			goto out_put;
+			goto out_unlock;
 	}
 
 	memcpy(ubi->peb_buf1 + offset, buf, len);
 
 	err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size);
-	if (err)
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
 		goto write_error;
+	}
 
 	mutex_unlock(&ubi->buf_mutex);
 	ubi_free_vid_hdr(ubi, vid_hdr);
@@ -553,8 +553,9 @@ retry:
 	ubi_msg("data was successfully recovered");
 	return 0;
 
-out_put:
+out_unlock:
 	mutex_unlock(&ubi->buf_mutex);
+out_put:
 	ubi_wl_put_peb(ubi, new_pnum, 1);
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	return err;
@@ -567,7 +568,6 @@ write_error:
 	ubi_warn("failed to write to PEB %d", new_pnum);
 	ubi_wl_put_peb(ubi, new_pnum, 1);
 	if (++tries > UBI_IO_RETRIES) {
-		mutex_unlock(&ubi->buf_mutex);
 		ubi_free_vid_hdr(ubi, vid_hdr);
 		return err;
 	}
-- 
cgit v0.10.2


From 6a8f483f33a150a0269ad4612621eb6c245eb2cf Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 5 Dec 2008 12:23:48 +0200
Subject: UBI: some code re-structuring

Minor code re-structuring and commentaries fixes to improve readability.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index dcb6dac..667f5f4 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -359,19 +359,18 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
  * @ubi: UBI device description object
  * @e: the physical eraseblock to add
  * @pe: protection entry object to use
- * @abs_ec: absolute erase counter value when this physical eraseblock has
- * to be removed from the protection trees.
+ * @ec: for how many erase operations this PEB should be protected
  *
  * @wl->lock has to be locked.
  */
 static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
-			  struct ubi_wl_prot_entry *pe, int abs_ec)
+			  struct ubi_wl_prot_entry *pe, int ec)
 {
 	struct rb_node **p, *parent = NULL;
 	struct ubi_wl_prot_entry *pe1;
 
 	pe->e = e;
-	pe->abs_ec = ubi->abs_ec + abs_ec;
+	pe->abs_ec = ubi->abs_ec + ec;
 
 	p = &ubi->prot.pnum.rb_node;
 	while (*p) {
@@ -739,7 +738,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
-	int err, put = 0, scrubbing = 0, protect = 0;
+	int err, put = 0, scrubbing = 0;
 	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
@@ -864,17 +863,28 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 			goto out_error;
 		}
 
-		protect = 1;
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		spin_lock(&ubi->wl_lock);
+		prot_tree_add(ubi, e1, pe, U_PROTECTION);
+		ubi_assert(!ubi->move_to_put);
+		ubi->move_from = ubi->move_to = NULL;
+		ubi->wl_scheduled = 0;
+		spin_unlock(&ubi->wl_lock);
+
+		err = schedule_erase(ubi, e2, 0);
+		if (err)
+			goto out_error;
+		mutex_unlock(&ubi->move_mutex);
+		return 0;
 	}
 
+	/* The PEB has been successfully moved */
 	ubi_free_vid_hdr(ubi, vid_hdr);
-	if (scrubbing && !protect)
+	if (scrubbing)
 		ubi_msg("scrubbed PEB %d, data moved to PEB %d",
 			e1->pnum, e2->pnum);
 
 	spin_lock(&ubi->wl_lock);
-	if (protect)
-		prot_tree_add(ubi, e1, pe, protect);
 	if (!ubi->move_to_put)
 		wl_tree_add(e2, &ubi->used);
 	else
@@ -883,6 +893,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
+	err = schedule_erase(ubi, e1, 0);
+	if (err)
+		goto out_error;
+
 	if (put) {
 		/*
 		 * Well, the target PEB was put meanwhile, schedule it for
@@ -894,13 +908,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 			goto out_error;
 	}
 
-	if (!protect) {
-		err = schedule_erase(ubi, e1, 0);
-		if (err)
-			goto out_error;
-	}
-
-
 	dbg_wl("done");
 	mutex_unlock(&ubi->move_mutex);
 	return 0;
-- 
cgit v0.10.2


From 3c98b0a043f25fa44b289c2f35b9d6ad1d859ac9 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 5 Dec 2008 12:42:45 +0200
Subject: UBI: fix error path

Make sure the resources had not already been freed before
freeing them in the error path of the WL worker function.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 667f5f4..442099d 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -738,13 +738,12 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
-	int err, put = 0, scrubbing = 0;
+	int err, scrubbing = 0;
 	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
 
 	kfree(wrk);
-
 	if (cancel)
 		return 0;
 
@@ -864,6 +863,8 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		}
 
 		ubi_free_vid_hdr(ubi, vid_hdr);
+		vid_hdr = NULL;
+
 		spin_lock(&ubi->wl_lock);
 		prot_tree_add(ubi, e1, pe, U_PROTECTION);
 		ubi_assert(!ubi->move_to_put);
@@ -871,6 +872,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		ubi->wl_scheduled = 0;
 		spin_unlock(&ubi->wl_lock);
 
+		e1 = NULL;
 		err = schedule_erase(ubi, e2, 0);
 		if (err)
 			goto out_error;
@@ -880,24 +882,27 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
 	/* The PEB has been successfully moved */
 	ubi_free_vid_hdr(ubi, vid_hdr);
+	vid_hdr = NULL;
 	if (scrubbing)
 		ubi_msg("scrubbed PEB %d, data moved to PEB %d",
 			e1->pnum, e2->pnum);
 
 	spin_lock(&ubi->wl_lock);
-	if (!ubi->move_to_put)
+	if (!ubi->move_to_put) {
 		wl_tree_add(e2, &ubi->used);
-	else
-		put = 1;
+		e2 = NULL;
+	}
 	ubi->move_from = ubi->move_to = NULL;
 	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
 	err = schedule_erase(ubi, e1, 0);
-	if (err)
+	if (err) {
+		e1 = NULL;
 		goto out_error;
+	}
 
-	if (put) {
+	if (e2) {
 		/*
 		 * Well, the target PEB was put meanwhile, schedule it for
 		 * erasure.
@@ -919,6 +924,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	 */
 out_not_moved:
 	ubi_free_vid_hdr(ubi, vid_hdr);
+	vid_hdr = NULL;
 	spin_lock(&ubi->wl_lock);
 	if (scrubbing)
 		wl_tree_add(e1, &ubi->scrub);
@@ -928,6 +934,7 @@ out_not_moved:
 	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
+	e1 = NULL;
 	err = schedule_erase(ubi, e2, 0);
 	if (err)
 		goto out_error;
@@ -945,8 +952,10 @@ out_error:
 	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
-	kmem_cache_free(ubi_wl_entry_slab, e1);
-	kmem_cache_free(ubi_wl_entry_slab, e2);
+	if (e1)
+		kmem_cache_free(ubi_wl_entry_slab, e1);
+	if (e2)
+		kmem_cache_free(ubi_wl_entry_slab, e2);
 	ubi_ro_mode(ubi);
 
 	mutex_unlock(&ubi->move_mutex);
-- 
cgit v0.10.2


From 6fa6f5bbc3a2ad833a3d4b798140602004f70f5a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 5 Dec 2008 13:37:02 +0200
Subject: UBI: handle write errors in WL worker

When a PEB is moved and a write error happens, UBI switches
to R/O mode, which is wrong, because we just copy the data
and may select a different PEB and re-try this. This patch
fixes WL worker's behavior.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 2e4d6bf..048a606 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -949,10 +949,14 @@ write_error:
  * This function copies logical eraseblock from physical eraseblock @from to
  * physical eraseblock @to. The @vid_hdr buffer may be changed by this
  * function. Returns:
- *   o %0  in case of success;
- *   o %1 if the operation was canceled and should be tried later (e.g.,
- *     because a bit-flip was detected at the target PEB);
- *   o %2 if the volume is being deleted and this LEB should not be moved.
+ *   o %0 in case of success;
+ *   o %1 if the operation was canceled because the volume is being deleted
+ *        or because the PEB was put meanwhile;
+ *   o %2 if the operation was canceled because there was a write error to the
+ *        target PEB;
+ *   o %-EAGAIN if the operation was canceled because a bit-flip was detected
+ *     in the target PEB;
+ *   o a negative error code in case of failure.
  */
 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 		     struct ubi_vid_hdr *vid_hdr)
@@ -978,7 +982,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	/*
 	 * Note, we may race with volume deletion, which means that the volume
 	 * this logical eraseblock belongs to might be being deleted. Since the
-	 * volume deletion unmaps all the volume's logical eraseblocks, it will
+	 * volume deletion un-maps all the volume's logical eraseblocks, it will
 	 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
 	 */
 	vol = ubi->volumes[idx];
@@ -986,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 		/* No need to do further work, cancel */
 		dbg_eba("volume %d is being removed, cancel", vol_id);
 		spin_unlock(&ubi->volumes_lock);
-		return 2;
+		return 1;
 	}
 	spin_unlock(&ubi->volumes_lock);
 
@@ -1023,7 +1027,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 
 	/*
 	 * OK, now the LEB is locked and we can safely start moving it. Since
-	 * this function utilizes thie @ubi->peb1_buf buffer which is shared
+	 * this function utilizes the @ubi->peb1_buf buffer which is shared
 	 * with some other functions, so lock the buffer by taking the
 	 * @ubi->buf_mutex.
 	 */
@@ -1068,8 +1072,11 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 
 	err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
-	if (err)
+	if (err) {
+		if (err == -EIO)
+			err = 2;
 		goto out_unlock_buf;
+	}
 
 	cond_resched();
 
@@ -1079,14 +1086,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 		if (err != UBI_IO_BITFLIPS)
 			ubi_warn("cannot read VID header back from PEB %d", to);
 		else
-			err = 1;
+			err = -EAGAIN;
 		goto out_unlock_buf;
 	}
 
 	if (data_size > 0) {
 		err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
-		if (err)
+		if (err) {
+			if (err == -EIO)
+				err = 2;
 			goto out_unlock_buf;
+		}
 
 		cond_resched();
 
@@ -1101,15 +1111,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 				ubi_warn("cannot read data back from PEB %d",
 					 to);
 			else
-				err = 1;
+				err = -EAGAIN;
 			goto out_unlock_buf;
 		}
 
 		cond_resched();
 
 		if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
-			ubi_warn("read data back from PEB %d - it is different",
-				 to);
+			ubi_warn("read data back from PEB %d and it is "
+				 "different", to);
+			err = -EINVAL;
 			goto out_unlock_buf;
 		}
 	}
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 442099d..abf65ea 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -738,7 +738,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
-	int err, scrubbing = 0;
+	int err, scrubbing = 0, torture = 0;
 	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
@@ -842,20 +842,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
 	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
 	if (err) {
-
+		if (err == -EAGAIN)
+			goto out_not_moved;
 		if (err < 0)
 			goto out_error;
-		if (err == 1)
+		if (err == 2) {
+			/* Target PEB write error, torture it */
+			torture = 1;
 			goto out_not_moved;
+		}
 
 		/*
-		 * For some reason the LEB was not moved - it might be because
-		 * the volume is being deleted. We should prevent this PEB from
-		 * being selected for wear-levelling movement for some "time",
-		 * so put it to the protection tree.
+		 * The LEB has not been moved because the volume is being
+		 * deleted or the PEB has been put meanwhile. We should prevent
+		 * this PEB from being selected for wear-leveling movement
+		 * again, so put it to the protection tree.
 		 */
 
-		dbg_wl("cancelled moving PEB %d", e1->pnum);
+		dbg_wl("canceled moving PEB %d", e1->pnum);
+		ubi_assert(err == 1);
+
 		pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
 		if (!pe) {
 			err = -ENOMEM;
@@ -920,9 +926,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	/*
 	 * For some reasons the LEB was not moved, might be an error, might be
 	 * something else. @e1 was not changed, so return it back. @e2 might
-	 * be changed, schedule it for erasure.
+	 * have been changed, schedule it for erasure.
 	 */
 out_not_moved:
+	dbg_wl("canceled moving PEB %d", e1->pnum);
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	vid_hdr = NULL;
 	spin_lock(&ubi->wl_lock);
@@ -930,12 +937,13 @@ out_not_moved:
 		wl_tree_add(e1, &ubi->scrub);
 	else
 		wl_tree_add(e1, &ubi->used);
+	ubi_assert(!ubi->move_to_put);
 	ubi->move_from = ubi->move_to = NULL;
-	ubi->move_to_put = ubi->wl_scheduled = 0;
+	ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
 	e1 = NULL;
-	err = schedule_erase(ubi, e2, 0);
+	err = schedule_erase(ubi, e2, torture);
 	if (err)
 		goto out_error;
 
@@ -1324,7 +1332,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
 	up_write(&ubi->work_sem);
 
 	/*
-	 * And in case last was the WL worker and it cancelled the LEB
+	 * And in case last was the WL worker and it canceled the LEB
 	 * movement, flush again.
 	 */
 	while (ubi->works_count) {
-- 
cgit v0.10.2


From 9963d1aad40946b1b6d34f9bee8d8a1b9032ae22 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 21 Nov 2008 21:07:16 +1030
Subject: [CPUFREQ] clean up speedstep-centrino and reduce cpumask_t usage

Impact: cleanup

1) The #ifdef CONFIG_HOTPLUG_CPU seems unnecessary these days.
2) The loop can simply skip over offline cpus, rather than creating a tmp mask.
3) set_mask is set to either a single cpu or all online cpus in a policy.
   Since it's just used for set_cpus_allowed(), any offline cpus in a policy
   don't matter, so we can just use cpumask_of_cpu() or the policy->cpus.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3b5f064..f0ea6fa 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy)
  * Sets a new CPUFreq policy.
  */
 struct allmasks {
-	cpumask_t		online_policy_cpus;
 	cpumask_t		saved_mask;
-	cpumask_t		set_mask;
 	cpumask_t		covered_cpus;
 };
 
@@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 	int			retval = 0;
 	unsigned int		j, k, first_cpu, tmp;
 	CPUMASK_ALLOC(allmasks);
-	CPUMASK_PTR(online_policy_cpus, allmasks);
 	CPUMASK_PTR(saved_mask, allmasks);
-	CPUMASK_PTR(set_mask, allmasks);
 	CPUMASK_PTR(covered_cpus, allmasks);
 
 	if (unlikely(allmasks == NULL))
@@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy,
 		goto out;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
-#else
-	*online_policy_cpus = policy->cpus;
-#endif
-
 	*saved_mask = current->cpus_allowed;
 	first_cpu = 1;
 	cpus_clear(*covered_cpus);
-	for_each_cpu_mask_nr(j, *online_policy_cpus) {
+	for_each_cpu_mask_nr(j, policy->cpus) {
+		const cpumask_t *mask;
+
+		/* cpufreq holds the hotplug lock, so we are safe here */
+		if (!cpu_online(j))
+			continue;
+
 		/*
 		 * Support for SMP systems.
 		 * Make sure we are running on CPU that wants to change freq
 		 */
-		cpus_clear(*set_mask);
 		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			cpus_or(*set_mask, *set_mask, *online_policy_cpus);
+			mask = &policy->cpus;
 		else
-			cpu_set(j, *set_mask);
+			mask = &cpumask_of_cpu(j);
 
-		set_cpus_allowed_ptr(current, set_mask);
+		set_cpus_allowed_ptr(current, mask);
 		preempt_disable();
-		if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
+		if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
 			dprintk("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
@@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy,
 			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
-			for_each_cpu_mask_nr(k, *online_policy_cpus) {
+			for_each_cpu_mask_nr(k, policy->cpus) {
+				if (!cpu_online(k))
+					continue;
 				freqs.cpu = k;
 				cpufreq_notify_transition(&freqs,
 					CPUFREQ_PRECHANGE);
@@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy,
 		preempt_enable();
 	}
 
-	for_each_cpu_mask_nr(k, *online_policy_cpus) {
+	for_each_cpu_mask_nr(k, policy->cpus) {
+		if (!cpu_online(k))
+			continue;
 		freqs.cpu = k;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
@@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy,
 		 * Best effort undo..
 		 */
 
-		if (!cpus_empty(*covered_cpus))
-			for_each_cpu_mask_nr(j, *covered_cpus) {
-				set_cpus_allowed_ptr(current,
-						     &cpumask_of_cpu(j));
-				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
-			}
+		for_each_cpu_mask_nr(j, *covered_cpus) {
+			set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
+			wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+		}
 
 		tmp = freqs.new;
 		freqs.new = freqs.old;
 		freqs.old = tmp;
-		for_each_cpu_mask_nr(j, *online_policy_cpus) {
-			freqs.cpu = j;
+		for_each_cpu_mask_nr(j, policy->cpus) {
+			if (!cpu_online(j))
+				continue;
 			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 		}
-- 
cgit v0.10.2


From 10db2e5cbda5b4e13d2e2f134b963bee2e129999 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 17 Oct 2008 22:52:04 +0200
Subject: [CPUFREQ] p4-clockmod: reduce noise

On those CPUs which are SpeedStep (EST) capable, we do not care at all if
p4-clockmod does not work, since a technically superior CPU frequency
management technology is to be used.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b8e05ee..ba3a94a 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -171,7 +171,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 	}
 
 	if (c->x86 != 0xF) {
-		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+		if (!cpu_has(c, X86_FEATURE_EST))
+			printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
+				"Please send an e-mail to <cpufreq@vger.kernel.org>\n");
 		return 0;
 	}
 
-- 
cgit v0.10.2


From e088e4c9cdb618675874becb91b2fd581ee707e6 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 25 Nov 2008 13:29:47 -0500
Subject: [CPUFREQ] Disable sysfs ui for p4-clockmod.

p4-clockmod has a long history of abuse.   It pretends to be a CPU
frequency scaling driver, even though it doesn't actually change
the CPU frequency, but instead just modulates the frequency with
wait-states.
The biggest misconception is that when running at the lower 'frequency'
p4-clockmod is saving power.  This isn't the case, as workloads running
slower take longer to complete, preventing the CPU from entering deep C states.

However p4-clockmod does have a purpose.  It can prevent overheating.
Having it hooked up to the cpufreq interfaces is the wrong way to achieve
cooling however. It should instead be hooked up to ACPI.

This diff introduces a means for a cpufreq driver to register with the
cpufreq core, but not present a sysfs interface.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index ba3a94a..0c43b22 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -276,6 +276,7 @@ static struct cpufreq_driver p4clockmod_driver = {
 	.name		= "p4-clockmod",
 	.owner		= THIS_MODULE,
 	.attr		= p4clockmod_attr,
+	.hide_interface	= 1,
 };
 
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 31d6f53..9044b91 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -754,6 +754,11 @@ static struct kobj_type ktype_cpufreq = {
 	.release	= cpufreq_sysfs_release,
 };
 
+static struct kobj_type ktype_empty_cpufreq = {
+	.sysfs_ops	= &sysfs_ops,
+	.release	= cpufreq_sysfs_release,
+};
+
 
 /**
  * cpufreq_add_dev - add a CPU device
@@ -876,26 +881,36 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
 
 	/* prepare interface data */
-	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
-				   "cpufreq");
-	if (ret)
-		goto err_out_driver_exit;
-
-	/* set up files for this cpu device */
-	drv_attr = cpufreq_driver->attr;
-	while ((drv_attr) && (*drv_attr)) {
-		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+	if (!cpufreq_driver->hide_interface) {
+		ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+					   &sys_dev->kobj, "cpufreq");
 		if (ret)
 			goto err_out_driver_exit;
-		drv_attr++;
-	}
-	if (cpufreq_driver->get) {
-		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
-		if (ret)
-			goto err_out_driver_exit;
-	}
-	if (cpufreq_driver->target) {
-		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+
+		/* set up files for this cpu device */
+		drv_attr = cpufreq_driver->attr;
+		while ((drv_attr) && (*drv_attr)) {
+			ret = sysfs_create_file(&policy->kobj,
+						&((*drv_attr)->attr));
+			if (ret)
+				goto err_out_driver_exit;
+			drv_attr++;
+		}
+		if (cpufreq_driver->get) {
+			ret = sysfs_create_file(&policy->kobj,
+						&cpuinfo_cur_freq.attr);
+			if (ret)
+				goto err_out_driver_exit;
+		}
+		if (cpufreq_driver->target) {
+			ret = sysfs_create_file(&policy->kobj,
+						&scaling_cur_freq.attr);
+			if (ret)
+				goto err_out_driver_exit;
+		}
+	} else {
+		ret = kobject_init_and_add(&policy->kobj, &ktype_empty_cpufreq,
+					   &sys_dev->kobj, "cpufreq");
 		if (ret)
 			goto err_out_driver_exit;
 	}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1ee608f..484b3ab 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -234,6 +234,7 @@ struct cpufreq_driver {
 	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
 	int	(*resume)	(struct cpufreq_policy *policy);
 	struct freq_attr	**attr;
+	bool			hide_interface;
 };
 
 /* flags */
-- 
cgit v0.10.2


From c60e19eb21d9a0fb0d78969884f32d88354abca9 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Sat, 15 Nov 2008 17:02:46 -0200
Subject: [CPUFREQ] add to speedstep-lib additional fsb values for core
 processors

Add additional fsb values to pentium_core_get_frequency, from latest edition
(September 2008) of Intel 64 and IA-32 Architectures Software Develper's Manual,
Volume 3B: System Programming Guide, Part 2. Values added are to detect 800,
1067 and 1333 FSB types.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 98d4fdb..cdac7d6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void)
 	case 3:
 		fsb = 166667;
 		break;
+	case 2:
+		fsb = 200000;
+		break;
+	case 0:
+		fsb = 266667;
+		break;
+	case 4:
+		fsb = 333333;
+		break;
 	default:
 		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
 	}
-- 
cgit v0.10.2


From 8529154ec3f3ac20344c65b7a040c604c7af7651 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Sat, 15 Nov 2008 17:02:46 -0200
Subject: [CPUFREQ] Add Celeron Core support to p4-clockmod.

Add Celeron Core support to p4-clockmod.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 0c43b22..beea446 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
+		case 0x16: /* Celeron Core */
 			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
 		case 0x0D: /* Pentium M (Dothan) */
-- 
cgit v0.10.2


From 187d9f4ed4fc089f1f25a875fb485e27626972f9 Mon Sep 17 00:00:00 2001
From: Mike Chan <mike@android.com>
Date: Thu, 4 Dec 2008 12:19:17 -0800
Subject: [CPUFREQ] Fix on resume, now preserves user policy min/max.

Previously driver resume would always set the current policy min/max with
the cpuinfo min/max, defined by user_policy.min/max. Resulting in a reset
of policy settings when policy.min/max != cpuinfo.min/max when coming out
of suspend. Now user_policy is saved as the policy instead of cpuinfo to
preserve what the user actually set.

Signed-off-by: Mike Chan <mike@android.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9044b91..01dde80 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -827,8 +827,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 		dprintk("initialization failed\n");
 		goto err_out;
 	}
-	policy->user_policy.min = policy->cpuinfo.min_freq;
-	policy->user_policy.max = policy->cpuinfo.max_freq;
+	policy->user_policy.min = policy->min;
+	policy->user_policy.max = policy->max;
 
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
-- 
cgit v0.10.2


From 8e26e1d7bce73acf6f995a4d252610e46ee831a5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Dec 2008 14:41:44 +0200
Subject: UBI: document UBI ioctls

Update the ioctl-numbers.txt file, add UBI and DVB there
(because they use the same ioctl numbers).

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index b880ce5..8246991 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -97,6 +97,7 @@ Code	Seq#	Include File		Comments
 					<http://linux01.gwdg.de/~alatham/ppdd.html>
 'M'	all	linux/soundcard.h
 'N'	00-1F	drivers/usb/scanner.h
+'O'     00-02   include/mtd/ubi-user.h UBI
 'P'	all	linux/soundcard.h
 'Q'	all	linux/soundcard.h
 'R'	00-1F	linux/random.h
@@ -142,6 +143,9 @@ Code	Seq#	Include File		Comments
 'n'	00-7F	linux/ncp_fs.h
 'n'	E0-FF	video/matrox.h          matroxfb
 'o'	00-1F	fs/ocfs2/ocfs2_fs.h	OCFS2
+'o'     00-03   include/mtd/ubi-user.h  conflict! (OCFS2 and UBI overlaps)
+'o'     40-41   include/mtd/ubi-user.h  UBI
+'o'     01-A1   include/linux/dvb/*.h DVB
 'p'	00-0F	linux/phantom.h		conflict! (OpenHaptics needs this)
 'p'	00-3F	linux/mc146818rtc.h	conflict!
 'p'	40-7F	linux/nvram.h
-- 
cgit v0.10.2


From ad5942bad6addcf9697a74413b517d9724d803a4 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Wed, 10 Dec 2008 10:42:54 +0100
Subject: UBI: return -ENOMEM upon failing vmalloc

Return with correct error code (-ENOMEM) from ubi_attach_mtd_dev() upon
failing vmalloc().

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index c7630a2..ba0bd3d 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -815,19 +815,20 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	if (err)
 		goto out_free;
 
+	err = -ENOMEM;
 	ubi->peb_buf1 = vmalloc(ubi->peb_size);
 	if (!ubi->peb_buf1)
 		goto out_free;
 
 	ubi->peb_buf2 = vmalloc(ubi->peb_size);
 	if (!ubi->peb_buf2)
-		 goto out_free;
+		goto out_free;
 
 #ifdef CONFIG_MTD_UBI_DEBUG
 	mutex_init(&ubi->dbg_buf_mutex);
 	ubi->dbg_peb_buf = vmalloc(ubi->peb_size);
 	if (!ubi->dbg_peb_buf)
-		 goto out_free;
+		goto out_free;
 #endif
 
 	err = attach_by_scanning(ubi);
-- 
cgit v0.10.2


From 5b37717a23b8e40f6cf7ad85a26ddcf41c171e2c Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Fri, 12 Dec 2008 13:00:06 +0000
Subject: uwb: improved MAS allocator and reservation conflict handling

Greatly enhance the MAS allocator:
  - Handle row and column reservations.
  - Permit all the available MAS to be allocated.
  - Follows the WiMedia rules on MAS selection.

Take appropriate action when reservation conflicts are detected.
  - Correctly identify which reservation wins the conflict.
  - Protect alien BP reservations.
  - If an owned reservation loses, resize/move it.
  - Follow the backoff procedure before requesting additional MAS.

When reservations are terminated, move the remaining reservations (if
necessary) so they keep following the MAS allocation rules.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index 7b6525d..c37e4f8 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -48,13 +48,15 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
 {
 	struct wusbhc *wusbhc = rsv->pal_priv;
 	struct device *dev = wusbhc->dev;
+	struct uwb_mas_bm mas;
 	char buf[72];
 
 	switch (rsv->state) {
 	case UWB_RSV_STATE_O_ESTABLISHED:
-		bitmap_scnprintf(buf, sizeof(buf), rsv->mas.bm, UWB_NUM_MAS);
+		uwb_rsv_get_usable_mas(rsv, &mas);
+		bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
 		dev_dbg(dev, "established reservation: %s\n", buf);
-		wusbhc_bwa_set(wusbhc, rsv->stream, &rsv->mas);
+		wusbhc_bwa_set(wusbhc, rsv->stream, &mas);
 		break;
 	case UWB_RSV_STATE_NONE:
 		dev_dbg(dev, "removed reservation\n");
@@ -85,13 +87,12 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc)
 	bcid.data[0] = wusbhc->cluster_id;
 	bcid.data[1] = 0;
 
-	rsv->owner = &rc->uwb_dev;
 	rsv->target.type = UWB_RSV_TARGET_DEVADDR;
 	rsv->target.devaddr = bcid;
 	rsv->type = UWB_DRP_TYPE_PRIVATE;
-	rsv->max_mas = 256;
-	rsv->min_mas = 16;  /* one MAS per zone? */
-	rsv->sparsity = 16; /* at least one MAS in each zone? */
+	rsv->max_mas = 256; /* try to get as much as possible */
+	rsv->min_mas = 15;  /* one MAS per zone */
+	rsv->max_interval = 1; /* max latency is one zone */
 	rsv->is_multicast = true;
 
 	ret = uwb_rsv_establish(rsv);
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index ce21a95..2f98d08 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_UWB_I1480U)	+= i1480/
 
 uwb-objs :=		\
 	address.o	\
+	allocator.o	\
 	beacon.o	\
 	driver.o	\
 	drp.o		\
diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c
new file mode 100644
index 0000000..c8185e6
--- /dev/null
+++ b/drivers/uwb/allocator.c
@@ -0,0 +1,386 @@
+/*
+ * UWB reservation management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int col, mas, safe_mas, unsafe_mas;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	unsigned char c;
+
+	for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) {
+    
+		safe_mas   = ci->csi.safe_mas_per_col;
+		unsafe_mas = ci->csi.unsafe_mas_per_col;
+    
+		for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) {
+			if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) {
+	
+				if (safe_mas > 0) {
+					safe_mas--;
+					c = UWB_RSV_MAS_SAFE;
+				} else if (unsafe_mas > 0) {
+					unsafe_mas--;
+					c = UWB_RSV_MAS_UNSAFE;
+				} else {
+					break;
+				}
+				bm[col * UWB_MAS_PER_ZONE + mas] = c;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int mas, col, rows;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	unsigned char c;
+
+	rows = 1;
+	c = UWB_RSV_MAS_SAFE;
+	for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) {
+		if (ri->avail[mas] == 1) {
+      
+			if (rows > ri->used_rows) {
+				break;
+			} else if (rows > 7) {
+				c = UWB_RSV_MAS_UNSAFE;
+			}
+
+			for (col = 0; col < UWB_NUM_ZONES; col++) {
+				if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) {
+					bm[col * UWB_NUM_ZONES + mas] = c;
+					if(c == UWB_RSV_MAS_SAFE)
+						ai->safe_allocated_mases++;
+					else
+						ai->unsafe_allocated_mases++;
+				}
+			}
+			rows++;
+		}
+	}
+	ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+}
+
+/*
+ * Find the best column set for a given availability, interval, num safe mas and
+ * num unsafe mas.
+ *
+ * The different sets are tried in order as shown below, depending on the interval.
+ *
+ * interval = 16
+ *	deep = 0
+ *		set 1 ->  {  8 }
+ *	deep = 1
+ *		set 1 ->  {  4 }
+ *		set 2 ->  { 12 }
+ *	deep = 2
+ *		set 1 ->  {  2 }
+ *		set 2 ->  {  6 }
+ *		set 3 ->  { 10 }
+ *		set 4 ->  { 14 }
+ *	deep = 3
+ *		set 1 ->  {  1 }
+ *		set 2 ->  {  3 }
+ *		set 3 ->  {  5 }
+ *		set 4 ->  {  7 }
+ *		set 5 ->  {  9 }
+ *		set 6 ->  { 11 }
+ *		set 7 ->  { 13 }
+ *		set 8 ->  { 15 }
+ *
+ * interval = 8
+ *	deep = 0
+ *		set 1 ->  {  4  12 }
+ *	deep = 1
+ *		set 1 ->  {  2  10 }
+ *		set 2 ->  {  6  14 }
+ *	deep = 2
+ *		set 1 ->  {  1   9 }
+ *		set 2 ->  {  3  11 }
+ *		set 3 ->  {  5  13 }
+ *		set 4 ->  {  7  15 }
+ *
+ * interval = 4
+ *	deep = 0
+ *		set 1 ->  {  2   6  10  14 }
+ *	deep = 1
+ *		set 1 ->  {  1   5   9  13 }
+ *		set 2 ->  {  3   7  11  15 }
+ *
+ * interval = 2
+ *	deep = 0
+ *		set 1 ->  {  1   3   5   7   9  11  13  15 }
+ */
+static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, 
+					int num_safe_mas, int num_unsafe_mas)
+{
+	struct uwb_rsv_col_info *ci = ai->ci;
+	struct uwb_rsv_col_set_info *csi = &ci->csi;
+	struct uwb_rsv_col_set_info tmp_csi;
+	int deep, set, col, start_col_deep, col_start_set;
+	int start_col, max_mas_in_set, lowest_max_mas_in_deep;
+	int n_mas;
+	int found = UWB_RSV_ALLOC_NOT_FOUND; 
+
+	tmp_csi.start_col = 0;
+	start_col_deep = interval;
+	n_mas = num_unsafe_mas + num_safe_mas;
+
+	for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) {
+		start_col_deep /= 2;
+		col_start_set = 0;
+		lowest_max_mas_in_deep = UWB_MAS_PER_ZONE;
+
+		for (set = 1; set <= (1 << deep); set++) {
+			max_mas_in_set = 0;
+			start_col = start_col_deep + col_start_set;
+			for (col = start_col; col < UWB_NUM_ZONES; col += interval) {
+                
+				if (ci[col].max_avail_safe >= num_safe_mas &&
+				    ci[col].max_avail_unsafe >= n_mas) {
+					if (ci[col].highest_mas[n_mas] > max_mas_in_set)
+						max_mas_in_set = ci[col].highest_mas[n_mas];
+				} else {
+					max_mas_in_set = 0;
+					break;
+				}
+			}
+			if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) {
+				lowest_max_mas_in_deep = max_mas_in_set;
+
+				tmp_csi.start_col = start_col;
+			}
+			col_start_set += (interval >> deep);
+		}
+
+		if (lowest_max_mas_in_deep < 8) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+			break;
+		} else if ((lowest_max_mas_in_deep > 8) && 
+			   (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) &&
+			   (found == UWB_RSV_ALLOC_NOT_FOUND)) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+		}
+	}
+
+	if (found == UWB_RSV_ALLOC_FOUND) {
+		csi->interval = interval;
+		csi->safe_mas_per_col = num_safe_mas;
+		csi->unsafe_mas_per_col = num_unsafe_mas;
+
+		ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas;
+		ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas;
+		ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+		ai->interval = interval;		
+	}
+	return found;
+}
+
+static void get_row_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	int col, mas;
+  
+	ri->free_rows = 16;
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		ri->avail[mas] = 1;
+		for (col = 1; col < UWB_NUM_ZONES; col++) {
+			if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) {
+				ri->free_rows--;
+				ri->avail[mas]=0;
+				break;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci)
+{
+	int mas;
+	int block_count = 0, start_block = 0; 
+	int previous_avail = 0;
+	int available = 0;
+	int safe_mas_in_row[UWB_MAS_PER_ZONE] = {
+		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
+	};
+
+	rci->max_avail_safe = 0;
+
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		if (!bm[column * UWB_NUM_ZONES + mas]) {
+			available++;
+			rci->max_avail_unsafe = available;
+
+			rci->highest_mas[available] = mas;
+
+			if (previous_avail) {
+				block_count++;
+				if ((block_count > safe_mas_in_row[start_block]) &&
+				    (!rci->max_avail_safe))
+					rci->max_avail_safe = available - 1;
+			} else {
+				previous_avail = 1;
+				start_block = mas;
+				block_count = 1;
+			}
+		} else {
+			previous_avail = 0;
+		}
+	}
+	if (!rci->max_avail_safe)
+		rci->max_avail_safe = rci->max_avail_unsafe;
+}
+
+static void get_column_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	int col;
+
+	for (col = 1; col < UWB_NUM_ZONES; col++) {
+		uwb_rsv_fill_column_info(bm, col, &ci[col]);
+	}
+}
+
+static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int n_rows;
+	int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW;
+	int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW;
+	if (ai->min_mas % UWB_USABLE_MAS_PER_ROW)
+		min_rows++;
+	for (n_rows = max_rows; n_rows >= min_rows; n_rows--) {
+		if (n_rows <= ai->ri.free_rows) {
+			ai->ri.used_rows = n_rows;
+			ai->interval = 1; /* row reservation */
+			uwb_rsv_fill_row_alloc(ai);
+			return UWB_RSV_ALLOC_FOUND;
+		}
+	}  
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval)
+{
+	int n_safe, n_unsafe, n_mas;  
+	int n_column = UWB_NUM_ZONES / interval;
+	int max_per_zone = ai->max_mas / n_column;
+	int min_per_zone = ai->min_mas / n_column;
+
+	if (ai->min_mas % n_column)
+		min_per_zone++;
+
+	if (min_per_zone > UWB_MAS_PER_ZONE) {
+		return UWB_RSV_ALLOC_NOT_FOUND;
+	}
+    
+	if (max_per_zone > UWB_MAS_PER_ZONE) {
+		max_per_zone = UWB_MAS_PER_ZONE;
+	}
+    
+	for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) {
+		if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND)
+			continue;
+		for (n_safe = n_mas; n_safe >= 0; n_safe--) {
+			n_unsafe = n_mas - n_safe;
+			if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) {
+				uwb_rsv_fill_column_alloc(ai);
+				return UWB_RSV_ALLOC_FOUND;
+			}
+		}
+	}
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result)
+{
+	struct uwb_rsv_alloc_info *ai;
+	int interval;
+	int bit_index;
+
+	ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL);
+	
+	ai->min_mas = rsv->min_mas;
+	ai->max_mas = rsv->max_mas;
+	ai->max_interval = rsv->max_interval;
+
+
+	/* fill the not available vector from the available bm */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (!test_bit(bit_index, available->bm))
+			ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL;
+	}
+
+	if (ai->max_interval == 1) {
+		get_row_descriptors(ai);
+		if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+		else
+			goto alloc_not_found;
+	}
+
+	get_column_descriptors(ai);
+        
+	for (interval = 16; interval >= 2; interval>>=1) {
+		if (interval > ai->max_interval)
+			continue;
+		if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+	}
+
+	/* try row reservation if no column is found */
+	get_row_descriptors(ai);
+	if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+		goto alloc_found;
+	else
+		goto alloc_not_found;
+
+  alloc_found:
+	bitmap_zero(result->bm, UWB_NUM_MAS);
+	bitmap_zero(result->unsafe_bm, UWB_NUM_MAS);
+	/* fill the safe and unsafe bitmaps */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE)
+			set_bit(bit_index, result->bm);
+		else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE)
+			set_bit(bit_index, result->unsafe_bm);
+	}
+	bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS);
+
+	result->safe   = ai->safe_allocated_mases;
+	result->unsafe = ai->unsafe_allocated_mases;
+	
+	kfree(ai);		
+	return UWB_RSV_ALLOC_FOUND;
+  
+  alloc_not_found:
+	kfree(ai);
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
diff --git a/drivers/uwb/drp-avail.c b/drivers/uwb/drp-avail.c
index 3febd85..40a540a 100644
--- a/drivers/uwb/drp-avail.c
+++ b/drivers/uwb/drp-avail.c
@@ -58,7 +58,7 @@ void uwb_drp_avail_init(struct uwb_rc *rc)
  *
  * avail = global & local & pending
  */
-static void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
 {
 	bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
 	bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS);
@@ -105,6 +105,7 @@ void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas)
 	bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
 	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
 /**
@@ -280,6 +281,7 @@ int uwbd_evt_handle_rc_drp_avail(struct uwb_event *evt)
 	mutex_lock(&rc->rsvs_mutex);
 	bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 	mutex_unlock(&rc->rsvs_mutex);
 
 	uwb_rsv_sched_update(rc);
diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c
index 75491d4..2840d7b 100644
--- a/drivers/uwb/drp-ie.c
+++ b/drivers/uwb/drp-ie.c
@@ -22,6 +22,96 @@
 
 #include "uwb-internal.h"
 
+
+/*
+ * Return the reason code for a reservations's DRP IE.
+ */
+int uwb_rsv_reason_code(struct uwb_rsv *rsv)
+{
+	static const int reason_codes[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_PENDING]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MODIFIED]           = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_ACCEPTED]           = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_CONFLICT]           = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_PENDING]            = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_DENIED]             = UWB_DRP_REASON_DENIED,
+		[UWB_RSV_STATE_T_RESIZED]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return reason_codes[rsv->state];
+}
+
+/*
+ * Return the reason code for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv)
+{
+	static const int companion_reason_codes[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return companion_reason_codes[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's DRP IE.
+ */
+int uwb_rsv_status(struct uwb_rsv *rsv)
+{
+	static const int statuses[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = 0,
+		[UWB_RSV_STATE_O_PENDING]            = 0,
+		[UWB_RSV_STATE_O_MODIFIED]           = 1,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = 1,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = 0,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = 1,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = 1,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 1,
+		[UWB_RSV_STATE_T_ACCEPTED]           = 1,
+		[UWB_RSV_STATE_T_CONFLICT]           = 0,
+		[UWB_RSV_STATE_T_PENDING]            = 0,
+		[UWB_RSV_STATE_T_DENIED]             = 0,
+		[UWB_RSV_STATE_T_RESIZED]            = 1,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 1,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 1,
+
+	};
+
+	return statuses[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_status(struct uwb_rsv *rsv)
+{
+	static const int companion_statuses[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 0,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 0,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 0,
+	};
+
+	return companion_statuses[rsv->state];
+}
+
 /*
  * Allocate a DRP IE.
  *
@@ -33,16 +123,12 @@
 static struct uwb_ie_drp *uwb_drp_ie_alloc(void)
 {
 	struct uwb_ie_drp *drp_ie;
-	unsigned tiebreaker;
 
 	drp_ie = kzalloc(sizeof(struct uwb_ie_drp) +
 			UWB_NUM_ZONES * sizeof(struct uwb_drp_alloc),
 			GFP_KERNEL);
 	if (drp_ie) {
 		drp_ie->hdr.element_id = UWB_IE_DRP;
-
-		get_random_bytes(&tiebreaker, sizeof(unsigned));
-		uwb_ie_drp_set_tiebreaker(drp_ie, tiebreaker & 1);
 	}
 	return drp_ie;
 }
@@ -103,43 +189,17 @@ static void uwb_drp_ie_from_bm(struct uwb_ie_drp *drp_ie,
  */
 int uwb_drp_ie_update(struct uwb_rsv *rsv)
 {
-	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_ie_drp *drp_ie;
-	int reason_code, status;
+	struct uwb_rsv_move *mv;
+	int unsafe;
 
-	switch (rsv->state) {
-	case UWB_RSV_STATE_NONE:
+	if (rsv->state == UWB_RSV_STATE_NONE) {
 		kfree(rsv->drp_ie);
 		rsv->drp_ie = NULL;
 		return 0;
-	case UWB_RSV_STATE_O_INITIATED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_PENDING:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_MODIFIED:
-		reason_code = UWB_DRP_REASON_MODIFIED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_ACCEPTED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_DENIED:
-		reason_code = UWB_DRP_REASON_DENIED;
-		status = 0;
-		break;
-	default:
-		dev_dbg(dev, "rsv with unhandled state (%d)\n", rsv->state);
-		return -EINVAL;
 	}
+	
+	unsafe = rsv->mas.unsafe ? 1 : 0;
 
 	if (rsv->drp_ie == NULL) {
 		rsv->drp_ie = uwb_drp_ie_alloc();
@@ -148,9 +208,11 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv)
 	}
 	drp_ie = rsv->drp_ie;
 
+	uwb_ie_drp_set_unsafe(drp_ie,       unsafe);
+	uwb_ie_drp_set_tiebreaker(drp_ie,   rsv->tiebreaker);
 	uwb_ie_drp_set_owner(drp_ie,        uwb_rsv_is_owner(rsv));
-	uwb_ie_drp_set_status(drp_ie,       status);
-	uwb_ie_drp_set_reason_code(drp_ie,  reason_code);
+	uwb_ie_drp_set_status(drp_ie,       uwb_rsv_status(rsv));
+	uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_reason_code(rsv));
 	uwb_ie_drp_set_stream_index(drp_ie, rsv->stream);
 	uwb_ie_drp_set_type(drp_ie,         rsv->type);
 
@@ -168,6 +230,27 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv)
 
 	uwb_drp_ie_from_bm(drp_ie, &rsv->mas);
 
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv; 
+		if (mv->companion_drp_ie == NULL) {
+			mv->companion_drp_ie = uwb_drp_ie_alloc();
+			if (mv->companion_drp_ie == NULL)
+				return -ENOMEM;
+		}
+		drp_ie = mv->companion_drp_ie;
+		
+		/* keep all the same configuration of the main drp_ie */
+		memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp));
+		
+
+		/* FIXME: handle properly the unsafe bit */
+		uwb_ie_drp_set_unsafe(drp_ie,       1);
+		uwb_ie_drp_set_status(drp_ie,       uwb_rsv_companion_status(rsv));
+		uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_companion_reason_code(rsv));
+	
+		uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas);
+	}
+
 	rsv->ie_valid = true;
 	return 0;
 }
@@ -218,6 +301,8 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
 	u8 zone;
 	u16 zone_mask;
 
+	bitmap_zero(bm->bm, UWB_NUM_MAS);
+
 	for (cnt = 0; cnt < numallocs; cnt++) {
 		alloc = &drp_ie->allocs[cnt];
 		zone_bm = le16_to_cpu(alloc->zone_bm);
@@ -229,3 +314,4 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
 		}
 	}
 }
+
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
index fe32814..2b4f940 100644
--- a/drivers/uwb/drp.c
+++ b/drivers/uwb/drp.c
@@ -23,6 +23,59 @@
 #include <linux/delay.h>
 #include "uwb-internal.h"
 
+
+/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */
+enum uwb_drp_conflict_action {
+	/* Reservation is mantained, no action needed */
+	UWB_DRP_CONFLICT_MANTAIN = 0,
+	
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. If the device is the reservation
+	 * target, it shall also set the Reason Code in its DRP IE to
+	 * Conflict in its beacon in the following superframe.
+	 */
+	UWB_DRP_CONFLICT_ACT1,
+	
+	/* the device shall not set the Reservation Status bit to ONE
+	 * and shall not transmit frames in conflicting MASs. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */	
+	UWB_DRP_CONFLICT_ACT2,
+
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. It shall remove the conflicting
+	 * MASs from the reservation or set the Reservation Status to
+	 * ZERO in its beacon in the following superframe. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */
+	UWB_DRP_CONFLICT_ACT3,
+};
+
+
+static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg,
+				    struct uwb_rceb *reply, ssize_t reply_size)
+{
+	struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply;
+
+	if (r != NULL) {
+		if (r->bResultCode != UWB_RC_RES_SUCCESS)
+			dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n",
+				uwb_rc_strerror(r->bResultCode), r->bResultCode);
+	} else
+		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n");
+
+	spin_lock(&rc->rsvs_lock);
+	if (rc->set_drp_ie_pending > 1) {
+		rc->set_drp_ie_pending = 0;
+		uwb_rsv_queue_update(rc);	
+	} else {
+		rc->set_drp_ie_pending = 0;	
+	}
+	spin_unlock(&rc->rsvs_lock);
+}
+
 /**
  * Construct and send the SET DRP IE
  *
@@ -46,18 +99,23 @@
 int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 {
 	int result;
-	struct device *dev = &rc->uwb_dev.dev;
 	struct uwb_rc_cmd_set_drp_ie *cmd;
-	struct uwb_rc_evt_set_drp_ie reply;
 	struct uwb_rsv *rsv;
+	struct uwb_rsv_move *mv;
 	int num_bytes = 0;
 	u8 *IEDataptr;
 
 	result = -ENOMEM;
 	/* First traverse all reservations to determine memory needed. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->drp_ie != NULL)
+		if (rsv->drp_ie != NULL) {
 			num_bytes += rsv->drp_ie->hdr.length + 2;
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				num_bytes += mv->companion_drp_ie->hdr.length + 2;	
+			}
+		}
 	}
 	num_bytes += sizeof(rc->drp_avail.ie);
 	cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL);
@@ -68,109 +126,322 @@ int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 	cmd->wIELength = num_bytes;
 	IEDataptr = (u8 *)&cmd->IEData[0];
 
+	/* FIXME: DRV avail IE is not always needed */
+	/* put DRP avail IE first */
+	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
+	IEDataptr += sizeof(struct uwb_ie_drp_avail);
+
 	/* Next traverse all reservations to place IEs in allocated memory. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
 		if (rsv->drp_ie != NULL) {
 			memcpy(IEDataptr, rsv->drp_ie,
 			       rsv->drp_ie->hdr.length + 2);
 			IEDataptr += rsv->drp_ie->hdr.length + 2;
+			
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				memcpy(IEDataptr, mv->companion_drp_ie,
+				       mv->companion_drp_ie->hdr.length + 2);
+				IEDataptr += mv->companion_drp_ie->hdr.length + 2;	
+			}
 		}
 	}
-	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
 
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_SET_DRP_IE;
-	result = uwb_rc_cmd(rc, "SET-DRP-IE", &cmd->rccb,
-			sizeof(*cmd) + num_bytes, &reply.rceb,
-			sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	result = le16_to_cpu(reply.wRemainingSpace);
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: command execution "
-				"failed: %s (%d). RemainingSpace in beacon "
-				"= %d\n", uwb_rc_strerror(reply.bResultCode),
-				reply.bResultCode, result);
-		result = -EIO;
-	} else {
-		dev_dbg(dev, "SET-DRP-IE sent. RemainingSpace in beacon "
-			     "= %d.\n", result);
-		result = 0;
-	}
-error_cmd:
+	result = uwb_rc_cmd_async(rc, "SET-DRP-IE", &cmd->rccb, sizeof(*cmd) + num_bytes,
+				  UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE,
+				  uwb_rc_set_drp_cmd_done, NULL);
+	
+	rc->set_drp_ie_pending = 1;
+
 	kfree(cmd);
 error:
 	return result;
 }
 
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv)
+/*
+ * Evaluate the action to perform using conflict resolution rules
+ *
+ * Return a uwb_drp_conflict_action.
+ */
+static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot,
+				    struct uwb_rsv *rsv, int our_status)
 {
-	struct device *dev = &rsv->rc->uwb_dev.dev;
+	int our_tie_breaker = rsv->tiebreaker;
+	int our_type        = rsv->type;
+	int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot;
+
+	int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie);
+	int ext_status      = uwb_ie_drp_status(ext_drp_ie);
+	int ext_type        = uwb_ie_drp_type(ext_drp_ie);
+	
+	
+	/* [ECMA-368 2nd Edition] 17.4.6 */
+	if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-1 */
+	if (our_type == UWB_DRP_TYPE_ALIEN_BP) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	/* [ECMA-368 2nd Edition] 17.4.6-2 */
+	if (ext_type == UWB_DRP_TYPE_ALIEN_BP) {
+		/* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */
+		return UWB_DRP_CONFLICT_ACT1;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-3 */
+	if (our_status == 0 && ext_status == 1) {
+		return UWB_DRP_CONFLICT_ACT2;
+	}
 
-	dev_dbg(dev, "reservation timeout in state %s (%d)\n",
-		uwb_rsv_state_str(rsv->state), rsv->state);
+	/* [ECMA-368 2nd Edition] 17.4.6-4 */
+	if (our_status == 1 && ext_status == 0) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
 
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_INITIATED:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			return;
+	/* [ECMA-368 2nd Edition] 17.4.6-5a */
+	if (our_tie_breaker == ext_tie_breaker &&
+	    our_beacon_slot <  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5b */
+	if (our_tie_breaker != ext_tie_breaker &&
+	    our_beacon_slot >  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	if (our_status == 0) {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-6a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		} else  {
+			/* [ECMA-368 2nd Edition] 17.4.6-6b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
 		}
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		if (rsv->is_multicast)
-			return;
-		break;
-	default:
-		break;
+	} else {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-7a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		} else {
+			/* [ECMA-368 2nd Edition] 17.4.6-7b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		}
+	}
+	return UWB_DRP_CONFLICT_MANTAIN;
+}
+
+static void handle_conflict_normal(struct uwb_ie_drp *drp_ie, 
+				   int ext_beacon_slot, 
+				   struct uwb_rsv *rsv, 
+				   struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	int action;
+
+	action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv));
+
+	if (uwb_rsv_is_owner(rsv)) {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+			/* try move */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED);
+			if (bow->can_reserve_extra_mases == false)
+				uwb_rsv_backoff_win_increment(rc);
+			
+			break;
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_backoff_win_increment(rc);
+			/* drop some mases with reason modified */
+			/* put in the companion the mases to be dropped */
+			bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		default:
+			break;
+		}
+	} else {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);	
+		default:
+			break;
+		}
+
+	}
+	
+}
+
+static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot,
+				      struct uwb_rsv *rsv, bool companion_only,
+				      struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int action;
+	
+	if (companion_only) {
+		/* status of companion is 0 at this point */
+		action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0);
+		if (uwb_rsv_is_owner(rsv)) {
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				rsv->needs_release_companion_mas = false;
+				if (bow->can_reserve_extra_mases == false)
+					uwb_rsv_backoff_win_increment(rc);
+				uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+			}
+		} else { /* rsv is target */			
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_CONFLICT);
+                                /* send_drp_avail_ie = true; */
+			}
+		}
+	} else { /* also base part of the reservation is conflicting */		
+		if (uwb_rsv_is_owner(rsv)) {
+			uwb_rsv_backoff_win_increment(rc);
+			/* remove companion part */
+			uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+
+			/* drop some mases with reason modified */
+
+			/* put in the companion the mases to be dropped */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		} else { /* it is a target rsv */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+                        /* send_drp_avail_ie = true; */
+		}
+	}
+}
+
+static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv,
+					struct uwb_rc_evt_drp *drp_evt, 
+					struct uwb_ie_drp *drp_ie,
+					struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv_move *mv;
+
+	/* check if the conflicting reservation has two drp_ies */
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv;
+		if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+			handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+						  rsv, false, conflicting_mas);
+		} else {
+			if (bitmap_intersects(mv->companion_mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+				handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+							  rsv, true, conflicting_mas);	
+			}
+		}
+	} else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+		handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number, rsv, conflicting_mas);
 	}
-	uwb_rsv_remove(rsv);
 }
 
+static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc,
+					    struct uwb_rc_evt_drp *drp_evt, 
+					    struct uwb_ie_drp *drp_ie,
+					    struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv *rsv;
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, conflicting_mas);	
+	}
+}
+	
 /*
  * Based on the DRP IE, transition a target reservation to a new
  * state.
  */
 static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				   struct uwb_ie_drp *drp_ie)
+				   struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
-
+	struct uwb_mas_bm mas;
+	
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
-	if (status) {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
-			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+	switch (reason_code) {
+	case UWB_DRP_REASON_ACCEPTED:
+
+		if (rsv->state == UWB_RSV_STATE_T_CONFLICT) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
 			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
 		}
-	} else {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			/* New reservations are handled in uwb_rsv_find(). */
-			break;
-		case UWB_DRP_REASON_DENIED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-			break;
-		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+
+		if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) {
+			/* drp_ie is companion */
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS))
+				/* stroke companion */
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);	
+		} else {
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+				if (uwb_drp_avail_reserve_pending(rc, &mas) == -EBUSY) {
+					/* FIXME: there is a conflict, find
+					 * the conflicting reservations and
+					 * take a sensible action. Consider
+					 * that in drp_ie there is the
+					 * "neighbour" */
+					uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+				} else {
+					/* accept the extra reservation */
+					bitmap_copy(mv->companion_mas.bm, mas.bm, UWB_NUM_MAS);
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+				}
+			} else {
+				if (status) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+				}
+			}
+			
+		}
+		break;
+
+	case UWB_DRP_REASON_MODIFIED:
+		/* check to see if we have already modified the reservation */
+		if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
 			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
 		}
+
+		/* find if the owner wants to expand or reduce */
+		if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+			/* owner is reducing */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+		}
+
+		bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED);
+		break;
+	default:
+		dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+			 reason_code, status);
 	}
 }
 
@@ -179,23 +450,60 @@ static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
  * state.
  */
 static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				  struct uwb_ie_drp *drp_ie)
+				  struct uwb_dev *src, struct uwb_ie_drp *drp_ie,
+				  struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
+	struct uwb_mas_bm mas;
 
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
 	if (status) {
 		switch (reason_code) {
 		case UWB_DRP_REASON_ACCEPTED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			switch (rsv->state) {
+			case UWB_RSV_STATE_O_PENDING:
+			case UWB_RSV_STATE_O_INITIATED:
+			case UWB_RSV_STATE_O_ESTABLISHED:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				break;
+			case UWB_RSV_STATE_O_MODIFIED:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);	
+				}
+				break;
+				
+			case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);	
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_EXPANDING:
+				if (bitmap_equal(mas.bm, mv->companion_mas.bm, UWB_NUM_MAS)) {
+					/* Companion reservation accepted */
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_COMBINING:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS))
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+				else
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				break;
+			default:
+				break;	
+			}
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -210,9 +518,10 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
 			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 			break;
 		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			/* resolve the conflict */
+			bitmap_complement(mas.bm, src->last_availability_bm,
+					  UWB_NUM_MAS);
+			uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas);
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -221,12 +530,110 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
 	}
 }
 
+static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt)
+{
+	unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US;
+	mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us));
+}
+
+static void uwb_cnflt_update_work(struct work_struct *work)
+{
+	struct uwb_cnflt_alien *cnflt = container_of(work,
+						     struct uwb_cnflt_alien,
+						     cnflt_update_work);
+	struct uwb_cnflt_alien *c;
+	struct uwb_rc *rc = cnflt->rc;
+	
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_del(&cnflt->rc_node);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+
+	list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) {
+		bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, c->mas.bm, UWB_NUM_MAS);			
+	}
+	
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+
+	kfree(cnflt);
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_cnflt_timer(unsigned long arg)
+{
+	struct uwb_cnflt_alien *cnflt = (struct uwb_cnflt_alien *)arg;
+
+	queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work);
+}
+
 /*
- * Process a received DRP IE, it's either for a reservation owned by
- * the RC or targeted at it (or it's for a WUSB cluster reservation).
+ * We have received an DRP_IE of type Alien BP and we need to make
+ * sure we do not transmit in conflicting MASs.
  */
-static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
-		     struct uwb_ie_drp *drp_ie)
+static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_mas_bm mas;
+	struct uwb_cnflt_alien *cnflt;
+	char buf[72];
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
+	
+	list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) {
+		if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			/* Existing alien BP reservation conflicting
+			 * bitmap, just reset the timer */
+			uwb_cnflt_alien_stroke_timer(cnflt);
+			return;
+		}
+	}
+
+	/* New alien BP reservation conflicting bitmap */
+
+	/* alloc and initialize new uwb_cnflt_alien */
+	cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL);
+	if (!cnflt)
+		dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n");
+	INIT_LIST_HEAD(&cnflt->rc_node);
+	init_timer(&cnflt->timer);
+	cnflt->timer.function = uwb_cnflt_timer;
+	cnflt->timer.data     = (unsigned long)cnflt;
+
+	cnflt->rc = rc;
+	INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work);
+	
+	bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS);
+
+	list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS);
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+	
+	/* start the timer */
+	uwb_cnflt_alien_stroke_timer(cnflt);
+}
+
+static void uwb_drp_process_not_involved(struct uwb_rc *rc,
+					 struct uwb_rc_evt_drp *drp_evt, 
+					 struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_mas_bm mas;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+}
+
+static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src,
+				     struct uwb_rc_evt_drp *drp_evt,
+				     struct uwb_ie_drp *drp_ie)
 {
 	struct uwb_rsv *rsv;
 
@@ -239,7 +646,7 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
 		 */
 		return;
 	}
-
+	
 	/*
 	 * Do nothing with DRP IEs for reservations that have been
 	 * terminated.
@@ -248,13 +655,43 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 		return;
 	}
-
+			
 	if (uwb_ie_drp_owner(drp_ie))
-		uwb_drp_process_target(rc, rsv, drp_ie);
+		uwb_drp_process_target(rc, rsv, drp_ie, drp_evt);
+	else
+		uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt);
+	
+}
+
+
+static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0;
+}
+
+/*
+ * Process a received DRP IE.
+ */
+static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
+			    struct uwb_dev *src, struct uwb_ie_drp *drp_ie)
+{
+	if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP)
+		uwb_drp_handle_alien_drp(rc, drp_ie);
+	else if (uwb_drp_involves_us(rc, drp_ie))
+		uwb_drp_process_involved(rc, src, drp_evt, drp_ie);
 	else
-		uwb_drp_process_owner(rc, rsv, drp_ie);
+		uwb_drp_process_not_involved(rc, drp_evt, drp_ie);
 }
 
+/*
+ * Process a received DRP Availability IE
+ */
+static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src,
+					 struct uwb_ie_drp_avail *drp_availability_ie)
+{
+	bitmap_copy(src->last_availability_bm,
+		    drp_availability_ie->bmp, UWB_NUM_MAS);
+}
 
 /*
  * Process all the DRP IEs (both DRP IEs and the DRP Availability IE)
@@ -276,10 +713,10 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
 
 		switch (ie_hdr->element_id) {
 		case UWB_IE_DRP_AVAILABILITY:
-			/* FIXME: does something need to be done with this? */
+			uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr);
 			break;
 		case UWB_IE_DRP:
-			uwb_drp_process(rc, src_dev, (struct uwb_ie_drp *)ie_hdr);
+			uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr);
 			break;
 		default:
 			dev_warn(dev, "unexpected IE in DRP notification\n");
@@ -292,55 +729,6 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
 			 (int)ielen);
 }
 
-
-/*
- * Go through all the DRP IEs and find the ones that conflict with our
- * reservations.
- *
- * FIXME: must resolve the conflict according the the rules in
- * [ECMA-368].
- */
-static
-void uwb_drp_process_conflict_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
-				  size_t ielen, struct uwb_dev *src_dev)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_ie_hdr *ie_hdr;
-	struct uwb_ie_drp *drp_ie;
-	void *ptr;
-
-	ptr = drp_evt->ie_data;
-	for (;;) {
-		ie_hdr = uwb_ie_next(&ptr, &ielen);
-		if (!ie_hdr)
-			break;
-
-		drp_ie = container_of(ie_hdr, struct uwb_ie_drp, hdr);
-
-		/* FIXME: check if this DRP IE conflicts. */
-	}
-
-	if (ielen > 0)
-		dev_warn(dev, "%d octets remaining in DRP notification\n",
-			 (int)ielen);
-}
-
-
-/*
- * Terminate all reservations owned by, or targeted at, 'uwb_dev'.
- */
-static void uwb_drp_terminate_all(struct uwb_rc *rc, struct uwb_dev *uwb_dev)
-{
-	struct uwb_rsv *rsv;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->owner == uwb_dev
-		    || (rsv->target.type == UWB_RSV_TARGET_DEV && rsv->target.dev == uwb_dev))
-			uwb_rsv_remove(rsv);
-	}
-}
-
-
 /**
  * uwbd_evt_handle_rc_drp - handle a DRP_IE event
  * @evt: the DRP_IE event from the radio controller
@@ -381,7 +769,6 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
 	size_t ielength, bytes_left;
 	struct uwb_dev_addr src_addr;
 	struct uwb_dev *src_dev;
-	int reason;
 
 	/* Is there enough data to decode the event (and any IEs in
 	   its payload)? */
@@ -417,22 +804,8 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
 
 	mutex_lock(&rc->rsvs_mutex);
 
-	reason = uwb_rc_evt_drp_reason(drp_evt);
-
-	switch (reason) {
-	case UWB_DRP_NOTIF_DRP_IE_RCVD:
-		uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_CONFLICT:
-		uwb_drp_process_conflict_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_TERMINATE:
-		uwb_drp_terminate_all(rc, src_dev);
-		break;
-	default:
-		dev_warn(dev, "ignored DRP event with reason code: %d\n", reason);
-		break;
-	}
+	/* We do not distinguish from the reason */
+	uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
 
 	mutex_unlock(&rc->rsvs_mutex);
 
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 1cd84f9..165aec6 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -17,20 +17,31 @@
  */
 #include <linux/kernel.h>
 #include <linux/uwb.h>
+#include <linux/random.h>
 
 #include "uwb-internal.h"
 
 static void uwb_rsv_timer(unsigned long arg);
 
 static const char *rsv_states[] = {
-	[UWB_RSV_STATE_NONE]          = "none",
-	[UWB_RSV_STATE_O_INITIATED]   = "initiated",
-	[UWB_RSV_STATE_O_PENDING]     = "pending",
-	[UWB_RSV_STATE_O_MODIFIED]    = "modified",
-	[UWB_RSV_STATE_O_ESTABLISHED] = "established",
-	[UWB_RSV_STATE_T_ACCEPTED]    = "accepted",
-	[UWB_RSV_STATE_T_DENIED]      = "denied",
-	[UWB_RSV_STATE_T_PENDING]     = "pending",
+	[UWB_RSV_STATE_NONE]                 = "none            ",
+	[UWB_RSV_STATE_O_INITIATED]          = "o initiated     ",
+	[UWB_RSV_STATE_O_PENDING]            = "o pending       ",
+	[UWB_RSV_STATE_O_MODIFIED]           = "o modified      ",
+	[UWB_RSV_STATE_O_ESTABLISHED]        = "o established   ",
+	[UWB_RSV_STATE_O_TO_BE_MOVED]        = "o to be moved   ",
+	[UWB_RSV_STATE_O_MOVE_EXPANDING]     = "o move expanding",
+	[UWB_RSV_STATE_O_MOVE_COMBINING]     = "o move combining",
+	[UWB_RSV_STATE_O_MOVE_REDUCING]      = "o move reducing ",
+	[UWB_RSV_STATE_T_ACCEPTED]           = "t accepted      ",
+	[UWB_RSV_STATE_T_CONFLICT]           = "t conflict      ",
+	[UWB_RSV_STATE_T_PENDING]            = "t pending       ",
+	[UWB_RSV_STATE_T_DENIED]             = "t denied        ",
+	[UWB_RSV_STATE_T_RESIZED]            = "t resized       ",
+	[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ",
+	[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf",
+	[UWB_RSV_STATE_T_EXPANDING_PENDING]  = "t expanding pend",
+	[UWB_RSV_STATE_T_EXPANDING_DENIED]   = "t expanding den ",
 };
 
 static const char *rsv_types[] = {
@@ -41,6 +52,31 @@ static const char *rsv_types[] = {
 	[UWB_DRP_TYPE_PCA]      = "pca",
 };
 
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv)
+{
+	static const bool has_two_drp_ies[] = {
+		[UWB_RSV_STATE_O_INITIATED]               = false,
+		[UWB_RSV_STATE_O_PENDING]                 = false,
+		[UWB_RSV_STATE_O_MODIFIED]                = false,
+		[UWB_RSV_STATE_O_ESTABLISHED]             = false,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]             = false,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]          = false,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]           = false,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]          = true,
+		[UWB_RSV_STATE_T_ACCEPTED]                = false,
+		[UWB_RSV_STATE_T_CONFLICT]                = false,
+		[UWB_RSV_STATE_T_PENDING]                 = false,
+		[UWB_RSV_STATE_T_DENIED]                  = false,
+		[UWB_RSV_STATE_T_RESIZED]                 = false,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]       = true,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]        = true,
+	};
+
+	return has_two_drp_ies[rsv->state];
+}
+
 /**
  * uwb_rsv_state_str - return a string for a reservation state
  * @state: the reservation state.
@@ -65,7 +101,7 @@ const char *uwb_rsv_type_str(enum uwb_drp_type type)
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_type_str);
 
-static void uwb_rsv_dump(struct uwb_rsv *rsv)
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv)
 {
 	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_dev_addr devaddr;
@@ -88,12 +124,12 @@ static void uwb_rsv_release(struct kref *kref)
 	kfree(rsv);
 }
 
-static void uwb_rsv_get(struct uwb_rsv *rsv)
+void uwb_rsv_get(struct uwb_rsv *rsv)
 {
 	kref_get(&rsv->kref);
 }
 
-static void uwb_rsv_put(struct uwb_rsv *rsv)
+void uwb_rsv_put(struct uwb_rsv *rsv)
 {
 	kref_put(&rsv->kref, uwb_rsv_release);
 }
@@ -108,6 +144,7 @@ static void uwb_rsv_put(struct uwb_rsv *rsv)
 static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 	int stream;
 
@@ -129,12 +166,15 @@ static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
 	rsv->stream = stream;
 	set_bit(stream, streams_bm);
 
+	dev_dbg(dev, "get stream %d\n", rsv->stream);
+
 	return 0;
 }
 
 static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 
 	switch (rsv->target.type) {
@@ -149,86 +189,52 @@ static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
 	}
 
 	clear_bit(rsv->stream, streams_bm);
+
+	dev_dbg(dev, "put stream %d\n", rsv->stream);
 }
 
-/*
- * Generate a MAS allocation with a single row component.
- */
-static void uwb_rsv_gen_alloc_row(struct uwb_mas_bm *mas,
-				  int first_mas, int mas_per_zone,
-				  int zs, int ze)
+void uwb_rsv_backoff_win_timer(unsigned long arg)
 {
-	struct uwb_mas_bm col;
-	int z;
+	struct uwb_drp_backoff_win *bow = (struct uwb_drp_backoff_win *)arg;
+	struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow);
+	struct device *dev = &rc->uwb_dev.dev;
 
-	bitmap_zero(mas->bm, UWB_NUM_MAS);
-	bitmap_zero(col.bm, UWB_NUM_MAS);
-	bitmap_fill(col.bm, mas_per_zone);
-	bitmap_shift_left(col.bm, col.bm, first_mas + zs * UWB_MAS_PER_ZONE, UWB_NUM_MAS);
-
-	for (z = zs; z <= ze; z++) {
-		bitmap_or(mas->bm, mas->bm, col.bm, UWB_NUM_MAS);
-		bitmap_shift_left(col.bm, col.bm, UWB_MAS_PER_ZONE, UWB_NUM_MAS);
+	bow->can_reserve_extra_mases = true;
+	if (bow->total_expired <= 4) {
+		bow->total_expired++;
+	} else {
+		/* after 4 backoff window has expired we can exit from
+		 * the backoff procedure */
+		bow->total_expired = 0;
+		bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
 	}
+	dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n: ", bow->total_expired, bow->n);
+
+	/* try to relocate all the "to be moved" relocations */
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
-/*
- * Allocate some MAS for this reservation based on current local
- * availability, the reservation parameters (max_mas, min_mas,
- * sparsity), and the WiMedia rules for MAS allocations.
- *
- * Returns -EBUSY is insufficient free MAS are available.
- *
- * FIXME: to simplify this, only safe reservations with a single row
- * component in zones 1 to 15 are tried (zone 0 is skipped to avoid
- * problems with the MAS reserved for the BP).
- *
- * [ECMA-368] section B.2.
- */
-static int uwb_rsv_alloc_mas(struct uwb_rsv *rsv)
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
 {
-	static const int safe_mas_in_row[UWB_NUM_ZONES] = {
-		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
-	};
-	int n, r;
-	struct uwb_mas_bm mas;
-	bool found = false;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned timeout_us;
 
-	/*
-	 * Search all valid safe allocations until either: too few MAS
-	 * are available; or the smallest allocation with sufficient
-	 * MAS is found.
-	 *
-	 * The top of the zones are preferred, so space for larger
-	 * allocations is available in the bottom of the zone (e.g., a
-	 * 15 MAS allocation should start in row 14 leaving space for
-	 * a 120 MAS allocation at row 0).
-	 */
-	for (n = safe_mas_in_row[0]; n >= 1; n--) {
-		int num_mas;
+	dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window);
 
-		num_mas = n * (UWB_NUM_ZONES - 1);
-		if (num_mas < rsv->min_mas)
-			break;
-		if (found && num_mas < rsv->max_mas)
-			break;
+	bow->can_reserve_extra_mases = false;
 
-		for (r = UWB_MAS_PER_ZONE-1;  r >= 0; r--) {
-			if (safe_mas_in_row[r] < n)
-				continue;
-			uwb_rsv_gen_alloc_row(&mas, r, n, 1, UWB_NUM_ZONES);
-			if (uwb_drp_avail_reserve_pending(rsv->rc, &mas) == 0) {
-				found = true;
-				break;
-			}
-		}
-	}
+	if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX)
+		return;
 
-	if (!found)
-		return -EBUSY;
+	bow->window <<= 1;
+	bow->n = random32() & (bow->window - 1);
+	dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n);
 
-	bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
-	return 0;
+	/* reset the timer associated variables */
+	timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US;
+	bow->total_expired = 0;
+	mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us));		
 }
 
 static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
@@ -241,13 +247,16 @@ static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
 	 * received.
 	 */
 	if (rsv->is_multicast) {
-		if (rsv->state == UWB_RSV_STATE_O_INITIATED)
+		if (rsv->state == UWB_RSV_STATE_O_INITIATED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING)
 			sframes = 1;
 		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED)
 			sframes = 0;
+		
 	}
 
-	rsv->expired = false;
 	if (sframes > 0) {
 		/*
 		 * Add an additional 2 superframes to account for the
@@ -269,7 +278,7 @@ static void uwb_rsv_state_update(struct uwb_rsv *rsv,
 	rsv->state = new_state;
 	rsv->ie_valid = false;
 
-	uwb_rsv_dump(rsv);
+	uwb_rsv_dump("SU", rsv);
 
 	uwb_rsv_stroke_timer(rsv);
 	uwb_rsv_sched_update(rsv->rc);
@@ -283,10 +292,17 @@ static void uwb_rsv_callback(struct uwb_rsv *rsv)
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 {
+	struct uwb_rsv_move *mv = &rsv->mv;
+
 	if (rsv->state == new_state) {
 		switch (rsv->state) {
 		case UWB_RSV_STATE_O_ESTABLISHED:
+		case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		case UWB_RSV_STATE_O_MOVE_COMBINING:
+		case UWB_RSV_STATE_O_MOVE_REDUCING:
 		case UWB_RSV_STATE_T_ACCEPTED:
+		case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		case UWB_RSV_STATE_T_RESIZED:
 		case UWB_RSV_STATE_NONE:
 			uwb_rsv_stroke_timer(rsv);
 			break;
@@ -298,11 +314,10 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 		return;
 	}
 
+	uwb_rsv_dump("SC", rsv);
+
 	switch (new_state) {
 	case UWB_RSV_STATE_NONE:
-		uwb_drp_avail_release(rsv->rc, &rsv->mas);
-		if (uwb_rsv_is_owner(rsv))
-			uwb_rsv_put_stream(rsv);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
 		uwb_rsv_callback(rsv);
 		break;
@@ -312,12 +327,45 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 	case UWB_RSV_STATE_O_PENDING:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING);
 		break;
+	case UWB_RSV_STATE_O_MODIFIED:
+		/* in the companion there are the MASes to drop */
+		bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED);
+		break;
 	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->state == UWB_RSV_STATE_O_MODIFIED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) {
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+			rsv->needs_release_companion_mas = false;
+		}
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED);
 		uwb_rsv_callback(rsv);
 		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		rsv->needs_release_companion_mas = true;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		rsv->needs_release_companion_mas = false;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		rsv->mas.safe   += mv->companion_mas.safe;
+		rsv->mas.unsafe += mv->companion_mas.unsafe;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		rsv->needs_release_companion_mas = true;
+		rsv->mas.safe   = mv->final_mas.safe;
+		rsv->mas.unsafe = mv->final_mas.unsafe;
+		bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		break;
 	case UWB_RSV_STATE_T_ACCEPTED:
+	case UWB_RSV_STATE_T_RESIZED:
+		rsv->needs_release_companion_mas = false;
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED);
 		uwb_rsv_callback(rsv);
@@ -325,12 +373,82 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 	case UWB_RSV_STATE_T_DENIED:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED);
 		break;
+	case UWB_RSV_STATE_T_CONFLICT:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT);
+		break;
+	case UWB_RSV_STATE_T_PENDING:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING);
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		rsv->needs_release_companion_mas = true;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+		break;
 	default:
 		dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n",
 			uwb_rsv_state_str(new_state), new_state);
 	}
 }
 
+static void uwb_rsv_handle_timeout_work(struct work_struct *work)
+{
+	struct uwb_rsv *rsv = container_of(work, struct uwb_rsv,
+					   handle_timeout_work);
+	struct uwb_rc *rc = rsv->rc;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	uwb_rsv_dump("TO", rsv);
+
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_INITIATED:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->is_multicast)
+			goto unlock;
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		/*
+		 * The time out could be for the main or of the
+		 * companion DRP, assume it's for the companion and
+		 * drop that first.  A further time out is required to
+		 * drop the main.
+		 */
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+		goto unlock;
+	default:
+		break;
+	}
+
+	uwb_rsv_remove(rsv);
+
+unlock:
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 {
 	struct uwb_rsv *rsv;
@@ -347,6 +465,7 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 	rsv->timer.data     = (unsigned long)rsv;
 
 	rsv->rc = rc;
+	INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work);
 
 	return rsv;
 }
@@ -381,8 +500,18 @@ EXPORT_SYMBOL_GPL(uwb_rsv_create);
 
 void uwb_rsv_remove(struct uwb_rsv *rsv)
 {
+	uwb_rsv_dump("RM", rsv);
+
 	if (rsv->state != UWB_RSV_STATE_NONE)
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+
+	if (rsv->needs_release_companion_mas)
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+	uwb_drp_avail_release(rsv->rc, &rsv->mas);
+
+	if (uwb_rsv_is_owner(rsv))
+		uwb_rsv_put_stream(rsv);
+	
 	del_timer_sync(&rsv->timer);
 	uwb_dev_put(rsv->owner);
 	if (rsv->target.type == UWB_RSV_TARGET_DEV)
@@ -409,7 +538,7 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
  * @rsv: the reservation
  *
  * The PAL should fill in @rsv's owner, target, type, max_mas,
- * min_mas, sparsity and is_multicast fields.  If the target is a
+ * min_mas, max_interval and is_multicast fields.  If the target is a
  * uwb_dev it must be referenced.
  *
  * The reservation's callback will be called when the reservation is
@@ -418,16 +547,27 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
 int uwb_rsv_establish(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct uwb_mas_bm available;
 	int ret;
 
 	mutex_lock(&rc->rsvs_mutex);
-
 	ret = uwb_rsv_get_stream(rsv);
 	if (ret)
 		goto out;
 
-	ret = uwb_rsv_alloc_mas(rsv);
-	if (ret) {
+	rsv->tiebreaker = random32() & 1;
+	/* get available mas bitmap */
+	uwb_drp_available(rc, &available);
+
+	ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas);
+	if (ret == UWB_RSV_ALLOC_NOT_FOUND) {
+		ret = -EBUSY;
+		uwb_rsv_put_stream(rsv);
+		goto out;
+	}
+
+	ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas);
+	if (ret != 0) {
 		uwb_rsv_put_stream(rsv);
 		goto out;
 	}
@@ -448,16 +588,71 @@ EXPORT_SYMBOL_GPL(uwb_rsv_establish);
  * @rsv: the reservation to modify
  * @max_mas: new maximum MAS to reserve
  * @min_mas: new minimum MAS to reserve
- * @sparsity: new sparsity to use
+ * @max_interval: new max_interval to use
  *
  * FIXME: implement this once there are PALs that use it.
  */
-int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int sparsity)
+int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_modify);
 
+/*
+ * move an already established reservation (rc->rsvs_mutex must to be
+ * taken when tis function is called)
+ */
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv;
+	int ret = 0;
+ 
+	if (bow->can_reserve_extra_mases == false)
+		return -EBUSY;
+
+	mv = &rsv->mv;
+
+	if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) {
+
+		if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) {
+			/* We want to move the reservation */
+			bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_reserve_pending(rc, &mv->companion_mas);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		}
+	} else {
+		dev_dbg(dev, "new allocation not found\n");
+	}
+	
+	return ret;
+}
+
+/* It will try to move every reservation in state O_ESTABLISHED giving
+ * to the MAS allocator algorithm an availability that is the real one
+ * plus the allocation already established from the reservation. */
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc)
+{
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv *rsv;
+	struct uwb_mas_bm mas;
+	
+	if (bow->can_reserve_extra_mases == false)
+		return;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED ||
+		    rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) {
+			uwb_drp_available(rc, &mas);
+			bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_rsv_try_move(rsv, &mas);
+		}
+	}
+	
+}
+
 /**
  * uwb_rsv_terminate - terminate an established reservation
  * @rsv: the reservation to terminate
@@ -546,6 +741,7 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	uwb_dev_get(rsv->owner);
 	rsv->target.type = UWB_RSV_TARGET_DEV;
 	rsv->target.dev  = &rc->uwb_dev;
+	uwb_dev_get(&rc->uwb_dev);
 	rsv->type        = uwb_ie_drp_type(drp_ie);
 	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
 	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
@@ -567,12 +763,34 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	state = rsv->state;
 	rsv->state = UWB_RSV_STATE_NONE;
-	uwb_rsv_set_state(rsv, state);
+
+	/* FIXME: do something sensible here */
+	if (state == UWB_RSV_STATE_T_ACCEPTED
+	    && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) {
+		/* FIXME: do something sensible here */
+	} else {
+		uwb_rsv_set_state(rsv, state);
+	}
 
 	return rsv;
 }
 
 /**
+ * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations
+ * @rsv: the reservation.
+ * @mas: returns the available MAS.
+ *
+ * The usable MAS of a reservation may be less than the negotiated MAS
+ * if alien BPs are present.
+ */
+void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas)
+{
+	bitmap_zero(mas->bm, UWB_NUM_MAS);
+	bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas);
+
+/**
  * uwb_rsv_find - find a reservation for a received DRP IE.
  * @rc: the radio controller
  * @src: source of the DRP IE
@@ -611,8 +829,6 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc)
 	bool ie_updated = false;
 
 	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		if (rsv->expired)
-			uwb_drp_handle_timeout(rsv);
 		if (!rsv->ie_valid) {
 			uwb_drp_ie_update(rsv);
 			ie_updated = true;
@@ -622,9 +838,47 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc)
 	return ie_updated;
 }
 
+void uwb_rsv_queue_update(struct uwb_rc *rc)
+{
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us));
+}
+
+/**
+ * uwb_rsv_sched_update - schedule an update of the DRP IEs
+ * @rc: the radio controller.
+ *
+ * To improve performance and ensure correctness with [ECMA-368] the
+ * number of SET-DRP-IE commands that are done are limited.
+ *
+ * DRP IEs update come from two sources: DRP events from the hardware
+ * which all occur at the beginning of the superframe ('syncronous'
+ * events) and reservation establishment/termination requests from
+ * PALs or timers ('asynchronous' events).
+ *
+ * A delayed work ensures that all the synchronous events result in
+ * one SET-DRP-IE command.
+ *
+ * Additional logic (the set_drp_ie_pending and rsv_updated_postponed
+ * flags) will prevent an asynchrous event starting a SET-DRP-IE
+ * command if one is currently awaiting a response.
+ *
+ * FIXME: this does leave a window where an asynchrous event can delay
+ * the SET-DRP-IE for a synchronous event by one superframe.
+ */
 void uwb_rsv_sched_update(struct uwb_rc *rc)
 {
-	queue_work(rc->rsv_workq, &rc->rsv_update_work);
+	spin_lock(&rc->rsvs_lock);
+	if (!delayed_work_pending(&rc->rsv_update_work)) {
+		if (rc->set_drp_ie_pending > 0) {
+			rc->set_drp_ie_pending++;
+			goto unlock;
+		}
+		uwb_rsv_queue_update(rc);
+	}
+unlock:
+	spin_unlock(&rc->rsvs_lock);
 }
 
 /*
@@ -633,7 +887,8 @@ void uwb_rsv_sched_update(struct uwb_rc *rc)
  */
 static void uwb_rsv_update_work(struct work_struct *work)
 {
-	struct uwb_rc *rc = container_of(work, struct uwb_rc, rsv_update_work);
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_update_work.work);
 	bool ie_updated;
 
 	mutex_lock(&rc->rsvs_mutex);
@@ -645,18 +900,34 @@ static void uwb_rsv_update_work(struct work_struct *work)
 		ie_updated = true;
 	}
 
-	if (ie_updated)
+	if (ie_updated && (rc->set_drp_ie_pending == 0))
 		uwb_rc_send_all_drp_ie(rc);
 
 	mutex_unlock(&rc->rsvs_mutex);
 }
 
+static void uwb_rsv_alien_bp_work(struct work_struct *work)
+{
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_alien_bp_work.work);
+	struct uwb_rsv *rsv;
+
+	mutex_lock(&rc->rsvs_mutex);
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) {
+			rsv->callback(rsv);
+		}
+	}
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static void uwb_rsv_timer(unsigned long arg)
 {
 	struct uwb_rsv *rsv = (struct uwb_rsv *)arg;
 
-	rsv->expired = true;
-	uwb_rsv_sched_update(rsv->rc);
+	queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work);
 }
 
 /**
@@ -673,16 +944,27 @@ void uwb_rsv_remove_all(struct uwb_rc *rc)
 	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
 		uwb_rsv_remove(rsv);
 	}
+	/* Cancel any postponed update. */
+	rc->set_drp_ie_pending = 0;
 	mutex_unlock(&rc->rsvs_mutex);
 
-	cancel_work_sync(&rc->rsv_update_work);
+	cancel_delayed_work_sync(&rc->rsv_update_work);
 }
 
 void uwb_rsv_init(struct uwb_rc *rc)
 {
 	INIT_LIST_HEAD(&rc->reservations);
+	INIT_LIST_HEAD(&rc->cnflt_alien_list);
 	mutex_init(&rc->rsvs_mutex);
-	INIT_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	spin_lock_init(&rc->rsvs_lock);
+	INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work);
+	rc->bow.can_reserve_extra_mases = true;
+	rc->bow.total_expired = 0;
+	rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
+	init_timer(&rc->bow.timer);
+	rc->bow.timer.function = uwb_rsv_backoff_win_timer;
+	rc->bow.timer.data     = (unsigned long)&rc->bow;
 
 	bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS);
 }
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index a6debb9..89b2e6a 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -82,29 +82,21 @@ struct uwb_dbg {
 	struct dentry *reservations_f;
 	struct dentry *accept_f;
 	struct dentry *drp_avail_f;
+	spinlock_t list_lock;
 };
 
 static struct dentry *root_dir;
 
 static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_dev_addr devaddr;
-	char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
-
-	uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		devaddr = rsv->target.dev->dev_addr;
-	else
-		devaddr = rsv->target.devaddr;
-	uwb_dev_addr_print(target, sizeof(target), &devaddr);
+	struct uwb_dbg *dbg = rsv->pal_priv;
 
-	dev_dbg(dev, "debug: rsv %s -> %s: %s\n",
-		owner, target, uwb_rsv_state_str(rsv->state));
+	uwb_rsv_dump("debug", rsv);
 
 	if (rsv->state == UWB_RSV_STATE_NONE) {
+		spin_lock(&dbg->list_lock);
 		list_del(&rsv->pal_node);
+		spin_unlock(&dbg->list_lock);
 		uwb_rsv_destroy(rsv);
 	}
 }
@@ -128,20 +120,21 @@ static int cmd_rsv_establish(struct uwb_rc *rc,
 		return -ENOMEM;
 	}
 
-	rsv->owner       = &rc->uwb_dev;
-	rsv->target.type = UWB_RSV_TARGET_DEV;
-	rsv->target.dev  = target;
-	rsv->type        = cmd->type;
-	rsv->max_mas     = cmd->max_mas;
-	rsv->min_mas     = cmd->min_mas;
-	rsv->sparsity    = cmd->sparsity;
+	rsv->target.type  = UWB_RSV_TARGET_DEV;
+	rsv->target.dev   = target;
+	rsv->type         = cmd->type;
+	rsv->max_mas      = cmd->max_mas;
+	rsv->min_mas      = cmd->min_mas;
+	rsv->max_interval = cmd->max_interval;
 
 	ret = uwb_rsv_establish(rsv);
 	if (ret)
 		uwb_rsv_destroy(rsv);
-	else
+	else {
+		spin_lock(&(rc->dbg)->list_lock);
 		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-
+		spin_unlock(&(rc->dbg)->list_lock);
+	}
 	return ret;
 }
 
@@ -151,17 +144,24 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 	struct uwb_rsv *rsv, *found = NULL;
 	int i = 0;
 
+	spin_lock(&(rc->dbg)->list_lock);
+
 	list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) {
 		if (i == cmd->index) {
 			found = rsv;
+			uwb_rsv_get(found);
 			break;
 		}
 		i++;
 	}
+
+	spin_unlock(&(rc->dbg)->list_lock);
+
 	if (!found)
 		return -EINVAL;
 
 	uwb_rsv_terminate(found);
+	uwb_rsv_put(found);
 
 	return 0;
 }
@@ -191,7 +191,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	struct uwb_rc *rc = file->private_data;
 	struct uwb_dbg_cmd cmd;
 	int ret = 0;
-
+	
 	if (len != sizeof(struct uwb_dbg_cmd))
 		return -EINVAL;
 
@@ -325,7 +325,9 @@ static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 
 	if (dbg->accept) {
+		spin_lock(&dbg->list_lock);
 		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		spin_unlock(&dbg->list_lock);
 		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
 	}
 }
@@ -341,6 +343,7 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 		return;
 
 	INIT_LIST_HEAD(&rc->dbg->rsvs);
+	spin_lock_init(&(rc->dbg)->list_lock);
 
 	uwb_pal_init(&rc->dbg->pal);
 	rc->dbg->pal.rc = rc;
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index f0f21f4..d5bcfc1 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -92,6 +92,12 @@ extern const char *uwb_rc_strerror(unsigned code);
 
 struct uwb_rc_neh;
 
+extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+			    struct uwb_rccb *cmd, size_t cmd_size,
+			    u8 expected_type, u16 expected_event,
+			    uwb_rc_cmd_cb_f cb, void *arg);
+
+
 void uwb_rc_neh_create(struct uwb_rc *rc);
 void uwb_rc_neh_destroy(struct uwb_rc *rc);
 
@@ -106,7 +112,69 @@ void uwb_rc_neh_put(struct uwb_rc_neh *neh);
 extern int uwb_est_create(void);
 extern void uwb_est_destroy(void);
 
+/*
+ * UWB conflicting alien reservations
+ */
+struct uwb_cnflt_alien {
+	struct uwb_rc *rc;
+	struct list_head rc_node;
+	struct uwb_mas_bm mas;
+	struct timer_list timer;
+	struct work_struct cnflt_update_work;
+};
+
+enum uwb_uwb_rsv_alloc_result {
+	UWB_RSV_ALLOC_FOUND = 0,
+	UWB_RSV_ALLOC_NOT_FOUND,
+};
+
+enum uwb_rsv_mas_status {
+	UWB_RSV_MAS_NOT_AVAIL = 1,
+	UWB_RSV_MAS_SAFE,
+	UWB_RSV_MAS_UNSAFE,
+};
+
+struct uwb_rsv_col_set_info {
+	unsigned char start_col;
+	unsigned char interval;
+	unsigned char safe_mas_per_col;
+	unsigned char unsafe_mas_per_col;
+};
+
+struct uwb_rsv_col_info {
+	unsigned char max_avail_safe;
+	unsigned char max_avail_unsafe;
+	unsigned char highest_mas[UWB_MAS_PER_ZONE];
+	struct uwb_rsv_col_set_info csi;
+};
+
+struct uwb_rsv_row_info {
+	unsigned char avail[UWB_MAS_PER_ZONE];
+	unsigned char free_rows;
+	unsigned char used_rows;
+};
+
+/*
+ * UWB find allocation
+ */
+struct uwb_rsv_alloc_info {
+	unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES];
+	struct uwb_rsv_col_info ci[UWB_NUM_ZONES];
+	struct uwb_rsv_row_info ri;
+	struct uwb_mas_bm *not_available;
+	struct uwb_mas_bm *result;
+	int min_mas;
+	int max_mas;
+	int max_interval;
+	int total_allocated_mases;
+	int safe_allocated_mases;
+	int unsafe_allocated_mases;
+	int interval;
+};
 
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result);
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc);
 /*
  * UWB Events & management daemon
  */
@@ -254,18 +322,28 @@ void uwb_rsv_init(struct uwb_rc *rc);
 int uwb_rsv_setup(struct uwb_rc *rc);
 void uwb_rsv_cleanup(struct uwb_rc *rc);
 void uwb_rsv_remove_all(struct uwb_rc *rc);
+void uwb_rsv_get(struct uwb_rsv *rsv);
+void uwb_rsv_put(struct uwb_rsv *rsv);
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv);
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv);
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available);
+void uwb_rsv_backoff_win_timer(unsigned long arg);
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc);
+int uwb_rsv_status(struct uwb_rsv *rsv);
+int uwb_rsv_companion_status(struct uwb_rsv *rsv);
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
 void uwb_rsv_remove(struct uwb_rsv *rsv);
 struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
 			     struct uwb_ie_drp *drp_ie);
 void uwb_rsv_sched_update(struct uwb_rc *rc);
+void uwb_rsv_queue_update(struct uwb_rc *rc);
 
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv);
 int uwb_drp_ie_update(struct uwb_rsv *rsv);
 void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie);
 
 void uwb_drp_avail_init(struct uwb_rc *rc);
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail);
 int  uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index d7ed520..c021289 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -67,6 +67,7 @@ struct uwb_dev {
 	struct uwb_dev_addr dev_addr;
 	int beacon_slot;
 	DECLARE_BITMAP(streams, UWB_NUM_STREAMS);
+	DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS);
 };
 #define to_uwb_dev(d) container_of(d, struct uwb_dev, dev)
 
@@ -109,6 +110,9 @@ struct uwbd {
  */
 struct uwb_mas_bm {
 	DECLARE_BITMAP(bm, UWB_NUM_MAS);
+	DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS);
+	int safe;
+	int unsafe;
 };
 
 /**
@@ -134,14 +138,24 @@ struct uwb_mas_bm {
  * FIXME: further target states TBD.
  */
 enum uwb_rsv_state {
-	UWB_RSV_STATE_NONE,
+	UWB_RSV_STATE_NONE = 0,
 	UWB_RSV_STATE_O_INITIATED,
 	UWB_RSV_STATE_O_PENDING,
 	UWB_RSV_STATE_O_MODIFIED,
 	UWB_RSV_STATE_O_ESTABLISHED,
+	UWB_RSV_STATE_O_TO_BE_MOVED,
+	UWB_RSV_STATE_O_MOVE_EXPANDING,
+	UWB_RSV_STATE_O_MOVE_COMBINING,
+	UWB_RSV_STATE_O_MOVE_REDUCING,
 	UWB_RSV_STATE_T_ACCEPTED,
 	UWB_RSV_STATE_T_DENIED,
+	UWB_RSV_STATE_T_CONFLICT,
 	UWB_RSV_STATE_T_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_ACCEPTED,
+	UWB_RSV_STATE_T_EXPANDING_CONFLICT,
+	UWB_RSV_STATE_T_EXPANDING_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_DENIED,
+	UWB_RSV_STATE_T_RESIZED,
 
 	UWB_RSV_STATE_LAST,
 };
@@ -166,6 +180,12 @@ struct uwb_rsv_target {
 	};
 };
 
+struct uwb_rsv_move {
+	struct uwb_mas_bm final_mas;
+	struct uwb_ie_drp *companion_drp_ie;
+	struct uwb_mas_bm companion_mas;
+};
+
 /*
  * Number of streams reserved for reservations targeted at DevAddrs.
  */
@@ -203,6 +223,7 @@ typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv);
  *
  * @status:         negotiation status
  * @stream:         stream index allocated for this reservation
+ * @tiebreaker:     conflict tiebreaker for this reservation
  * @mas:            reserved MAS
  * @drp_ie:         the DRP IE
  * @ie_valid:       true iff the DRP IE matches the reservation parameters
@@ -225,19 +246,22 @@ struct uwb_rsv {
 	enum uwb_drp_type type;
 	int max_mas;
 	int min_mas;
-	int sparsity;
+	int max_interval;
 	bool is_multicast;
 
 	uwb_rsv_cb_f callback;
 	void *pal_priv;
 
 	enum uwb_rsv_state state;
+	bool needs_release_companion_mas;
 	u8 stream;
+	u8 tiebreaker;
 	struct uwb_mas_bm mas;
 	struct uwb_ie_drp *drp_ie;
+	struct uwb_rsv_move mv;
 	bool ie_valid;
 	struct timer_list timer;
-	bool expired;
+	struct work_struct handle_timeout_work;
 };
 
 static const
@@ -279,6 +303,13 @@ struct uwb_drp_avail {
 	bool ie_valid;
 };
 
+struct uwb_drp_backoff_win {
+	u8 window;
+	u8 n;
+	int total_expired;
+	struct timer_list timer;
+	bool can_reserve_extra_mases;
+};
 
 const char *uwb_rsv_state_str(enum uwb_rsv_state state);
 const char *uwb_rsv_type_str(enum uwb_drp_type type);
@@ -294,6 +325,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv);
 
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv);
 
+void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas);
+
 /**
  * Radio Control Interface instance
  *
@@ -364,12 +397,18 @@ struct uwb_rc {
 
 	struct uwbd uwbd;
 
+	struct uwb_drp_backoff_win bow;
 	struct uwb_drp_avail drp_avail;
 	struct list_head reservations;
+	struct list_head cnflt_alien_list;
+	struct uwb_mas_bm cnflt_alien_bitmap;
 	struct mutex rsvs_mutex;
+	spinlock_t rsvs_lock;
 	struct workqueue_struct *rsv_workq;
-	struct work_struct rsv_update_work;
 
+	struct delayed_work rsv_update_work;
+	struct delayed_work rsv_alien_bp_work;
+	int set_drp_ie_pending;
 	struct mutex ies_mutex;
 	struct uwb_rc_cmd_set_ie *ies;
 	size_t ies_capacity;
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 07efbe1..8da004e 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -43,7 +43,7 @@ struct uwb_dbg_cmd_rsv_establish {
 	__u8  type;
 	__u16 max_mas;
 	__u16 min_mas;
-	__u8  sparsity;
+	__u8  max_interval;
 };
 
 struct uwb_dbg_cmd_rsv_terminate {
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
index a30436e..b52e44f 100644
--- a/include/linux/uwb/spec.h
+++ b/include/linux/uwb/spec.h
@@ -59,6 +59,11 @@ enum { UWB_NUM_ZONES = 16 };
 #define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES)
 
 /*
+ * Number of MAS required before a row can be considered available.
+ */
+#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1)
+
+/*
  * Number of streams per DRP reservation between a pair of devices.
  *
  * [ECMA-368] section 16.8.6.
@@ -94,6 +99,26 @@ enum { UWB_BEACON_SLOT_LENGTH_US = 85 };
 enum { UWB_MAX_LOST_BEACONS = 3 };
 
 /*
+ * mDRPBackOffWinMin
+ *
+ * The minimum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MIN = 2 };
+
+/*
+ * mDRPBackOffWinMax
+ *
+ * The maximum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MAX = 16 };
+
+/*
  * Length of a superframe in microseconds.
  */
 #define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS)
-- 
cgit v0.10.2


From 671e470ed04865ca148b83f46319d14547481340 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 12 Dec 2008 13:23:24 +0000
Subject: uwb: fix oops when terminating an already terminated reservation

Calling uwb_rsv_terminate() on a reservation already in UWB_RSV_STATE_NONE
should do nothing.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 165aec6..ec6eecb 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -669,7 +669,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv)
 
 	mutex_lock(&rc->rsvs_mutex);
 
-	uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+	if (rsv->state != UWB_RSV_STATE_NONE)
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 
 	mutex_unlock(&rc->rsvs_mutex);
 }
-- 
cgit v0.10.2


From fe6e87a4b570d2e435709746ba550a7197016bd0 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 12 Dec 2008 13:25:21 +0000
Subject: wusb: fix oops when terminating a non-existant reservation

If a reservation was not established, do not try terminating it.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index c37e4f8..4ed9736 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -110,6 +110,9 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc)
  */
 void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
 {
-	uwb_rsv_terminate(wusbhc->rsv);
-	uwb_rsv_destroy(wusbhc->rsv);
+	if (wusbhc->rsv) {
+		uwb_rsv_terminate(wusbhc->rsv);
+		uwb_rsv_destroy(wusbhc->rsv);
+		wusbhc->rsv = NULL;
+	}
 }
-- 
cgit v0.10.2


From 02f11ee181baa562df23e105ba930902f0d0b1bf Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 12 Dec 2008 13:28:48 +0000
Subject: uwb: fix memory leak in uwb_rc_notif()

Don't leak memory in uwb_rc_notif() if certain non-standard events are
received.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c
index 48b4ece..6df18ed 100644
--- a/drivers/uwb/neh.c
+++ b/drivers/uwb/neh.c
@@ -349,7 +349,7 @@ struct uwb_rc_neh *uwb_rc_neh_lookup(struct uwb_rc *rc,
 }
 
 
-/**
+/*
  * Process notifications coming from the radio control interface
  *
  * @rc:    UWB Radio Control Interface descriptor
@@ -401,23 +401,6 @@ void uwb_rc_notif(struct uwb_rc *rc, struct uwb_rceb *rceb, ssize_t size)
 	uwb_evt->notif.size = size;
 	uwb_evt->notif.rceb = rceb;
 
-	switch (le16_to_cpu(rceb->wEvent)) {
-		/* Trap some vendor specific events
-		 *
-		 * FIXME: move this to handling in ptc-est, where we
-		 * register a NULL event handler for these two guys
-		 * using the Intel IDs.
-		 */
-	case 0x0103:
-		dev_info(dev, "FIXME: DEVICE ADD\n");
-		return;
-	case 0x0104:
-		dev_info(dev, "FIXME: DEVICE RM\n");
-		return;
-	default:
-		break;
-	}
-
 	uwbd_event_queue(uwb_evt);
 }
 
-- 
cgit v0.10.2


From 98a79d6a50181ca1ecf7400eda01d5dc1bc0dbf0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:19:41 +1030
Subject: cpumask: centralize cpu_online_map and cpu_possible_map

Impact: cleanup

Each SMP arch defines these themselves.  Move them to a central
location.

Twists:
1) Some archs (m32, parisc, s390) set possible_map to all 1, so we add a
   CONFIG_INIT_ALL_POSSIBLE for this rather than break them.

2) mips and sparc32 '#define cpu_possible_map phys_cpu_present_map'.
   Those archs simply have phys_cpu_present_map replaced everywhere.

3) Alpha defined cpu_possible_map to cpu_present_map; this is tricky
   so I just manipulate them both in sync.

4) IA64, cris and m32r have gratuitous 'extern cpumask_t cpu_possible_map'
   declarations.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Mike Travis <travis@sgi.com>
Cc: ink@jurassic.park.msu.ru
Cc: rmk@arm.linux.org.uk
Cc: starvik@axis.com
Cc: tony.luck@intel.com
Cc: takata@linux-m32r.org
Cc: ralf@linux-mips.org
Cc: grundler@parisc-linux.org
Cc: paulus@samba.org
Cc: schwidefsky@de.ibm.com
Cc: lethal@linux-sh.org
Cc: wli@holomorphy.com
Cc: davem@davemloft.net
Cc: jdike@addtoit.com
Cc: mingo@redhat.com

diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 544c69a..547e909 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -45,7 +45,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern int smp_num_cpus;
-#define cpu_possible_map	cpu_present_map
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi(cpumask_t mask);
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 351407e..f238370 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -94,6 +94,7 @@ common_shutdown_1(void *generic_ptr)
 		flags |= 0x00040000UL; /* "remain halted" */
 		*pflags = flags;
 		cpu_clear(cpuid, cpu_present_map);
+		cpu_clear(cpuid, cpu_possible_map);
 		halt();
 	}
 #endif
@@ -120,6 +121,7 @@ common_shutdown_1(void *generic_ptr)
 #ifdef CONFIG_SMP
 	/* Wait for the secondaries to halt. */
 	cpu_clear(boot_cpuid, cpu_present_map);
+	cpu_clear(boot_cpuid, cpu_possible_map);
 	while (cpus_weight(cpu_present_map))
 		barrier();
 #endif
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index cf7da10..d953e51 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -70,11 +70,6 @@ enum ipi_message_type {
 /* Set to a secondary's cpuid when it comes online.  */
 static int smp_secondary_alive __devinitdata = 0;
 
-/* Which cpus ids came online.  */
-cpumask_t cpu_online_map;
-
-EXPORT_SYMBOL(cpu_online_map);
-
 int smp_num_probed;		/* Internal processor count */
 int smp_num_cpus = 1;		/* Number that came online.  */
 EXPORT_SYMBOL(smp_num_cpus);
@@ -440,6 +435,7 @@ setup_smp(void)
 				((char *)cpubase + i*hwrpb->processor_size);
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
+				cpu_set(i, cpu_possible_map);
 				cpu_set(i, cpu_present_map);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
@@ -473,6 +469,7 @@ smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Nothing to do on a UP box, or when told not to.  */
 	if (smp_num_probed == 1 || max_cpus == 0) {
+		cpu_possible_map = cpumask_of_cpu(boot_cpuid);
 		cpu_present_map = cpumask_of_cpu(boot_cpuid);
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		return;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index e42a749..bd905c0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -34,16 +34,6 @@
 #include <asm/ptrace.h>
 
 /*
- * bitmask of present and online CPUs.
- * The present bitmask indicates that the CPU is physically present.
- * The online bitmask indicates that the CPU is up and running.
- */
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
  * where to place its SVC stack
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 52e16c6..9dac173 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -29,11 +29,7 @@
 spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
 
 /* CPU masks */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(phys_cpu_present_map);
 
 /* Variables used during SMP boot */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
index dba33ab..c615a06 100644
--- a/arch/cris/include/asm/smp.h
+++ b/arch/cris/include/asm/smp.h
@@ -4,7 +4,6 @@
 #include <linux/cpumask.h>
 
 extern cpumask_t phys_cpu_present_map;
-extern cpumask_t cpu_possible_map;
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 12d96e0..21c4023 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -57,7 +57,6 @@ extern struct smp_boot_data {
 
 extern char no_int_routing __devinitdata;
 
-extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_core_map[NR_CPUS];
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern int smp_num_siblings;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1dcbb85..4ede6e5 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -131,12 +131,6 @@ struct task_struct *task_for_booting_cpu;
  */
 DEFINE_PER_CPU(int, cpu_state);
 
-/* Bitmasks of currently online, and possible CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
-
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a..17a6dab 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -10,6 +10,7 @@ config M32R
 	default y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select INIT_ALL_POSSIBLE
 
 config SBUS
 	bool
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 39cb6da..0f06b37 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -73,17 +73,11 @@ static unsigned int bsp_phys_id = -1;
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
 cpumask_t cpu_bootout_map;
 cpumask_t cpu_bootin_map;
 static cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 0ff5b52..86557b5 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -38,9 +38,6 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_RESCHEDULE_YOURSELF	0x1	/* XXX braindead */
 #define SMP_CALL_FUNCTION	0x2
 
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map	phys_cpu_present_map
-
 extern void asmlinkage smp_bootstrap(void);
 
 /*
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index ca476c4..6789c1a 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -226,7 +226,7 @@ void __init cmp_smp_setup(void)
 
 	for (i = 1; i < NR_CPUS; i++) {
 		if (amon_cpu_avail(i)) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i]	= ++ncpu;
 			__cpu_logical_map[ncpu]	= i;
 		}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 87a1816..6f7ee5a 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 		write_vpe_c0_vpeconf0(tmp);
 
 		/* Record this as available CPU */
-		cpu_set(tc, phys_cpu_present_map);
+		cpu_set(tc, cpu_possible_map);
 		__cpu_number_map[tc]	= ++ncpu;
 		__cpu_logical_map[ncpu]	= tc;
 	}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8bf88faf..3da9470 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -44,15 +44,10 @@
 #include <asm/mipsmtregs.h>
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-cpumask_t phys_cpu_present_map;		/* Bitmask of available CPUs */
 volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
-cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
-EXPORT_SYMBOL(phys_cpu_present_map);
-EXPORT_SYMBOL(cpu_online_map);
-
 extern void cpu_idle(void);
 
 /* Number of TCs (or siblings in Intel speak) per CPU core */
@@ -195,7 +190,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 /* preload SMP state for boot cpu */
 void __devinit smp_prepare_boot_cpu(void)
 {
-	cpu_set(0, phys_cpu_present_map);
+	cpu_set(0, cpu_possible_map);
 	cpu_set(0, cpu_online_map);
 	cpu_set(0, cpu_callin_map);
 }
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 897fb2b..b6cca01 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -290,7 +290,7 @@ static void smtc_configure_tlb(void)
  * possibly leave some TCs/VPEs as "slave" processors.
  *
  * Use c0_MVPConf0 to find out how many TCs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  */
 
 int __init smtc_build_cpu_map(int start_cpu_slot)
@@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
 	 */
 	ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
 	for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_possible_map);
 		__cpu_number_map[i] = i;
 		__cpu_logical_map[i] = i;
 	}
@@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus)
 	 * Pull any physically present but unused TCs out of circulation.
 	 */
 	while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
-		cpu_clear(tc, phys_cpu_present_map);
+		cpu_clear(tc, cpu_possible_map);
 		cpu_clear(tc, cpu_present_map);
 		tc++;
 	}
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 3a7df64..f78c29b 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle)
 }
 
 /*
- * Detect available CPUs, populate phys_cpu_present_map before smp_init
+ * Detect available CPUs, populate cpu_possible_map before smp_init
  *
  * We don't want to start the secondary CPU yet nor do we have a nice probing
  * feature in PMON so we just assume presence of the secondary core.
@@ -150,10 +150,10 @@ static void __init yos_smp_setup(void)
 {
 	int i;
 
-	cpus_clear(phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
 
 	for (i = 0; i < 2; i++) {
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_possible_map);
 		__cpu_number_map[i]	= i;
 		__cpu_logical_map[i]	= i;
 	}
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index ba5cdeb..5b47d6b 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
 			/* Only let it join in if it's marked enabled */
 			if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
 			    (tot_cpus_found != NR_CPUS)) {
-				cpu_set(cpuid, phys_cpu_present_map);
+				cpu_set(cpuid, cpu_possible_map);
 				alloc_cpupda(cpuid, tot_cpus_found);
 				cpus_found++;
 				tot_cpus_found++;
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index bd9eeb4..dddfda8 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle)
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void)
 {
 	int i, num;
 
-	cpus_clear(phys_cpu_present_map);
-	cpu_set(0, phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
+	cpu_set(0, cpu_possible_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 0734b93..5950a28 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle)
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void)
 {
 	int i, num;
 
-	cpus_clear(phys_cpu_present_map);
-	cpu_set(0, phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
+	cpu_set(0, cpu_possible_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 644a70b..aacf11d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
 	select HAVE_OPROFILE
 	select RTC_CLASS
 	select RTC_DRV_PARISC
+	select INIT_ALL_POSSIBLE
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index d47f397..80bc000 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -67,21 +67,6 @@ static volatile int cpu_now_booting __read_mostly = 0;	/* track which CPU is boo
 
 static int parisc_max_cpus __read_mostly = 1;
 
-/* online cpus are ones that we've managed to bring up completely
- * possible cpus are all valid cpu 
- * present cpus are all detected cpu
- *
- * On startup we bring up the "possible" cpus. Since we discover
- * CPUs later, we add them as hotplug, so the possible cpu mask is
- * empty in the beginning.
- */
-
-cpumask_t cpu_online_map   __read_mostly = CPU_MASK_NONE;	/* Bitmap of online CPUs */
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;	/* Bitmap of Present CPUs */
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
 DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
 
 enum ipi_message_type {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ff9f701..d116556 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -60,13 +60,9 @@
 int smp_hw_index[NR_CPUS];
 struct thread_info *secondary_ti;
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
 
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8116a33..b4aa586 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,6 +75,7 @@ config S390
 	select HAVE_KRETPROBES
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
+	select INIT_ALL_POSSIBLE
 
 source "init/Kconfig"
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b559568..f03914b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -52,12 +52,6 @@
 struct _lowcore *lowcore_ptr[NR_CPUS];
 EXPORT_SYMBOL(lowcore_ptr);
 
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-
 static struct task_struct *current_set[NR_CPUS];
 
 static u8 smp_cpu_type;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 3c5ad16..593937d 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -31,12 +31,6 @@
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
 static inline void __init smp_store_cpu_info(unsigned int cpu)
 {
 	struct sh_cpuinfo *c = cpu_data + cpu;
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index a8180e5..8408d9d 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -29,8 +29,6 @@
  */
 
 extern unsigned char boot_cpu_id;
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map phys_cpu_present_map
 
 typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index e396c1f..1e5ac4e 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -39,8 +39,6 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
 unsigned char boot_cpu_id = 0;
 unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 
 /* The only guaranteed locking primitive available on all Sparc
@@ -334,7 +332,7 @@ void __init smp_setup_cpu_possible_map(void)
 	instance = 0;
 	while (!cpu_find_by_instance(instance, NULL, &mid)) {
 		if (mid < NR_CPUS) {
-			cpu_set(mid, phys_cpu_present_map);
+			cpu_set(mid, cpu_possible_map);
 			cpu_set(mid, cpu_present_map);
 		}
 		instance++;
@@ -354,7 +352,7 @@ void __init smp_prepare_boot_cpu(void)
 
 	current_thread_info()->cpu = cpuid;
 	cpu_set(cpuid, cpu_online_map);
-	cpu_set(cpuid, phys_cpu_present_map);
+	cpu_set(cpuid, cpu_possible_map);
 }
 
 int __cpuinit __cpu_up(unsigned int cpu)
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index b0dfff8..32d11a5 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -113,10 +113,6 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data);
 #ifdef CONFIG_SMP
 /* IRQ implementation. */
 EXPORT_SYMBOL(synchronize_irq);
-
-/* CPU online map and active count. */
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(phys_cpu_present_map);
 #endif
 
 EXPORT_SYMBOL(__udelay);
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index f500b06..a97b882 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -49,14 +49,10 @@
 
 int sparc64_multi_core __read_mostly;
 
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
-EXPORT_SYMBOL(cpu_possible_map);
-EXPORT_SYMBOL(cpu_online_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 0457721..98351c7 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -25,13 +25,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 #include "irq_user.h"
 #include "os.h"
 
-/* CPU online map, set by smp_boot_cpus */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
 /* Per CPU bogomips and other parameters
  * The only piece used here is the ipi pipe, which is set before SMP is
  * started and never changed.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b10933..468c2f9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,14 +101,8 @@ EXPORT_SYMBOL(smp_num_siblings);
 /* Last level cache ID of each logical CPU */
 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 5214500..9c99018 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1;
 /* Used for the invalidate map that's also checked in the spinlock */
 static volatile unsigned long smp_invalidate_needed;
 
-/* Bitmask of currently online CPUs - used by setup.c for
-   /proc/cpuinfo, visible externally but still physical */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
 /* Bitmask of CPUs present in the system - exported by i386_syms.c, used
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
 static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h
index c5dd669..b96a6d2 100644
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -63,8 +63,6 @@ extern volatile int cpu_2_physid[NR_CPUS];
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_present_map;
 
 static __inline__ int hard_smp_processor_id(void)
 {
diff --git a/init/Kconfig b/init/Kconfig
index f763762..7656623 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -916,6 +916,15 @@ config KMOD
 
 endif # MODULES
 
+config INIT_ALL_POSSIBLE
+	bool
+	help
+	  Back when each arch used to define their own cpu_online_map and
+	  cpu_possible_map, some of them chose to initialize cpu_possible_map
+	  with all 1s, and others with all 0s.  When they were centralised,
+	  it was better to provide this option than to break all the archs
+	  and have several arch maintainers persuing me down dark alleys.
+
 config STOP_MACHINE
 	bool
 	default y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ea32e8..bae131a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,19 +24,20 @@
 cpumask_t cpu_present_map __read_mostly;
 EXPORT_SYMBOL(cpu_present_map);
 
-#ifndef CONFIG_SMP
-
 /*
  * Represents all cpu's that are currently online.
  */
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
+#ifdef CONFIG_INIT_ALL_POSSIBLE
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+#else
+cpumask_t cpu_possible_map __read_mostly;
+#endif
 EXPORT_SYMBOL(cpu_possible_map);
 
-#else /* CONFIG_SMP */
-
+#ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_map, cpu_present_map */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
-- 
cgit v0.10.2


From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:25 +1030
Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse,
 and cpulist_scnprintf to take pointers.

Impact: change calling convention of existing cpumask APIs

Most cpumask functions started with cpus_: these have been replaced by
cpumask_ ones which take struct cpumask pointers as expected.

These four functions don't have good replacement names; fortunately
they're rarely used, so we just change them over.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: mingo@redhat.com
Cc: tony.luck@intel.com
Cc: ralf@linux-mips.org
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: cl@linux-foundation.org
Cc: srostedt@redhat.com

diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index c75b914..a8d61a3 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
 	cpumask_t shared_cpu_map;
 
 	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
-	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
 	len += sprintf(buf+len, "\n");
 	return len;
 }
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index 6789c1a..f27beca 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@ static int __init allowcpus(char *str)
 	int len;
 
 	cpus_clear(cpu_allow_map);
-	if (cpulist_parse(str, cpu_allow_map) == 0) {
+	if (cpulist_parse(str, &cpu_allow_map) == 0) {
 		cpu_set(0, cpu_allow_map);
 		cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
-		len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+		len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
 		buf[len] = '\0';
 		pr_debug("Allowable CPUs: %s\n", buf);
 		return 1;
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index e190477..64d2431 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -358,7 +358,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	irq_server = get_irq_server(virq, 1);
 	if (irq_server == -1) {
 		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		cpumask_scnprintf(cpulist, sizeof(cpulist), &cpumask);
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afb..43ea612 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -626,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 		cpumask_t *mask = &this_leaf->shared_cpu_map;
 
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3..1c20842 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -282,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 	else
 		cpu_clear(cpu, *mask);
 
-	cpulist_scnprintf(buf, sizeof(buf), *mask);
+	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
  }
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 64f5d54..4259072 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
  */
 static ssize_t print_cpus_map(char *buf, cpumask_t *map)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map);
+	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
 
 	buf[n++] = '\n';
 	buf[n] = '\0';
diff --git a/drivers/base/node.c b/drivers/base/node.c
index f520709..91636cd 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 
 	len = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, *mask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, *mask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
  	buf[len++] = '\n';
  	buf[len] = '\0';
 	return len;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 199cd97..a8bc1cb 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
 
 	if (len > 1) {
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5d72866..c884858 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
@@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 003a9b3..5b3f593 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
 
 	cpumask = pcibus_to_cpumask(to_pci_bus(dev));
 	ret = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask);
 	buf[ret++] = '\n';
 	buf[ret] = '\0';
 	return ret;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21e1dd4..94a2ab8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -339,36 +339,6 @@ extern cpumask_t cpu_mask_all;
 #endif
 #define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
 
-#define cpumask_scnprintf(buf, len, src) \
-			__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpumask_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpumask_parse_user(ubuf, ulen, dst) \
-			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
-static inline int __cpumask_parse_user(const char __user *buf, int len,
-					cpumask_t *dstp, int nbits)
-{
-	return bitmap_parse_user(buf, len, dstp->bits, nbits);
-}
-
-#define cpulist_scnprintf(buf, len, src) \
-			__cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpulist_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS)
-static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits)
-{
-	return bitmap_parselist(buf, dstp->bits, nbits);
-}
-
 #define cpu_remap(oldbit, old, new) \
 		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
 static inline int __cpu_remap(int oldbit,
@@ -946,6 +916,63 @@ static inline void cpumask_copy(struct cpumask *dstp,
 #define cpumask_of(cpu) (get_cpu_mask(cpu))
 
 /**
+ * cpumask_scnprintf - print a cpumask into a string as comma-separated hex
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpumask_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpumask_parse_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parse_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_scnprintf - print a cpumask into a string as comma-separated list
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpulist_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_parse_user - extract a cpumask from a user string of ranges
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+{
+	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
+}
+
+/**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
  * @bitmap: the bitmap
  *
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba1..39c1a4c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, trialcs.cpus_allowed);
+		retval = cpulist_parse(buf, &trialcs.cpus_allowed);
 		if (retval < 0)
 			return retval;
 
@@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 	mask = cs->cpus_allowed;
 	mutex_unlock(&callback_mutex);
 
-	return cpulist_scnprintf(page, PAGE_SIZE, mask);
+	return cpulist_scnprintf(page, PAGE_SIZE, &mask);
 }
 
 static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d257e7d..f293349 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -47,7 +47,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
@@ -95,7 +95,7 @@ static ssize_t default_affinity_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/profile.c b/kernel/profile.c
index dc41827..7d620df 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,7 @@ void profile_tick(int type)
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
 {
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+	int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
 	if (count - len < 2)
 		return -EINVAL;
 	len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file,
 	unsigned long full_count = count, err;
 	cpumask_t new_value;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index e4bb1dd..d2d16d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6666,7 +6666,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sd->span);
+	cpulist_scnprintf(str, sizeof(str), &sd->span);
 	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -6720,7 +6720,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 		cpus_or(*groupmask, *groupmask, group->cpumask);
 
-		cpulist_scnprintf(str, sizeof(str), group->cpumask);
+		cpulist_scnprintf(str, sizeof(str), &group->cpumask);
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a..6beff1e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, sd->span);
+			cpumask_scnprintf(mask_str, mask_len, &sd->span);
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index bd6be76..6d7dc4e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask)
 	if (!data)
 		return -ENOMEM;
 	nla_strlcpy(data, na, len);
-	ret = cpulist_parse(data, *mask);
+	ret = cpulist_parse(data, mask);
 	kfree(data);
 	return ret;
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e325..d2e7547 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2126,7 +2126,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2147,7 +2147,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	int err, cpu;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
diff --git a/mm/slub.c b/mm/slub.c
index a2cd47d..8e516e2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3626,7 +3626,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-					l->cpus);
+					&l->cpus);
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
-- 
cgit v0.10.2


From 0de26520c7cabf36e1de090ea8092f011a6106ce Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: make irq_set_affinity() take a const struct cpumask

Impact: change existing irq_chip API

Not much point with gentle transition here: the struct irq_chip's
setaffinity method signature needs to change.

Fortunately, not widely used code, but hits a few architectures.

Note: In irq_select_affinity() I save a temporary in by mangling
irq_desc[irq].affinity directly.  Ingo, does this break anything?

(Folded in fix from KOSAKI Motohiro)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Ingo Molnar <mingo@redhat.com>
Cc: ralf@linux-mips.org
Cc: grundler@parisc-linux.org
Cc: jeremy@xensource.com
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c626a82..d0f1620 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq)
 	last_cpu = cpu;
 
 	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-	irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
 	return 0;
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c71b0fd..ab44c16 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-dp264_set_affinity(unsigned int irq, cpumask_t affinity)
+dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq, affinity);
+	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
 
 static void
-clipper_set_affinity(unsigned int irq, cpumask_t affinity)
+clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq - 16, affinity);
+	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 52c91cc..27f840a 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-titan_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
-	titan_cpu_set_irq_affinity(irq - 16, affinity);
+	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
 }
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 7fc9860..c6884ba 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, cpumask_t mask_val)
+static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
-	unsigned int cpu = first_cpu(mask_val);
+	unsigned int cpu = cpumask_first(mask_val);
 	u32 val;
 
 	spin_lock(&irq_controller_lock);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 2f3eb79..7141cee 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
 	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	desc->chip->set_affinity(irq, cpumask_of(cpu));
 	spin_unlock_irq(&desc->lock);
 }
 
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4de366e..6d6bd58 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@ static void em_stop(void)
 static void em_route_irq(int irq, unsigned int cpu)
 {
 	struct irq_desc *desc = irq_desc + irq;
-	cpumask_t mask = cpumask_of_cpu(cpu);
+	const struct cpumask *mask = cpumask_of(cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->affinity = mask;
+	desc->affinity = *mask;
 	desc->chip->set_affinity(irq, mask);
 	spin_unlock_irq(&desc->lock);
 }
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 173c141..295131f 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest)
+void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
-	irq_allocations[irq - FIRST_IRQ].mask = dest;
+	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
 }
 
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c2f58ff..cc0a318 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq)
 }
 
 static void
-hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
 }
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 5c4674a..c8adecd 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@ unmask_irq (unsigned int irq)
 
 
 static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
 	u32 high32, low32;
-	int dest, rte_index;
+	int cpu, dest, rte_index;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	struct iosapic_rte_info *rte;
 	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 
-	cpus_and(mask, mask, cpu_online_map);
-	if (cpus_empty(mask))
+	cpu = cpumask_first_and(cpu_online_mask, mask);
+	if (cpu >= nr_cpu_ids)
 		return;
 
-	if (irq_prepare_move(irq, first_cpu(mask)))
+	if (irq_prepare_move(irq, cpu))
 		return;
 
-	dest = cpu_physical_id(first_cpu(mask));
+	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt */
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7fd18f5..0b6db53 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS];
  */
 static void migrate_irqs(void)
 {
-	cpumask_t	mask;
 	irq_desc_t *desc;
 	int 		irq, new_cpu;
 
@@ -152,15 +151,14 @@ static void migrate_irqs(void)
 		if (desc->status == IRQ_PER_CPU)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
 			/*
 			 * Save it for phase 2 processing
 			 */
 			vectors_in_migration[irq] = irq;
 
 			new_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpu);
 
 			/*
 			 * Al three are essential, currently WARN_ON.. maybe panic?
@@ -168,7 +166,8 @@ static void migrate_irqs(void)
 			if (desc->chip && desc->chip->disable &&
 				desc->chip->enable && desc->chip->set_affinity) {
 				desc->chip->disable(irq);
-				desc->chip->set_affinity(irq, mask);
+				desc->chip->set_affinity(irq,
+							 cpumask_of(new_cpu));
 				desc->chip->enable(irq);
 			} else {
 				WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 702a09c..8903393 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -49,11 +49,12 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void ia64_set_msi_irq_affinity(unsigned int irq,
+				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
 	u32 addr, data;
-	int cpu = first_cpu(cpu_mask);
+	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
-	int cpu = first_cpu(mask);
-
+	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	irq_desc[irq].affinity = *mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 4ede6e5..1146399 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -682,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu)
 {
 	int new_cpei_cpu;
 	irq_desc_t *desc = NULL;
-	cpumask_t 	mask;
+	const struct cpumask *mask;
 	int 		retval = 0;
 
 	/*
@@ -695,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu)
 			 * Now re-target the CPEI to a different processor
 			 */
 			new_cpei_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpei_cpu);
+			mask = cpumask_of(new_cpei_cpu);
 			set_cpei_target_cpu(new_cpei_cpu);
 			desc = irq_desc + ia64_cpe_irq;
 			/*
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0c66dbd..66fd705 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@ finish_up:
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
 	int slice;
 
-	nasid = cpuid_to_nasid(first_cpu(mask));
-	slice = cpuid_to_slice(first_cpu(mask));
+	nasid = cpuid_to_nasid(cpumask_first(mask));
+	slice = cpuid_to_slice(cpumask_first(mask));
 
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 83f190f..ca553b0 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void sn_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
 	int slice;
@@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 	struct sn_pcibus_provider *provider;
 	unsigned int cpu;
 
-	cpu = first_cpu(cpu_mask);
+	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
 		return;
@@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = cpu_mask;
+	irq_desc[irq].affinity = *cpu_mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index a58f0ee..abc62aa 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity);
+extern void plat_set_irq_affinity(unsigned int irq,
+				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
 /*
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index 0a57f86..d7e21bc 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 63ac3ad..0f188cd 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index f0a4bb1..494a49a 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
 	pr_debug(KERN_DEBUG "%s called\n", __func__);
 	irq -= _irqbase;
 
-	cpus_and(tmp, cpumask, cpu_online_map);
+	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
 		return;
 
@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
 
 	}
-	irq_desc[irq].affinity = cpumask;
+	irq_desc[irq].affinity = *cpumask;
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 }
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index f84a46a..aabd727 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
-	cpumask_t tmask = affinity;
+	cpumask_t tmask = *affinity;
 	int cpu = 0;
 	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
 
@@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 	 * be made to forward to an offline "CPU".
 	 */
 
-	for_each_cpu_mask(cpu, affinity) {
+	for_each_cpu(cpu, affinity) {
 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
 			cpu_clear(cpu, tmask);
 	}
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index a35818e..12b465d 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask);
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int irq_dirty;
 
-	if (cpus_weight(mask) != 1) {
+	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
 	/* Convert logical CPU to physical CPU */
 	cpu = cpu_logical_map(i);
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index a515848..808ac29 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask);
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask)
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
-	if (cpus_weight(mask) > 1) {
+	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 23ef950..4cea935 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest)
 	return 0;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	irq_desc[irq].affinity = dest;
+	irq_desc[irq].affinity = *dest;
 }
 #endif
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0..23b8b5e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+			irq_desc[irq].chip->set_affinity(irq, &mask);
 		else if (irq_desc[irq].action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 64d2431..424b335 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -358,7 +358,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	irq_server = get_irq_server(virq, 1);
 	if (irq_server == -1) {
 		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), &cpumask);
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
@@ -845,7 +845,7 @@ void xics_migrate_irqs_away(void)
 
 		/* Reset affinity to all cpus */
 		irq_desc[virq].affinity = CPU_MASK_ALL;
-		desc->chip->set_affinity(virq, CPU_MASK_ALL);
+		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
 	}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 1890fb0..5d7f9f0 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -817,7 +817,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -829,7 +829,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 	} else {
 		cpumask_t tmp;
 
-		cpus_and(tmp, cpumask, cpu_online_map);
+		cpumask_and(&tmp, cpumask, cpu_online_mask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62..3cef2af 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 52fc836..4aaf18e 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4u_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
 }
@@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_virt_set_affinity(unsigned int virt_irq,
+				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
@@ -788,7 +791,7 @@ void fixup_irqs(void)
 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 			if (irq_desc[irq].chip->set_affinity)
 				irq_desc[irq].chip->set_affinity(irq,
-					irq_desc[irq].affinity);
+					&irq_desc[irq].affinity);
 		}
 		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 0f616ae..df2efb7 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -780,7 +780,7 @@ out:
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 
 	return irq;
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
index 2e680f3..0d0cd81 100644
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de..940f258 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -301,7 +301,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 			hpet_setup_msi_irq(hdev->irq);
 			disable_irq(hdev->irq);
-			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 			enable_irq(hdev->irq);
 		}
 		break;
@@ -449,7 +449,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 		return -1;
 
 	disable_irq(dev->irq);
-	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
 	enable_irq(dev->irq);
 
 	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251..1184210 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -361,7 +361,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
@@ -369,15 +370,14 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -387,7 +387,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif /* CONFIG_SMP */
@@ -2189,7 +2189,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq, desc->pending_mask);
+			desc->chip->set_affinity(irq, &desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -2198,18 +2198,19 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
+		cpumask_copy(&desc->pending_mask, mask);
 		migrate_irq_remapped_level(irq);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq(irq, *mask);
 }
 #endif
 
@@ -3027,7 +3028,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3035,15 +3036,14 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
@@ -3055,7 +3055,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	write_msi_msg(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -3063,7 +3063,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
@@ -3071,18 +3072,17 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irte irte;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	irte.vector = cfg->vector;
@@ -3106,7 +3106,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	}
 
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -3308,7 +3308,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3316,15 +3316,14 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
@@ -3336,7 +3335,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	dmar_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3369,7 +3368,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -3377,15 +3376,14 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	hpet_msi_read(irq, &msg);
@@ -3397,7 +3395,7 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	hpet_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3451,27 +3449,26 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 
@@ -3794,10 +3791,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq(irq, &mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq(irq, &mask);
 		}
 
 	}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a513826..87870a4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -251,7 +251,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84e..7d37f84 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -116,7 +116,7 @@ void fixup_irqs(cpumask_t map)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 7beffca..9dedbbd 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -704,16 +704,17 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void iosapic_set_affinity_irq(unsigned int irq,
+				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
 	u32 d0, d1, dummy_d0;
 	unsigned long flags;
 
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest));
+	vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest));
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	/* d1 contains the destination CPU, so only want to set that
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 1e3b934..eba5ec5 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -579,7 +579,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	spin_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
-	irq_set_affinity(irq, cpumask_of_cpu(0));
+	irq_set_affinity(irq, cpumask_of(0));
 
 	/* Unmask the event channel. */
 	enable_irq(irq);
@@ -608,9 +608,9 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 }
 
 
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
+static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
-	unsigned tcpu = first_cpu(dest);
+	unsigned tcpu = cpumask_first(dest);
 	rebind_irq_to_cpu(irq, tcpu);
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf..48e6393 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -109,13 +109,13 @@ extern void enable_irq(unsigned int irq);
 
 extern cpumask_t irq_default_affinity;
 
-extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
 #else /* CONFIG_SMP */
 
-static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3dddfa7..ab70fd6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -113,7 +113,8 @@ struct irq_chip {
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq, cpumask_t dest);
+	void		(*set_affinity)(unsigned int irq,
+					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
 	int		(*set_wake)(unsigned int irq, unsigned int on);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 10b5092..58d8e31 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -45,7 +45,7 @@ void dynamic_irq_init(unsigned int irq)
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 #ifdef CONFIG_SMP
-	cpus_setall(desc->affinity);
+	cpumask_setall(&desc->affinity);
 #endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 801addd..10ad2f8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq)
  *	@cpumask:	cpumask
  *
  */
-int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
-		desc->affinity = cpumask;
+		cpumask_copy(&desc->affinity, cpumask);
 		desc->chip->set_affinity(irq, cpumask);
 	} else {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = cpumask;
+		cpumask_copy(&desc->pending_mask, cpumask);
 	}
 #else
-	desc->affinity = cpumask;
+	cpumask_copy(&desc->affinity, cpumask);
 	desc->chip->set_affinity(irq, cpumask);
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +112,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
  */
 int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 {
-	cpumask_t mask;
-
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
-	cpus_and(mask, cpu_online_map, irq_default_affinity);
-
 	/*
 	 * Preserve an userspace affinity setup, but make sure that
 	 * one of the targets is online.
 	 */
 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-		if (cpus_intersects(desc->affinity, cpu_online_map))
-			mask = desc->affinity;
+		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+		    < nr_cpu_ids)
+			goto set_affinity;
 		else
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	desc->affinity = mask;
-	desc->chip->set_affinity(irq, mask);
+	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+set_affinity:
+	desc->chip->set_affinity(irq, &desc->affinity);
 
 	return 0;
 }
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 9db681d..bd72329 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,7 +4,6 @@
 void move_masked_irq(int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
@@ -19,7 +18,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(desc->pending_mask)))
+	if (unlikely(cpumask_empty(&desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, desc->pending_mask, cpu_online_map);
-
 	/*
 	 * If there was a valid mask to work with, please
 	 * do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@ void move_masked_irq(int irq)
 	 * For correct operation this depends on the caller
 	 * masking the irqs.
 	 */
-	if (likely(!cpus_empty(tmp))) {
-		desc->chip->set_affinity(irq,tmp);
+	if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
+		   < nr_cpu_ids)) {
+		cpumask_and(&desc->affinity,
+			    &desc->pending_mask, cpu_online_mask);
+		desc->chip->set_affinity(irq, &desc->affinity);
 	}
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 }
 
 void move_native_irq(int irq)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f293349..8e91c97 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 	int err;
 
 	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		return err;
+		goto free_cpumask;
 
-	if (!is_affinity_mask_valid(new_value))
-		return -EINVAL;
+	if (!is_affinity_mask_valid(*new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpus_intersects(new_value, cpu_online_map))
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
 		/* Special case for empty set - allow the architecture
 		   code to set default SMP affinity. */
-		return irq_select_affinity_usr(irq) ? -EINVAL : count;
-
-	irq_set_affinity(irq, new_value);
+		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+	} else {
+		irq_set_affinity(irq, new_value);
+		err = count;
+	}
 
-	return count;
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
 }
 
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df12434..ab65d21 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  */
 static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
-			      const cpumask_t *cpumask)
+			      const struct cpumask *cpumask)
 {
 	ktime_t next_event;
 	void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td,
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpus_equal(newdev->cpumask, *cpumask))
-		irq_set_affinity(newdev->irq, *cpumask);
+	if (!cpumask_equal(&newdev->cpumask, cpumask))
+		irq_set_affinity(newdev->irq, cpumask);
 
 	/*
 	 * When global broadcasting is active, check if the current
-- 
cgit v0.10.2


From 320ab2b0b1e08e3805a3e1084a2f0eb1938d5d67 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: convert struct clock_event_device to cpumask pointers.

Impact: change calling convention of existing clock_event APIs

struct clock_event_timer's cpumask field gets changed to take pointer,
as does the ->broadcast function.

Another single-patch change.  For safety, we BUG_ON() in
clockevents_register_device() if it's not set.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index a72e798..72f51d3 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -169,7 +169,6 @@ static struct clock_event_device clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 150,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= clkevt32k_next_event,
 	.set_mode	= clkevt32k_mode,
 };
@@ -197,7 +196,7 @@ void __init at91rm9200_timer_init(void)
 	clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
 	clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
 	clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
-	clkevt.cpumask = cpumask_of_cpu(0);
+	clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 122fd77..b63e1d5 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 100,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= pit_clkevt_mode,
 };
 
@@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void)
 
 	/* Set up and register clockevents */
 	pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
+	pit_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&pit_clkevt);
 }
 
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 3b9a296..f8bcd29 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@ static void __init davinci_timer_init(void)
 	clockevent_davinci.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_davinci);
 
-	clockevent_davinci.cpumask = cpumask_of_cpu(0);
+	clockevent_davinci.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_davinci);
 }
 
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index a11765f..aff0ebc 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate)
 	clockevent_imx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_imx);
 
-	clockevent_imx.cpumask = cpumask_of_cpu(0);
+	clockevent_imx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_imx);
 
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 7766f46..f4656d2 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void)
 		clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
 	clockevent_ixp4xx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_ixp4xx);
-	clockevent_ixp4xx.cpumask = cpumask_of_cpu(0);
+	clockevent_ixp4xx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_ixp4xx);
 	return 0;
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 345a14c..444d9c0 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@ static void __init msm_timer_init(void)
 			clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
 		/* 4 gets rounded down to 3 */
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
-		ce->cpumask = cpumask_of_cpu(0);
+		ce->cpumask = cpumask_of(0);
 
 		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
 		res = clocksource_register(cs);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index a63424d..41df697 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void)
 	ns9360_clockevent_device.min_delta_ns =
 		clockevent_delta2ns(1, &ns9360_clockevent_device);
 
-	ns9360_clockevent_device.cpumask = cpumask_of_cpu(0);
+	ns9360_clockevent_device.cpumask = cpumask_of(0);
 	clockevents_register_device(&ns9360_clockevent_device);
 
 	setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 2cf7e32..495a32c 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate)
 	clockevent_mpu_timer1.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_mpu_timer1);
 
-	clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0);
+	clockevent_mpu_timer1.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_mpu_timer1);
 }
 
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 705367e..fd3f739 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void)
 	clockevent_32k_timer.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_32k_timer);
 
-	clockevent_32k_timer.cpumask = cpumask_of_cpu(0);
+	clockevent_32k_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_32k_timer);
 }
 
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 589393b..ae60363 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void)
 	clockevent_gpt.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_gpt);
 
-	clockevent_gpt.cpumask = cpumask_of_cpu(0);
+	clockevent_gpt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_gpt);
 }
 
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index f8a9a62..bf3c9a4 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= pxa_osmr0_set_next_event,
 	.set_mode	= pxa_osmr0_set_mode,
 };
@@ -170,6 +169,7 @@ static void __init pxa_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
 	ckevt_pxa_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_pxa_oscr0.mult =
 		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 2f04d54..b07cb9b 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -511,7 +511,7 @@ static struct clock_event_device timer0_clockevent =	 {
 	.set_mode	= timer_set_mode,
 	.set_next_event	= timer_set_next_event,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 };
 
 static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 44d178c..504961e 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -161,7 +161,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->set_mode		= local_timer_set_mode;
 	clk->set_next_event	= local_timer_set_next_event;
 	clk->irq		= IRQ_LOCALTIMER;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 	clk->shift		= 20;
 	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
 	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
@@ -199,7 +199,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->rating		= 200;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 24c0a4b..1cac4ac 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= sa1100_osmr0_set_next_event,
 	.set_mode	= sa1100_osmr0_set_mode,
 };
@@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
 	ckevt_sa1100_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_sa1100_oscr.mult =
 		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 565e0ba..a3f1933 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -965,7 +965,7 @@ static void __init versatile_timer_init(void)
 	timer0_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xf, &timer0_clockevent);
 
-	timer0_clockevent.cpumask = cpumask_of_cpu(0);
+	timer0_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&timer0_clockevent);
 }
 
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index fd28f51..758a129 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void)
 	clockevent_mxc.min_delta_ns =
 			clockevent_delta2ns(0xff, &clockevent_mxc);
 
-	clockevent_mxc.cpumask = cpumask_of_cpu(0);
+	clockevent_mxc.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_mxc);
 
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 544d6b3..6fa2923 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= orion_clkevt_next_event,
 	.set_mode	= orion_clkevt_mode,
 };
@@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
 	orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
 	orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
+	orion_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&orion_clkevt);
 }
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 283481d..0ff46bf 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -106,7 +106,6 @@ static struct clock_event_device comparator = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 16,
 	.rating		= 50,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= comparator_next_event,
 	.set_mode	= comparator_mode,
 };
@@ -134,6 +133,7 @@ void __init time_init(void)
 	comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
 	comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
 	comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
+	comparator.cpumask = cpumask_of(0);
 
 	sysreg_write(COMPARE, 0);
 	timer_irqaction.dev_id = &comparator;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index e887efc..0ed2bad 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = {
 	.name		= "bfin_core_timer",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event = bfin_timer_set_next_event,
 	.set_mode	= bfin_timer_set_mode,
 };
@@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void)
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
 	clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
 	clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
+	clockevent_bfin.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_bfin);
 
 	return 0;
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index c5b9167..2a12e7f 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@ void hw_timer_init(void)
 {
 	u32 imr;
 
-	cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
 	cf_pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d7f8a78..03965cb 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
 
 	BUG_ON(HZ != 100);
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index d7e21bc..b820661 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index df4acb6..1ada45e 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev)
 static struct clock_event_device ds1287_clockevent = {
 	.name		= "ds1287",
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= ds1287_set_next_event,
 	.set_mode	= ds1287_set_mode,
 	.event_handler	= ds1287_event_handler,
@@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq)
 	clockevent_set_clock(cd, 32768);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&ds1287_clockevent);
 
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index 6e2f585..e9b787f 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev)
 static struct clock_event_device gt641xx_timer0_clockevent = {
 	.name		= "gt641xx-timer0",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.cpumask	= CPU_MASK_CPU0,
 	.irq		= GT641XX_TIMER0_IRQ,
 	.set_next_event	= gt641xx_timer0_set_next_event,
 	.set_mode	= gt641xx_timer0_set_mode,
@@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void)
 	clockevent_set_clock(cd, gt641xx_base_clock);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&gt641xx_timer0_clockevent);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 4a4c59f..e1ec83b 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 0f188cd..a2eebaa 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index 5162fe4..6d45e24 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index b5fc4eb..eccf7d6 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = {
 	.name		= "TXx9",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= txx9tmr_set_mode,
 	.set_next_event	= txx9tmr_set_next_event,
 };
@@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq,
 		clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
 	cd->irq = irq;
+	cd->cpumask = cpumask_of(0),
 	clockevents_register_device(cd);
 	setup_irq(irq, &txx9tmr_irq);
 	printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index b6ac551..f4d1878 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	cd->cpumask = cpumask_of_cpu(cpu);
+	cd->cpumask = cpumask_of(cpu);
 	clockevent_set_clock(cd, CLOCK_TICK_RATE);
 	cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b..cf293b2 100644
--- a/arch/mips/nxp/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
@@ -102,6 +102,7 @@ __init void plat_time_init(void)
 	unsigned int p;
 	unsigned int pow2p;
 
+	pnx8xxx_clockevent.cpumask = cpu_none_mask;
 	clockevents_register_device(&pnx8xxx_clockevent);
 	clocksource_register(&pnx_clocksource);
 
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1327c27..f024057 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void)
 	cd->min_delta_ns        = clockevent_delta2ns(0x300, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= rt_next_event;
 	cd->set_mode		= rt_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 796e3ce..69f5f88 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void)
 	struct irqaction *action = &a20r_irqaction;
 	unsigned int cpu = smp_processor_id();
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e2ee66b..6f39d35 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -869,7 +869,7 @@ static void register_decrementer_clockevent(int cpu)
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 
 	*dec = decrementer_clockevent;
-	dec->cpumask = cpumask_of_cpu(cpu);
+	dec->cpumask = cpumask_of(cpu);
 
 	printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 	       dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index eccefbb..f5bd141 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -154,7 +154,7 @@ void init_cpu_timer(void)
 	cd->min_delta_ns	= 1;
 	cd->max_delta_ns	= LONG_MAX;
 	cd->rating		= 400;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= s390_next_event;
 	cd->set_mode		= s390_set_mode;
 
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 85b660c..c24e9c6 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@ enum {
 };
 
 void smp_message_recv(unsigned int msg);
-void smp_timer_broadcast(cpumask_t mask);
+void smp_timer_broadcast(const struct cpumask *mask);
 
 void local_timer_interrupt(void);
 void local_timer_setup(unsigned int cpu);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 593937d..8f40274 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -184,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
-void smp_timer_broadcast(cpumask_t mask)
+void smp_timer_broadcast(const struct cpumask *mask)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, mask)
 		plat_send_ipi(cpu, SMP_MSG_TIMER);
 }
 
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
index c231763..96e8eae 100644
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->mult		= 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 3c61ddd..0db3f95 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@ static int tmu_timer_init(void)
 	tmu0_clockevent.min_delta_ns =
 			clockevent_delta2ns(1, &tmu0_clockevent);
 
-	tmu0_clockevent.cpumask = cpumask_of_cpu(0);
+	tmu0_clockevent.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&tmu0_clockevent);
 
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 141da37..9df8f09 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void)
 	sevt = &__get_cpu_var(sparc64_events);
 
 	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
-	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+	sevt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(sevt);
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47f04f4..b13a87a 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta,
 static struct clock_event_device itimer_clockevent = {
 	.name		= "itimer",
 	.rating		= 250,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= itimer_set_mode,
 	.set_next_event = itimer_next_event,
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f9487..b2cef49 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+	send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
 #endif
 }
 
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+	levt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(levt);
 }
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 940f258..e76d7e2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -246,7 +246,7 @@ static void hpet_legacy_clockevent_register(void)
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
-	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
 	clockevents_register_device(&hpet_clockevent);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -500,7 +500,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 	/* 5 usec minimum reprogramming delta. */
 	evt->min_delta_ns = 5000;
 
-	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	evt->cpumask = cpumask_of(hdev->cpu);
 	clockevents_register_device(evt);
 }
 
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3e..10f92fb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b59951..c12314c 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
 	.set_mode = mfgpt_set_mode,
 	.set_next_event = mfgpt_next_event,
 	.rating = 250,
-	.cpumask = CPU_MASK_ALL,
+	.cpumask = cpu_all_mask,
 	.shift = 32
 };
 
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07..c4c1f9e 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
 	/* Upper bound is clockevent's use of ulong for cycle deltas. */
 	evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
 	evt->min_delta_ns = clockevent_delta2ns(1, evt);
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 
 	printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
 	       evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1a..104c822 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -737,7 +737,7 @@ static void lguest_time_init(void)
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of_cpu(0);
+	lguest_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda..65d75a6 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
 	evt = &per_cpu(xen_clock_events, cpu);
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
 
 	setup_runstate_info(cpu);
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index f450588..254f106 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -154,7 +154,6 @@ static struct tc_clkevt_device clkevt = {
 		.shift		= 32,
 		/* Should be lower than at91rm9200's system timer */
 		.rating		= 125,
-		.cpumask	= CPU_MASK_CPU0,
 		.set_next_event	= tc_next_event,
 		.set_mode	= tc_mode,
 	},
@@ -195,6 +194,7 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	clkevt.clkevt.max_delta_ns
 		= clockevent_delta2ns(0xffff, &clkevt.clkevt);
 	clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+	clkevt.clkevt.cpumask = cpumask_of(0);
 
 	setup_irq(irq, &tc_irqaction);
 
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index ed3a5d4..cea1536 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -82,13 +82,13 @@ struct clock_event_device {
 	int			shift;
 	int			rating;
 	int			irq;
-	cpumask_t		cpumask;
+	const struct cpumask	*cpumask;
 	int			(*set_next_event)(unsigned long evt,
 						  struct clock_event_device *);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
 	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(cpumask_t mask);
+	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f8d9680..ea2f48a 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -166,6 +166,8 @@ static void clockevents_notify_released(void)
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(!dev->cpumask);
+
 	/*
 	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 	 * on it, so fix it up and emit a warning:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f98a1b7..9590af2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask)
 		 */
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(mask);
+		td->evtdev->broadcast(&mask);
 	}
 }
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ab65d21..f8372be 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -171,7 +171,7 @@ static void tick_setup_device(struct tick_device *td,
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpumask_equal(&newdev->cpumask, cpumask))
+	if (!cpumask_equal(newdev->cpumask, cpumask))
 		irq_set_affinity(newdev->irq, cpumask);
 
 	/*
@@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 	spin_lock_irqsave(&tick_device_lock, flags);
 
 	cpu = smp_processor_id();
-	if (!cpu_isset(cpu, newdev->cpumask))
+	if (!cpumask_test_cpu(cpu, newdev->cpumask))
 		goto out_bc;
 
 	td = &per_cpu(tick_cpu_device, cpu);
 	curdev = td->evtdev;
 
 	/* cpu local device ? */
-	if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
+	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
 
 		/*
 		 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		 * If we have a cpu local device already, do not replace it
 		 * by a non cpu local device
 		 */
-		if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
+		if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
 			goto out_bc;
 	}
 
-- 
cgit v0.10.2


From aab46da0520af9c99b7802cebe4f14a81ff39415 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:27 +1030
Subject: cpumask: Add CONFIG_CPUMASK_OFFSTACK

Impact: Add config option to enable code in cpumask.h

Currently it can be set if DEBUG_PER_CPU_MAPS, or set specifically by
an arch.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/lib/Kconfig b/lib/Kconfig
index 85cf7ea..7823f83 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -157,4 +157,11 @@ config CHECK_SIGNATURE
 config HAVE_LMB
 	boolean
 
+config CPUMASK_OFFSTACK
+	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+	help
+	  Use dynamic allocation for cpumask_var_t, instead of putting
+	  them on the stack.  This is a bit more expensive, but avoids
+	  stack overflow.
+
 endmenu
-- 
cgit v0.10.2


From f0b848ce6fe9062d504d997e9e97fe0f87d57217 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:27 +1030
Subject: cpumask: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

This defines them in the generic non-NUMA case.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>

diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 54bbf6e..0e9e2bc 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -40,6 +40,9 @@
 #ifndef node_to_cpumask
 #define node_to_cpumask(node)	((void)node, cpu_online_map)
 #endif
+#ifndef cpumask_of_node
+#define cpumask_of_node(node)	((void)node, cpu_online_mask)
+#endif
 #ifndef node_to_first_cpu
 #define node_to_first_cpu(node)	((void)(node),0)
 #endif
@@ -54,9 +57,18 @@
 				)
 #endif
 
+#ifndef cpumask_of_pcibus
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+#endif
+
 #endif	/* CONFIG_NUMA */
 
-/* returns pointer to cpumask for specified node */
+/*
+ * returns pointer to cpumask for specified node
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #ifndef node_to_cpumask_ptr
 
 #define	node_to_cpumask_ptr(v, node) 					\
-- 
cgit v0.10.2


From 7be7585393d311866653564fbcd10a3232773c0b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:28 +1030
Subject: cpumask: Use all NR_CPUS bits unless CONFIG_CPUMASK_OFFSTACK

Impact: futureproof as we convert more code to new APIs

The old cpumask operators treat all NR_CPUS bits as relevent, the new
ones use nr_cpumask_bits.  For large NR_CPUS and small nr_cpu_ids, this
makes a difference.

However, mixing the two can cause problems with undefined bits.  An
arch which sets CONFIG_CPUMASK_OFFSTACK should have converted across
to the new operators, so it's safe in that case.

(Thanks to Stephen Rothwell for bisecting the initial unused-bits bug,
and Mike Travis for this solution).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Mike Travis <travis@sgi.com>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 94a2ab8..d4bf526 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -510,9 +510,6 @@ extern cpumask_t cpu_active_map;
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
 }
 
-/* This produces more efficient code. */
-#define nr_cpumask_bits	NR_CPUS
-
 #else /* NR_CPUS > BITS_PER_LONG */
 
 #define CPU_BITS_ALL						\
@@ -520,9 +517,15 @@ extern cpumask_t cpu_active_map;
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
 }
+#endif /* NR_CPUS > BITS_PER_LONG */
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
 #define nr_cpumask_bits	nr_cpu_ids
-#endif /* NR_CPUS > BITS_PER_LONG */
+#else
+#define nr_cpumask_bits	NR_CPUS
+#endif
 
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
-- 
cgit v0.10.2


From 23553b2c08c9b6e96be98c44feb9c5e640d3e789 Mon Sep 17 00:00:00 2001
From: Xiaochuan-Xu <xiaochuan-xu@cqu.edu.cn>
Date: Tue, 9 Dec 2008 19:44:12 +0800
Subject: UBI: prepare for protection tree improvements

This patch modifies @struct ubi_wl_entry and adds union which
contains only one element so far. This is just a preparation
for further changes which will kill the protection tree and
make UBI use a list instead.

Signed-off-by: Xiaochuan-Xu <xiaochuan-xu@cqu.edu.cn>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 1c3fa18..46a4763 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -95,7 +95,7 @@ enum {
 
 /**
  * struct ubi_wl_entry - wear-leveling entry.
- * @rb: link in the corresponding RB-tree
+ * @u.rb: link in the corresponding (free/used) RB-tree
  * @ec: erase counter
  * @pnum: physical eraseblock number
  *
@@ -104,7 +104,9 @@ enum {
  * RB-trees. See WL sub-system for details.
  */
 struct ubi_wl_entry {
-	struct rb_node rb;
+	union {
+		struct rb_node rb;
+	} u;
 	int ec;
 	int pnum;
 };
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index abf65ea..0279bf9 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -220,7 +220,7 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
 		struct ubi_wl_entry *e1;
 
 		parent = *p;
-		e1 = rb_entry(parent, struct ubi_wl_entry, rb);
+		e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
 
 		if (e->ec < e1->ec)
 			p = &(*p)->rb_left;
@@ -235,8 +235,8 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
 		}
 	}
 
-	rb_link_node(&e->rb, parent, p);
-	rb_insert_color(&e->rb, root);
+	rb_link_node(&e->u.rb, parent, p);
+	rb_insert_color(&e->u.rb, root);
 }
 
 /**
@@ -331,7 +331,7 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
 	while (p) {
 		struct ubi_wl_entry *e1;
 
-		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+		e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
 
 		if (e->pnum == e1->pnum) {
 			ubi_assert(e == e1);
@@ -413,14 +413,14 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
 	struct rb_node *p;
 	struct ubi_wl_entry *e;
 
-	e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
+	e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
 	max += e->ec;
 
 	p = root->rb_node;
 	while (p) {
 		struct ubi_wl_entry *e1;
 
-		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+		e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
 		if (e1->ec >= max)
 			p = p->rb_left;
 		else {
@@ -491,12 +491,13 @@ retry:
 		 * eraseblock with erase counter greater or equivalent than the
 		 * lowest erase counter plus %WL_FREE_MAX_DIFF.
 		 */
-		first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
-		last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
+		first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry,
+					u.rb);
+		last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb);
 
 		if (last->ec - first->ec < WL_FREE_MAX_DIFF)
 			e = rb_entry(ubi->free.rb_node,
-					struct ubi_wl_entry, rb);
+					struct ubi_wl_entry, u.rb);
 		else {
 			medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
 			e = find_wl_entry(&ubi->free, medium_ec);
@@ -508,7 +509,7 @@ retry:
 		 * For short term data we pick a physical eraseblock with the
 		 * lowest erase counter as we expect it will be erased soon.
 		 */
-		e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+		e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb);
 		protect = ST_PROTECTION;
 		break;
 	default:
@@ -522,7 +523,7 @@ retry:
 	 * be protected from being moved for some time.
 	 */
 	paranoid_check_in_wl_tree(e, &ubi->free);
-	rb_erase(&e->rb, &ubi->free);
+	rb_erase(&e->u.rb, &ubi->free);
 	prot_tree_add(ubi, e, pe, protect);
 
 	dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
@@ -779,7 +780,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		 * highly worn-out free physical eraseblock. If the erase
 		 * counters differ much enough, start wear-leveling.
 		 */
-		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
 
 		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
@@ -788,21 +789,21 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 			goto out_cancel;
 		}
 		paranoid_check_in_wl_tree(e1, &ubi->used);
-		rb_erase(&e1->rb, &ubi->used);
+		rb_erase(&e1->u.rb, &ubi->used);
 		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
 		       e1->pnum, e1->ec, e2->pnum, e2->ec);
 	} else {
 		/* Perform scrubbing */
 		scrubbing = 1;
-		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
+		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
 		paranoid_check_in_wl_tree(e1, &ubi->scrub);
-		rb_erase(&e1->rb, &ubi->scrub);
+		rb_erase(&e1->u.rb, &ubi->scrub);
 		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
 	}
 
 	paranoid_check_in_wl_tree(e2, &ubi->free);
-	rb_erase(&e2->rb, &ubi->free);
+	rb_erase(&e2->u.rb, &ubi->free);
 	ubi->move_from = e1;
 	ubi->move_to = e2;
 	spin_unlock(&ubi->wl_lock);
@@ -1012,7 +1013,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi)
 		 * erase counter of free physical eraseblocks is greater then
 		 * %UBI_WL_THRESHOLD.
 		 */
-		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
 
 		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
@@ -1214,10 +1215,10 @@ retry:
 	} else {
 		if (in_wl_tree(e, &ubi->used)) {
 			paranoid_check_in_wl_tree(e, &ubi->used);
-			rb_erase(&e->rb, &ubi->used);
+			rb_erase(&e->u.rb, &ubi->used);
 		} else if (in_wl_tree(e, &ubi->scrub)) {
 			paranoid_check_in_wl_tree(e, &ubi->scrub);
-			rb_erase(&e->rb, &ubi->scrub);
+			rb_erase(&e->u.rb, &ubi->scrub);
 		} else {
 			err = prot_tree_del(ubi, e->pnum);
 			if (err) {
@@ -1279,7 +1280,7 @@ retry:
 
 	if (in_wl_tree(e, &ubi->used)) {
 		paranoid_check_in_wl_tree(e, &ubi->used);
-		rb_erase(&e->rb, &ubi->used);
+		rb_erase(&e->u.rb, &ubi->used);
 	} else {
 		int err;
 
@@ -1361,11 +1362,11 @@ static void tree_destroy(struct rb_root *root)
 		else if (rb->rb_right)
 			rb = rb->rb_right;
 		else {
-			e = rb_entry(rb, struct ubi_wl_entry, rb);
+			e = rb_entry(rb, struct ubi_wl_entry, u.rb);
 
 			rb = rb_parent(rb);
 			if (rb) {
-				if (rb->rb_left == &e->rb)
+				if (rb->rb_left == &e->u.rb)
 					rb->rb_left = NULL;
 				else
 					rb->rb_right = NULL;
-- 
cgit v0.10.2


From 7b6c32daec3bff380ced6822002bc352bdf2c982 Mon Sep 17 00:00:00 2001
From: Xiaochuan-Xu <xiaochuan-xu@cqu.edu.cn>
Date: Mon, 15 Dec 2008 21:07:41 +0800
Subject: UBI: simplify PEB protection code

UBI has 2 RB-trees to implement PEB protection, which is too
much for simply prevent PEB from being moved for some time.
This patch implements this using lists. The benefits:

1. No need to allocate protection entry on each PEB get.
2. No need to maintain balanced trees and walk them.

Signed-off-by: Xiaochuan-Xu <xiaochuan-xu@cqu.edu.cn>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 46a4763..4a8ec48 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,6 +74,13 @@
 #define UBI_IO_RETRIES 3
 
 /*
+ * Length of the protection queue. The length is effectively equivalent to the
+ * number of (global) erase cycles PEBs are protected from the wear-leveling
+ * worker.
+ */
+#define UBI_PROT_QUEUE_LEN 10
+
+/*
  * Error codes returned by the I/O sub-system.
  *
  * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
@@ -96,6 +103,7 @@ enum {
 /**
  * struct ubi_wl_entry - wear-leveling entry.
  * @u.rb: link in the corresponding (free/used) RB-tree
+ * @u.list: link in the protection queue
  * @ec: erase counter
  * @pnum: physical eraseblock number
  *
@@ -106,6 +114,7 @@ enum {
 struct ubi_wl_entry {
 	union {
 		struct rb_node rb;
+		struct list_head list;
 	} u;
 	int ec;
 	int pnum;
@@ -290,7 +299,7 @@ struct ubi_wl_entry;
  * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
  *
  * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end
- *                     of UBI ititializetion
+ *                     of UBI initialization
  * @vtbl_slots: how many slots are available in the volume table
  * @vtbl_size: size of the volume table in bytes
  * @vtbl: in-RAM volume table copy
@@ -308,18 +317,17 @@ struct ubi_wl_entry;
  * @used: RB-tree of used physical eraseblocks
  * @free: RB-tree of free physical eraseblocks
  * @scrub: RB-tree of physical eraseblocks which need scrubbing
- * @prot: protection trees
- * @prot.pnum: protection tree indexed by physical eraseblock numbers
- * @prot.aec: protection tree indexed by absolute erase counter value
- * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
- *           @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
- *           fields
+ * @pq: protection queue (contain physical eraseblocks which are temporarily
+ *      protected from the wear-leveling worker)
+ * @pq_head: protection queue head
+ * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
+ * 	     @move_to, @move_to_put @erase_pending, @wl_scheduled and @works
+ * 	     fields
  * @move_mutex: serializes eraseblock moves
- * @work_sem: sycnhronizes the WL worker with use tasks
+ * @work_sem: synchronizes the WL worker with use tasks
  * @wl_scheduled: non-zero if the wear-leveling was scheduled
  * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
  *             physical eraseblock
- * @abs_ec: absolute erase counter
  * @move_from: physical eraseblock from where the data is being moved
  * @move_to: physical eraseblock where the data is being moved to
  * @move_to_put: if the "to" PEB was put
@@ -353,11 +361,11 @@ struct ubi_wl_entry;
  *
  * @peb_buf1: a buffer of PEB size used for different purposes
  * @peb_buf2: another buffer of PEB size used for different purposes
- * @buf_mutex: proptects @peb_buf1 and @peb_buf2
+ * @buf_mutex: protects @peb_buf1 and @peb_buf2
  * @ckvol_mutex: serializes static volume checking when opening
- * @mult_mutex: serializes operations on multiple volumes, like re-nameing
+ * @mult_mutex: serializes operations on multiple volumes, like re-naming
  * @dbg_peb_buf: buffer of PEB size used for debugging
- * @dbg_buf_mutex: proptects @dbg_peb_buf
+ * @dbg_buf_mutex: protects @dbg_peb_buf
  */
 struct ubi_device {
 	struct cdev cdev;
@@ -394,16 +402,13 @@ struct ubi_device {
 	struct rb_root used;
 	struct rb_root free;
 	struct rb_root scrub;
-	struct {
-		struct rb_root pnum;
-		struct rb_root aec;
-	} prot;
+	struct list_head pq[UBI_PROT_QUEUE_LEN];
+	int pq_head;
 	spinlock_t wl_lock;
 	struct mutex move_mutex;
 	struct rw_semaphore work_sem;
 	int wl_scheduled;
 	struct ubi_wl_entry **lookuptbl;
-	unsigned long long abs_ec;
 	struct ubi_wl_entry *move_from;
 	struct ubi_wl_entry *move_to;
 	int move_to_put;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 0279bf9..14901cb 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -22,7 +22,7 @@
  * UBI wear-leveling sub-system.
  *
  * This sub-system is responsible for wear-leveling. It works in terms of
- * physical* eraseblocks and erase counters and knows nothing about logical
+ * physical eraseblocks and erase counters and knows nothing about logical
  * eraseblocks, volumes, etc. From this sub-system's perspective all physical
  * eraseblocks are of two types - used and free. Used physical eraseblocks are
  * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
@@ -55,8 +55,39 @@
  *
  * As it was said, for the UBI sub-system all physical eraseblocks are either
  * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
- * used eraseblocks are kept in a set of different RB-trees: @wl->used,
- * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
+ * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or
+ * (temporarily) in the @wl->pq queue.
+ *
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is a protection queue in between where this
+ * physical eraseblock is temporarily stored (@wl->pq).
+ *
+ * All this protection stuff is needed because:
+ *  o we don't want to move physical eraseblocks just after we have given them
+ *    to the user; instead, we first want to let users fill them up with data;
+ *
+ *  o there is a chance that the user will put the physical eraseblock very
+ *    soon, so it makes sense not to move it for some time, but wait; this is
+ *    especially important in case of "short term" physical eraseblocks.
+ *
+ * Physical eraseblocks stay protected only for limited time. But the "time" is
+ * measured in erase cycles in this case. This is implemented with help of the
+ * protection queue. Eraseblocks are put to the tail of this queue when they
+ * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
+ * head of the queue on each erase operation (for any eraseblock). So the
+ * length of the queue defines how may (global) erase cycles PEBs are protected.
+ *
+ * To put it differently, each physical eraseblock has 2 main states: free and
+ * used. The former state corresponds to the @wl->free tree. The latter state
+ * is split up on several sub-states:
+ * o the WL movement is allowed (@wl->used tree);
+ * o the WL movement is temporarily prohibited (@wl->pq queue);
+ * o scrubbing is needed (@wl->scrub tree).
+ *
+ * Depending on the sub-state, wear-leveling entries of the used physical
+ * eraseblocks may be kept in one of those structures.
  *
  * Note, in this implementation, we keep a small in-RAM object for each physical
  * eraseblock. This is surely not a scalable solution. But it appears to be good
@@ -70,9 +101,6 @@
  * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
  * pick target PEB with an average EC if our PEB is not very "old". This is a
  * room for future re-works of the WL sub-system.
- *
- * Note: the stuff with protection trees looks too complex and is difficult to
- * understand. Should be fixed.
  */
 
 #include <linux/slab.h>
@@ -85,14 +113,6 @@
 #define WL_RESERVED_PEBS 1
 
 /*
- * How many erase cycles are short term, unknown, and long term physical
- * eraseblocks protected.
- */
-#define ST_PROTECTION 16
-#define U_PROTECTION  10
-#define LT_PROTECTION 4
-
-/*
  * Maximum difference between two erase counters. If this threshold is
  * exceeded, the WL sub-system starts moving data from used physical
  * eraseblocks with low erase counter to free physical eraseblocks with high
@@ -120,64 +140,9 @@
 #define WL_MAX_FAILURES 32
 
 /**
- * struct ubi_wl_prot_entry - PEB protection entry.
- * @rb_pnum: link in the @wl->prot.pnum RB-tree
- * @rb_aec: link in the @wl->prot.aec RB-tree
- * @abs_ec: the absolute erase counter value when the protection ends
- * @e: the wear-leveling entry of the physical eraseblock under protection
- *
- * When the WL sub-system returns a physical eraseblock, the physical
- * eraseblock is protected from being moved for some "time". For this reason,
- * the physical eraseblock is not directly moved from the @wl->free tree to the
- * @wl->used tree. There is one more tree in between where this physical
- * eraseblock is temporarily stored (@wl->prot).
- *
- * All this protection stuff is needed because:
- *  o we don't want to move physical eraseblocks just after we have given them
- *    to the user; instead, we first want to let users fill them up with data;
- *
- *  o there is a chance that the user will put the physical eraseblock very
- *    soon, so it makes sense not to move it for some time, but wait; this is
- *    especially important in case of "short term" physical eraseblocks.
- *
- * Physical eraseblocks stay protected only for limited time. But the "time" is
- * measured in erase cycles in this case. This is implemented with help of the
- * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
- * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
- * the @wl->used tree.
- *
- * Protected physical eraseblocks are searched by physical eraseblock number
- * (when they are put) and by the absolute erase counter (to check if it is
- * time to move them to the @wl->used tree). So there are actually 2 RB-trees
- * storing the protected physical eraseblocks: @wl->prot.pnum and
- * @wl->prot.aec. They are referred to as the "protection" trees. The
- * first one is indexed by the physical eraseblock number. The second one is
- * indexed by the absolute erase counter. Both trees store
- * &struct ubi_wl_prot_entry objects.
- *
- * Each physical eraseblock has 2 main states: free and used. The former state
- * corresponds to the @wl->free tree. The latter state is split up on several
- * sub-states:
- * o the WL movement is allowed (@wl->used tree);
- * o the WL movement is temporarily prohibited (@wl->prot.pnum and
- * @wl->prot.aec trees);
- * o scrubbing is needed (@wl->scrub tree).
- *
- * Depending on the sub-state, wear-leveling entries of the used physical
- * eraseblocks may be kept in one of those trees.
- */
-struct ubi_wl_prot_entry {
-	struct rb_node rb_pnum;
-	struct rb_node rb_aec;
-	unsigned long long abs_ec;
-	struct ubi_wl_entry *e;
-};
-
-/**
  * struct ubi_work - UBI work description data structure.
  * @list: a link in the list of pending works
  * @func: worker function
- * @priv: private data of the worker function
  * @e: physical eraseblock to erase
  * @torture: if the physical eraseblock has to be tortured
  *
@@ -198,9 +163,11 @@ struct ubi_work {
 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 				     struct rb_root *root);
+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e);
 #else
 #define paranoid_check_ec(ubi, pnum, ec) 0
 #define paranoid_check_in_wl_tree(e, root)
+#define paranoid_check_in_pq(ubi, e) 0
 #endif
 
 /**
@@ -355,49 +322,24 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
 }
 
 /**
- * prot_tree_add - add physical eraseblock to protection trees.
+ * prot_queue_add - add physical eraseblock to the protection queue.
  * @ubi: UBI device description object
  * @e: the physical eraseblock to add
- * @pe: protection entry object to use
- * @ec: for how many erase operations this PEB should be protected
  *
- * @wl->lock has to be locked.
+ * This function adds @e to the tail of the protection queue @ubi->pq, where
+ * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
+ * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
+ * be locked.
  */
-static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
-			  struct ubi_wl_prot_entry *pe, int ec)
+static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
 {
-	struct rb_node **p, *parent = NULL;
-	struct ubi_wl_prot_entry *pe1;
+	int pq_tail = ubi->pq_head - 1;
 
-	pe->e = e;
-	pe->abs_ec = ubi->abs_ec + ec;
-
-	p = &ubi->prot.pnum.rb_node;
-	while (*p) {
-		parent = *p;
-		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
-
-		if (e->pnum < pe1->e->pnum)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&pe->rb_pnum, parent, p);
-	rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
-
-	p = &ubi->prot.aec.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
-
-		if (pe->abs_ec < pe1->abs_ec)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&pe->rb_aec, parent, p);
-	rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
+	if (pq_tail < 0)
+		pq_tail = UBI_PROT_QUEUE_LEN - 1;
+	ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
+	list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
+	dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
 }
 
 /**
@@ -442,17 +384,12 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
  */
 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
 {
-	int err, protect, medium_ec;
+	int err, medium_ec;
 	struct ubi_wl_entry *e, *first, *last;
-	struct ubi_wl_prot_entry *pe;
 
 	ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
 		   dtype == UBI_UNKNOWN);
 
-	pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
-	if (!pe)
-		return -ENOMEM;
-
 retry:
 	spin_lock(&ubi->wl_lock);
 	if (!ubi->free.rb_node) {
@@ -460,16 +397,13 @@ retry:
 			ubi_assert(list_empty(&ubi->works));
 			ubi_err("no free eraseblocks");
 			spin_unlock(&ubi->wl_lock);
-			kfree(pe);
 			return -ENOSPC;
 		}
 		spin_unlock(&ubi->wl_lock);
 
 		err = produce_free_peb(ubi);
-		if (err < 0) {
-			kfree(pe);
+		if (err < 0)
 			return err;
-		}
 		goto retry;
 	}
 
@@ -482,7 +416,6 @@ retry:
 		 * %WL_FREE_MAX_DIFF.
 		 */
 		e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
-		protect = LT_PROTECTION;
 		break;
 	case UBI_UNKNOWN:
 		/*
@@ -502,7 +435,6 @@ retry:
 			medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
 			e = find_wl_entry(&ubi->free, medium_ec);
 		}
-		protect = U_PROTECTION;
 		break;
 	case UBI_SHORTTERM:
 		/*
@@ -510,63 +442,45 @@ retry:
 		 * lowest erase counter as we expect it will be erased soon.
 		 */
 		e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb);
-		protect = ST_PROTECTION;
 		break;
 	default:
-		protect = 0;
-		e = NULL;
 		BUG();
 	}
 
+	paranoid_check_in_wl_tree(e, &ubi->free);
+
 	/*
-	 * Move the physical eraseblock to the protection trees where it will
+	 * Move the physical eraseblock to the protection queue where it will
 	 * be protected from being moved for some time.
 	 */
-	paranoid_check_in_wl_tree(e, &ubi->free);
 	rb_erase(&e->u.rb, &ubi->free);
-	prot_tree_add(ubi, e, pe, protect);
-
-	dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
+	dbg_wl("PEB %d EC %d", e->pnum, e->ec);
+	prot_queue_add(ubi, e);
 	spin_unlock(&ubi->wl_lock);
-
 	return e->pnum;
 }
 
 /**
- * prot_tree_del - remove a physical eraseblock from the protection trees
+ * prot_queue_del - remove a physical eraseblock from the protection queue.
  * @ubi: UBI device description object
  * @pnum: the physical eraseblock to remove
  *
- * This function returns PEB @pnum from the protection trees and returns zero
- * in case of success and %-ENODEV if the PEB was not found in the protection
- * trees.
+ * This function deletes PEB @pnum from the protection queue and returns zero
+ * in case of success and %-ENODEV if the PEB was not found.
  */
-static int prot_tree_del(struct ubi_device *ubi, int pnum)
+static int prot_queue_del(struct ubi_device *ubi, int pnum)
 {
-	struct rb_node *p;
-	struct ubi_wl_prot_entry *pe = NULL;
-
-	p = ubi->prot.pnum.rb_node;
-	while (p) {
-
-		pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
-
-		if (pnum == pe->e->pnum)
-			goto found;
+	struct ubi_wl_entry *e;
 
-		if (pnum < pe->e->pnum)
-			p = p->rb_left;
-		else
-			p = p->rb_right;
-	}
+	e = ubi->lookuptbl[pnum];
+	if (!e)
+		return -ENODEV;
 
-	return -ENODEV;
+	if (paranoid_check_in_pq(ubi, e))
+		return -ENODEV;
 
-found:
-	ubi_assert(pe->e->pnum == pnum);
-	rb_erase(&pe->rb_aec, &ubi->prot.aec);
-	rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
-	kfree(pe);
+	list_del(&e->u.list);
+	dbg_wl("deleted PEB %d from the protection queue", e->pnum);
 	return 0;
 }
 
@@ -632,47 +546,47 @@ out_free:
 }
 
 /**
- * check_protection_over - check if it is time to stop protecting some PEBs.
+ * serve_prot_queue - check if it is time to stop protecting PEBs.
  * @ubi: UBI device description object
  *
- * This function is called after each erase operation, when the absolute erase
- * counter is incremented, to check if some physical eraseblock  have not to be
- * protected any longer. These physical eraseblocks are moved from the
- * protection trees to the used tree.
+ * This function is called after each erase operation and removes PEBs from the
+ * tail of the protection queue. These PEBs have been protected for long enough
+ * and should be moved to the used tree.
  */
-static void check_protection_over(struct ubi_device *ubi)
+static void serve_prot_queue(struct ubi_device *ubi)
 {
-	struct ubi_wl_prot_entry *pe;
+	struct ubi_wl_entry *e, *tmp;
+	int count;
 
 	/*
 	 * There may be several protected physical eraseblock to remove,
 	 * process them all.
 	 */
-	while (1) {
-		spin_lock(&ubi->wl_lock);
-		if (!ubi->prot.aec.rb_node) {
-			spin_unlock(&ubi->wl_lock);
-			break;
-		}
-
-		pe = rb_entry(rb_first(&ubi->prot.aec),
-			      struct ubi_wl_prot_entry, rb_aec);
+repeat:
+	count = 0;
+	spin_lock(&ubi->wl_lock);
+	list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
+		dbg_wl("PEB %d EC %d protection over, move to used tree",
+			e->pnum, e->ec);
 
-		if (pe->abs_ec > ubi->abs_ec) {
+		list_del(&e->u.list);
+		wl_tree_add(e, &ubi->used);
+		if (count++ > 32) {
+			/*
+			 * Let's be nice and avoid holding the spinlock for
+			 * too long.
+			 */
 			spin_unlock(&ubi->wl_lock);
-			break;
+			cond_resched();
+			goto repeat;
 		}
-
-		dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
-		       pe->e->pnum, ubi->abs_ec, pe->abs_ec);
-		rb_erase(&pe->rb_aec, &ubi->prot.aec);
-		rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
-		wl_tree_add(pe->e, &ubi->used);
-		spin_unlock(&ubi->wl_lock);
-
-		kfree(pe);
-		cond_resched();
 	}
+
+	ubi->pq_head += 1;
+	if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
+		ubi->pq_head = 0;
+	ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
+	spin_unlock(&ubi->wl_lock);
 }
 
 /**
@@ -680,8 +594,8 @@ static void check_protection_over(struct ubi_device *ubi)
  * @ubi: UBI device description object
  * @wrk: the work to schedule
  *
- * This function enqueues a work defined by @wrk to the tail of the pending
- * works list.
+ * This function adds a work defined by @wrk to the tail of the pending works
+ * list.
  */
 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
 {
@@ -740,7 +654,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
 	int err, scrubbing = 0, torture = 0;
-	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
 
@@ -857,23 +770,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		 * The LEB has not been moved because the volume is being
 		 * deleted or the PEB has been put meanwhile. We should prevent
 		 * this PEB from being selected for wear-leveling movement
-		 * again, so put it to the protection tree.
+		 * again, so put it to the protection queue.
 		 */
 
 		dbg_wl("canceled moving PEB %d", e1->pnum);
 		ubi_assert(err == 1);
 
-		pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
-		if (!pe) {
-			err = -ENOMEM;
-			goto out_error;
-		}
-
 		ubi_free_vid_hdr(ubi, vid_hdr);
 		vid_hdr = NULL;
 
 		spin_lock(&ubi->wl_lock);
-		prot_tree_add(ubi, e1, pe, U_PROTECTION);
+		prot_queue_add(ubi, e1);
 		ubi_assert(!ubi->move_to_put);
 		ubi->move_from = ubi->move_to = NULL;
 		ubi->wl_scheduled = 0;
@@ -1075,7 +982,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 		kfree(wl_wrk);
 
 		spin_lock(&ubi->wl_lock);
-		ubi->abs_ec += 1;
 		wl_tree_add(e, &ubi->free);
 		spin_unlock(&ubi->wl_lock);
 
@@ -1083,7 +989,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 		 * One more erase operation has happened, take care about
 		 * protected physical eraseblocks.
 		 */
-		check_protection_over(ubi);
+		serve_prot_queue(ubi);
 
 		/* And take care about wear-leveling */
 		err = ensure_wear_leveling(ubi);
@@ -1220,7 +1126,7 @@ retry:
 			paranoid_check_in_wl_tree(e, &ubi->scrub);
 			rb_erase(&e->u.rb, &ubi->scrub);
 		} else {
-			err = prot_tree_del(ubi, e->pnum);
+			err = prot_queue_del(ubi, e->pnum);
 			if (err) {
 				ubi_err("PEB %d not found", pnum);
 				ubi_ro_mode(ubi);
@@ -1284,7 +1190,7 @@ retry:
 	} else {
 		int err;
 
-		err = prot_tree_del(ubi, e->pnum);
+		err = prot_queue_del(ubi, e->pnum);
 		if (err) {
 			ubi_err("PEB %d not found", pnum);
 			ubi_ro_mode(ubi);
@@ -1315,7 +1221,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
 	int err;
 
 	/*
-	 * Erase while the pending works queue is not empty, but not more then
+	 * Erase while the pending works queue is not empty, but not more than
 	 * the number of currently pending works.
 	 */
 	dbg_wl("flush (%d pending works)", ubi->works_count);
@@ -1461,15 +1367,13 @@ static void cancel_pending(struct ubi_device *ubi)
  */
 int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 {
-	int err;
+	int err, i;
 	struct rb_node *rb1, *rb2;
 	struct ubi_scan_volume *sv;
 	struct ubi_scan_leb *seb, *tmp;
 	struct ubi_wl_entry *e;
 
-
 	ubi->used = ubi->free = ubi->scrub = RB_ROOT;
-	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
 	spin_lock_init(&ubi->wl_lock);
 	mutex_init(&ubi->move_mutex);
 	init_rwsem(&ubi->work_sem);
@@ -1483,6 +1387,10 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	if (!ubi->lookuptbl)
 		return err;
 
+	for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
+		INIT_LIST_HEAD(&ubi->pq[i]);
+	ubi->pq_head = 0;
+
 	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
 		cond_resched();
 
@@ -1577,33 +1485,18 @@ out_free:
 }
 
 /**
- * protection_trees_destroy - destroy the protection RB-trees.
+ * protection_queue_destroy - destroy the protection queue.
  * @ubi: UBI device description object
  */
-static void protection_trees_destroy(struct ubi_device *ubi)
+static void protection_queue_destroy(struct ubi_device *ubi)
 {
-	struct rb_node *rb;
-	struct ubi_wl_prot_entry *pe;
+	int i;
+	struct ubi_wl_entry *e, *tmp;
 
-	rb = ubi->prot.aec.rb_node;
-	while (rb) {
-		if (rb->rb_left)
-			rb = rb->rb_left;
-		else if (rb->rb_right)
-			rb = rb->rb_right;
-		else {
-			pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
-
-			rb = rb_parent(rb);
-			if (rb) {
-				if (rb->rb_left == &pe->rb_aec)
-					rb->rb_left = NULL;
-				else
-					rb->rb_right = NULL;
-			}
-
-			kmem_cache_free(ubi_wl_entry_slab, pe->e);
-			kfree(pe);
+	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
+		list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
+			list_del(&e->u.list);
+			kmem_cache_free(ubi_wl_entry_slab, e);
 		}
 	}
 }
@@ -1616,7 +1509,7 @@ void ubi_wl_close(struct ubi_device *ubi)
 {
 	dbg_wl("close the WL sub-system");
 	cancel_pending(ubi);
-	protection_trees_destroy(ubi);
+	protection_queue_destroy(ubi);
 	tree_destroy(&ubi->used);
 	tree_destroy(&ubi->free);
 	tree_destroy(&ubi->scrub);
@@ -1686,4 +1579,27 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 	return 1;
 }
 
+/**
+ * paranoid_check_in_pq - check if wear-leveling entry is in the protection
+ *                        queue.
+ * @ubi: UBI device description object
+ * @e: the wear-leveling entry to check
+ *
+ * This function returns zero if @e is in @ubi->pq and %1 if it is not.
+ */
+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
+{
+	struct ubi_wl_entry *p;
+	int i;
+
+	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
+		list_for_each_entry(p, &ubi->pq[i], u.list)
+			if (p == e)
+				return 0;
+
+	ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
+		e->pnum, e->ec);
+	ubi_dbg_dump_stack();
+	return 1;
+}
 #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
-- 
cgit v0.10.2


From 3d0911bfe03b5f077cef32ca644b5756d48affc3 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 31 Oct 2008 16:10:24 +0000
Subject: i2c-s3c2410: Fixup style problems from checkpatch.pl

Fixup the 36 warnings and errors generated from running
checkpatch.pl on the driver. The warnings are too numerous
to be listed here.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 1fac4e2..f5efece 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -109,7 +109,8 @@ static inline int s3c24xx_i2c_is2440(struct s3c24xx_i2c *i2c)
  * the default if there is none
 */
 
-static inline struct s3c2410_platform_i2c *s3c24xx_i2c_get_platformdata(struct device *dev)
+static inline struct s3c2410_platform_i2c *
+s3c24xx_i2c_get_platformdata(struct device *dev)
 {
 	if (dev->platform_data != NULL)
 		return (struct s3c2410_platform_i2c *)dev->platform_data;
@@ -129,7 +130,7 @@ static inline void s3c24xx_i2c_master_complete(struct s3c24xx_i2c *i2c, int ret)
 
 	i2c->msg_ptr = 0;
 	i2c->msg = NULL;
-	i2c->msg_idx ++;
+	i2c->msg_idx++;
 	i2c->msg_num = 0;
 	if (ret)
 		i2c->msg_idx = ret;
@@ -140,19 +141,17 @@ static inline void s3c24xx_i2c_master_complete(struct s3c24xx_i2c *i2c, int ret)
 static inline void s3c24xx_i2c_disable_ack(struct s3c24xx_i2c *i2c)
 {
 	unsigned long tmp;
-	
+
 	tmp = readl(i2c->regs + S3C2410_IICCON);
 	writel(tmp & ~S3C2410_IICCON_ACKEN, i2c->regs + S3C2410_IICCON);
-
 }
 
 static inline void s3c24xx_i2c_enable_ack(struct s3c24xx_i2c *i2c)
 {
 	unsigned long tmp;
-	
+
 	tmp = readl(i2c->regs + S3C2410_IICCON);
 	writel(tmp | S3C2410_IICCON_ACKEN, i2c->regs + S3C2410_IICCON);
-
 }
 
 /* irq enable/disable functions */
@@ -160,7 +159,7 @@ static inline void s3c24xx_i2c_enable_ack(struct s3c24xx_i2c *i2c)
 static inline void s3c24xx_i2c_disable_irq(struct s3c24xx_i2c *i2c)
 {
 	unsigned long tmp;
-	
+
 	tmp = readl(i2c->regs + S3C2410_IICCON);
 	writel(tmp & ~S3C2410_IICCON_IRQEN, i2c->regs + S3C2410_IICCON);
 }
@@ -168,7 +167,7 @@ static inline void s3c24xx_i2c_disable_irq(struct s3c24xx_i2c *i2c)
 static inline void s3c24xx_i2c_enable_irq(struct s3c24xx_i2c *i2c)
 {
 	unsigned long tmp;
-	
+
 	tmp = readl(i2c->regs + S3C2410_IICCON);
 	writel(tmp | S3C2410_IICCON_IRQEN, i2c->regs + S3C2410_IICCON);
 }
@@ -176,10 +175,10 @@ static inline void s3c24xx_i2c_enable_irq(struct s3c24xx_i2c *i2c)
 
 /* s3c24xx_i2c_message_start
  *
- * put the start of a message onto the bus 
+ * put the start of a message onto the bus
 */
 
-static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c, 
+static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c,
 				      struct i2c_msg *msg)
 {
 	unsigned int addr = (msg->addr & 0x7f) << 1;
@@ -198,15 +197,15 @@ static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c,
 	if (msg->flags & I2C_M_REV_DIR_ADDR)
 		addr ^= 1;
 
-	// todo - check for wether ack wanted or not
+	/* todo - check for wether ack wanted or not */
 	s3c24xx_i2c_enable_ack(i2c);
 
 	iiccon = readl(i2c->regs + S3C2410_IICCON);
 	writel(stat, i2c->regs + S3C2410_IICSTAT);
-	
+
 	dev_dbg(i2c->dev, "START: %08lx to IICSTAT, %02x to DS\n", stat, addr);
 	writeb(addr, i2c->regs + S3C2410_IICDS);
-	
+
 	/* delay here to ensure the data byte has gotten onto the bus
 	 * before the transaction is started */
 
@@ -214,8 +213,8 @@ static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c,
 
 	dev_dbg(i2c->dev, "iiccon, %08lx\n", iiccon);
 	writel(iiccon, i2c->regs + S3C2410_IICCON);
-	
-	stat |=  S3C2410_IICSTAT_START;
+
+	stat |= S3C2410_IICSTAT_START;
 	writel(stat, i2c->regs + S3C2410_IICSTAT);
 }
 
@@ -226,11 +225,11 @@ static inline void s3c24xx_i2c_stop(struct s3c24xx_i2c *i2c, int ret)
 	dev_dbg(i2c->dev, "STOP\n");
 
 	/* stop the transfer */
-	iicstat &= ~ S3C2410_IICSTAT_START;
+	iicstat &= ~S3C2410_IICSTAT_START;
 	writel(iicstat, i2c->regs + S3C2410_IICSTAT);
-	
+
 	i2c->state = STATE_STOP;
-	
+
 	s3c24xx_i2c_master_complete(i2c, ret);
 	s3c24xx_i2c_disable_irq(i2c);
 }
@@ -240,7 +239,7 @@ static inline void s3c24xx_i2c_stop(struct s3c24xx_i2c *i2c, int ret)
 
 /* is_lastmsg()
  *
- * returns TRUE if the current message is the last in the set 
+ * returns TRUE if the current message is the last in the set
 */
 
 static inline int is_lastmsg(struct s3c24xx_i2c *i2c)
@@ -288,14 +287,14 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 
 	case STATE_STOP:
 		dev_err(i2c->dev, "%s: called in STATE_STOP\n", __func__);
-		s3c24xx_i2c_disable_irq(i2c);		
+		s3c24xx_i2c_disable_irq(i2c);
 		goto out_ack;
 
 	case STATE_START:
 		/* last thing we did was send a start condition on the
 		 * bus, or started a new i2c message
 		 */
-		
+
 		if (iicstat & S3C2410_IICSTAT_LASTBIT &&
 		    !(i2c->msg->flags & I2C_M_IGNORE_NAK)) {
 			/* ack was not received... */
@@ -321,7 +320,7 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 		if (i2c->state == STATE_READ)
 			goto prepare_read;
 
-		/* fall through to the write state, as we will need to 
+		/* fall through to the write state, as we will need to
 		 * send a byte as well */
 
 	case STATE_WRITE:
@@ -338,7 +337,7 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 			}
 		}
 
-	retry_write:
+ retry_write:
 
 		if (!is_msgend(i2c)) {
 			byte = i2c->msg->buf[i2c->msg_ptr++];
@@ -358,9 +357,9 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 			dev_dbg(i2c->dev, "WRITE: Next Message\n");
 
 			i2c->msg_ptr = 0;
-			i2c->msg_idx ++;
+			i2c->msg_idx++;
 			i2c->msg++;
-			
+
 			/* check to see if we need to do another message */
 			if (i2c->msg->flags & I2C_M_NOSTART) {
 
@@ -374,7 +373,6 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 
 				goto retry_write;
 			} else {
-			
 				/* send the new start */
 				s3c24xx_i2c_message_start(i2c, i2c->msg);
 				i2c->state = STATE_START;
@@ -388,7 +386,7 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 		break;
 
 	case STATE_READ:
-		/* we have a byte of data in the data register, do 
+		/* we have a byte of data in the data register, do
 		 * something with it, and then work out wether we are
 		 * going to do any more read/write
 		 */
@@ -396,13 +394,13 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 		byte = readb(i2c->regs + S3C2410_IICDS);
 		i2c->msg->buf[i2c->msg_ptr++] = byte;
 
-	prepare_read:
+ prepare_read:
 		if (is_msglast(i2c)) {
 			/* last byte of buffer */
 
 			if (is_lastmsg(i2c))
 				s3c24xx_i2c_disable_ack(i2c);
-			
+
 		} else if (is_msgend(i2c)) {
 			/* ok, we've read the entire buffer, see if there
 			 * is anything else we need to do */
@@ -428,7 +426,7 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 	/* acknowlegde the IRQ and get back on with the work */
 
  out_ack:
-	tmp = readl(i2c->regs + S3C2410_IICCON);	
+	tmp = readl(i2c->regs + S3C2410_IICCON);
 	tmp &= ~S3C2410_IICCON_IRQPEND;
 	writel(tmp, i2c->regs + S3C2410_IICCON);
  out:
@@ -449,19 +447,19 @@ static irqreturn_t s3c24xx_i2c_irq(int irqno, void *dev_id)
 	status = readl(i2c->regs + S3C2410_IICSTAT);
 
 	if (status & S3C2410_IICSTAT_ARBITR) {
-		// deal with arbitration loss
+		/* deal with arbitration loss */
 		dev_err(i2c->dev, "deal with arbitration loss\n");
 	}
 
 	if (i2c->state == STATE_IDLE) {
 		dev_dbg(i2c->dev, "IRQ: error i2c->state == IDLE\n");
 
-		tmp = readl(i2c->regs + S3C2410_IICCON);	
+		tmp = readl(i2c->regs + S3C2410_IICCON);
 		tmp &= ~S3C2410_IICCON_IRQPEND;
 		writel(tmp, i2c->regs +  S3C2410_IICCON);
 		goto out;
 	}
-	
+
 	/* pretty much this leaves us with the fact that we've
 	 * transmitted or received whatever byte we last sent */
 
@@ -484,7 +482,7 @@ static int s3c24xx_i2c_set_master(struct s3c24xx_i2c *i2c)
 
 	while (timeout-- > 0) {
 		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
-		
+
 		if (!(iicstat & S3C2410_IICSTAT_BUSBUSY))
 			return 0;
 
@@ -502,7 +500,8 @@ static int s3c24xx_i2c_set_master(struct s3c24xx_i2c *i2c)
  * this starts an i2c transfer
 */
 
-static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c, struct i2c_msg *msgs, int num)
+static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c,
+			      struct i2c_msg *msgs, int num)
 {
 	unsigned long timeout;
 	int ret;
@@ -528,12 +527,12 @@ static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c, struct i2c_msg *msgs, int
 	s3c24xx_i2c_enable_irq(i2c);
 	s3c24xx_i2c_message_start(i2c, msgs);
 	spin_unlock_irq(&i2c->lock);
-	
+
 	timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5);
 
 	ret = i2c->msg_idx;
 
-	/* having these next two as dev_err() makes life very 
+	/* having these next two as dev_err() makes life very
 	 * noisy when doing an i2cdetect */
 
 	if (timeout == 0)
@@ -642,7 +641,7 @@ static inline int freq_acceptable(unsigned int freq, unsigned int wanted)
 {
 	int diff = freq - wanted;
 
-	return (diff >= -2 && diff <= 2);
+	return diff >= -2 && diff <= 2;
 }
 
 /* s3c24xx_i2c_clockrate
@@ -665,7 +664,7 @@ static int s3c24xx_i2c_clockrate(struct s3c24xx_i2c *i2c, unsigned int *got)
 
 	pdata = s3c24xx_i2c_get_platformdata(i2c->adap.dev.parent);
 	clkin /= 1000;		/* clkin now in KHz */
-     
+
 	dev_dbg(i2c->dev, "pdata %p, freq %lu %lu..%lu\n",
 		 pdata, pdata->bus_freq, pdata->min_freq, pdata->max_freq);
 
@@ -773,7 +772,7 @@ static inline void s3c24xx_i2c_deregister_cpufreq(struct s3c24xx_i2c *i2c)
 
 /* s3c24xx_i2c_init
  *
- * initialise the controller, set the IO lines and frequency 
+ * initialise the controller, set the IO lines and frequency
 */
 
 static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
@@ -792,7 +791,7 @@ static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
 	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPE14_IICSCL);
 
 	/* write slave address */
-	
+
 	writeb(pdata->slave_addr, i2c->regs + S3C2410_IICADD);
 
 	dev_info(i2c->dev, "slave address 0x%02x\n", pdata->slave_addr);
@@ -877,7 +876,8 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 		goto err_ioarea;
 	}
 
-	dev_dbg(&pdev->dev, "registers %p (%p, %p)\n", i2c->regs, i2c->ioarea, res);
+	dev_dbg(&pdev->dev, "registers %p (%p, %p)\n",
+		i2c->regs, i2c->ioarea, res);
 
 	/* setup info block for the i2c core */
 
@@ -891,7 +891,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 		goto err_iomap;
 
 	/* find the IRQ for this unit (note, this relies on the init call to
-	 * ensure no current IRQs pending 
+	 * ensure no current IRQs pending
 	 */
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -910,7 +910,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 	}
 
 	i2c->irq = res;
-		
+
 	dev_dbg(&pdev->dev, "irq resource %p (%lu)\n", res,
 		(unsigned long)res->start);
 
-- 
cgit v0.10.2


From 8be310a6dea491b28f81672752d9d2c7fc25cdd3 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 31 Oct 2008 16:10:25 +0000
Subject: i2c-s3c2410: Use platform data for gpio configuration

Add a callback to set the gpio configuration for the
i2c device instead of a set include. This also allows
the remvoal of the machine gpio and hardware files.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index f5efece..0aa0142 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -35,11 +35,9 @@
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
 
-#include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#include <mach/regs-gpio.h>
 #include <asm/plat-s3c/regs-iic.h>
 #include <asm/plat-s3c/iic.h>
 
@@ -489,9 +487,6 @@ static int s3c24xx_i2c_set_master(struct s3c24xx_i2c *i2c)
 		msleep(1);
 	}
 
-	dev_dbg(i2c->dev, "timeout: GPEDAT is %08x\n",
-		__raw_readl(S3C2410_GPEDAT));
-
 	return -ETIMEDOUT;
 }
 
@@ -783,12 +778,12 @@ static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
 
 	/* get the plafrom data */
 
-	pdata = s3c24xx_i2c_get_platformdata(i2c->adap.dev.parent);
+	pdata = s3c24xx_i2c_get_platformdata(i2c->dev);
 
 	/* inititalise the gpio */
 
-	s3c2410_gpio_cfgpin(S3C2410_GPE15, S3C2410_GPE15_IICSDA);
-	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPE14_IICSCL);
+	if (pdata->cfg_gpio)
+		pdata->cfg_gpio(to_platform_device(i2c->dev));
 
 	/* write slave address */
 
-- 
cgit v0.10.2


From 6a039cabba3ddd556643156ce0a7cd07da456b20 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 31 Oct 2008 16:10:27 +0000
Subject: i2c-s3c2410: Remove default platform data.

The platform data should now always be present when the device
is initialised, so we can remove the default platform data in
the driver.

All the device initialisation points in the board specific code
should already have been changed to initialise this as necessary.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 0aa0142..d6343e2 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -77,16 +77,7 @@ struct s3c24xx_i2c {
 #endif
 };
 
-/* default platform data to use if not supplied in the platform_device
-*/
-
-static struct s3c2410_platform_i2c s3c24xx_i2c_default_platform = {
-	.flags		= 0,
-	.slave_addr	= 0x10,
-	.bus_freq	= 100*1000,
-	.max_freq	= 400*1000,
-	.sda_delay	= S3C2410_IICLC_SDA_DELAY5 | S3C2410_IICLC_FILTER_ON,
-};
+/* default platform data removed, dev should always carry data. */
 
 /* s3c24xx_i2c_is2440()
  *
@@ -100,22 +91,6 @@ static inline int s3c24xx_i2c_is2440(struct s3c24xx_i2c *i2c)
 	return !strcmp(pdev->name, "s3c2440-i2c");
 }
 
-
-/* s3c24xx_i2c_get_platformdata
- *
- * get the platform data associated with the given device, or return
- * the default if there is none
-*/
-
-static inline struct s3c2410_platform_i2c *
-s3c24xx_i2c_get_platformdata(struct device *dev)
-{
-	if (dev->platform_data != NULL)
-		return (struct s3c2410_platform_i2c *)dev->platform_data;
-
-	return &s3c24xx_i2c_default_platform;
-}
-
 /* s3c24xx_i2c_master_complete
  *
  * complete the message and wake up the caller, using the given return code,
@@ -648,7 +623,7 @@ static inline int freq_acceptable(unsigned int freq, unsigned int wanted)
 
 static int s3c24xx_i2c_clockrate(struct s3c24xx_i2c *i2c, unsigned int *got)
 {
-	struct s3c2410_platform_i2c *pdata;
+	struct s3c2410_platform_i2c *pdata = i2c->dev->platform_data;
 	unsigned long clkin = clk_get_rate(i2c->clk);
 	unsigned int divs, div1;
 	u32 iiccon;
@@ -656,8 +631,6 @@ static int s3c24xx_i2c_clockrate(struct s3c24xx_i2c *i2c, unsigned int *got)
 	int start, end;
 
 	i2c->clkrate = clkin;
-
-	pdata = s3c24xx_i2c_get_platformdata(i2c->adap.dev.parent);
 	clkin /= 1000;		/* clkin now in KHz */
 
 	dev_dbg(i2c->dev, "pdata %p, freq %lu %lu..%lu\n",
@@ -778,7 +751,7 @@ static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
 
 	/* get the plafrom data */
 
-	pdata = s3c24xx_i2c_get_platformdata(i2c->dev);
+	pdata = i2c->dev->platform_data;
 
 	/* inititalise the gpio */
 
@@ -829,7 +802,11 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 	struct resource *res;
 	int ret;
 
-	pdata = s3c24xx_i2c_get_platformdata(&pdev->dev);
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -EINVAL;
+	}
 
 	/* find the clock and enable it */
 
-- 
cgit v0.10.2


From 692acbd3a866a9f84e18a5980b3a97ca52e501b2 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 31 Oct 2008 16:10:28 +0000
Subject: i2c-s3c2410: Allow more than one i2c-s3c2410 adapter

Newer SoCs such as the S3C6410 have 2 instances of this i2c
controller block in and thus require the ability to create
two seperate busses from this.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index d6343e2..f14007f 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -559,19 +559,6 @@ static const struct i2c_algorithm s3c24xx_i2c_algorithm = {
 	.functionality		= s3c24xx_i2c_func,
 };
 
-static struct s3c24xx_i2c s3c24xx_i2c = {
-	.lock		= __SPIN_LOCK_UNLOCKED(s3c24xx_i2c.lock),
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER(s3c24xx_i2c.wait),
-	.tx_setup	= 50,
-	.adap		= {
-		.name			= "s3c2410-i2c",
-		.owner			= THIS_MODULE,
-		.algo			= &s3c24xx_i2c_algorithm,
-		.retries		= 2,
-		.class			= I2C_CLASS_HWMON | I2C_CLASS_SPD,
-	},
-};
-
 /* s3c24xx_i2c_calcdivisor
  *
  * return the divisor settings for a given frequency
@@ -797,7 +784,7 @@ static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
 
 static int s3c24xx_i2c_probe(struct platform_device *pdev)
 {
-	struct s3c24xx_i2c *i2c = &s3c24xx_i2c;
+	struct s3c24xx_i2c *i2c;
 	struct s3c2410_platform_i2c *pdata;
 	struct resource *res;
 	int ret;
@@ -808,6 +795,22 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	i2c = kzalloc(sizeof(struct s3c24xx_i2c), GFP_KERNEL);
+	if (!i2c) {
+		dev_err(&pdev->dev, "no memory for state\n");
+		return -ENOMEM;
+	}
+
+	strlcpy(i2c->adap.name, "s3c2410-i2c", sizeof(i2c->adap.name));
+	i2c->adap.owner   = THIS_MODULE;
+	i2c->adap.algo    = &s3c24xx_i2c_algorithm;
+	i2c->adap.retries = 2;
+	i2c->adap.class   = I2C_CLASS_HWMON | I2C_CLASS_SPD;
+	i2c->tx_setup     = 50;
+
+	spin_lock_init(&i2c->lock);
+	init_waitqueue_head(&i2c->wait);
+
 	/* find the clock and enable it */
 
 	i2c->dev = &pdev->dev;
@@ -929,6 +932,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 	clk_put(i2c->clk);
 
  err_noclk:
+	kfree(i2c);
 	return ret;
 }
 
@@ -953,6 +957,7 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
 
 	release_resource(i2c->ioarea);
 	kfree(i2c->ioarea);
+	kfree(i2c);
 
 	return 0;
 }
-- 
cgit v0.10.2


From e0d1ec97853fa09cf676dc6b51dafd35db12759e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 31 Oct 2008 16:10:30 +0000
Subject: i2c-s3c2410: Change IRQ to be plain integer.

Change the code to use a plain integer as the holder
for the IRQ for the device and use platform_get_irq()
to find it.

This makes the code slightly neater, and easier to get
the IRQ number.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index f14007f..2a0de64 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -61,6 +61,7 @@ struct s3c24xx_i2c {
 	unsigned int		msg_ptr;
 
 	unsigned int		tx_setup;
+	unsigned int		irq;
 
 	enum s3c24xx_i2c_state	state;
 	unsigned long		clkrate;
@@ -68,7 +69,6 @@ struct s3c24xx_i2c {
 	void __iomem		*regs;
 	struct clk		*clk;
 	struct device		*dev;
-	struct resource		*irq;
 	struct resource		*ioarea;
 	struct i2c_adapter	adap;
 
@@ -869,26 +869,20 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 	 * ensure no current IRQs pending
 	 */
 
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (res == NULL) {
+	i2c->irq = ret = platform_get_irq(pdev, 0);
+	if (ret <= 0) {
 		dev_err(&pdev->dev, "cannot find IRQ\n");
-		ret = -ENOENT;
 		goto err_iomap;
 	}
 
-	ret = request_irq(res->start, s3c24xx_i2c_irq, IRQF_DISABLED,
-			  pdev->name, i2c);
+	ret = request_irq(i2c->irq, s3c24xx_i2c_irq, IRQF_DISABLED,
+			  dev_name(&pdev->dev), i2c);
 
 	if (ret != 0) {
-		dev_err(&pdev->dev, "cannot claim IRQ\n");
+		dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq);
 		goto err_iomap;
 	}
 
-	i2c->irq = res;
-
-	dev_dbg(&pdev->dev, "irq resource %p (%lu)\n", res,
-		(unsigned long)res->start);
-
 	ret = s3c24xx_i2c_register_cpufreq(i2c);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to register cpufreq notifier\n");
@@ -918,7 +912,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
 	s3c24xx_i2c_deregister_cpufreq(i2c);
 
  err_irq:
-	free_irq(i2c->irq->start, i2c);
+	free_irq(i2c->irq, i2c);
 
  err_iomap:
 	iounmap(i2c->regs);
@@ -948,7 +942,7 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
 	s3c24xx_i2c_deregister_cpufreq(i2c);
 
 	i2c_del_adapter(&i2c->adap);
-	free_irq(i2c->irq->start, i2c);
+	free_irq(i2c->irq, i2c);
 
 	clk_disable(i2c->clk);
 	clk_put(i2c->clk);
-- 
cgit v0.10.2


From e355204ef70181d28544ebb65a64969340ef4822 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 16 Dec 2008 22:08:08 +0000
Subject: i2c-omap: fix type of irq handler function

The probe function used a pointer to the interrupt
handler to register as a 'void *', change it to the
proper type of irq_handler_t.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 96f3bed..be8ee2c 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -772,7 +772,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	struct omap_i2c_dev	*dev;
 	struct i2c_adapter	*adap;
 	struct resource		*mem, *irq, *ioarea;
-	void *isr;
+	irq_handler_t isr;
 	int r;
 	u32 speed = 0;
 
-- 
cgit v0.10.2


From c8cae544bba6aee0f5cb0756dbab1a71d2c68737 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 09:13:11 -0800
Subject: x86: fix build error with post-merge of tip/cpus4096 and
 rr-for-ingo/master.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ingo Molnar wrote:

> allyes64 build failure:
>
> arch/x86/kernel/io_apic.c: In function â€˜set_ir_ioapic_affinity_irq_descâ€™:
> arch/x86/kernel/io_apic.c:2295: error: incompatible type for argument 2 of
> â€˜migrate_ioapic_irq_descâ€™
> arch/x86/kernel/io_apic.c: In function â€˜ir_set_msi_irq_affinityâ€™:
> arch/x86/kernel/io_apic.c:3205: error: incompatible type for argument 2 of
> â€˜set_extra_move_descâ€™
> make[1]: *** wait: No child processes.  Stop.

Here's a small patch to correct the build error with the post-merge tree.
Built and boot-tested.  I'll will reset the follow on patches in my brand
new git tree to accommodate this change.

Fix two references in io_apic.c that were incorrect.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d7f0993..3d7d0d5 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2292,7 +2292,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 		return;
 	}
 
-	migrate_ioapic_irq_desc(desc, mask);
+	migrate_ioapic_irq_desc(desc, *mask);
 }
 static void set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
@@ -3203,7 +3203,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 	if (assign_irq_vector(irq, cfg, *mask))
 		return;
 
-	set_extra_move_desc(desc, mask);
+	set_extra_move_desc(desc, *mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
-- 
cgit v0.10.2


From 36f5101a60de8f79c0d1ca06e50660bf5129e02c Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:51 -0800
Subject: x86: enable MAXSMP

Impact: activates new off-stack cpumask code on MAXSMP (non-default) x86 configs

Set MAXSMP to enable CONFIG_CPUMASK_OFFSTACK which moves cpumask's off
the stack (and in structs) when using cpumask_var_t.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hy>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d99eeb7..1fd4435 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -591,16 +591,17 @@ config IOMMU_HELPER
 
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
-	depends on X86_64 && SMP && BROKEN
+	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+	select CPUMASK_OFFSTACK
 	default n
 	help
 	  Configure maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-512)" if !MAXSMP
-	range 2 512
 	depends on SMP
+	int "Maximum number of CPUs" if SMP && !MAXSMP
+	range 2 512 if SMP && !MAXSMP
 	default "4096" if MAXSMP
 	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
 	default "8"
-- 
cgit v0.10.2


From e7986739a76cde5079da08809d8bbc6878387ae0 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:52 -0800
Subject: x86 smp: modify send_IPI_mask interface to accept cpumask_t pointers

Impact: cleanup, change parameter passing

  * Change genapic interfaces to accept cpumask_t pointers where possible.

  * Modify external callers to use cpumask_t pointers in function calls.

  * Create new send_IPI_mask_allbutself which is the same as the
    send_IPI_mask functions but removes smp_processor_id() from list.
    This removes another common need for a temporary cpumask_t variable.

  * Functions that used a temp cpumask_t variable for:

	cpumask_t allbutme = cpu_online_map;

	cpu_clear(smp_processor_id(), allbutme);
	if (!cpus_empty(allbutme))
		...

    become:

	if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu)))
		...

  * Other minor code optimizations (like using cpus_clear instead of
    CPU_MASK_NONE, etc.)

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f2..dc6225c 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 #ifdef CONFIG_SMP
-        return cpu_online_map;
+	return &cpu_online_map;
 #else
-        return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 #endif
 }
 
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
+	if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 
 	return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
 /* Mapping from cpu number to logical apicid */
 static inline int cpu_to_logical_apicid(int cpu)
 {
-	if (cpu >= NR_CPUS)
+	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
 	return cpu_physical_id(cpu);
 }
@@ -119,12 +119,12 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 	int apicid;	
 
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	return apicid;
 }
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c53..63553e9 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef87..4cac083 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
 	        return (1);
 }
 
-static inline cpumask_t target_cpus_cluster(void)
+static inline const cpumask_t *target_cpus_cluster(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return cpumask_of_cpu(smp_processor_id());
+	return &cpumask_of_cpu(smp_processor_id());
 }
 
 #define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS];
 static inline void setup_apic_routing(void)
 {
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
+	printk("Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
+			"Physical Cluster" : "Logical Cluster",
+			nr_ioapics, cpus_addr(*target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
 		return boot_cpu_physical_apicid;
-	else if (mps_cpu < NR_CPUS)
+	else if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
-       return (int)cpu_2_logical_apicid[cpu];
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
+	return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
 #endif
@@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
 	return (1);
 }
 
-static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
+static inline unsigned int
+cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpumask_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return 0xFF;
@@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return cpu_to_logical_apicid(0);
@@ -194,10 +196,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955..1a85072 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -13,12 +14,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpumask_t mask = cpu_online_map;
 	cpu_clear(smp_processor_id(), mask);
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d3..b21ed21 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
+	const cpumask_t *(*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,12 +57,13 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011..a020e7d 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_GENAPIC_64_H
 #define _ASM_X86_GENAPIC_64_H
 
+#include <linux/cpumask.h>
+
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -18,16 +20,17 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	const cpumask_t *(*target_cpus)(void);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb..24b6e61 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -128,11 +128,28 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
+static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
+			__send_IPI_dest_field(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
+}
+
 #endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a46..c18896b 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return cpu_online_map;
+	return &cpu_online_map;
 #else
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 #endif
 } 
 
@@ -61,9 +61,9 @@ static inline int apic_id_registered(void)
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0];
+	return cpus_addr(*cpumask)[0];
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid)
 #endif
 }
 
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
@@ -98,8 +98,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
          * deliver interrupts to the wrong hyperthread when only one
          * hyperthread was specified in the interrupt desitination.
          */
-        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-        return domain;
+	*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
 }
 #endif
 
@@ -131,7 +130,7 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01..9353ab8 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
 #ifdef CONFIG_X86_64
 #include <asm/genapic.h>
 #define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
-	if (no_broadcast || vector == NMI_VECTOR) {
-		cpumask_t mask = cpu_online_map;
-
-		cpu_clear(smp_processor_id(), mask);
-		send_IPI_mask(mask, vector);
-	} else
+	if (no_broadcast || vector == NMI_VECTOR)
+		send_IPI_mask_allbutself(&cpu_online_map, vector);
+	else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
 
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(cpu_online_map, vector);
+		send_IPI_mask(&cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06..1df7ebe 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
 #define NO_BALANCE_IRQ (1)
@@ -122,7 +122,7 @@ static inline void enable_apic_mode(void)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	return (int) 0xF;
 }
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d..c734d7a 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811c..c4a9aa52 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_ipi)(const cpumask_t *mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
 
 static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	smp_ops.send_call_func_ipi(mask);
+	smp_ops.send_call_func_ipi(&mask);
 }
 
 void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_ipi(const cpumask_t *mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f..437dc83 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 	/* CPU_MASK_ALL (0xff) has undefined behaviour with
 	 * dest_LowestPrio mode logical clustered apic interrupt routing
 	 * Just start on cpu 0.  IRQ balancing will spread load
 	 */
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
 {
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7..a8a2c24 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_SUMMIT_IPI_H
 #define __ASM_SUMMIT_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b2cef49..a375791 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(const struct cpumask *mask);
+static void lapic_timer_broadcast(const cpumask_t *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(const struct cpumask *mask)
+static void lapic_timer_broadcast(const cpumask_t *mask)
 {
 #ifdef CONFIG_SMP
-	send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
+	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 #endif
 }
 
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2685538..81e01f7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -77,10 +77,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 
 static void smp_send_nmi_allbutself(void)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(safe_smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, NMI_VECTOR);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 
 static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c026279..50eebd0 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static cpumask_t flat_target_cpus(void)
+static const cpumask_t *flat_target_cpus(void)
 {
-	return cpu_online_map;
+	return &cpu_online_map;
 }
 
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,8 +45,7 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
 }
 
 /*
@@ -69,9 +68,8 @@ static void flat_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -79,20 +77,40 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 	local_irq_restore(flags);
 }
 
+static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(*cpumask)[0];
+
+	_flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(*cpumask)[0];
+	int cpu = smp_processor_id();
+
+	if (cpu < BITS_PER_LONG)
+		clear_bit(cpu, &mask);
+	_flat_send_IPI_mask(mask, vector);
+}
+
 static void flat_send_IPI_allbutself(int vector)
 {
+	int cpu = smp_processor_id();
 #ifdef	CONFIG_HOTPLUG_CPU
 	int hotplug = 1;
 #else
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		cpumask_t allbutme = cpu_online_map;
+		if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
+			unsigned long mask = cpus_addr(cpu_online_map)[0];
 
-		cpu_clear(smp_processor_id(), allbutme);
+			if (cpu < BITS_PER_LONG)
+				clear_bit(cpu, &mask);
 
-		if (!cpus_empty(allbutme))
-			flat_send_IPI_mask(allbutme, vector);
+			_flat_send_IPI_mask(mask, vector);
+		}
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -101,7 +119,7 @@ static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(cpu_online_map, vector);
+		flat_send_IPI_mask(&cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -135,9 +153,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -157,6 +175,7 @@ struct genapic apic_flat =  {
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
+	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
@@ -188,35 +207,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static cpumask_t physflat_target_cpus(void)
+static const cpumask_t *physflat_target_cpus(void)
 {
-	return cpu_online_map;
+	return &cpu_online_map;
 }
 
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	return cpumask_of_cpu(cpu);
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
-static void physflat_send_IPI_allbutself(int vector)
+static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
+					      int vector)
 {
-	cpumask_t allbutme = cpu_online_map;
+	send_IPI_mask_allbutself(cpumask, vector);
+}
 
-	cpu_clear(smp_processor_id(), allbutme);
-	physflat_send_IPI_mask(allbutme, vector);
+static void physflat_send_IPI_allbutself(int vector)
+{
+	send_IPI_mask_allbutself(&cpu_online_map, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(cpu_online_map, vector);
+	physflat_send_IPI_mask(&cpu_online_map, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -224,7 +247,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
@@ -243,6 +266,7 @@ struct genapic apic_physflat =  {
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
+	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8e..f5fa9a9 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const cpumask_t *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,52 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
-		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-				       vector, APIC_DEST_LOGICAL);
-	}
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		__x2apic_send_IPI_dest(
+			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, APIC_DEST_LOGICAL);
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	cpu_clear(smp_processor_id(), mask);
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
+}
 
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -89,7 +108,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -97,8 +116,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = first_cpu(*cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
 		return BAD_APICID;
@@ -150,6 +169,7 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211..41c27b2 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const cpumask_t *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask) {
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
 
-	cpu_clear(smp_processor_id(), mask);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -87,7 +107,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -95,8 +115,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = first_cpu(*cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
@@ -145,6 +165,7 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb..0106594 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t uv_target_cpus(void)
+static const cpumask_t *uv_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int vector)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, mask))
+	for_each_cpu_mask_nr(cpu, *mask)
+		uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	for_each_cpu_mask_nr(cpu, *mask)
+		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-
-	cpu_clear(smp_processor_id(), mask);
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
 
-	if (!cpus_empty(mask))
-		uv_send_IPI_mask(mask, vector);
+	for_each_online_cpu(cpu)
+		if (cpu != this_cpu)
+			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(cpu_online_map, vector);
+	uv_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -164,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
@@ -218,6 +226,7 @@ struct genapic apic_x2apic_uv_x = {
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
+	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 3d7d0d5..7f23ce7 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -231,7 +231,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 }
 
@@ -396,7 +397,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
 
 static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					 const struct cpumask *mask)
@@ -412,13 +414,13 @@ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -1099,7 +1101,8 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1115,35 +1118,32 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
 	int cpu;
+	cpumask_t tmp_mask;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
+		cpus_and(tmp_mask, *mask, cpu_online_map);
+		cpus_and(tmp_mask, cfg->domain, tmp_mask);
+		if (!cpus_empty(tmp_mask))
 			return 0;
 	}
 
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
+	/* Only try and allocate irqs on cpus that are present */
+	for_each_cpu_and(cpu, mask, &cpu_online_map) {
 		int new_cpu;
 		int vector, offset;
 
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
+		vector_allocation_domain(cpu, &tmp_mask);
 
 		vector = current_vector;
 		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
+			/* If out of vectors on large boxen, must share them. */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
@@ -1156,7 +1156,7 @@ next:
 		if (vector == SYSCALL_VECTOR)
 			goto next;
 #endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1166,16 +1166,17 @@ next:
 			cfg->move_in_progress = 1;
 			cfg->old_domain = cfg->domain;
 		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = domain;
+		cfg->domain = tmp_mask;
 		return 0;
 	}
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -1384,8 +1385,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 	cfg = desc->chip_data;
 
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, cfg, mask))
+	mask = *TARGET_CPUS;
+	if (assign_irq_vector(irq, cfg, &mask))
 		return;
 
 	cpus_and(mask, cfg->domain, mask);
@@ -1398,7 +1399,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cpu_mask_to_apicid(&mask), trigger, polarity,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
@@ -2121,7 +2122,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2170,18 +2171,19 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	cpumask_t tmp, cleanup_mask;
+	cpumask_t tmpmask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	cpus_and(tmpmask, *mask, cpu_online_map);
+	if (cpus_empty(tmpmask))
 		return;
 
 	irq = desc->irq;
@@ -2194,8 +2196,8 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 
 	set_extra_move_desc(desc, mask);
 
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(tmpmask, cfg->domain, *mask);
+	dest = cpu_mask_to_apicid(&tmpmask);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2213,13 +2215,13 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 	modify_irte(irq, &irte);
 
 	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cpus_and(tmpmask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(tmpmask);
+		send_IPI_mask(&tmpmask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
-	desc->affinity = mask;
+	desc->affinity = *mask;
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2241,7 +2243,7 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq_desc(desc, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, &desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
@@ -2292,7 +2294,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 		return;
 	}
 
-	migrate_ioapic_irq_desc(desc, *mask);
+	migrate_ioapic_irq_desc(desc, mask);
 }
 static void set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
@@ -2359,7 +2361,7 @@ static void irq_complete_move(struct irq_desc **descp)
 
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 }
@@ -3089,13 +3091,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	tmp = *TARGET_CPUS;
+	err = assign_irq_vector(irq, cfg, &tmp);
 	if (err)
 		return err;
 
 	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3161,13 +3163,13 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3184,8 +3186,8 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq,
-				    const struct cpumask *mask)
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3200,13 +3202,13 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3224,7 +3226,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 	if (cfg->move_in_progress) {
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
@@ -3419,7 +3421,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3431,13 +3433,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3481,7 +3483,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3493,13 +3495,13 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3564,7 +3566,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static void set_ht_irq_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3575,13 +3577,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	cpumask_copy(&desc->affinity, mask);
@@ -3607,14 +3609,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
 		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+		dest = cpu_mask_to_apicid(&tmp);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3650,7 +3651,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	const cpumask_t *eligible_cpu = &cpumask_of_cpu(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3660,7 +3661,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
 	cfg = irq_cfg(irq);
 
-	err = assign_irq_vector(irq, cfg, *eligible_cpu);
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3679,7 +3680,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	entry->polarity = 0;
 	entry->trigger = 0;
 	entry->mask = 0;
-	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+	entry->dest = cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3890,7 +3891,7 @@ void __init setup_ioapic_dest(void)
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	cpumask_t mask;
+	const cpumask_t *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3921,16 +3922,16 @@ void __init setup_ioapic_dest(void)
 			 */
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
+				mask = &desc->affinity;
 			else
 				mask = TARGET_CPUS;
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, &mask);
+				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
 #endif
-				set_ioapic_affinity_irq_desc(desc, &mask);
+				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
 	}
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e..86aa50f 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,9 +116,9 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long mask = cpus_addr(*cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -127,7 +127,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 */
 
 	local_irq_save(flags);
-	for_each_possible_cpu(query_cpu) {
-		if (cpu_isset(query_cpu, mask)) {
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
-		}
-	}
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 3f92b13..341df94 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(cpumask_t mask)
+void native_send_call_func_ipi(const cpumask_t *mask)
 {
 	cpumask_t allbutself;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	if (cpus_equal(mask, allbutself) &&
+	if (cpus_equal(*mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3..174ea90 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
 
 	while (!cpus_empty(flush_cpumask))
 		/* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca..de6f1bd 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
 	while (!cpus_empty(f->flush_cpumask))
 		cpu_relax();
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a36..bc4c784 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-        return cpumask_of_cpu(cpu);
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0..4ba5cca 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b..511d794 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234..2821ffc 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b67..2cce362 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
 {
 	int i, rc;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (rc >= 0) {
 			num_processors++;
@@ -196,7 +196,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
-		for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
+		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 			continue;
 		cpu_clear(cpu, cpu_possible_map);
 	}
@@ -408,24 +408,22 @@ static void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	cpus_and(mask, mask, cpu_online_map);
-
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, mask, &cpu_online_map)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu_mask_nr(cpu, *mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -435,7 +433,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	xen_send_IPI_mask(&cpumask_of_cpu(cpu),
+			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
-- 
cgit v0.10.2


From a1681965011916c2f1f0f1f87e70784f5d5d5be5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:53 -0800
Subject: x86: move and enhance debug printk for nr_cpu_ids etc.

Impact: cleanup, better debugging

This has proven useful in debugging, *before* we try to use
for_each_possible_cpu().  It also now shows nr_cpumask_bits.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1c20842..0b63b08 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
 	old_size = PERCPU_ENOUGH_ROOM;
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
+
+	printk(KERN_INFO
+		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d at %016lx\n",
 					 cpu, __pa(ptr));
 		}
 		else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
-					 cpu, node, __pa(ptr));
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d on node%d "
+					"at %016lx\n",
+					cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
 
-	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
-		NR_CPUS, nr_cpu_ids, nr_node_ids);
-
 	/* Setup percpu data maps */
 	setup_per_cpu_maps();
 
-- 
cgit v0.10.2


From 95d313cf1c1ecedc8bec5727b09bdacbf67dfc45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:54 -0800
Subject: x86: Add cpu_mask_to_apicid_and

Impact: new API

Add a helper function that takes two cpumask's, and's them and then
returns the apicid of the result.  This removes a need in io_apic.c
that uses a temporary cpumask to hold (mask & cfg->domain).

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index dc6225c..99f9aba 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -129,6 +129,22 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return cpu_to_logical_apicid(cpu);
+
+	return BAD_APICID;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 4cac083..c2bed77 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,6 +214,53 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int num_bits_set;
+	int num_bits_set2;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = 0;
+
+	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set2 = cpus_weight(*andmask);
+	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	/* Return id to all */
+	if (num_bits_set >= nr_cpu_ids)
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+		return 0xFF;
+#else
+		return cpu_to_logical_apicid(0);
+#endif
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask)
+			apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)) {
+				printk(KERN_WARNING
+					"%s: Not a valid mask!\n", __func__);
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+				return 0xFF;
+#else
+				return cpu_to_logical_apicid(0);
+#endif
+			}
+			apicid = new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+	return apicid;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index b21ed21..325298a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -58,6 +58,8 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
+					       const cpumask_t *andmask);
 	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
@@ -115,6 +117,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(cpu_mask_to_apicid_and)		\
 	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index a020e7d..301c7f4 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -31,6 +31,8 @@ struct genapic {
 	void (*send_IPI_self)(int vector);
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
+					       const cpumask_t *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index c18896b..229b605 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -28,6 +28,7 @@ static inline const cpumask_t *target_cpus(void)
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
 #define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
@@ -66,6 +67,15 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0];
 }
 
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask,
+					      const cpumask_t *andmask)
+{
+	unsigned long mask1 = cpus_addr(*cpumask)[0];
+	unsigned long mask2 = cpus_addr(*andmask)[0];
+
+	return (unsigned int)(mask1 & mask2);
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47..48553e9 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 1df7ebe..abf668c 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -127,6 +127,12 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return (int) 0xF;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	return (int) 0xF;
+}
+
 /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 437dc83..cbcc2c7 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,6 +170,45 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int num_bits_set;
+	int num_bits_set2;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = 0;
+
+	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set2 = cpus_weight(*andmask);
+	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	/* Return id to all */
+	if (num_bits_set >= nr_cpu_ids)
+		return 0xFF;
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask)
+			apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)) {
+				printk(KERN_WARNING
+					"%s: Not a valid mask!\n", __func__);
+				return 0xFF;
+			}
+			apicid = apicid | new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+	return apicid;
+}
+
 /* cpuid returns the value latched in the HW at reset, not the APIC ID
  * register's value.  For any box whose BIOS changes APIC IDs, like
  * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 50eebd0..1efecd2 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -158,6 +158,15 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
+static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						const cpumask_t *andmask)
+{
+	unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS;
+
+	return (int)(mask1 & mask2);
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 {
 	return hard_smp_processor_id() >> index_msb;
@@ -178,6 +187,7 @@ struct genapic apic_flat =  {
 	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
@@ -254,6 +264,21 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						    const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 struct genapic apic_physflat =  {
 	.name = "physical flat",
 	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -269,6 +294,7 @@ struct genapic apic_physflat =  {
 	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f5fa9a9..fd8047f 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -123,6 +123,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -172,6 +187,7 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 41c27b2..d5578bb 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -122,6 +122,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -168,6 +183,7 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 0106594..53bd257 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -179,6 +179,21 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+					      const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -229,6 +244,7 @@ struct genapic apic_x2apic_uv_x = {
 	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
-- 
cgit v0.10.2


From 6eeb7c5a99434596c5953a95baa17d2f085664e3 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:55 -0800
Subject: x86: update add-cpu_mask_to_apicid_and to use struct cpumask*

Impact: use updated APIs

Various API updates for x86:add-cpu_mask_to_apicid_and

(Note: separate because previous patch has been "backported" to 2.6.27.)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 99f9aba..976399d 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -129,8 +129,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -138,9 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return cpu_to_logical_apicid(cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return cpu_to_logical_apicid(cpu);
 
 	return BAD_APICID;
 }
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index c2bed77..ba8423c 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,8 +214,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int num_bits_set;
 	int num_bits_set2;
@@ -223,9 +223,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	int cpu;
 	int apicid = 0;
 
-	num_bits_set = cpus_weight(*cpumask);
-	num_bits_set2 = cpus_weight(*andmask);
-	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	num_bits_set = cpumask_weight(cpumask);
+	num_bits_set2 = cpumask_weight(andmask);
+	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
 	if (num_bits_set >= nr_cpu_ids)
 #if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -237,11 +237,12 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask)
-			apicid = cpu_to_logical_apicid(cpu);
+	cpu = cpumask_first_and(cpumask, andmask);
+	apicid = cpu_to_logical_apicid(cpu);
+
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask) &&
+		    cpumask_test_cpu(cpu, andmask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)) {
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 325298a..eed6e30 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -58,8 +58,8 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
-					       const cpumask_t *andmask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
 	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 301c7f4..244b717 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -31,8 +31,8 @@ struct genapic {
 	void (*send_IPI_self)(int vector);
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
-					       const cpumask_t *andmask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 229b605..df8e024 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -67,11 +67,11 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0];
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask,
-					      const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
-	unsigned long mask1 = cpus_addr(*cpumask)[0];
-	unsigned long mask2 = cpus_addr(*andmask)[0];
+	unsigned long mask1 = cpumask_bits(cpumask)[0];
+	unsigned long mask2 = cpumask_bits(andmask)[0];
 
 	return (unsigned int)(mask1 & mask2);
 }
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index abf668c..c80f00d 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -127,8 +127,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return (int) 0xF;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	return (int) 0xF;
 }
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index cbcc2c7..651a938 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,8 +170,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int num_bits_set;
 	int num_bits_set2;
@@ -179,9 +179,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	int cpu;
 	int apicid = 0;
 
-	num_bits_set = cpus_weight(*cpumask);
-	num_bits_set2 = cpus_weight(*andmask);
-	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	num_bits_set = cpumask_weight(cpumask);
+	num_bits_set2 = cpumask_weight(andmask);
+	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
 	if (num_bits_set >= nr_cpu_ids)
 		return 0xFF;
@@ -189,11 +189,11 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask)
-			apicid = cpu_to_logical_apicid(cpu);
+	cpu = cpumask_first_and(cpumask, andmask);
+	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)
+		    && cpumask_test_cpu(cpu, andmask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)) {
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 1efecd2..c772bb1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -158,13 +158,13 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
-static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						const cpumask_t *andmask)
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						const struct cpumask *andmask)
 {
-	unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
-	unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS;
+	unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
 
-	return (int)(mask1 & mask2);
+	return mask1 & mask2;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -264,8 +264,9 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						    const cpumask_t *andmask)
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -273,9 +274,9 @@ static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index fd8047f..e7d16f5 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -123,8 +123,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -132,9 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d5578bb..9d0386c 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -122,8 +122,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -131,9 +131,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 53bd257..22596ec 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -179,8 +179,8 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-					      const cpumask_t *andmask)
+static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+					      const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -188,9 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
-- 
cgit v0.10.2


From 22f65d31b25a320a5246592160bcb102d2791c45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:56 -0800
Subject: x86: Update io_apic.c to use new cpumask API

Impact: cleanup, consolidate patches, use new API

Consolidate the following into a single patch to adapt to new
sparseirq code in arch/x86/kernel/io_apic.c, add allocation of
cpumask_var_t's in domain and old_domain, and reduce further
merge conflicts.  Only one file (arch/x86/kernel/io_apic.c) is
changed in all of these patches.

	0006-x86-io_apic-change-irq_cfg-domain-old_domain-to.patch
	0007-x86-io_apic-set_desc_affinity.patch
	0008-x86-io_apic-send_cleanup_vector.patch
	0009-x86-io_apic-eliminate-remaining-cpumask_ts-from-st.patch
	0021-x86-final-cleanups-in-io_apic-to-use-new-cpumask-AP.patch

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7f23ce7..60bb8b1 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
 
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
+	cpumask_var_t domain;
+	cpumask_var_t old_domain;
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
@@ -149,22 +149,22 @@ static struct irq_cfg irq_cfgx[] = {
 #else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
 #endif
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+	[0]  = { .vector = IRQ0_VECTOR,  },
+	[1]  = { .vector = IRQ1_VECTOR,  },
+	[2]  = { .vector = IRQ2_VECTOR,  },
+	[3]  = { .vector = IRQ3_VECTOR,  },
+	[4]  = { .vector = IRQ4_VECTOR,  },
+	[5]  = { .vector = IRQ5_VECTOR,  },
+	[6]  = { .vector = IRQ6_VECTOR,  },
+	[7]  = { .vector = IRQ7_VECTOR,  },
+	[8]  = { .vector = IRQ8_VECTOR,  },
+	[9]  = { .vector = IRQ9_VECTOR,  },
+	[10] = { .vector = IRQ10_VECTOR, },
+	[11] = { .vector = IRQ11_VECTOR, },
+	[12] = { .vector = IRQ12_VECTOR, },
+	[13] = { .vector = IRQ13_VECTOR, },
+	[14] = { .vector = IRQ14_VECTOR, },
+	[15] = { .vector = IRQ15_VECTOR, },
 };
 
 void __init arch_early_irq_init(void)
@@ -180,6 +180,10 @@ void __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
+		alloc_bootmem_cpumask_var(&cfg[i].domain);
+		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		if (i < NR_IRQS_LEGACY)
+			cpumask_setall(cfg[i].domain);
 	}
 }
 
@@ -204,6 +208,20 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	if (cfg) {
+		/* FIXME: needs alloc_cpumask_var_node() */
+		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+			kfree(cfg);
+			cfg = NULL;
+		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+			free_cpumask_var(cfg->domain);
+			kfree(cfg);
+			cfg = NULL;
+		} else {
+			cpumask_clear(cfg->domain);
+			cpumask_clear(cfg->old_domain);
+		}
+	}
 	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
 	return cfg;
@@ -362,6 +380,26 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -400,40 +438,52 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 static int
 assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
 
-static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
-					 const struct cpumask *mask)
+/*
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
 	unsigned int irq;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return BAD_APICID;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
+		return BAD_APICID;
 
+	cpumask_and(&desc->affinity, cfg->domain, mask);
 	set_extra_move_desc(desc, mask);
+	return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+}
 
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg);
-	cpumask_copy(&desc->affinity, mask);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void set_ioapic_affinity_irq(unsigned int irq,
-				    const struct cpumask *mask)
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc;
 
@@ -1117,26 +1167,32 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 */
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
-	int cpu;
-	cpumask_t tmp_mask;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpus_and(tmp_mask, *mask, cpu_online_map);
-		cpus_and(tmp_mask, cfg->domain, tmp_mask);
-		if (!cpus_empty(tmp_mask))
+		cpumask_and(tmp_mask, mask, cpu_online_mask);
+		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+		if (!cpumask_empty(tmp_mask)) {
+			free_cpumask_var(tmp_mask);
 			return 0;
+		}
 	}
 
 	/* Only try and allocate irqs on cpus that are present */
-	for_each_cpu_and(cpu, mask, &cpu_online_map) {
+	err = -ENOSPC;
+	for_each_cpu_and(cpu, mask, cpu_online_mask) {
 		int new_cpu;
 		int vector, offset;
 
-		vector_allocation_domain(cpu, &tmp_mask);
+		vector_allocation_domain(cpu, tmp_mask);
 
 		vector = current_vector;
 		offset = current_offset;
@@ -1156,7 +1212,7 @@ next:
 		if (vector == SYSCALL_VECTOR)
 			goto next;
 #endif
-		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1164,15 +1220,17 @@ next:
 		current_offset = offset;
 		if (old_vector) {
 			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
+			cpumask_copy(cfg->old_domain, cfg->domain);
 		}
-		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = tmp_mask;
-		return 0;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
 	}
-	return -ENOSPC;
+	free_cpumask_var(tmp_mask);
+	return err;
 }
 
 static int
@@ -1189,23 +1247,20 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 
 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	cpumask_t mask;
 	int cpu, vector;
 
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
-	cpus_clear(cfg->domain);
+	cpumask_clear(cfg->domain);
 
 	if (likely(!cfg->move_in_progress))
 		return;
-	cpus_and(mask, cfg->old_domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 								vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1230,7 +1285,7 @@ void __setup_vector_irq(int cpu)
 		if (!desc)
 			continue;
 		cfg = desc->chip_data;
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1242,7 +1297,7 @@ void __setup_vector_irq(int cpu)
 			continue;
 
 		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1378,18 +1433,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
+	unsigned int dest;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	mask = *TARGET_CPUS;
-	if (assign_irq_vector(irq, cfg, &mask))
+	if (assign_irq_vector(irq, cfg, TARGET_CPUS))
 		return;
 
-	cpus_and(mask, cfg->domain, mask);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1399,8 +1453,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(&mask), trigger, polarity,
-			       cfg->vector)) {
+			       dest, trigger, polarity, cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
 		__clear_irq_vector(irq, cfg);
@@ -2122,7 +2175,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2175,15 +2228,13 @@ static void
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	cpumask_t tmpmask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	cpus_and(tmpmask, *mask, cpu_online_map);
-	if (cpus_empty(tmpmask))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	irq = desc->irq;
@@ -2196,8 +2247,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 
 	set_extra_move_desc(desc, mask);
 
-	cpus_and(tmpmask, cfg->domain, *mask);
-	dest = cpu_mask_to_apicid(&tmpmask);
+	dest = cpu_mask_to_apicid_and(cfg->domain, mask);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2214,14 +2264,10 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	 */
 	modify_irte(irq, &irte);
 
-	if (cfg->move_in_progress) {
-		cpus_and(tmpmask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(tmpmask);
-		send_IPI_mask(&tmpmask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 
-	desc->affinity = *mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2247,7 +2293,7 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq_desc(desc);
@@ -2333,7 +2379,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
 
 		__get_cpu_var(vector_irq)[vector] = -1;
@@ -2356,14 +2402,8 @@ static void irq_complete_move(struct irq_desc **descp)
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3088,16 +3128,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = *TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, &tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (err)
 		return err;
 
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(&tmp);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3157,19 +3194,12 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3179,7 +3209,6 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 #ifdef CONFIG_INTR_REMAP
 /*
@@ -3192,24 +3221,15 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
-
 	if (get_irte(irq, &irte))
 		return;
 
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
-
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
 
@@ -3223,14 +3243,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	 * at the new destination. So, time to cleanup the previous
 	 * vector allocation.
 	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	cpumask_copy(&desc->affinity, mask);
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 }
 
 #endif
@@ -3421,25 +3435,18 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3449,7 +3456,6 @@ static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -3483,25 +3489,18 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3511,7 +3510,6 @@ static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -3566,27 +3564,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const cpumask_t *mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif
@@ -3606,7 +3596,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
 	int err;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
 	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
@@ -3614,8 +3603,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(&tmp);
+		dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3651,7 +3639,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const cpumask_t *eligible_cpu = &cpumask_of_cpu(cpu);
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3891,7 +3879,7 @@ void __init setup_ioapic_dest(void)
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	const cpumask_t *mask;
+	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
-- 
cgit v0.10.2


From b78936e14ee47b6b2d628501a0eab5270db80132 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:57 -0800
Subject: xen: convert to cpumask_var_t and new cpumask primitives.

Simple change, and eventual space saving when NR_CPUS >> nr_cpu_ids.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2cce362..b3a9586 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-cpumask_t xen_cpu_initialized_map;
+cpumask_var_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
@@ -192,7 +192,10 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	if (xen_smp_intr_init(0))
 		BUG();
 
-	xen_cpu_initialized_map = cpumask_of_cpu(0);
+	if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
+		panic("could not allocate xen_cpu_initialized_map\n");
+
+	cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	struct vcpu_guest_context *ctxt;
 	struct desc_struct *gdt;
 
-	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
+	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db..212ffe0 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
 			pfn_to_mfn(xen_start_info->console.domU.mfn);
 	} else {
 #ifdef CONFIG_SMP
-		xen_cpu_initialized_map = cpu_online_map;
+		BUG_ON(xen_cpu_initialized_map == NULL);
+		cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
 #endif
 		xen_vcpu_restore();
 	}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae..c1f8faf 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
 __cpuinit void xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 
-extern cpumask_t xen_cpu_initialized_map;
+extern cpumask_var_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}
 #endif
-- 
cgit v0.10.2


From d7b381bb7b1ad69ff008ea063d26e988b686c8de Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:58 -0800
Subject: x86: fixup_irqs() doesnt need an argument.

Impact: cleanup, remove on-stack cpumask.

The "map" arg is always cpu_online_mask.  Importantly, set_affinity
always ands the argument with cpu_online_mask anyway, so we don't need
to do it in fixup_irqs(), avoiding a temporary.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda..8766d30 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str);
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
 #endif
 
 extern unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9cf9cbb..9dc5588 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, desc->affinity, map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		affinity = &desc->affinity;
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			printk("Breaking affinity for irq %i\n", irq);
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, &mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 27f2307..fca2991 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,16 +83,17 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
@@ -102,23 +103,23 @@ void fixup_irqs(cpumask_t map)
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
+		affinity = &desc->affinity;
 		if (!irq_has_action(irq) ||
-		    cpus_equal(desc->affinity, map)) {
+		    cpumask_equal(affinity, cpu_online_mask)) {
 			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, desc->affinity, map);
-		if (cpus_empty(mask)) {
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 
 		if (desc->chip->mask)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, &mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9d58134..8b6f675 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1346,7 +1346,7 @@ void cpu_disable_common(void)
 	lock_vector_lock();
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
-	fixup_irqs(cpu_online_map);
+	fixup_irqs();
 }
 
 int native_cpu_disable(void)
-- 
cgit v0.10.2


From bcda016eddd7a8b374bb371473c821a91ff1d8cc Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:59 -0800
Subject: x86: cosmetic changes apic-related files.

This patch simply changes cpumask_t to struct cpumask and similar
trivial modernizations.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 63553e9..27fcd01 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,26 +1,22 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 1a85072..7e8ed24 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,25 +1,22 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index eed6e30..746f37a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	const cpumask_t *(*target_cpus)(void);
+	const struct cpumask *(*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,15 +57,16 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
 	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 					       const struct cpumask *andmask);
-	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 244b717..adf32fb 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -20,17 +20,18 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	const cpumask_t *(*target_cpus)(void);
-	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+	const struct cpumask *(*target_cpus)(void);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
 	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 					       const struct cpumask *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 24b6e61..c745a30 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+					  int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -128,14 +129,15 @@ static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -144,7 +146,7 @@ static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 	/* See Hack comment above */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(
 				per_cpu(x86_cpu_to_apicid, query_cpu),
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index df8e024..8863d97 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline const cpumask_t *target_cpus(void)
+static inline const struct cpumask *target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return &cpu_online_map;
+	return cpu_online_mask;
 #else
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 #endif
 } 
 
@@ -62,9 +62,9 @@ static inline int apic_id_registered(void)
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(*cpumask)[0];
+	return cpumask_bits(cpumask)[0];
 }
 
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -98,7 +98,7 @@ static inline int apicid_to_node(int logical_apicid)
 #endif
 }
 
-static inline void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index 9353ab8..191312d 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,8 +4,8 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -15,17 +15,17 @@ extern int no_broadcast;
 #define send_IPI_mask (genapic->send_IPI_mask)
 #define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask_allbutself(&cpu_online_map, vector);
+		send_IPI_mask_allbutself(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
@@ -33,7 +33,7 @@ static inline void __local_send_IPI_allbutself(int vector)
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(&cpu_online_map, vector);
+		send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index c734d7a..a8374c6 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,26 +1,22 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c4a9aa52..830b9fc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(const cpumask_t *mask);
+	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(const cpumask_t *mask);
+void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c772bb1..7fa5f49 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static const cpumask_t *flat_target_cpus(void)
+static const struct cpumask *flat_target_cpus(void)
 {
-	return &cpu_online_map;
+	return cpu_online_mask;
 }
 
-static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,7 +45,8 @@ static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	*retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
 }
 
 /*
@@ -77,16 +78,17 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 	local_irq_restore(flags);
 }
 
-static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 
 	_flat_send_IPI_mask(mask, vector);
 }
 
-static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
+static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+					  int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 	int cpu = smp_processor_id();
 
 	if (cpu < BITS_PER_LONG)
@@ -103,8 +105,8 @@ static void flat_send_IPI_allbutself(int vector)
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
-			unsigned long mask = cpus_addr(cpu_online_map)[0];
+		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
 
 			if (cpu < BITS_PER_LONG)
 				clear_bit(cpu, &mask);
@@ -119,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(&cpu_online_map, vector);
+		flat_send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -153,9 +155,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -217,23 +219,23 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static const cpumask_t *physflat_target_cpus(void)
+static const struct cpumask *physflat_target_cpus(void)
 {
-	return &cpu_online_map;
+	return cpu_online_mask;
 }
 
-static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
-static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
-static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
 					      int vector)
 {
 	send_IPI_mask_allbutself(cpumask, vector);
@@ -241,15 +243,15 @@ static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
 
 static void physflat_send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(&cpu_online_map, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(&cpu_online_map, vector);
+	physflat_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -257,7 +259,7 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index e7d16f5..4716a0c 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,18 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -55,27 +55,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		__x2apic_send_IPI_dest(
 			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, APIC_DEST_LOGICAL);
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 	unsigned long this_cpu = smp_processor_id();
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__x2apic_send_IPI_dest(
 				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
@@ -100,7 +101,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(&cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -108,7 +109,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -116,7 +117,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 9d0386c..b255507 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,15 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -53,27 +53,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 	unsigned long this_cpu = smp_processor_id();
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		if (query_cpu != this_cpu)
 			__x2apic_send_IPI_dest(
 				per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -99,7 +100,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(&cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -107,7 +108,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -115,7 +116,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 22596ec..3984682 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,15 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *uv_target_cpus(void)
+static const struct cpumask *uv_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -122,20 +122,20 @@ static void uv_send_IPI_one(int cpu, int vector)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_cpu_mask_nr(cpu, *mask)
+	for_each_cpu(cpu, mask)
 		uv_send_IPI_one(cpu, vector);
 }
 
-static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	unsigned int cpu;
 	unsigned int this_cpu = smp_processor_id();
 
-	for_each_cpu_mask_nr(cpu, *mask)
+	for_each_cpu(cpu, mask)
 		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 }
@@ -152,7 +152,7 @@ static void uv_send_IPI_allbutself(int vector)
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(&cpu_online_map, vector);
+	uv_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -164,7 +164,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -172,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 86aa50f..285bbf8 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
-	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__send_IPI_dest_field(mask, vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,12 +139,12 @@ void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 	 */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -153,7 +153,7 @@ void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 	/* See Hack comment above */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 341df94..49ed667 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,15 +118,15 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(const cpumask_t *mask)
+void native_send_call_func_ipi(const struct cpumask *mask)
 {
 	cpumask_t allbutself;
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b3a9586..c44e206 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -411,22 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const struct cpumask *mask,
+			      enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	for_each_cpu_and(cpu, mask, &cpu_online_map)
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
+static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu_mask_nr(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -436,7 +437,7 @@ static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(&cpumask_of_cpu(cpu),
+	xen_send_IPI_mask(cpumask_of(cpu),
 			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-- 
cgit v0.10.2


From 78637a97b7fe1df51f40a460448df0b93d511176 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:00 -0800
Subject: x86: Set CONFIG_NR_CPUS even on UP

Impact: cleanup

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1fd4435..4a3f585 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -599,12 +599,12 @@ config MAXSMP
 	  If unsure, say N.
 
 config NR_CPUS
-	depends on SMP
 	int "Maximum number of CPUs" if SMP && !MAXSMP
 	range 2 512 if SMP && !MAXSMP
+	default "1" if !SMP
 	default "4096" if MAXSMP
-	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
-	default "8"
+	default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+	default "8" if SMP
 	help
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  The maximum supported value is 512 and the
-- 
cgit v0.10.2


From 168ef543a43678146e06b3911e987ac021d575b8 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:01 -0800
Subject: x86: prepare for cpumask iterators to only go to nr_cpu_ids

Impact: cleanup, futureproof

In fact, all cpumask ops will only be valid (in general) for bit
numbers < nr_cpu_ids.  So use that instead of NR_CPUS in various
places.

This is always safe: no cpu number can be >= nr_cpu_ids, and
nr_cpu_ids is initialized to NR_CPUS at boot.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index a375791..3b630ec 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -2106,7 +2106,7 @@ __cpuinit int apic_is_clustered_box(void)
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		/* are we being called early in kernel startup? */
 		if (bios_cpu_apicid) {
 			id = bios_cpu_apicid[i];
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9c99018..a5bc054 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -672,7 +672,7 @@ void __init smp_boot_cpus(void)
 
 	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf1..71a14f8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
 	int rr, i;
 
 	rr = first_node(node_online_map);
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (early_cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
 		numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
 	memnodemap[0] = 0;
 	node_set_online(0);
 	node_set(0, node_possible_map);
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		numa_set_node(i, 0);
 	e820_register_active_regions(0, start_pfn, last_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2f..09737c8 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (!node_online(i))
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		int node = early_cpu_to_node(i);
 
 		if (node == NUMA_NO_NODE)
-- 
cgit v0.10.2


From 1de88cd4a33fcc2fcf70cbce01688723f728675d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:02 -0800
Subject: x86: Use cpumask accessors code for possible/present maps.

Impact: use new API

Use the accessors rather than frobbing bits directly.  Most of this is
in arch code I haven't even compiled, but is straightforward.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 3b630ec..edda4c0 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1903,8 +1903,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	}
 #endif
 
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
+	set_cpu_possible(cpu, true);
+	set_cpu_present(cpu, true);
 }
 
 #ifdef CONFIG_X86_64
-- 
cgit v0.10.2


From b2bb85549134c005e997e5a7ed303bda6a1ae738 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:03 -0800
Subject: x86: Remove cpumask games in x86/kernel/cpu/intel_cacheinfo.c

Impact: remove cpumask_t from stack.

We should not try to save and restore cpus_allowed on current.

We can't use work_on_cpu() here, since it's in the hotplug cpu path
(if anyone else tries to get the hotplug lock from a workqueue we
could deadlock against them).

Fortunately, we can just use smp_call_function_single() since the
function can run from an interrupt.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@tv-sign.ru>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 43ea612..fb7f946 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
+static void get_cpu_leaves(void *_retval)
 {
-	struct _cpuid4_info	*this_leaf;
-	unsigned long		j;
-	int			retval;
-	cpumask_t		oldmask;
-
-	if (num_cache_leaves == 0)
-		return -ENOENT;
-
-	per_cpu(cpuid4_info, cpu) = kzalloc(
-	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-	if (per_cpu(cpuid4_info, cpu) == NULL)
-		return -ENOMEM;
-
-	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	if (retval)
-		goto out;
+	int j, *retval = _retval, cpu = smp_processor_id();
 
 	/* Do cpuid and store the results */
 	for (j = 0; j < num_cache_leaves; j++) {
+		struct _cpuid4_info *this_leaf;
 		this_leaf = CPUID4_INFO_IDX(cpu, j);
-		retval = cpuid4_cache_lookup(j, this_leaf);
-		if (unlikely(retval < 0)) {
+		*retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(*retval < 0)) {
 			int i;
 
 			for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 		}
 		cache_shared_cpu_map_setup(cpu, j);
 	}
-	set_cpus_allowed_ptr(current, &oldmask);
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+	int			retval;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	per_cpu(cpuid4_info, cpu) = kzalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (per_cpu(cpuid4_info, cpu) == NULL)
+		return -ENOMEM;
 
-out:
+	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 	if (retval) {
 		kfree(per_cpu(cpuid4_info, cpu));
 		per_cpu(cpuid4_info, cpu) = NULL;
-- 
cgit v0.10.2


From 4cd4601d592d07b26e4b7d2bb8fcd55bbfd6cf6e Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:04 -0800
Subject: x86: use work_on_cpu in x86/kernel/cpu/mcheck/mce_amd_64.c

Impact: Remove cpumask_t's from stack.

Simple transition to work_on_cpu(), rather than cpumask games.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robert Richter <robert.richter@amd.com>
Cc: jacob.shin@amd.com

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a..a1de80f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
  * CPU Initialization
  */
 
+struct thresh_restart {
+	struct threshold_block *b;
+	int reset;
+	u16 old_limit;
+};
+
 /* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
-				   int reset, u16 old_limit)
+static long threshold_restart_bank(void *_tr)
 {
+	struct thresh_restart *tr = _tr;
 	u32 mci_misc_hi, mci_misc_lo;
 
-	rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+	rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
 
-	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
-		reset = 1;	/* limit cannot be lower than err count */
+	if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		tr->reset = 1;	/* limit cannot be lower than err count */
 
-	if (reset) {		/* reset err count and overflow bit */
+	if (tr->reset) {		/* reset err count and overflow bit */
 		mci_misc_hi =
 		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
-		    (THRESHOLD_MAX - b->threshold_limit);
-	} else if (old_limit) {	/* change limit w/o reset */
+		    (THRESHOLD_MAX - tr->b->threshold_limit);
+	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
-		    (old_limit - b->threshold_limit);
+		    (tr->old_limit - tr->b->threshold_limit);
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
 
-	b->interrupt_enable ?
+	tr->b->interrupt_enable ?
 	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	return 0;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 	unsigned int cpu = smp_processor_id();
 	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
+	struct thresh_restart tr;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 			wrmsr(address, low, high);
 
 			threshold_defaults.address = address;
-			threshold_restart_bank(&threshold_defaults, 0, 0);
+			tr.b = &threshold_defaults;
+			tr.reset = 0;
+			tr.old_limit = 0;
+			threshold_restart_bank(&tr);
 		}
 	}
 }
@@ -251,20 +262,6 @@ struct threshold_attr {
 	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
-static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
-					   cpumask_t *newmask)
-{
-	*oldmask = current->cpus_allowed;
-	cpus_clear(*newmask);
-	cpu_set(cpu, *newmask);
-	set_cpus_allowed_ptr(current, newmask);
-}
-
-static void affinity_restore(const cpumask_t *oldmask)
-{
-	set_cpus_allowed_ptr(current, oldmask);
-}
-
 #define SHOW_FIELDS(name)                                           \
 static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
 {                                                                   \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
 				      const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
 	b->interrupt_enable = !!new;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, 0);
-	affinity_restore(&oldmask);
+	tr.b = b;
+	tr.reset = 0;
+	tr.old_limit = 0;
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 				     const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
-	u16 old;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
-	old = b->threshold_limit;
+	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
+	tr.b = b;
+	tr.reset = 0;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, old);
-	affinity_restore(&oldmask);
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
 
-static ssize_t show_error_count(struct threshold_block *b, char *buf)
+static long local_error_count(void *_b)
 {
-	u32 high, low;
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
+	struct threshold_block *b = _b;
+	u32 low, high;
+
 	rdmsr(b->address, low, high);
-	affinity_restore(&oldmask);
-	return sprintf(buf, "%x\n",
-		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+}
+
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
+{
+	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
 }
 
 static ssize_t store_error_count(struct threshold_block *b,
 				 const char *buf, size_t count)
 {
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 1, 0);
-	affinity_restore(&oldmask);
+	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 	return 1;
 }
 
@@ -463,12 +462,19 @@ out_free:
 	return err;
 }
 
+static long local_allocate_threshold_blocks(void *_bank)
+{
+	unsigned int *bank = _bank;
+
+	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+					 MSR_IA32_MC0_MISC + *bank * 4);
+}
+
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
 static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
-	cpumask_t oldmask, newmask;
 	char name[32];
 
 	sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
 
-	affinity_set(cpu, &oldmask, &newmask);
-	err = allocate_threshold_blocks(cpu, bank, 0,
-					MSR_IA32_MC0_MISC + bank * 4);
-	affinity_restore(&oldmask);
-
+	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
 	if (err)
 		goto out_free;
 
-- 
cgit v0.10.2


From e4d98207ea3f3d15eb664282df16d18c4ac86f80 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:05 -0800
Subject: x86: xen: use smp_call_function_many()

Impact: use new API, remove cpumask from stack.

Change smp_call_function_mask() callers to smp_call_function_many().

This removes a cpumask from the stack, and falls back should allocating
the cpumask var fail (only possible with CONFIG_CPUMASKS_OFFSTACK).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: jeremy@xensource.com

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 636ef4c..e59e53b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1079,7 +1079,7 @@ static void drop_other_mm_ref(void *info)
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
-	cpumask_t mask;
+	cpumask_var_t mask;
 	unsigned cpu;
 
 	if (current->active_mm == mm) {
@@ -1091,7 +1091,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
-	mask = mm->cpu_vm_mask;
+	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+		for_each_online_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+				continue;
+			smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+		}
+		return;
+	}
+	cpumask_copy(mask, &mm->cpu_vm_mask);
 
 	/* It's possible that a vcpu may have a stale reference to our
 	   cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1100,11 +1109,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	   if needed. */
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-			cpu_set(cpu, mask);
+			cpumask_set_cpu(cpu, mask);
 	}
 
-	if (!cpus_empty(mask))
-		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+	if (!cpumask_empty(mask))
+		smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+	free_cpumask_var(mask);
 }
 #else
 static void xen_drop_mm_ref(struct mm_struct *mm)
-- 
cgit v0.10.2


From 83b19597f793fd5f91533bda0dc2eb3d21936798 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:06 -0800
Subject: x86: Introduce topology_core_cpumask()/topology_thread_cpumask()

Impact: new API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff..79e31e9 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu);
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 
 /* indicates that pointers to the topology cpumask_t maps are valid */
 #define arch_provides_topology_pointers		yes
-- 
cgit v0.10.2


From d733e00d7c10cc68333fbb88108bb15bb044f61b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 13:35:51 +0100
Subject: x86: update io_apic.c to the new cpumask code

Impact: build fix

The sparseirq tree crossed with the cpumask changes, fix the fallout.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6bd51ce..58938cc 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -347,13 +347,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 	}
 }
 
-static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = desc->chip_data;
 
 	if (!cfg->move_in_progress) {
 		/* it means that domain is not changed */
-		if (!cpus_intersects(desc->affinity, mask))
+		if (!cpumask_intersects(&desc->affinity, mask))
 			cfg->move_desc_pending = 1;
 	}
 }
-- 
cgit v0.10.2


From a775a38b1353161a6d7af86b667d6523c12c1a37 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 17 Dec 2008 15:21:39 -0800
Subject: x86: fix cpu_mask_to_apicid_and to include cpu_online_mask

Impact: fix potential APIC crash

In determining the destination apicid, there are usually three cpumasks
that are considered: the incoming cpumask arg, cfg->domain and the
cpu_online_mask.  Since we are just introducing the cpu_mask_to_apicid_and
function, make sure it includes the cpu_online_mask in it's evaluation.
[Added with this patch.]

There are two io_apic.c functions that did not previously use the
cpu_online_mask:  setup_IO_APIC_irq and msi_compose_msg.  Both of these
simply used cpu_mask_to_apicid(cfg->domain & TARGET_CPUS), and all but
one arch (NUMAQ[*]) returns only online cpus in the TARGET_CPUS mask,
so the behavior is identical for all cases.

[*: NUMAQ bug?]

Note that alloc_cpumask_var is only used for the 32-bit cases where
it's highly likely that the cpumask set size will be small and therefore
CPUMASK_OFFSTACK=n.  But if that's not the case, failing the allocate
will cause the same return value as the default.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 976399d..d8dd9f5 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -138,7 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return cpu_to_logical_apicid(cpu);
 
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index ba8423c..51ac123 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,51 +214,47 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
 	int num_bits_set;
-	int num_bits_set2;
 	int cpus_found = 0;
 	int cpu;
-	int apicid = 0;
+	int apicid = cpu_to_logical_apicid(0);
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return apicid;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
 
 	num_bits_set = cpumask_weight(cpumask);
-	num_bits_set2 = cpumask_weight(andmask);
-	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
-	if (num_bits_set >= nr_cpu_ids)
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-		return 0xFF;
-#else
-		return cpu_to_logical_apicid(0);
-#endif
+	if (num_bits_set == NR_CPUS)
+		goto exit;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = cpumask_first_and(cpumask, andmask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
-
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask) &&
-		    cpumask_test_cpu(cpu, andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)) {
-				printk(KERN_WARNING
-					"%s: Not a valid mask!\n", __func__);
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-				return 0xFF;
-#else
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
 				return cpu_to_logical_apicid(0);
-#endif
 			}
 			apicid = new_apicid;
 			cpus_found++;
 		}
 		cpu++;
 	}
+exit:
+	free_cpumask_var(cpumask);
 	return apicid;
 }
 
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 8863d97..cc09cbb 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -72,8 +72,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 {
 	unsigned long mask1 = cpumask_bits(cpumask)[0];
 	unsigned long mask2 = cpumask_bits(andmask)[0];
+	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
 
-	return (unsigned int)(mask1 & mask2);
+	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 651a938..99327d1 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,35 +170,37 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
 	int num_bits_set;
-	int num_bits_set2;
 	int cpus_found = 0;
 	int cpu;
-	int apicid = 0;
+	int apicid = 0xFF;
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return (int) 0xFF;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
 
 	num_bits_set = cpumask_weight(cpumask);
-	num_bits_set2 = cpumask_weight(andmask);
-	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
-	if (num_bits_set >= nr_cpu_ids)
-		return 0xFF;
+	if (num_bits_set == nr_cpu_ids)
+		goto exit;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = cpumask_first_and(cpumask, andmask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)
-		    && cpumask_test_cpu(cpu, andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)) {
-				printk(KERN_WARNING
-					"%s: Not a valid mask!\n", __func__);
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
 				return 0xFF;
 			}
 			apicid = apicid | new_apicid;
@@ -206,6 +208,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		}
 		cpu++;
 	}
+exit:
+	free_cpumask_var(cpumask);
 	return apicid;
 }
 
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 7fa5f49..3418548 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -276,7 +276,9 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index 4716a0c..d451c9b 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -133,7 +133,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index b255507..62895cf 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -132,7 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 3984682..0e88be1 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -188,7 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
-- 
cgit v0.10.2


From 3b11ce7f542e415c90267b4482d4611410b468e6 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 17 Dec 2008 15:21:39 -0800
Subject: x86: use possible_cpus=NUM to extend the possible cpus allowed

Impact: add new boot parameter

Use possible_cpus=NUM kernel parameter to extend the number of possible
cpus.

The ability to HOTPLUG ON cpus that are "possible" but not "present" is
dealt with in a later patch.

Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 94bbc27..9d620c1 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -50,16 +50,17 @@ additional_cpus=n (*)	Use this to limit hotpluggable cpus. This option sets
   			cpu_possible_map = cpu_present_map + additional_cpus
 
 (*) Option valid only for following architectures
-- x86_64, ia64
+- ia64
 
-ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
-to determine the number of potentially hot-pluggable cpus. The implementation
-should only rely on this to count the # of cpus, but *MUST* not rely on the
-apicid values in those tables for disabled apics. In the event BIOS doesn't
-mark such hot-pluggable cpus as disabled entries, one could use this
-parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map.
+ia64 uses the number of disabled local apics in ACPI tables MADT to
+determine the number of potentially hot-pluggable cpus. The implementation
+should only rely on this to count the # of cpus, but *MUST* not rely
+on the apicid values in those tables for disabled apics. In the event
+BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
+use this parameter "additional_cpus=x" to represent those cpus in the
+cpu_possible_map.
 
-possible_cpus=n		[s390 only] use this to set hotpluggable cpus.
+possible_cpus=n		[s390,x86_64] use this to set hotpluggable cpus.
 			This option sets possible_cpus bits in
 			cpu_possible_map. Thus keeping the numbers of bits set
 			constant even if the machine gets rebooted.
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 93cf2d1..f7a32a3 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1819,28 +1819,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 void __cpuinit generic_processor_info(int apicid, int version)
 {
 	int cpu;
-	cpumask_t tmp_map;
 
 	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {
 		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-			"fixing up to 0x10. (tell your hw vendor)\n",
-			version);
+			   "fixing up to 0x10. (tell your hw vendor)\n",
+				version);
 		version = 0x10;
 	}
 	apic_version[apicid] = version;
 
-	if (num_processors >= NR_CPUS) {
-		pr_warning("WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
+	if (num_processors >= nr_cpu_ids) {
+		int max = nr_cpu_ids;
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning(
+			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+		disabled_cpus++;
 		return;
 	}
 
 	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
+	cpu = cpumask_next_zero(-1, cpu_present_mask);
 
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index be94667..1a9941b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1252,6 +1252,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	check_nmi_watchdog();
 }
 
+static int __initdata setup_possible_cpus = -1;
+static int __init _setup_possible_cpus(char *str)
+{
+	get_option(&str, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1264,7 +1273,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
  *
  * Three ways to find out the number of additional hotplug CPUs:
  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
+ * - The user can overwrite it with possible_cpus=NUM
  * - Otherwise don't reserve additional CPUs.
  * We do this because additional CPUs waste a lot of memory.
  * -AK
@@ -1277,9 +1286,17 @@ __init void prefill_possible_map(void)
 	if (!num_processors)
 		num_processors = 1;
 
-	possible = num_processors + disabled_cpus;
-	if (possible > NR_CPUS)
-		possible = NR_CPUS;
+	if (setup_possible_cpus == -1)
+		possible = num_processors + disabled_cpus;
+	else
+		possible = setup_possible_cpus;
+
+	if (possible > CONFIG_NR_CPUS) {
+		printk(KERN_WARNING
+			"%d Processors exceeds NR_CPUS limit of %d\n",
+			possible, CONFIG_NR_CPUS);
+		possible = CONFIG_NR_CPUS;
+	}
 
 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
-- 
cgit v0.10.2


From 7b4967c532045a1983d6d4af5c69cc7c5109f62b Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 19 Dec 2008 16:56:37 +1030
Subject: cpumask: Add alloc_cpumask_var_node()

Impact: New API

This will be needed in x86 code to allocate the domain and old_domain
cpumasks on the same node as where the containing irq_cfg struct is
allocated.

(Also fixes double-dump_stack on rare CONFIG_DEBUG_PER_CPU_MAPS case)

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (re-impl alloc_cpumask_var)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d4bf526..b5ad19a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1025,6 +1025,7 @@ static inline size_t cpumask_size(void)
 #ifdef CONFIG_CPUMASK_OFFSTACK
 typedef struct cpumask *cpumask_var_t;
 
+bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
@@ -1038,6 +1039,12 @@ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 	return true;
 }
 
+static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+					  int node)
+{
+	return true;
+}
+
 static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 8d03f22..3f258f5 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -76,15 +76,14 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
-bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
 	if (likely(slab_is_available()))
-		*mask = kmalloc(cpumask_size(), flags);
+		*mask = kmalloc_node(cpumask_size(), flags, node);
 	else {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 		printk(KERN_ERR
 			"=> alloc_cpumask_var: kmalloc not available!\n");
-		dump_stack();
 #endif
 		*mask = NULL;
 	}
@@ -96,6 +95,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 #endif
 	return *mask != NULL;
 }
+EXPORT_SYMBOL(alloc_cpumask_var_node);
+
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return alloc_cpumask_var_node(mask, flags, numa_node_id());
+}
 EXPORT_SYMBOL(alloc_cpumask_var);
 
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
-- 
cgit v0.10.2


From ec26b805879c7e77865b39ee91b737985e80006d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 19 Dec 2008 16:56:52 +1030
Subject: cpumask: documentation for cpumask_var_t

Impact: New kerneldoc comments

Additional documentation added to all the alloc_cpumask and free_cpumask
functions.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (minor additions)

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 3f258f5..a24edf1 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -76,6 +76,20 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
+/**
+ * alloc_cpumask_var_node - allocate a struct cpumask on a given node
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ * @flags: GFP_ flags
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop returning a constant 1 (in <linux/cpumask.h>)
+ * Returns TRUE if memory allocation succeeded, FALSE otherwise.
+ *
+ * In addition, mask will be NULL if this fails.  Note that gcc is
+ * usually smart enough to know that mask can never be NULL if
+ * CONFIG_CPUMASK_OFFSTACK=n, so does code elimination in that case
+ * too.
+ */
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
 	if (likely(slab_is_available()))
@@ -97,23 +111,52 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 }
 EXPORT_SYMBOL(alloc_cpumask_var_node);
 
+/**
+ * alloc_cpumask_var - allocate a struct cpumask
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ * @flags: GFP_ flags
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop returning a constant 1 (in <linux/cpumask.h>).
+ *
+ * See alloc_cpumask_var_node.
+ */
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return alloc_cpumask_var_node(mask, flags, numa_node_id());
 }
 EXPORT_SYMBOL(alloc_cpumask_var);
 
+/**
+ * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop returning a constant 1 (in <linux/cpumask.h>)
+ * Either returns an allocated (zero-filled) cpumask, or causes the
+ * system to panic.
+ */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 	*mask = alloc_bootmem(cpumask_size());
 }
 
+/**
+ * free_cpumask_var - frees memory allocated for a struct cpumask.
+ * @mask: cpumask to free
+ *
+ * This is safe on a NULL mask.
+ */
 void free_cpumask_var(cpumask_var_t mask)
 {
 	kfree(mask);
 }
 EXPORT_SYMBOL(free_cpumask_var);
 
+/**
+ * free_bootmem_cpumask_var - frees result of alloc_bootmem_cpumask_var
+ * @mask: cpumask to free
+ */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 	free_bootmem((unsigned long)mask, cpumask_size());
-- 
cgit v0.10.2


From e057d7aea9d8f2a46cd440d8bfb72245d4e72d79 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 15 Dec 2008 20:26:48 -0800
Subject: cpumask: add sysfs displays for configured and disabled cpu maps

Impact: add new sysfs files.

Add sysfs files "kernel_max" and "offline" to display the max CPU index
allowed (NR_CPUS-1), and the map of cpus that are offline.

Cpus can be offlined via HOTPLUG, disabled by the BIOS ACPI tables, or
if they exceed the number of cpus allowed by the NR_CPUS config option,
or the "maxcpus=NUM" kernel start parameter.

The "possible_cpus=NUM" parameter can also extend the number of possible
cpus allowed, in which case the cpus not present at startup will be
in the offline state.  (These cpus can be HOTPLUGGED ON after system
startup [pending a follow-on patch to provide the capability via the
/sys/devices/sys/cpu/cpuN/online mechanism to bring them online.])

By design, the "offlined cpus > possible cpus" display will always
use the following formats:

  * all possible cpus online:   "x$"    or "x-y$"
  * some possible cpus offline: ".*,x$" or ".*,x-y$"

where:
  x == number of possible cpus (nr_cpu_ids); and
  y == number of cpus >= NR_CPUS or maxcpus (if y > x).

One use of this feature is for distros to select (or configure) the
appropriate kernel to install for the resident system.

Notes:
  * cpus offlined <= possible cpus will be printed for all architectures.
  * cpus offlined >  possible cpus will only be printed for arches that
  	set 'total_cpus' [X86 only in this patch].

Based on tip/cpus4096 + .../rusty/linux-2.6-for-ingo.git/master +
	 x86-only-patches sent 12/15.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4259072..2aef96f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -128,10 +128,54 @@ print_cpus_func(online);
 print_cpus_func(possible);
 print_cpus_func(present);
 
+/*
+ * Print values for NR_CPUS and offlined cpus
+ */
+static ssize_t print_cpus_kernel_max(struct sysdev_class *class, char *buf)
+{
+	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", CONFIG_NR_CPUS - 1);
+	return n;
+}
+static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
+
+/* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
+unsigned int total_cpus;
+
+static ssize_t print_cpus_offline(struct sysdev_class *class, char *buf)
+{
+	int n = 0, len = PAGE_SIZE-2;
+	cpumask_var_t offline;
+
+	/* display offline cpus < nr_cpu_ids */
+	if (!alloc_cpumask_var(&offline, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_complement(offline, cpu_online_mask);
+	n = cpulist_scnprintf(buf, len, offline);
+	free_cpumask_var(offline);
+
+	/* display offline cpus >= nr_cpu_ids */
+	if (total_cpus && nr_cpu_ids < total_cpus) {
+		if (n && n < len)
+			buf[n++] = ',';
+
+		if (nr_cpu_ids == total_cpus-1)
+			n += snprintf(&buf[n], len - n, "%d", nr_cpu_ids);
+		else
+			n += snprintf(&buf[n], len - n, "%d-%d",
+						      nr_cpu_ids, total_cpus-1);
+	}
+
+	n += snprintf(&buf[n], len - n, "\n");
+	return n;
+}
+static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
+
 static struct sysdev_class_attribute *cpu_state_attr[] = {
 	&attr_online_map,
 	&attr_possible_map,
 	&attr_present_map,
+	&attr_kernel_max,
+	&attr_offline,
 };
 
 static int cpu_states_init(void)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 3f9a600..0d5770c 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -21,6 +21,9 @@ struct call_single_data {
 	u16 priv;
 };
 
+/* total number of cpus in this system (may exceed NR_CPUS) */
+extern unsigned int total_cpus;
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
-- 
cgit v0.10.2


From d62720ade82c5e5b8f9585e5ed02c89573ebf111 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 17 Dec 2008 14:14:30 -0800
Subject: sysfs: add documentation to cputopology.txt for system cpumasks

Add information to cputopology.txt explaining the output of various
system cpumask's.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index bd699da..45932ec 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -31,3 +31,51 @@ not defined by include/asm-XXX/topology.h:
 2) core_id: 0
 3) thread_siblings: just the given CPU
 4) core_siblings: just the given CPU
+
+Additionally, cpu topology information is provided under
+/sys/devices/system/cpu and includes these files.  The internal
+source for the output is in brackets ("[]").
+
+    kernel_max: the maximum cpu index allowed by the kernel configuration.
+		[NR_CPUS-1]
+
+    offline:	cpus that are not online because they have been
+		HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
+		of cpus allowed by the kernel configuration (kernel_max
+		above). [~cpu_online_mask + cpus >= NR_CPUS]
+
+    online:	cpus that are online and being scheduled [cpu_online_mask]
+
+    possible:	cpus that have been allocated resources and can be
+		brought online if they are present. [cpu_possible_mask]
+
+    present:	cpus that have been identified as being present in the
+		system. [cpu_present_mask]
+
+The format for the above output is compatible with cpulist_parse()
+[see <linux/cpumask.h>].  Some examples follow.
+
+In this example, there are 64 cpus in the system but cpus 32-63 exceed
+the kernel max which is limited to 0..31 by the NR_CPUS config option
+being 32.  Note also that cpus 2 and 4-31 are not online but could be
+brought online as they are both present and possible.
+
+     kernel_max: 31
+        offline: 2,4-31,32-63
+         online: 0-1,3
+       possible: 0-31
+        present: 0-31
+
+In this example, the NR_CPUS config option is 128, but the kernel was
+started with possible_cpus=144.  There are 4 cpus in the system and cpu2
+was manually taken offline (and is the only cpu that can be brought
+online.)
+
+     kernel_max: 127
+        offline: 2,4-127,128-143
+         online: 0-1,3
+       possible: 0-127
+        present: 0-3
+
+See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
+as well as more information on the various cpumask's.
-- 
cgit v0.10.2


From 716707b29906e1d8d190defe3d646610b097a861 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:02 +0530
Subject: sched: convert BALANCE_FOR_xx_POWER to inline functions

Impact: cleanup

BALANCE_FOR_MC_POWER and similar macros defined in sched.h are
not constants and have various condition checks and significant
amount of code that is not suitable to be contain in a macro.
Also there could be side effects on the expressions passed to
some of them like test_sd_parent().

This patch converts all complex macros related to power savings
balance to inline functions.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4240f6b..1210fb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,15 +763,23 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
-#define BALANCE_FOR_MC_POWER	\
-	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
+extern int sched_mc_power_savings, sched_smt_power_savings;
+
+static inline int sd_balance_for_mc_power(void)
+{
+	if (sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
 
-#define BALANCE_FOR_PKG_POWER	\
-	((sched_mc_power_savings || sched_smt_power_savings) ?	\
-	 SD_POWERSAVINGS_BALANCE : 0)
+	return 0;
+}
 
-#define test_sd_parent(sd, flag)	((sd->parent &&		\
-					 (sd->parent->flags & flag)) ? 1 : 0)
+static inline int sd_balance_for_package_power(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
+
+	return 0;
+}
 
 
 struct sched_group {
@@ -1399,6 +1407,15 @@ struct task_struct {
 #endif
 };
 
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+	if (sd->parent && (sd->parent->flags & flag))
+		return 1;
+
+	return 0;
+}
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -2256,8 +2273,6 @@ __trace_special(void *__tr, void *__data,
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern int sched_mc_power_savings, sched_smt_power_savings;
-
 extern void normalize_rt_tasks(void);
 
 #ifdef CONFIG_GROUP_SCHED
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0c5b5ac..0ce7c0d 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,7 @@ int arch_update_cpu_topology(void);
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| BALANCE_FOR_MC_POWER,	\
+				| sd_balance_for_mc_power(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -150,7 +150,7 @@ int arch_update_cpu_topology(void);
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| BALANCE_FOR_PKG_POWER,\
+				| sd_balance_for_package_power(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
-- 
cgit v0.10.2


From afb8a9b70b86866a60e08b2956ae4e1406390336 Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Thu, 18 Dec 2008 23:26:09 +0530
Subject: sched: framework for sched_mc/smt_power_savings=N

Impact: extend range of /sys/devices/system/cpu/sched_mc_power_savings

Currently the sched_mc/smt_power_savings variable is a boolean,
which either enables or disables topology based power savings.
This patch extends the behaviour of the variable from boolean to
multivalued, such that based on the value, we decide how
aggressively do we want to perform powersavings balance at
appropriate sched domain based on topology.

Variable levels of power saving tunable would benefit end user to
match the required level of power savings vs performance
trade-off depending on the system configuration and workloads.

This version makes the sched_mc_power_savings global variable to
take more values (0,1,2).  Later versions can have a single
tunable called sched_power_savings instead of
sched_{mc,smt}_power_savings.

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1210fb0..a967266 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,6 +763,17 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
+enum powersavings_balance_level {
+	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
+	POWERSAVINGS_BALANCE_BASIC,	/* Fill one thread/core/package
+					 * first for long running threads
+					 */
+	POWERSAVINGS_BALANCE_WAKEUP,	/* Also bias task wakeups to semi-idle
+					 * cpu package for power savings
+					 */
+	MAX_POWERSAVINGS_BALANCE_LEVELS
+};
+
 extern int sched_mc_power_savings, sched_smt_power_savings;
 
 static inline int sd_balance_for_mc_power(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index b309027..56b285c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7906,14 +7906,25 @@ int arch_reinit_sched_domains(void)
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 {
 	int ret;
+	unsigned int level = 0;
 
-	if (buf[0] != '0' && buf[0] != '1')
+	if (sscanf(buf, "%u", &level) != 1)
+		return -EINVAL;
+
+	/*
+	 * level is always be positive so don't check for
+	 * level < POWERSAVINGS_BALANCE_NONE which is 0
+	 * What happens on 0 or 1 byte write,
+	 * need to check for count as well?
+	 */
+
+	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
 		return -EINVAL;
 
 	if (smt)
-		sched_smt_power_savings = (buf[0] == '1');
+		sched_smt_power_savings = level;
 	else
-		sched_mc_power_savings = (buf[0] == '1');
+		sched_mc_power_savings = level;
 
 	ret = arch_reinit_sched_domains();
 
-- 
cgit v0.10.2


From d5679bd11916eba5c8ee9033003e1a5ce56ece9a Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:16 +0530
Subject: sched: favour lower logical cpu number for sched_mc balance

Impact: change load-balancing direction to match that of irqbalanced

Just in case two groups have identical load, prefer to move load to lower
logical cpu number rather than the present logic of moving to higher logical
number.

find_busiest_group() tries to look for a group_leader that has spare capacity
to take more tasks and freeup an appropriate least loaded group.  Just in case
there is a tie and the load is equal, then the group with higher logical number
is favoured.  This conflicts with user space irqbalance daemon that will move
interrupts to lower logical number if the system utilisation is very low.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 56b285c..94b9d11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3241,7 +3241,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     cpumask_first(sched_group_cpus(group)) <
+		     cpumask_first(sched_group_cpus(group)) >
 		     cpumask_first(sched_group_cpus(group_min)))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
@@ -3257,7 +3257,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     cpumask_first(sched_group_cpus(group)) >
+			     cpumask_first(sched_group_cpus(group)) <
 			     cpumask_first(sched_group_cpus(group_leader)))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
-- 
cgit v0.10.2


From 7a09b1a27b1e5a4957e4af9951420fea02c44fba Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:22 +0530
Subject: sched: nominate preferred wakeup cpu

Impact: extend load-balancing code (no change in behavior yet)

When the system utilisation is low and more cpus are idle,
then the process waking up from sleep should prefer to
wakeup an idle cpu from semi-idle cpu package (multi core
package) rather than a completely idle cpu package which
would waste power.

Use the sched_mc balance logic in find_busiest_group() to
nominate a preferred wakeup cpu.

This info can be stored in appropriate sched_domain, but
updating this info in all copies of sched_domain is not
practical.  Hence this information is stored in root_domain
struct which is one copy per partitioned sched domain.
The root_domain can be accessed from each cpu's runqueue
and there is one copy per partitioned sched domain.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 94b9d11..c1b8b30 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -509,6 +509,14 @@ struct root_domain {
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
 #endif
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+	/*
+	 * Preferred wake up cpu nominated by sched_mc balance that will be
+	 * used when most cpus are idle in the system indicating overall very
+	 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
+	 */
+	unsigned int sched_mc_preferred_wakeup_cpu;
+#endif
 };
 
 /*
@@ -3384,6 +3392,10 @@ out_balanced:
 
 	if (this == group_leader && group_leader != group_min) {
 		*imbalance = min_load_per_task;
+		if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+			cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+					first_cpu(group_leader->cpumask);
+		}
 		return group_min;
 	}
 #endif
-- 
cgit v0.10.2


From 7eb52dfa70dbf5232b5b83ec4357e6bebaa8fde8 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:29 +0530
Subject: sched: bias task wakeups to preferred semi-idle packages

Impact: tweak task wakeup to save power more agressively

Preferred wakeup cpu (from a semi idle package) has been
nominated in find_busiest_group() in the previous patch.  Use
this information in sched_mc_preferred_wakeup_cpu in function
wake_idle() to bias task wakeups if the following conditions
are satisfied:

        - The present cpu that is trying to wakeup the process is
          idle and waking the target process on this cpu will
          potentially wakeup a completely idle package
        - The previous cpu on which the target process ran is
          also idle and hence selecting the previous cpu may
          wakeup a semi idle cpu package
        - The task being woken up is allowed to run in the
          nominated cpu (cpu affinity and restrictions)

Basically if both the current cpu and the previous cpu on
which the task ran is idle, select the nominated cpu from semi
idle cpu package for running the new task that is waking up.

Cache hotness is considered since the actual biasing happens
in wake_idle() only if the application is cache cold.

This technique will effectively move short running bursty jobs in
a mostly idle system.

Wakeup biasing for power savings gets automatically disabled if
system utilisation increases due to the fact that the probability
of finding both this_cpu and prev_cpu idle decreases.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 08ffffd..36b5e34 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1026,6 +1026,24 @@ static int wake_idle(int cpu, struct task_struct *p)
 {
 	struct sched_domain *sd;
 	int i;
+	unsigned int chosen_wakeup_cpu;
+	int this_cpu;
+
+	/*
+	 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
+	 * are idle and this is not a kernel thread and this task's affinity
+	 * allows it to be moved to preferred cpu, then just move!
+	 */
+
+	this_cpu = smp_processor_id();
+	chosen_wakeup_cpu =
+		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
+
+	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
+		idle_cpu(cpu) && idle_cpu(this_cpu) &&
+		p->mm && !(p->flags & PF_KTHREAD) &&
+		cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
+		return chosen_wakeup_cpu;
 
 	/*
 	 * If it is idle, then it is the best cpu to run this task.
-- 
cgit v0.10.2


From ad273b32e482cdef306eac32b28d97f513a022f4 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:36 +0530
Subject: sched: activate active load balancing in new idle cpus

Impact: tweak task balancing to save power more agressively

Active load balancing is a process by which migration thread
is woken up on the target CPU in order to pull current
running task on another package into this newly idle
package.

This method is already in use with normal load_balance(),
this patch introduces this method to new idle cpus when
sched_mc is set to POWERSAVINGS_BALANCE_WAKEUP.

This logic provides effective consolidation of short running
daemon jobs in a almost idle system

The side effect of this patch may be ping-ponging of tasks
if the system is moderately utilised. May need to adjust the
iterations before triggering.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index c1b8b30..8fc0d5a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3670,10 +3670,64 @@ redo:
 	}
 
 	if (!ld_moved) {
+		int active_balance;
+
 		schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
 		if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 		    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
 			return -1;
+
+		if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
+			return -1;
+
+		if (sd->nr_balance_failed++ < 2)
+			return -1;
+
+		/*
+		 * The only task running in a non-idle cpu can be moved to this
+		 * cpu in an attempt to completely freeup the other CPU
+		 * package. The same method used to move task in load_balance()
+		 * have been extended for load_balance_newidle() to speedup
+		 * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
+		 *
+		 * The package power saving logic comes from
+		 * find_busiest_group().  If there are no imbalance, then
+		 * f_b_g() will return NULL.  However when sched_mc={1,2} then
+		 * f_b_g() will select a group from which a running task may be
+		 * pulled to this cpu in order to make the other package idle.
+		 * If there is no opportunity to make a package idle and if
+		 * there are no imbalance, then f_b_g() will return NULL and no
+		 * action will be taken in load_balance_newidle().
+		 *
+		 * Under normal task pull operation due to imbalance, there
+		 * will be more than one task in the source run queue and
+		 * move_tasks() will succeed.  ld_moved will be true and this
+		 * active balance code will not be triggered.
+		 */
+
+		/* Lock busiest in correct order while this_rq is held */
+		double_lock_balance(this_rq, busiest);
+
+		/*
+		 * don't kick the migration_thread, if the curr
+		 * task on busiest cpu can't be moved to this_cpu
+		 */
+		if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			double_unlock_balance(this_rq, busiest);
+			all_pinned = 1;
+			return ld_moved;
+		}
+
+		if (!busiest->active_balance) {
+			busiest->active_balance = 1;
+			busiest->push_cpu = this_cpu;
+			active_balance = 1;
+		}
+
+		double_unlock_balance(this_rq, busiest);
+		if (active_balance)
+			wake_up_process(busiest->migration_thread);
+
 	} else
 		sd->nr_balance_failed = 0;
 
-- 
cgit v0.10.2


From 100fdaee70ebf5f31b9451fbc01300c627091328 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:47 +0530
Subject: sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0

Impact: change task balancing to save power more agressively

Add SD_BALANCE_NEWIDLE flag at MC level and CPU level
if sched_mc is set.  This helps power savings and
will not affect performance when sched_mc=0

Ingo and Mike Galbraith have optimised the SD flags by
removing SD_BALANCE_NEWIDLE at MC and CPU level.  This
helps performance but hurts power savings since this
slows down task consolidation by reducing the number
of times load_balance is run.

    sched: fine-tune SD_MC_INIT
        commit 14800984706bf6936bbec5187f736e928be5c218
        Author: Mike Galbraith <efault@gmx.de>
        Date:   Fri Nov 7 15:26:50 2008 +0100

    sched: re-tune balancing -- revert
        commit 9fcd18c9e63e325dbd2b4c726623f760788d5aa8
        Author: Ingo Molnar <mingo@elte.hu>
        Date:   Wed Nov 5 16:52:08 2008 +0100

This patch selectively enables SD_BALANCE_NEWIDLE flag
only when sched_mc is set to 1 or 2.  This helps power savings
by task consolidation and also does not hurt performance at
sched_mc=0 where all power saving optimisations are turned off.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a967266..5a933d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -792,6 +792,19 @@ static inline int sd_balance_for_package_power(void)
 	return 0;
 }
 
+/*
+ * Optimise SD flags for power savings:
+ * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
+ * Keep default SD flags if sched_{smt,mc}_power_saving=0
+ */
+
+static inline int sd_power_saving_flags(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_BALANCE_NEWIDLE;
+
+	return 0;
+}
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0ce7c0d..e632d29 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,8 @@ int arch_update_cpu_topology(void);
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| sd_balance_for_mc_power(),\
+				| sd_balance_for_mc_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -150,7 +151,8 @@ int arch_update_cpu_topology(void);
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| sd_balance_for_package_power(),\
+				| sd_balance_for_package_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
-- 
cgit v0.10.2


From 06aaf76a7e2e4cc57eabcb8f43ec99c961fe55fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 18 Dec 2008 21:30:23 +0100
Subject: sched: move test_sd_parent() to an SMP section of sched.h

Impact: build fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a933d9..e5f928a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,6 +920,15 @@ extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+	if (sd->parent && (sd->parent->flags & flag))
+		return 1;
+
+	return 0;
+}
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1431,15 +1440,6 @@ struct task_struct {
 #endif
 };
 
-/* Test a flag in parent sched domain */
-static inline int test_sd_parent(struct sched_domain *sd, int flag)
-{
-	if (sd->parent && (sd->parent->flags & flag))
-		return 1;
-
-	return 0;
-}
-
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
-- 
cgit v0.10.2


From 9924da434a13668fceb208d56dbdf86d166862cc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 00:53:40 +0100
Subject: sched: fix warning in kernel/sched.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: fix cpumask conversion bug

this warning:

  kernel/sched.c: In function ‘find_busiest_group’:
  kernel/sched.c:3429: warning: passing argument 1 of ‘__first_cpu’ from incompatible pointer type

shows that we forgot to convert a new patch to the new cpumask APIs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 8fc0d5a..ae5ca3f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3394,7 +3394,7 @@ out_balanced:
 		*imbalance = min_load_per_task;
 		if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
 			cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-					first_cpu(group_leader->cpumask);
+				cpumask_first(sched_group_cpus(group_leader));
 		}
 		return group_min;
 	}
-- 
cgit v0.10.2


From a7883dece6ef82097e6bdf19c1d0a20351e06056 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 00:59:09 +0100
Subject: x86: fix warning in arch/x86/kernel/io_apic.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this warning:

  arch/x86/kernel/io_apic.c: In function ‘ir_set_msi_irq_affinity’:
  arch/x86/kernel/io_apic.c:3373: warning: ‘cfg’ may be used uninitialized in this function

triggers because the variable was truly uninitialized. We'd crash on
entering this code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 58938cc..908c1d0 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3360,7 +3360,7 @@ static void
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned int dest;
 	struct irte irte;
 
-- 
cgit v0.10.2


From bce83697c5fe84a7a5d38c96fbbe43b4bc028c3e Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 22 Dec 2008 18:22:50 +0000
Subject: uwb: use dev_dbg() for debug messages

Instead of the home-grown d_fnstart(), d_fnend() and d_printf() macros,
use dev_dbg() or remove the message entirely.

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 2a4d36f..8582236 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -62,16 +62,12 @@
 #include "../wusbcore/wa-hc.h"
 #include "../wusbcore/wusbhc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 struct hwahc {
 	struct wusbhc wusbhc;	/* has to be 1st */
 	struct wahc wa;
-	u8 buffer[16];		/* for misc usb transactions */
 };
 
-/**
+/*
  * FIXME should be wusbhc
  *
  * NOTE: we need to cache the Cluster ID because later...there is no
@@ -125,7 +121,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd)
 	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
 	struct device *dev = &hwahc->wa.usb_iface->dev;
 
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	wa_nep_disarm(&hwahc->wa);
 	result = __wa_set_feature(&hwahc->wa, WA_RESET);
@@ -133,7 +128,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd)
 		dev_err(dev, "error commanding HC to reset: %d\n", result);
 		goto error_unlock;
 	}
-	d_printf(3, dev, "reset: waiting for device to change state\n");
 	result = __wa_wait_status(&hwahc->wa, WA_STATUS_RESETTING, 0);
 	if (result < 0) {
 		dev_err(dev, "error waiting for HC to reset: %d\n", result);
@@ -141,7 +135,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd)
 	}
 error_unlock:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 }
 
@@ -154,15 +147,9 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd)
 	int result;
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
 
-	/* Set up a Host Info WUSB Information Element */
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	result = -ENOSPC;
 	mutex_lock(&wusbhc->mutex);
-	/* Start the numbering from the top so that the bottom
-	 * range of the unauth addr space is used for devices,
-	 * the top for HCs; use 0xfe - RC# */
 	addr = wusb_cluster_id_get();
 	if (addr == 0)
 		goto error_cluster_id_get;
@@ -176,7 +163,6 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd)
 	result = 0;
 out:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 
 error_set_cluster_id:
@@ -213,18 +199,11 @@ static int hwahc_op_resume(struct usb_hcd *usb_hcd)
  */
 static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 {
-	int result;
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	struct device *dev = &wa->usb_iface->dev;
 
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return;
 }
 
 static int hwahc_op_get_frame_number(struct usb_hcd *usb_hcd)
@@ -573,11 +552,11 @@ static int wa_fill_descr(struct wahc *wa)
 	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
 	while (itr_size >= sizeof(*hdr)) {
 		hdr = (struct usb_descriptor_header *) itr;
-		d_printf(3, dev, "Extra device descriptor: "
-			 "type %02x/%u bytes @ %zu (%zu left)\n",
-			 hdr->bDescriptorType, hdr->bLength,
-			 (itr - usb_dev->rawdescriptors[actconfig_idx]),
-			 itr_size);
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
 		if (hdr->bDescriptorType == USB_DT_WIRE_ADAPTER)
 			goto found;
 		itr += hdr->bLength;
@@ -786,7 +765,6 @@ static void hwahc_destroy(struct hwahc *hwahc)
 {
 	struct wusbhc *wusbhc = &hwahc->wusbhc;
 
-	d_fnstart(1, NULL, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	__wa_destroy(&hwahc->wa);
 	wusbhc_destroy(&hwahc->wusbhc);
@@ -796,7 +774,6 @@ static void hwahc_destroy(struct hwahc *hwahc)
 	usb_put_intf(hwahc->wa.usb_iface);
 	usb_put_dev(hwahc->wa.usb_dev);
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(1, NULL, "(hwahc %p) = void\n", hwahc);
 }
 
 static void hwahc_init(struct hwahc *hwahc)
@@ -813,7 +790,6 @@ static int hwahc_probe(struct usb_interface *usb_iface,
 	struct hwahc *hwahc;
 	struct device *dev = &usb_iface->dev;
 
-	d_fnstart(4, dev, "(%p, %p)\n", usb_iface, id);
 	result = -ENOMEM;
 	usb_hcd = usb_create_hcd(&hwahc_hc_driver, &usb_iface->dev, "wusb-hwa");
 	if (usb_hcd == NULL) {
@@ -840,7 +816,6 @@ static int hwahc_probe(struct usb_interface *usb_iface,
 		dev_err(dev, "Cannot setup phase B of WUSBHC: %d\n", result);
 		goto error_wusbhc_b_create;
 	}
-	d_fnend(4, dev, "(%p, %p) = 0\n", usb_iface, id);
 	return 0;
 
 error_wusbhc_b_create:
@@ -850,7 +825,6 @@ error_add_hcd:
 error_hwahc_create:
 	usb_put_hcd(usb_hcd);
 error_alloc:
-	d_fnend(4, dev, "(%p, %p) = %d\n", usb_iface, id, result);
 	return result;
 }
 
@@ -864,16 +838,12 @@ static void hwahc_disconnect(struct usb_interface *usb_iface)
 	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
 
-	d_fnstart(1, NULL, "(hwahc %p [usb_iface %p])\n", hwahc, usb_iface);
 	wusbhc_b_destroy(&hwahc->wusbhc);
 	usb_remove_hcd(usb_hcd);
 	hwahc_destroy(hwahc);
 	usb_put_hcd(usb_hcd);
-	d_fnend(1, NULL, "(hwahc %p [usb_iface %p]) = void\n", hwahc,
-		usb_iface);
 }
 
-/** USB device ID's that we handle */
 static struct usb_device_id hwahc_id_table[] = {
 	/* FIXME: use class labels for this */
 	{ USB_INTERFACE_INFO(0xe0, 0x02, 0x01), },
@@ -890,18 +860,7 @@ static struct usb_driver hwahc_driver = {
 
 static int __init hwahc_driver_init(void)
 {
-	int result;
-	result = usb_register(&hwahc_driver);
-	if (result < 0) {
-		printk(KERN_ERR "WA-CDS: Cannot register USB driver: %d\n",
-		       result);
-		goto error_usb_register;
-	}
-	return 0;
-
-error_usb_register:
-	return result;
-
+	return usb_register(&hwahc_driver);
 }
 module_init(hwahc_driver_init);
 
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 0ca8603..9d9128a 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -51,7 +51,6 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 #include <linux/scatterlist.h>
-#define D_LOCAL 0
 #include <linux/uwb/debug.h>
 
 static int debug_crypto_verify = 0;
@@ -207,9 +206,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc,
 	const u8 bzero[16] = { 0 };
 	size_t zero_padding;
 
-	d_fnstart(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, "
-		  "n %p, a %p, b %p, blen %zu)\n",
-		  tfm_cbc, tfm_aes, mic, n, a, b, blen);
 	/*
 	 * These checks should be compile time optimized out
 	 * ensure @a fills b1's mac_header and following fields
@@ -251,16 +247,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc,
 	b1.la = cpu_to_be16(blen + 14);
 	memcpy(&b1.mac_header, a, sizeof(*a));
 
-	d_printf(4, NULL, "I: B0 (%zu bytes)\n", sizeof(b0));
-	d_dump(4, NULL, &b0, sizeof(b0));
-	d_printf(4, NULL, "I: B1 (%zu bytes)\n", sizeof(b1));
-	d_dump(4, NULL, &b1, sizeof(b1));
-	d_printf(4, NULL, "I: B (%zu bytes)\n", blen);
-	d_dump(4, NULL, b, blen);
-	d_printf(4, NULL, "I: B 0-padding (%zu bytes)\n", zero_padding);
-	d_printf(4, NULL, "D: IV before crypto (%zu)\n", ivsize);
-	d_dump(4, NULL, iv, ivsize);
-
 	sg_init_table(sg, ARRAY_SIZE(sg));
 	sg_set_buf(&sg[0], &b0, sizeof(b0));
 	sg_set_buf(&sg[1], &b1, sizeof(b1));
@@ -277,8 +263,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc,
 		       result);
 		goto error_cbc_crypt;
 	}
-	d_printf(4, NULL, "D: MIC tag\n");
-	d_dump(4, NULL, iv, ivsize);
 
 	/* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5]
 	 * The procedure is to AES crypt the A0 block and XOR the MIC
@@ -293,17 +277,10 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc,
 	ax.counter = 0;
 	crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax);
 	bytewise_xor(mic, &ax, iv, 8);
-	d_printf(4, NULL, "D: CTR[MIC]\n");
-	d_dump(4, NULL, &ax, 8);
-	d_printf(4, NULL, "D: CCM-MIC tag\n");
-	d_dump(4, NULL, mic, 8);
 	result = 8;
 error_cbc_crypt:
 	kfree(dst_buf);
 error_dst_buf:
-	d_fnend(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, "
-		"n %p, a %p, b %p, blen %zu)\n",
-		tfm_cbc, tfm_aes, mic, n, a, b, blen);
 	return result;
 }
 
@@ -325,10 +302,6 @@ ssize_t wusb_prf(void *out, size_t out_size,
 	u64 sfn = 0;
 	__le64 sfn_le;
 
-	d_fnstart(3, NULL, "(out %p, out_size %zu, key %p, _n %p, "
-		  "a %p, b %p, blen %zu, len %zu)\n", out, out_size,
-		  key, _n, a, b, blen, len);
-
 	tfm_cbc = crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_cbc)) {
 		result = PTR_ERR(tfm_cbc);
@@ -370,9 +343,6 @@ error_alloc_aes:
 error_setkey_cbc:
 	crypto_free_blkcipher(tfm_cbc);
 error_alloc_cbc:
-	d_fnend(3, NULL, "(out %p, out_size %zu, key %p, _n %p, "
-		"a %p, b %p, blen %zu, len %zu) = %d\n", out, out_size,
-		key, _n, a, b, blen, len, (int)bytes);
 	return result;
 }
 
diff --git a/drivers/usb/wusbcore/dev-sysfs.c b/drivers/usb/wusbcore/dev-sysfs.c
index 7897a19..1018345 100644
--- a/drivers/usb/wusbcore/dev-sysfs.c
+++ b/drivers/usb/wusbcore/dev-sysfs.c
@@ -28,10 +28,6 @@
 #include <linux/workqueue.h>
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 4
-#include <linux/uwb/debug.h>
-
 static ssize_t wusb_disconnect_store(struct device *dev,
 				     struct device_attribute *attr,
 				     const char *buf, size_t size)
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index 26cbc89..e2e7e4b 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -91,10 +91,6 @@
 #include <linux/workqueue.h>
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
-
 static void wusbhc_devconnect_acked_work(struct work_struct *work);
 
 static void wusb_dev_free(struct wusb_dev *wusb_dev)
@@ -234,6 +230,7 @@ static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc,
 	list_add_tail(&wusb_dev->cack_node, &wusbhc->cack_list);
 	wusbhc->cack_count++;
 	wusbhc_fill_cack_ie(wusbhc);
+
 	return wusb_dev;
 }
 
@@ -244,12 +241,9 @@ static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc,
  */
 static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-	d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev);
 	list_del_init(&wusb_dev->cack_node);
 	wusbhc->cack_count--;
 	wusbhc_fill_cack_ie(wusbhc);
-	d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev);
 }
 
 /*
@@ -257,14 +251,11 @@ static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 static
 void wusbhc_devconnect_acked(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-	d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev);
 	wusbhc_cack_rm(wusbhc, wusb_dev);
 	if (wusbhc->cack_count)
 		wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
 	else
 		wusbhc_mmcie_rm(wusbhc, &wusbhc->cack_ie.hdr);
-	d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev);
 }
 
 static void wusbhc_devconnect_acked_work(struct work_struct *work)
@@ -314,7 +305,6 @@ void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc,
 	struct wusb_port *port;
 	unsigned idx, devnum;
 
-	d_fnstart(3, dev, "(%p, %p, %s)\n", wusbhc, dnc, pr_cdid);
 	mutex_lock(&wusbhc->mutex);
 
 	/* Check we are not handling it already */
@@ -367,7 +357,6 @@ void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc,
 	 */
 error_unlock:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(3, dev, "(%p, %p, %s) = void\n", wusbhc, dnc, pr_cdid);
 	return;
 
 }
@@ -390,10 +379,8 @@ error_unlock:
 static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
 				    struct wusb_port *port)
 {
-	struct device *dev = wusbhc->dev;
 	struct wusb_dev *wusb_dev = port->wusb_dev;
 
-	d_fnstart(3, dev, "(wusbhc %p, port %p)\n", wusbhc, port);
 	port->status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE
 			  | USB_PORT_STAT_SUSPEND | USB_PORT_STAT_RESET
 			  | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED);
@@ -410,7 +397,6 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
 	 * section 6.2.11.2). */
 	wusbhc_gtk_rekey(wusbhc);
 
-	d_fnend(3, dev, "(wusbhc %p, port %p) = void\n", wusbhc, port);
 	/* The Wireless USB part has forgotten about the device already; now
 	 * khubd's timer will pick up the disconnection and remove the USB
 	 * device from the system
@@ -535,10 +521,6 @@ static struct wusb_dev *wusbhc_find_dev_by_addr(struct wusbhc *wusbhc, u8 addr)
  */
 static void wusbhc_handle_dn_alive(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-
-	d_printf(2, dev, "DN ALIVE: device 0x%02x pong\n", wusb_dev->addr);
-
 	mutex_lock(&wusbhc->mutex);
 	wusb_dev->entry_ts = jiffies;
 	__wusbhc_keep_alive(wusbhc);
@@ -571,11 +553,10 @@ static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc,
 		"no-beacon"
 	};
 
-	d_fnstart(3, dev, "(%p, %p, %zu)\n", wusbhc, dn_hdr, size);
 	if (size < sizeof(*dnc)) {
 		dev_err(dev, "DN CONNECT: short notification (%zu < %zu)\n",
 			size, sizeof(*dnc));
-		goto out;
+		return;
 	}
 
 	dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr);
@@ -587,10 +568,6 @@ static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc,
 		 wusb_dn_connect_new_connection(dnc) ? "connect" : "reconnect");
 	/* ACK the connect */
 	wusbhc_devconnect_ack(wusbhc, dnc, pr_cdid);
-out:
-	d_fnend(3, dev, "(%p, %p, %zu) = void\n",
-		wusbhc, dn_hdr, size);
-	return;
 }
 
 /*
@@ -631,19 +608,17 @@ void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr,
 	struct device *dev = wusbhc->dev;
 	struct wusb_dev *wusb_dev;
 
-	d_fnstart(3, dev, "(%p, %p)\n", wusbhc, dn_hdr);
-
 	if (size < sizeof(struct wusb_dn_hdr)) {
 		dev_err(dev, "DN data shorter than DN header (%d < %d)\n",
 			(int)size, (int)sizeof(struct wusb_dn_hdr));
-		goto out;
+		return;
 	}
 
 	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
 	if (wusb_dev == NULL && dn_hdr->bType != WUSB_DN_CONNECT) {
 		dev_dbg(dev, "ignoring DN %d from unconnected device %02x\n",
 			dn_hdr->bType, srcaddr);
-		goto out;
+		return;
 	}
 
 	switch (dn_hdr->bType) {
@@ -668,9 +643,6 @@ void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr,
 		dev_warn(dev, "unknown DN %u (%d octets) from %u\n",
 			 dn_hdr->bType, (int)size, srcaddr);
 	}
-out:
-	d_fnend(3, dev, "(%p, %p) = void\n", wusbhc, dn_hdr);
-	return;
 }
 EXPORT_SYMBOL_GPL(wusbhc_handle_dn);
 
@@ -700,59 +672,30 @@ void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port_idx)
 	struct wusb_dev *wusb_dev;
 	struct wuie_disconnect *ie;
 
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	result = 0;
 	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
 	if (wusb_dev == NULL) {
 		/* reset no device? ignore */
 		dev_dbg(dev, "DISCONNECT: no device at port %u, ignoring\n",
 			port_idx);
-		goto error;
+		return;
 	}
 	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
 
-	result = -ENOMEM;
 	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
 	if (ie == NULL)
-		goto error;
+		return;
 	ie->hdr.bLength = sizeof(*ie);
 	ie->hdr.bIEIdentifier = WUIE_ID_DEVICE_DISCONNECT;
 	ie->bDeviceAddress = wusb_dev->addr;
 	result = wusbhc_mmcie_set(wusbhc, 0, 0, &ie->hdr);
-	if (result < 0) {
+	if (result < 0)
 		dev_err(dev, "DISCONNECT: can't set MMC: %d\n", result);
-		goto error_kfree;
+	else {
+		/* At least 6 MMCs, assuming at least 1 MMC per zone. */
+		msleep(7*4);
+		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
 	}
-
-	/* 120ms, hopefully 6 MMCs */
-	msleep(100);
-	wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-error_kfree:
 	kfree(ie);
-error:
-	d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result);
-	return;
-}
-
-static void wusb_cap_descr_printf(const unsigned level, struct device *dev,
-				  const struct usb_wireless_cap_descriptor *wcd)
-{
-	d_printf(level, dev,
-		 "WUSB Capability Descriptor\n"
-		 "  bDevCapabilityType          0x%02x\n"
-		 "  bmAttributes                0x%02x\n"
-		 "  wPhyRates                   0x%04x\n"
-		 "  bmTFITXPowerInfo            0x%02x\n"
-		 "  bmFFITXPowerInfo            0x%02x\n"
-		 "  bmBandGroup                 0x%04x\n"
-		 "  bReserved                   0x%02x\n",
-		 wcd->bDevCapabilityType,
-		 wcd->bmAttributes,
-		 le16_to_cpu(wcd->wPHYRates),
-		 wcd->bmTFITXPowerInfo,
-		 wcd->bmFFITXPowerInfo,
-		 wcd->bmBandGroup,
-		 wcd->bReserved);
 }
 
 /*
@@ -795,8 +738,6 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev,
 		}
 		cap_size = cap_hdr->bLength;
 		cap_type = cap_hdr->bDevCapabilityType;
-		d_printf(4, dev, "BOS Capability: 0x%02x (%zu bytes)\n",
-			 cap_type, cap_size);
 		if (cap_size == 0)
 			break;
 		if (cap_size > top - itr) {
@@ -808,7 +749,6 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev,
 			result = -EBADF;
 			goto error_bad_cap;
 		}
-		d_dump(3, dev, itr, cap_size);
 		switch (cap_type) {
 		case USB_CAP_TYPE_WIRELESS_USB:
 			if (cap_size != sizeof(*wusb_dev->wusb_cap_descr))
@@ -816,10 +756,8 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev,
 					"descriptor is %zu bytes vs %zu "
 					"needed\n", cap_size,
 					sizeof(*wusb_dev->wusb_cap_descr));
-			else {
+			else
 				wusb_dev->wusb_cap_descr = itr;
-				wusb_cap_descr_printf(3, dev, itr);
-			}
 			break;
 		default:
 			dev_err(dev, "BUG? Unknown BOS capability 0x%02x "
@@ -884,9 +822,7 @@ static int wusb_dev_bos_add(struct usb_device *usb_dev,
 			"%zu bytes): %zd\n", desc_size, result);
 		goto error_get_descriptor;
 	}
-	d_printf(2, dev, "Got BOS descriptor %zd bytes, %u capabilities\n",
-		 result, bos->bNumDeviceCaps);
-	d_dump(2, dev, bos, result);
+
 	result = wusb_dev_bos_grok(usb_dev, wusb_dev, bos, result);
 	if (result < 0)
 		goto error_bad_bos;
@@ -952,8 +888,6 @@ static void wusb_dev_add_ncb(struct usb_device *usb_dev)
 	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
 		return;		/* skip non wusb and wusb RHs */
 
-	d_fnstart(3, dev, "(usb_dev %p)\n", usb_dev);
-
 	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
 	if (wusbhc == NULL)
 		goto error_nodev;
@@ -983,7 +917,6 @@ out:
 	wusb_dev_put(wusb_dev);
 	wusbhc_put(wusbhc);
 error_nodev:
-	d_fnend(3, dev, "(usb_dev %p) = void\n", usb_dev);
 	return;
 
 	wusb_dev_sysfs_rm(wusb_dev);
@@ -1070,11 +1003,10 @@ EXPORT_SYMBOL_GPL(__wusb_dev_get_by_usb_dev);
 
 void wusb_dev_destroy(struct kref *_wusb_dev)
 {
-	struct wusb_dev *wusb_dev
-		= container_of(_wusb_dev, struct wusb_dev, refcnt);
+	struct wusb_dev *wusb_dev = container_of(_wusb_dev, struct wusb_dev, refcnt);
+
 	list_del_init(&wusb_dev->cack_node);
 	wusb_dev_free(wusb_dev);
-	d_fnend(1, NULL, "%s (wusb_dev %p) = void\n", __func__, wusb_dev);
 }
 EXPORT_SYMBOL_GPL(wusb_dev_destroy);
 
@@ -1086,8 +1018,6 @@ EXPORT_SYMBOL_GPL(wusb_dev_destroy);
  */
 int wusbhc_devconnect_create(struct wusbhc *wusbhc)
 {
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-
 	wusbhc->keep_alive_ie.hdr.bIEIdentifier = WUIE_ID_KEEP_ALIVE;
 	wusbhc->keep_alive_ie.hdr.bLength = sizeof(wusbhc->keep_alive_ie.hdr);
 	INIT_DELAYED_WORK(&wusbhc->keep_alive_timer, wusbhc_keep_alive_run);
@@ -1096,7 +1026,6 @@ int wusbhc_devconnect_create(struct wusbhc *wusbhc)
 	wusbhc->cack_ie.hdr.bLength = sizeof(wusbhc->cack_ie.hdr);
 	INIT_LIST_HEAD(&wusbhc->cack_list);
 
-	d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
 	return 0;
 }
 
@@ -1105,8 +1034,7 @@ int wusbhc_devconnect_create(struct wusbhc *wusbhc)
  */
 void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
 {
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-	d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
+	/* no op */
 }
 
 /*
diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c
index 1c73319..95c6fa3 100644
--- a/drivers/usb/wusbcore/rh.c
+++ b/drivers/usb/wusbcore/rh.c
@@ -71,9 +71,6 @@
  */
 #include "wusbhc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 /*
  * Reset a fake port
  *
@@ -142,7 +139,6 @@ int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf)
 	size_t cnt, size;
 	unsigned long *buf = (unsigned long *) _buf;
 
-	d_fnstart(1, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
 	/* WE DON'T LOCK, see comment */
 	size = wusbhc->ports_max + 1 /* hub bit */;
 	size = (size + 8 - 1) / 8;	/* round to bytes */
@@ -151,8 +147,6 @@ int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf)
 			set_bit(cnt + 1, buf);
 		else
 			clear_bit(cnt + 1, buf);
-	d_fnend(1, wusbhc->dev, "(wusbhc %p) %u, buffer:\n", wusbhc, (int)size);
-	d_dump(1, wusbhc->dev, _buf, size);
 	return size;
 }
 EXPORT_SYMBOL_GPL(wusbhc_rh_status_data);
@@ -201,9 +195,7 @@ static int wusbhc_rh_get_hub_descr(struct wusbhc *wusbhc, u16 wValue,
 static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature)
 {
 	int result;
-	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(%p, feature 0x%04u)\n", wusbhc, feature);
 	switch (feature) {
 	case C_HUB_LOCAL_POWER:
 		/* FIXME: maybe plug bit 0 to the power input status,
@@ -215,7 +207,6 @@ static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature)
 	default:
 		result = -EPIPE;
 	}
-	d_fnend(4, dev, "(%p, feature 0x%04u), %d\n", wusbhc, feature, result);
 	return result;
 }
 
@@ -242,14 +233,10 @@ static int wusbhc_rh_get_hub_status(struct wusbhc *wusbhc, u32 *buf,
 static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature,
 				   u8 selector, u8 port_idx)
 {
-	int result = -EINVAL;
 	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d)\n",
-		  feature, selector, port_idx);
-
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
 
 	switch (feature) {
 		/* According to USB2.0[11.24.2.13]p2, these features
@@ -259,35 +246,27 @@ static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature,
 	case USB_PORT_FEAT_C_SUSPEND:
 	case USB_PORT_FEAT_C_CONNECTION:
 	case USB_PORT_FEAT_C_RESET:
-		result = 0;
-		break;
-
+		return 0;
 	case USB_PORT_FEAT_POWER:
 		/* No such thing, but we fake it works */
 		mutex_lock(&wusbhc->mutex);
 		wusb_port_by_idx(wusbhc, port_idx)->status |= USB_PORT_STAT_POWER;
 		mutex_unlock(&wusbhc->mutex);
-		result = 0;
-		break;
+		return 0;
 	case USB_PORT_FEAT_RESET:
-		result = wusbhc_rh_port_reset(wusbhc, port_idx);
-		break;
+		return wusbhc_rh_port_reset(wusbhc, port_idx);
 	case USB_PORT_FEAT_ENABLE:
 	case USB_PORT_FEAT_SUSPEND:
 		dev_err(dev, "(port_idx %d) set feat %d/%d UNIMPLEMENTED\n",
 			port_idx, feature, selector);
-		result = -ENOSYS;
-		break;
+		return -ENOSYS;
 	default:
 		dev_err(dev, "(port_idx %d) set feat %d/%d UNKNOWN\n",
 			port_idx, feature, selector);
-		result = -EPIPE;
-		break;
+		return -EPIPE;
 	}
-error:
-	d_fnend(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d) = %d\n",
-		feature, selector, port_idx, result);
-	return result;
+
+	return 0;
 }
 
 /*
@@ -298,17 +277,13 @@ error:
 static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
 				     u8 selector, u8 port_idx)
 {
-	int result = -EINVAL;
+	int result = 0;
 	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d)\n",
-		  wusbhc, feature, selector, port_idx);
-
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
 
 	mutex_lock(&wusbhc->mutex);
-	result = 0;
 	switch (feature) {
 	case USB_PORT_FEAT_POWER:	/* fake port always on */
 		/* According to USB2.0[11.24.2.7.1.4], no need to implement? */
@@ -328,10 +303,8 @@ static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
 		break;
 	case USB_PORT_FEAT_SUSPEND:
 	case USB_PORT_FEAT_C_SUSPEND:
-	case 0xffff:		/* ??? FIXME */
 		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNIMPLEMENTED\n",
 			port_idx, feature, selector);
-		/* dump_stack(); */
 		result = -ENOSYS;
 		break;
 	default:
@@ -341,9 +314,7 @@ static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
 		break;
 	}
 	mutex_unlock(&wusbhc->mutex);
-error:
-	d_fnend(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d) = "
-		"%d\n", wusbhc, feature, selector, port_idx, result);
+
 	return result;
 }
 
@@ -355,22 +326,17 @@ error:
 static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx,
 				     u32 *_buf, u16 wLength)
 {
-	int result = -EINVAL;
 	u16 *buf = (u16 *) _buf;
 
-	d_fnstart(1, wusbhc->dev, "(wusbhc %p port_idx %u wLength %u)\n",
-		  wusbhc, port_idx, wLength);
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
+
 	mutex_lock(&wusbhc->mutex);
 	buf[0] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->status);
 	buf[1] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->change);
-	result = 0;
 	mutex_unlock(&wusbhc->mutex);
-error:
-	d_fnend(1, wusbhc->dev, "(wusbhc %p) = %d, buffer:\n", wusbhc, result);
-	d_dump(1, wusbhc->dev, _buf, wLength);
-	return result;
+
+	return 0;
 }
 
 /*
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
index ac00640..f4aa28e 100644
--- a/drivers/usb/wusbcore/security.c
+++ b/drivers/usb/wusbcore/security.c
@@ -27,19 +27,6 @@
 #include <linux/random.h>
 #include "wusbhc.h"
 
-/*
- * DEBUG & SECURITY WARNING!!!!
- *
- * If you enable this past 1, the debug code will weaken the
- * cryptographic safety of the system (on purpose, for debugging).
- *
- * Weaken means:
- *   we print secret keys and intermediate values all the way,
- */
-#undef D_LOCAL
-#define D_LOCAL 2
-#include <linux/uwb/debug.h>
-
 static void wusbhc_set_gtk_callback(struct urb *urb);
 static void wusbhc_gtk_rekey_done_work(struct work_struct *work);
 
@@ -219,7 +206,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc,
 	const void *itr, *top;
 	char buf[64];
 
-	d_fnstart(3, dev, "(usb_dev %p, wusb_dev %p)\n", usb_dev, wusb_dev);
 	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
 				    0, &secd, sizeof(secd));
 	if (result < sizeof(secd)) {
@@ -228,8 +214,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc,
 		goto error_secd;
 	}
 	secd_size = le16_to_cpu(secd.wTotalLength);
-	d_printf(5, dev, "got %d bytes of sec descriptor, total is %d\n",
-		 result, secd_size);
 	secd_buf = kmalloc(secd_size, GFP_KERNEL);
 	if (secd_buf == NULL) {
 		dev_err(dev, "Can't allocate space for security descriptors\n");
@@ -242,7 +226,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc,
 			"not enough data: %d\n", result);
 		goto error_secd_all;
 	}
-	d_printf(5, dev, "got %d bytes of sec descriptors\n", result);
 	bytes = 0;
 	itr = secd_buf + sizeof(secd);
 	top = secd_buf + result;
@@ -279,14 +262,12 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc,
 		goto error_no_ccm1;
 	}
 	wusb_dev->ccm1_etd = *ccm1_etd;
-	dev_info(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
-		 buf, wusb_et_name(ccm1_etd->bEncryptionType),
-		 ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
+	dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
+		buf, wusb_et_name(ccm1_etd->bEncryptionType),
+		ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
 	result = 0;
 	kfree(secd_buf);
 out:
-	d_fnend(3, dev, "(usb_dev %p, wusb_dev %p) = %d\n",
-		usb_dev, wusb_dev, result);
 	return result;
 
 
@@ -303,32 +284,6 @@ void wusb_dev_sec_rm(struct wusb_dev *wusb_dev)
 	/* Nothing so far */
 }
 
-static void hs_printk(unsigned level, struct device *dev,
-		      struct usb_handshake *hs)
-{
-	d_printf(level, dev,
-		 "  bMessageNumber: %u\n"
-		 "  bStatus:        %u\n"
-		 "  tTKID:          %02x %02x %02x\n"
-		 "  CDID:           %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "                  %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "  nonce:          %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "                  %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "  MIC:            %02x %02x %02x %02x %02x %02x %02x %02x\n",
-		 hs->bMessageNumber, hs->bStatus,
-		 hs->tTKID[2], hs->tTKID[1], hs->tTKID[0],
-		 hs->CDID[0], hs->CDID[1], hs->CDID[2], hs->CDID[3],
-		 hs->CDID[4], hs->CDID[5], hs->CDID[6], hs->CDID[7],
-		 hs->CDID[8], hs->CDID[9], hs->CDID[10], hs->CDID[11],
-		 hs->CDID[12], hs->CDID[13], hs->CDID[14], hs->CDID[15],
-		 hs->nonce[0], hs->nonce[1], hs->nonce[2], hs->nonce[3],
-		 hs->nonce[4], hs->nonce[5], hs->nonce[6], hs->nonce[7],
-		 hs->nonce[8], hs->nonce[9], hs->nonce[10], hs->nonce[11],
-		 hs->nonce[12], hs->nonce[13], hs->nonce[14], hs->nonce[15],
-		 hs->MIC[0], hs->MIC[1], hs->MIC[2], hs->MIC[3],
-		 hs->MIC[4], hs->MIC[5], hs->MIC[6], hs->MIC[7]);
-}
-
 /**
  * Update the address of an unauthenticated WUSB device
  *
@@ -421,9 +376,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 	get_random_bytes(&hs[0].nonce, sizeof(hs[0].nonce));
 	memset(hs[0].MIC, 0, sizeof(hs[0].MIC));	/* Per WUSB1.0[T7-22] */
 
-	d_printf(1, dev, "I: sending hs1:\n");
-	hs_printk(2, dev, &hs[0]);
-
 	result = usb_control_msg(
 		usb_dev, usb_sndctrlpipe(usb_dev, 0),
 		USB_REQ_SET_HANDSHAKE,
@@ -444,8 +396,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 		dev_err(dev, "Handshake2: request failed: %d\n", result);
 		goto error_hs2;
 	}
-	d_printf(1, dev, "got HS2:\n");
-	hs_printk(2, dev, &hs[1]);
 
 	result = -EINVAL;
 	if (hs[1].bMessageNumber != 2) {
@@ -486,10 +436,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 			result);
 		goto error_hs2;
 	}
-	d_printf(2, dev, "KCK:\n");
-	d_dump(2, dev, keydvt_out.kck, sizeof(keydvt_out.kck));
-	d_printf(2, dev, "PTK:\n");
-	d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk));
 
 	/* Compute MIC and verify it */
 	result = wusb_oob_mic(mic, keydvt_out.kck, &ccm_n, &hs[1]);
@@ -499,8 +445,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 		goto error_hs2;
 	}
 
-	d_printf(2, dev, "MIC:\n");
-	d_dump(2, dev, mic, sizeof(mic));
 	if (memcmp(hs[1].MIC, mic, sizeof(hs[1].MIC))) {
 		dev_err(dev, "Handshake2 failed: MIC mismatch\n");
 		goto error_hs2;
@@ -520,9 +464,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 		goto error_hs2;
 	}
 
-	d_printf(1, dev, "I: sending hs3:\n");
-	hs_printk(2, dev, &hs[2]);
-
 	result = usb_control_msg(
 		usb_dev, usb_sndctrlpipe(usb_dev, 0),
 		USB_REQ_SET_HANDSHAKE,
@@ -533,14 +474,11 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 		goto error_hs3;
 	}
 
-	d_printf(1, dev, "I: turning on encryption on host for device\n");
-	d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk));
 	result = wusbhc->set_ptk(wusbhc, wusb_dev->port_idx, tkid,
 				 keydvt_out.ptk, sizeof(keydvt_out.ptk));
 	if (result < 0)
 		goto error_wusbhc_set_ptk;
 
-	d_printf(1, dev, "I: setting a GTK\n");
 	result = wusb_dev_set_gtk(wusbhc, wusb_dev);
 	if (result < 0) {
 		dev_err(dev, "Set GTK for device: request failed: %d\n",
@@ -550,13 +488,12 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
 
 	/* Update the device's address from unauth to auth */
 	if (usb_dev->authenticated == 0) {
-		d_printf(1, dev, "I: updating addres to auth from non-auth\n");
 		result = wusb_dev_update_address(wusbhc, wusb_dev);
 		if (result < 0)
 			goto error_dev_update_address;
 	}
 	result = 0;
-	d_printf(1, dev, "I: 4way handshke done, device authenticated\n");
+	dev_info(dev, "device authenticated\n");
 
 error_dev_update_address:
 error_wusbhc_set_gtk:
@@ -569,10 +506,8 @@ error_hs1:
 	memset(&keydvt_in, 0, sizeof(keydvt_in));
 	memset(&ccm_n, 0, sizeof(ccm_n));
 	memset(mic, 0, sizeof(mic));
-	if (result < 0) {
-		/* error path */
+	if (result < 0)
 		wusb_dev_set_encryption(usb_dev, 0);
-	}
 error_dev_set_encryption:
 	kfree(hs);
 error_kzalloc:
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
index f18e4aa..7369655 100644
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ b/drivers/usb/wusbcore/wa-rpipe.c
@@ -60,13 +60,10 @@
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include <linux/bitmap.h>
+
 #include "wusbhc.h"
 #include "wa-hc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
 static int __rpipe_get_descr(struct wahc *wa,
 			     struct usb_rpipe_descriptor *descr, u16 index)
 {
@@ -76,7 +73,6 @@ static int __rpipe_get_descr(struct wahc *wa,
 	/* Get the RPIPE descriptor -- we cannot use the usb_get_descriptor()
 	 * function because the arguments are different.
 	 */
-	d_printf(1, dev, "rpipe %u: get descr\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
 		USB_REQ_GET_DESCRIPTOR,
@@ -115,7 +111,6 @@ static int __rpipe_set_descr(struct wahc *wa,
 	/* we cannot use the usb_get_descriptor() function because the
 	 * arguments are different.
 	 */
-	d_printf(1, dev, "rpipe %u: set descr\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
 		USB_REQ_SET_DESCRIPTOR,
@@ -174,13 +169,12 @@ void rpipe_destroy(struct kref *_rpipe)
 {
 	struct wa_rpipe *rpipe = container_of(_rpipe, struct wa_rpipe, refcnt);
 	u8 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
-	d_fnstart(1, NULL, "(rpipe %p %u)\n", rpipe, index);
+
 	if (rpipe->ep)
 		rpipe->ep->hcpriv = NULL;
 	rpipe_put_idx(rpipe->wa, index);
 	wa_put(rpipe->wa);
 	kfree(rpipe);
-	d_fnend(1, NULL, "(rpipe %p %u)\n", rpipe, index);
 }
 EXPORT_SYMBOL_GPL(rpipe_destroy);
 
@@ -202,7 +196,6 @@ static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs,
 	struct wa_rpipe *rpipe;
 	struct device *dev = &wa->usb_iface->dev;
 
-	d_fnstart(3, dev, "(wa %p crs 0x%02x)\n", wa, crs);
 	rpipe = kzalloc(sizeof(*rpipe), gfp);
 	if (rpipe == NULL)
 		return -ENOMEM;
@@ -223,14 +216,12 @@ static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs,
 	}
 	*prpipe = NULL;
 	kfree(rpipe);
-	d_fnend(3, dev, "(wa %p crs 0x%02x) = -ENXIO\n", wa, crs);
 	return -ENXIO;
 
 found:
 	set_bit(rpipe_idx, wa->rpipe_bm);
 	rpipe->wa = wa_get(wa);
 	*prpipe = rpipe;
-	d_fnstart(3, dev, "(wa %p crs 0x%02x) = 0\n", wa, crs);
 	return 0;
 }
 
@@ -239,7 +230,6 @@ static int __rpipe_reset(struct wahc *wa, unsigned index)
 	int result;
 	struct device *dev = &wa->usb_iface->dev;
 
-	d_printf(1, dev, "rpipe %u: reset\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
 		USB_REQ_RPIPE_RESET,
@@ -276,7 +266,6 @@ static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find(
 	struct usb_descriptor_header *hdr;
 	struct usb_wireless_ep_comp_descriptor *epcd;
 
-	d_fnstart(3, dev, "(ep %p)\n", ep);
 	if (ep->desc.bEndpointAddress == 0) {
 		epcd = &epc0;
 		goto out;
@@ -310,7 +299,6 @@ static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find(
 		itr_size -= hdr->bDescriptorType;
 	}
 out:
-	d_fnend(3, dev, "(ep %p) = %p\n", ep, epcd);
 	return epcd;
 }
 
@@ -329,8 +317,6 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa,
 	struct usb_wireless_ep_comp_descriptor *epcd;
 	u8 unauth;
 
-	d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n",
-		    rpipe, wa, ep, urb);
 	epcd = rpipe_epc_find(dev, ep);
 	if (epcd == NULL) {
 		dev_err(dev, "ep 0x%02x: can't find companion descriptor\n",
@@ -350,10 +336,12 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa,
 	/* FIXME: use maximum speed as supported or recommended by device */
 	rpipe->descr.bSpeed = usb_pipeendpoint(urb->pipe) == 0 ?
 		UWB_PHY_RATE_53 : UWB_PHY_RATE_200;
-	d_printf(2, dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
-		 urb->dev->devnum, urb->dev->devnum | unauth,
-		 le16_to_cpu(rpipe->descr.wRPipeIndex),
-		 usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
+
+	dev_dbg(dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
+		urb->dev->devnum, urb->dev->devnum | unauth,
+		le16_to_cpu(rpipe->descr.wRPipeIndex),
+		usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
+
 	/* see security.c:wusb_update_address() */
 	if (unlikely(urb->dev->devnum == 0x80))
 		rpipe->descr.bDeviceAddress = 0;
@@ -384,8 +372,6 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa,
 	}
 	result = 0;
 error:
-	d_fnend(3, dev, "(rpipe %p wa %p ep %p urb %p) = %d\n",
-		  rpipe, wa, ep, urb, result);
 	return result;
 }
 
@@ -405,8 +391,6 @@ static int rpipe_check_aim(const struct wa_rpipe *rpipe, const struct wahc *wa,
 	u8 unauth = (usb_dev->wusb && !usb_dev->authenticated) ? 0x80 : 0;
 	u8 portnum = wusb_port_no_to_idx(urb->dev->portnum);
 
-	d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n",
-		    rpipe, wa, ep, urb);
 #define AIM_CHECK(rdf, val, text)					\
 	do {								\
 		if (rpipe->descr.rdf != (val)) {			\
@@ -451,8 +435,6 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep,
 	struct wa_rpipe *rpipe;
 	u8 eptype;
 
-	d_fnstart(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb,
-		  gfp);
 	mutex_lock(&wa->rpipe_mutex);
 	rpipe = ep->hcpriv;
 	if (rpipe != NULL) {
@@ -462,9 +444,9 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep,
 				goto error;
 		}
 		__rpipe_get(rpipe);
-		d_printf(2, dev, "ep 0x%02x: reusing rpipe %u\n",
-			 ep->desc.bEndpointAddress,
-			 le16_to_cpu(rpipe->descr.wRPipeIndex));
+		dev_dbg(dev, "ep 0x%02x: reusing rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
 	} else {
 		/* hmm, assign idle rpipe, aim it */
 		result = -ENOBUFS;
@@ -480,14 +462,12 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep,
 		ep->hcpriv = rpipe;
 		rpipe->ep = ep;
 		__rpipe_get(rpipe);	/* for caching into ep->hcpriv */
-		d_printf(2, dev, "ep 0x%02x: using rpipe %u\n",
-			 ep->desc.bEndpointAddress,
-			 le16_to_cpu(rpipe->descr.wRPipeIndex));
+		dev_dbg(dev, "ep 0x%02x: using rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
 	}
-	d_dump(4, dev, &rpipe->descr, sizeof(rpipe->descr));
 error:
 	mutex_unlock(&wa->rpipe_mutex);
-	d_fnend(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb, gfp);
 	return result;
 }
 
@@ -507,7 +487,7 @@ int wa_rpipes_create(struct wahc *wa)
 void wa_rpipes_destroy(struct wahc *wa)
 {
 	struct device *dev = &wa->usb_iface->dev;
-	d_fnstart(3, dev, "(wa %p)\n", wa);
+
 	if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) {
 		char buf[256];
 		WARN_ON(1);
@@ -515,7 +495,6 @@ void wa_rpipes_destroy(struct wahc *wa)
 		dev_err(dev, "BUG: pipes not released on exit: %s\n", buf);
 	}
 	kfree(wa->rpipe_bm);
-	d_fnend(3, dev, "(wa %p)\n", wa);
 }
 
 /*
@@ -530,33 +509,20 @@ void wa_rpipes_destroy(struct wahc *wa)
  */
 void rpipe_ep_disable(struct wahc *wa, struct usb_host_endpoint *ep)
 {
-	struct device *dev = &wa->usb_iface->dev;
 	struct wa_rpipe *rpipe;
-	d_fnstart(2, dev, "(wa %p ep %p)\n", wa, ep);
+
 	mutex_lock(&wa->rpipe_mutex);
 	rpipe = ep->hcpriv;
 	if (rpipe != NULL) {
-		unsigned rc = atomic_read(&rpipe->refcnt.refcount);
-		int result;
 		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
 
-		if (rc != 1)
-			d_printf(1, dev, "(wa %p ep %p) rpipe %p refcnt %u\n",
-				 wa, ep, rpipe, rc);
-
-		d_printf(1, dev, "rpipe %u: abort\n", index);
-		result = usb_control_msg(
+		usb_control_msg(
 			wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
 			USB_REQ_RPIPE_ABORT,
 			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
 			0, index, NULL, 0, 1000 /* FIXME: arbitrary */);
-		if (result < 0 && result != -ENODEV /* dev is gone */)
-			d_printf(1, dev, "(wa %p rpipe %u): abort failed: %d\n",
-				 wa, index, result);
 		rpipe_put(rpipe);
 	}
 	mutex_unlock(&wa->rpipe_mutex);
-	d_fnend(2, dev, "(wa %p ep %p)\n", wa, ep);
-	return;
 }
 EXPORT_SYMBOL_GPL(rpipe_ep_disable);
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
index c038635..238a96a 100644
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ b/drivers/usb/wusbcore/wa-xfer.c
@@ -82,13 +82,10 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/hash.h>
+
 #include "wa-hc.h"
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 0 /* 0 disabled, > 0 different levels... */
-#include <linux/uwb/debug.h>
-
 enum {
 	WA_SEGS_MAX = 255,
 };
@@ -180,7 +177,6 @@ static void wa_xfer_destroy(struct kref *_xfer)
 		}
 	}
 	kfree(xfer);
-	d_printf(2, NULL, "xfer %p destroyed\n", xfer);
 }
 
 static void wa_xfer_get(struct wa_xfer *xfer)
@@ -190,10 +186,7 @@ static void wa_xfer_get(struct wa_xfer *xfer)
 
 static void wa_xfer_put(struct wa_xfer *xfer)
 {
-	d_fnstart(3, NULL, "(xfer %p) -- ref count bef put %d\n",
-		    xfer, atomic_read(&xfer->refcnt.refcount));
 	kref_put(&xfer->refcnt, wa_xfer_destroy);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 }
 
 /*
@@ -209,7 +202,7 @@ static void wa_xfer_put(struct wa_xfer *xfer)
 static void wa_xfer_giveback(struct wa_xfer *xfer)
 {
 	unsigned long flags;
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
+
 	spin_lock_irqsave(&xfer->wa->xfer_list_lock, flags);
 	list_del_init(&xfer->list_node);
 	spin_unlock_irqrestore(&xfer->wa->xfer_list_lock, flags);
@@ -217,7 +210,6 @@ static void wa_xfer_giveback(struct wa_xfer *xfer)
 	wusbhc_giveback_urb(xfer->wa->wusb, xfer->urb, xfer->result);
 	wa_put(xfer->wa);
 	wa_xfer_put(xfer);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 }
 
 /*
@@ -227,13 +219,10 @@ static void wa_xfer_giveback(struct wa_xfer *xfer)
  */
 static void wa_xfer_completion(struct wa_xfer *xfer)
 {
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
 	if (xfer->wusb_dev)
 		wusb_dev_put(xfer->wusb_dev);
 	rpipe_put(xfer->ep->hcpriv);
 	wa_xfer_giveback(xfer);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
-	return;
 }
 
 /*
@@ -243,12 +232,12 @@ static void wa_xfer_completion(struct wa_xfer *xfer)
  */
 static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
 {
+	struct device *dev = &xfer->wa->usb_iface->dev;
 	unsigned result, cnt;
 	struct wa_seg *seg;
 	struct urb *urb = xfer->urb;
 	unsigned found_short = 0;
 
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
 	result = xfer->segs_done == xfer->segs_submitted;
 	if (result == 0)
 		goto out;
@@ -258,10 +247,8 @@ static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
 		switch (seg->status) {
 		case WA_SEG_DONE:
 			if (found_short && seg->result > 0) {
-				if (printk_ratelimit())
-					printk(KERN_ERR "xfer %p#%u: bad short "
-					       "segments (%zu)\n", xfer, cnt,
-					       seg->result);
+				dev_dbg(dev, "xfer %p#%u: bad short segments (%zu)\n",
+					xfer, cnt, seg->result);
 				urb->status = -EINVAL;
 				goto out;
 			}
@@ -269,36 +256,30 @@ static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
 			if (seg->result < xfer->seg_size
 			    && cnt != xfer->segs-1)
 				found_short = 1;
-			d_printf(2, NULL, "xfer %p#%u: DONE short %d "
-				 "result %zu urb->actual_length %d\n",
-				 xfer, seg->index, found_short, seg->result,
-				 urb->actual_length);
+			dev_dbg(dev, "xfer %p#%u: DONE short %d "
+				"result %zu urb->actual_length %d\n",
+				xfer, seg->index, found_short, seg->result,
+				urb->actual_length);
 			break;
 		case WA_SEG_ERROR:
 			xfer->result = seg->result;
-			d_printf(2, NULL, "xfer %p#%u: ERROR result %zu\n",
-				 xfer, seg->index, seg->result);
+			dev_dbg(dev, "xfer %p#%u: ERROR result %zu\n",
+				xfer, seg->index, seg->result);
 			goto out;
 		case WA_SEG_ABORTED:
-			WARN_ON(urb->status != -ECONNRESET
-				&& urb->status != -ENOENT);
-			d_printf(2, NULL, "xfer %p#%u ABORTED: result %d\n",
-				 xfer, seg->index, urb->status);
+			dev_dbg(dev, "xfer %p#%u ABORTED: result %d\n",
+				xfer, seg->index, urb->status);
 			xfer->result = urb->status;
 			goto out;
 		default:
-			/* if (printk_ratelimit()) */
-				printk(KERN_ERR "xfer %p#%u: "
-				       "is_done bad state %d\n",
-				       xfer, cnt, seg->status);
+			dev_warn(dev, "xfer %p#%u: is_done bad state %d\n",
+				 xfer, cnt, seg->status);
 			xfer->result = -EINVAL;
-			WARN_ON(1);
 			goto out;
 		}
 	}
 	xfer->result = 0;
 out:
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 	return result;
 }
 
@@ -424,8 +405,6 @@ static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer,
 	struct urb *urb = xfer->urb;
 	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n",
-		  xfer, rpipe, urb);
 	switch (rpipe->descr.bmAttribute & 0x3) {
 	case USB_ENDPOINT_XFER_CONTROL:
 		*pxfer_type = WA_XFER_TYPE_CTL;
@@ -472,12 +451,10 @@ static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer,
 	if (xfer->segs == 0 && *pxfer_type == WA_XFER_TYPE_CTL)
 		xfer->segs = 1;
 error:
-	d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n",
-		xfer, rpipe, urb, (int)result);
 	return result;
 }
 
-/** Fill in the common request header and xfer-type specific data. */
+/* Fill in the common request header and xfer-type specific data. */
 static void __wa_xfer_setup_hdr0(struct wa_xfer *xfer,
 				 struct wa_xfer_hdr *xfer_hdr0,
 				 enum wa_xfer_type xfer_type,
@@ -534,14 +511,13 @@ static void wa_seg_dto_cb(struct urb *urb)
 	unsigned rpipe_ready = 0;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
-		d_printf(2, dev, "xfer %p#%u: data out done (%d bytes)\n",
-			   xfer, seg->index, urb->actual_length);
+		dev_dbg(dev, "xfer %p#%u: data out done (%d bytes)\n",
+			xfer, seg->index, urb->actual_length);
 		if (seg->status < WA_SEG_PENDING)
 			seg->status = WA_SEG_PENDING;
 		seg->result = urb->actual_length;
@@ -555,9 +531,8 @@ static void wa_seg_dto_cb(struct urb *urb)
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
 		rpipe = xfer->ep->hcpriv;
-		if (printk_ratelimit())
-			dev_err(dev, "xfer %p#%u: data out error %d\n",
-				xfer, seg->index, urb->status);
+		dev_dbg(dev, "xfer %p#%u: data out error %d\n",
+			xfer, seg->index, urb->status);
 		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
 			    EDC_ERROR_TIMEFRAME)){
 			dev_err(dev, "DTO: URB max acceptable errors "
@@ -578,7 +553,6 @@ static void wa_seg_dto_cb(struct urb *urb)
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -610,14 +584,12 @@ static void wa_seg_cb(struct urb *urb)
 	unsigned rpipe_ready;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
-		d_printf(2, dev, "xfer %p#%u: request done\n",
-			   xfer, seg->index);
+		dev_dbg(dev, "xfer %p#%u: request done\n", xfer, seg->index);
 		if (xfer->is_inbound && seg->status < WA_SEG_PENDING)
 			seg->status = WA_SEG_PENDING;
 		spin_unlock_irqrestore(&xfer->lock, flags);
@@ -652,7 +624,6 @@ static void wa_seg_cb(struct urb *urb)
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -750,9 +721,6 @@ static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb)
 	size_t xfer_hdr_size, cnt, transfer_size;
 	struct wa_xfer_hdr *xfer_hdr0, *xfer_hdr;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n",
-		  xfer, xfer->ep->hcpriv, urb);
-
 	result = __wa_xfer_setup_sizes(xfer, &xfer_type);
 	if (result < 0)
 		goto error_setup_sizes;
@@ -788,8 +756,6 @@ static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb)
 	result = 0;
 error_setup_segs:
 error_setup_sizes:
-	d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n",
-		xfer, xfer->ep->hcpriv, urb, result);
 	return result;
 }
 
@@ -843,9 +809,6 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe)
 	struct wa_xfer *xfer;
 	unsigned long flags;
 
-	d_fnstart(1, dev, "(rpipe #%d) %d segments available\n",
-		  le16_to_cpu(rpipe->descr.wRPipeIndex),
-		  atomic_read(&rpipe->segs_available));
 	spin_lock_irqsave(&rpipe->seg_lock, flags);
 	while (atomic_read(&rpipe->segs_available) > 0
 	      && !list_empty(&rpipe->seg_list)) {
@@ -854,10 +817,8 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe)
 		list_del(&seg->list_node);
 		xfer = seg->xfer;
 		result = __wa_seg_submit(rpipe, xfer, seg);
-		d_printf(1, dev, "xfer %p#%u submitted from delayed "
-			 "[%d segments available] %d\n",
-			 xfer, seg->index,
-			 atomic_read(&rpipe->segs_available), result);
+		dev_dbg(dev, "xfer %p#%u submitted from delayed [%d segments available] %d\n",
+			xfer, seg->index, atomic_read(&rpipe->segs_available), result);
 		if (unlikely(result < 0)) {
 			spin_unlock_irqrestore(&rpipe->seg_lock, flags);
 			spin_lock_irqsave(&xfer->lock, flags);
@@ -868,10 +829,6 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe)
 		}
 	}
 	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(1, dev, "(rpipe #%d) = void, %d segments available\n",
-		le16_to_cpu(rpipe->descr.wRPipeIndex),
-		atomic_read(&rpipe->segs_available));
-
 }
 
 /*
@@ -894,9 +851,6 @@ static int __wa_xfer_submit(struct wa_xfer *xfer)
 	u8 available;
 	u8 empty;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p])\n",
-		  xfer, xfer->ep->hcpriv);
-
 	spin_lock_irqsave(&wa->xfer_list_lock, flags);
 	list_add_tail(&xfer->list_node, &wa->xfer_list);
 	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
@@ -908,30 +862,24 @@ static int __wa_xfer_submit(struct wa_xfer *xfer)
 		available = atomic_read(&rpipe->segs_available);
 		empty = list_empty(&rpipe->seg_list);
 		seg = xfer->seg[cnt];
-		d_printf(2, dev, "xfer %p#%u: available %u empty %u (%s)\n",
-			 xfer, cnt, available, empty,
-			 available == 0 || !empty ? "delayed" : "submitted");
+		dev_dbg(dev, "xfer %p#%u: available %u empty %u (%s)\n",
+			xfer, cnt, available, empty,
+			available == 0 || !empty ? "delayed" : "submitted");
 		if (available == 0 || !empty) {
-			d_printf(1, dev, "xfer %p#%u: delayed\n", xfer, cnt);
+			dev_dbg(dev, "xfer %p#%u: delayed\n", xfer, cnt);
 			seg->status = WA_SEG_DELAYED;
 			list_add_tail(&seg->list_node, &rpipe->seg_list);
 		} else {
 			result = __wa_seg_submit(rpipe, xfer, seg);
-			if (result < 0)
+			if (result < 0) {
+				__wa_xfer_abort(xfer);
 				goto error_seg_submit;
+			}
 		}
 		xfer->segs_submitted++;
 	}
-	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer,
-		xfer->ep->hcpriv);
-	return result;
-
 error_seg_submit:
-	__wa_xfer_abort(xfer);
 	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer,
-		xfer->ep->hcpriv);
 	return result;
 }
 
@@ -964,11 +912,9 @@ static void wa_urb_enqueue_b(struct wa_xfer *xfer)
 	struct urb *urb = xfer->urb;
 	struct wahc *wa = xfer->wa;
 	struct wusbhc *wusbhc = wa->wusb;
-	struct device *dev = &wa->usb_iface->dev;
 	struct wusb_dev *wusb_dev;
 	unsigned done;
 
-	d_fnstart(3, dev, "(wa %p urb %p)\n", wa, urb);
 	result = rpipe_get_by_ep(wa, xfer->ep, urb, xfer->gfp);
 	if (result < 0)
 		goto error_rpipe_get;
@@ -997,7 +943,6 @@ static void wa_urb_enqueue_b(struct wa_xfer *xfer)
 	if (result < 0)
 		goto error_xfer_submit;
 	spin_unlock_irqrestore(&xfer->lock, flags);
-	d_fnend(3, dev, "(wa %p urb %p) = void\n", wa, urb);
 	return;
 
 	/* this is basically wa_xfer_completion() broken up wa_xfer_giveback()
@@ -1015,7 +960,6 @@ error_dev_gone:
 error_rpipe_get:
 	xfer->result = result;
 	wa_xfer_giveback(xfer);
-	d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result);
 	return;
 
 error_xfer_submit:
@@ -1024,8 +968,6 @@ error_xfer_submit:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	if (done)
 		wa_xfer_completion(xfer);
-	d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result);
-	return;
 }
 
 /*
@@ -1041,11 +983,9 @@ error_xfer_submit:
 void wa_urb_enqueue_run(struct work_struct *ws)
 {
 	struct wahc *wa = container_of(ws, struct wahc, xfer_work);
-	struct device *dev = &wa->usb_iface->dev;
 	struct wa_xfer *xfer, *next;
 	struct urb *urb;
 
-	d_fnstart(3, dev, "(wa %p)\n", wa);
 	spin_lock_irq(&wa->xfer_list_lock);
 	list_for_each_entry_safe(xfer, next, &wa->xfer_delayed_list,
 				 list_node) {
@@ -1059,7 +999,6 @@ void wa_urb_enqueue_run(struct work_struct *ws)
 		spin_lock_irq(&wa->xfer_list_lock);
 	}
 	spin_unlock_irq(&wa->xfer_list_lock);
-	d_fnend(3, dev, "(wa %p) = void\n", wa);
 }
 EXPORT_SYMBOL_GPL(wa_urb_enqueue_run);
 
@@ -1084,9 +1023,6 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep,
 	unsigned long my_flags;
 	unsigned cant_sleep = irqs_disabled() | in_atomic();
 
-	d_fnstart(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x)\n",
-		  wa, ep, urb, urb->transfer_buffer_length, gfp);
-
 	if (urb->transfer_buffer == NULL
 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
 	    && urb->transfer_buffer_length != 0) {
@@ -1108,11 +1044,13 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep,
 	xfer->gfp = gfp;
 	xfer->ep = ep;
 	urb->hcpriv = xfer;
-	d_printf(2, dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
-		 xfer, urb, urb->pipe, urb->transfer_buffer_length,
-		 urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
-		 urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
-		 cant_sleep ? "deferred" : "inline");
+
+	dev_dbg(dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
+		xfer, urb, urb->pipe, urb->transfer_buffer_length,
+		urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
+		urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
+		cant_sleep ? "deferred" : "inline");
+
 	if (cant_sleep) {
 		usb_get_urb(urb);
 		spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
@@ -1122,15 +1060,11 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep,
 	} else {
 		wa_urb_enqueue_b(xfer);
 	}
-	d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = 0\n",
-		wa, ep, urb, urb->transfer_buffer_length, gfp);
 	return 0;
 
 error_dequeued:
 	kfree(xfer);
 error_kmalloc:
-	d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = %d\n",
-		wa, ep, urb, urb->transfer_buffer_length, gfp, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wa_urb_enqueue);
@@ -1155,7 +1089,6 @@ EXPORT_SYMBOL_GPL(wa_urb_enqueue);
  */
 int wa_urb_dequeue(struct wahc *wa, struct urb *urb)
 {
-	struct device *dev = &wa->usb_iface->dev;
 	unsigned long flags, flags2;
 	struct wa_xfer *xfer;
 	struct wa_seg *seg;
@@ -1163,9 +1096,6 @@ int wa_urb_dequeue(struct wahc *wa, struct urb *urb)
 	unsigned cnt;
 	unsigned rpipe_ready = 0;
 
-	d_fnstart(3, dev, "(wa %p, urb %p)\n", wa, urb);
-
-	d_printf(1, dev, "xfer %p urb %p: aborting\n", urb->hcpriv, urb);
 	xfer = urb->hcpriv;
 	if (xfer == NULL) {
 		/* NOthing setup yet enqueue will see urb->status !=
@@ -1234,13 +1164,11 @@ int wa_urb_dequeue(struct wahc *wa, struct urb *urb)
 	wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 
 out_unlock:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 out:
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 
 dequeue_delayed:
@@ -1250,7 +1178,6 @@ dequeue_delayed:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	wa_xfer_giveback(xfer);
 	usb_put_urb(urb);		/* we got a ref in enqueue() */
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(wa_urb_dequeue);
@@ -1326,7 +1253,6 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer)
 	u8 usb_status;
 	unsigned rpipe_ready = 0;
 
-	d_fnstart(3, dev, "(wa %p xfer %p)\n", wa, xfer);
 	spin_lock_irqsave(&xfer->lock, flags);
 	seg_idx = xfer_result->bTransferSegment & 0x7f;
 	if (unlikely(seg_idx >= xfer->segs))
@@ -1334,8 +1260,8 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer)
 	seg = xfer->seg[seg_idx];
 	rpipe = xfer->ep->hcpriv;
 	usb_status = xfer_result->bTransferStatus;
-	d_printf(2, dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n",
-		 xfer, seg_idx, usb_status, seg->status);
+	dev_dbg(dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n",
+		xfer, seg_idx, usb_status, seg->status);
 	if (seg->status == WA_SEG_ABORTED
 	    || seg->status == WA_SEG_ERROR)	/* already handled */
 		goto segment_aborted;
@@ -1391,10 +1317,8 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer)
 		wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p xfer %p) = void\n", wa, xfer);
 	return;
 
-
 error_submit_buf_in:
 	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
 		dev_err(dev, "DTI: URB max acceptable errors "
@@ -1416,11 +1340,8 @@ error_complete:
 		wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p xfer %p) = void [segment/DTI-submit error]\n",
-		wa, xfer);
 	return;
 
-
 error_bad_seg:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	wa_urb_dequeue(wa, xfer->urb);
@@ -1431,17 +1352,11 @@ error_bad_seg:
 			"exceeded, resetting device\n");
 		wa_reset_all(wa);
 	}
-	d_fnend(3, dev, "(wa %p xfer %p) = void [bad seg]\n", wa, xfer);
 	return;
 
-
 segment_aborted:
 	/* nothing to do, as the aborter did the completion */
 	spin_unlock_irqrestore(&xfer->lock, flags);
-	d_fnend(3, dev, "(wa %p xfer %p) = void [segment aborted]\n",
-		wa, xfer);
-	return;
-
 }
 
 /*
@@ -1465,15 +1380,14 @@ static void wa_buf_in_cb(struct urb *urb)
 	unsigned long flags;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
 		rpipe = xfer->ep->hcpriv;
-		d_printf(2, dev, "xfer %p#%u: data in done (%zu bytes)\n",
-			   xfer, seg->index, (size_t)urb->actual_length);
+		dev_dbg(dev, "xfer %p#%u: data in done (%zu bytes)\n",
+			xfer, seg->index, (size_t)urb->actual_length);
 		seg->status = WA_SEG_DONE;
 		seg->result = urb->actual_length;
 		xfer->segs_done++;
@@ -1514,7 +1428,6 @@ static void wa_buf_in_cb(struct urb *urb)
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -1553,14 +1466,12 @@ static void wa_xfer_result_cb(struct urb *urb)
 	struct wa_xfer *xfer;
 	u8 usb_status;
 
-	d_fnstart(3, dev, "(%p)\n", wa);
 	BUG_ON(wa->dti_urb != urb);
 	switch (wa->dti_urb->status) {
 	case 0:
 		/* We have a xfer result buffer; check it */
-		d_printf(2, dev, "DTI: xfer result %d bytes at %p\n",
-			   urb->actual_length, urb->transfer_buffer);
-		d_dump(3, dev, urb->transfer_buffer, urb->actual_length);
+		dev_dbg(dev, "DTI: xfer result %d bytes at %p\n",
+			urb->actual_length, urb->transfer_buffer);
 		if (wa->dti_urb->actual_length != sizeof(*xfer_result)) {
 			dev_err(dev, "DTI Error: xfer result--bad size "
 				"xfer result (%d bytes vs %zu needed)\n",
@@ -1622,7 +1533,6 @@ static void wa_xfer_result_cb(struct urb *urb)
 		wa_reset_all(wa);
 	}
 out:
-	d_fnend(3, dev, "(%p) = void\n", wa);
 	return;
 }
 
@@ -1653,7 +1563,6 @@ void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr)
 	struct wa_notif_xfer *notif_xfer;
 	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
 
-	d_fnstart(4, dev, "(%p, %p)\n", wa, notif_hdr);
 	notif_xfer = container_of(notif_hdr, struct wa_notif_xfer, hdr);
 	BUG_ON(notif_hdr->bNotifyType != WA_NOTIF_TRANSFER);
 
@@ -1693,7 +1602,6 @@ void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr)
 		goto error_dti_urb_submit;
 	}
 out:
-	d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr);
 	return;
 
 error_dti_urb_submit:
@@ -1704,6 +1612,4 @@ error_buf_in_urb_alloc:
 error_dti_urb_alloc:
 error:
 	wa_reset_all(wa);
-	d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr);
-	return;
 }
diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index d9c60cb..0315093 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -22,19 +22,16 @@
  *
  * FIXME: docs
  */
-
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/kdev_t.h>
-#include "uwb-internal.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
+#include "uwb-internal.h"
 
-/** Start Beaconing command structure */
+/* Start Beaconing command structure */
 struct uwb_rc_cmd_start_beacon {
 	struct uwb_rccb rccb;
 	__le16 wBPSTOffset;
@@ -176,9 +173,6 @@ struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
 {
 	struct uwb_beca_e *bce, *next;
 	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
-		d_printf(6, NULL, "looking for addr %02x:%02x in %02x:%02x\n",
-			 dev_addr->data[0], dev_addr->data[1],
-			 bce->dev_addr.data[0], bce->dev_addr.data[1]);
 		if (!memcmp(&bce->dev_addr, dev_addr, sizeof(bce->dev_addr)))
 			goto out;
 	}
diff --git a/drivers/uwb/est.c b/drivers/uwb/est.c
index 5fe566b..328fcc2 100644
--- a/drivers/uwb/est.c
+++ b/drivers/uwb/est.c
@@ -40,10 +40,8 @@
  *   uwb_est_get_size()
  */
 #include <linux/spinlock.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-#include "uwb-internal.h"
 
+#include "uwb-internal.h"
 
 struct uwb_est {
 	u16 type_event_high;
@@ -52,7 +50,6 @@ struct uwb_est {
 	const struct uwb_est_entry *entry;
 };
 
-
 static struct uwb_est *uwb_est;
 static u8 uwb_est_size;
 static u8 uwb_est_used;
@@ -440,21 +437,12 @@ ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
 	u8 *ptr = (u8 *) rceb;
 
 	read_lock_irqsave(&uwb_est_lock, flags);
-	d_printf(2, dev, "Size query for event 0x%02x/%04x/%02x,"
-		 " buffer size %ld\n",
-		 (unsigned) rceb->bEventType,
-		 (unsigned) le16_to_cpu(rceb->wEvent),
-		 (unsigned) rceb->bEventContext,
-		 (long) rceb_size);
 	size = -ENOSPC;
 	if (rceb_size < sizeof(*rceb))
 		goto out;
 	event = le16_to_cpu(rceb->wEvent);
 	type_event_high = rceb->bEventType << 8 | (event & 0xff00) >> 8;
 	for (itr = 0; itr < uwb_est_used; itr++) {
-		d_printf(3, dev, "Checking EST 0x%04x/%04x/%04x\n",
-			uwb_est[itr].type_event_high, uwb_est[itr].vendor,
-			uwb_est[itr].product);
 		if (uwb_est[itr].type_event_high != type_event_high)
 			continue;
 		size = uwb_est_get_size(rc, &uwb_est[itr],
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 158e98d..559f878 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -57,9 +57,8 @@
 #include <linux/usb/wusb.h>
 #include <linux/usb/wusb-wa.h>
 #include <linux/uwb.h>
+
 #include "uwb-internal.h"
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
 /* The device uses commands and events from the WHCI specification, although
  * reporting itself as WUSB compliant. */
@@ -630,17 +629,13 @@ void hwarc_neep_cb(struct urb *urb)
 
 	switch (result = urb->status) {
 	case 0:
-		d_printf(3, dev, "NEEP: receive stat %d, %zu bytes\n",
-			 urb->status, (size_t)urb->actual_length);
 		uwb_rc_neh_grok(hwarc->uwb_rc, urb->transfer_buffer,
 				urb->actual_length);
 		break;
 	case -ECONNRESET:	/* Not an error, but a controlled situation; */
 	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-		d_printf(2, dev, "NEEP: URB reset/noent %d\n", urb->status);
 		goto out;
 	case -ESHUTDOWN:	/* going away! */
-		d_printf(2, dev, "NEEP: URB down %d\n", urb->status);
 		goto out;
 	default:		/* On general errors, retry unless it gets ugly */
 		if (edc_inc(&hwarc->neep_edc, EDC_MAX_ERRORS,
@@ -649,7 +644,6 @@ void hwarc_neep_cb(struct urb *urb)
 		dev_err(dev, "NEEP: URB error %d\n", urb->status);
 	}
 	result = usb_submit_urb(urb, GFP_ATOMIC);
-	d_printf(3, dev, "NEEP: submit %d\n", result);
 	if (result < 0) {
 		dev_err(dev, "NEEP: Can't resubmit URB (%d) resetting device\n",
 			result);
@@ -758,11 +752,11 @@ static int hwarc_get_version(struct uwb_rc *rc)
 	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
 	while (itr_size >= sizeof(*hdr)) {
 		hdr = (struct usb_descriptor_header *) itr;
-		d_printf(3, dev, "Extra device descriptor: "
-			 "type %02x/%u bytes @ %zu (%zu left)\n",
-			 hdr->bDescriptorType, hdr->bLength,
-			 (itr - usb_dev->rawdescriptors[actconfig_idx]),
-			 itr_size);
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
 		if (hdr->bDescriptorType == USB_DT_CS_RADIO_CONTROL)
 			goto found;
 		itr += hdr->bLength;
@@ -794,8 +788,7 @@ found:
 		goto error;
 	}
 	rc->version = version;
-	d_printf(3, dev, "Device supports WUSB protocol version 0x%04x \n",
-		 rc->version);
+	dev_dbg(dev, "Device supports WUSB protocol version 0x%04x \n",	rc->version);
 	result = 0;
 error:
 	return result;
@@ -876,7 +869,6 @@ static void hwarc_disconnect(struct usb_interface *iface)
 	uwb_rc_rm(uwb_rc);
 	usb_put_intf(hwarc->usb_iface);
 	usb_put_dev(hwarc->usb_dev);
-	d_printf(1, &hwarc->usb_iface->dev, "freed hwarc %p\n", hwarc);
 	kfree(hwarc);
 	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
 }
@@ -924,13 +916,7 @@ static struct usb_driver hwarc_driver = {
 
 static int __init hwarc_driver_init(void)
 {
-	int result;
-	result = usb_register(&hwarc_driver);
-	if (result < 0)
-		printk(KERN_ERR "HWA-RC: Cannot register USB driver: %d\n",
-		       result);
-	return result;
-
+	return usb_register(&hwarc_driver);
 }
 module_init(hwarc_driver_init);
 
diff --git a/drivers/uwb/i1480/dfu/dfu.c b/drivers/uwb/i1480/dfu/dfu.c
index 9097b3b..da7b1d0 100644
--- a/drivers/uwb/i1480/dfu/dfu.c
+++ b/drivers/uwb/i1480/dfu/dfu.c
@@ -34,10 +34,7 @@
 #include <linux/uwb.h>
 #include <linux/random.h>
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-/**
+/*
  * i1480_rceb_check - Check RCEB for expected field values
  * @i1480: pointer to device for which RCEB is being checked
  * @rceb: RCEB being checked
@@ -83,7 +80,7 @@ int i1480_rceb_check(const struct i1480 *i1480, const struct uwb_rceb *rceb,
 EXPORT_SYMBOL_GPL(i1480_rceb_check);
 
 
-/**
+/*
  * Execute a Radio Control Command
  *
  * Command data has to be in i1480->cmd_buf.
@@ -101,7 +98,6 @@ ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size,
 	u8 expected_type = reply->bEventType;
 	u8 context;
 
-	d_fnstart(3, i1480->dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size);
 	init_completion(&i1480->evt_complete);
 	i1480->evt_result = -EINPROGRESS;
 	do {
@@ -150,8 +146,6 @@ ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size,
 	result = i1480_rceb_check(i1480, i1480->evt_buf, cmd_name, context,
 				  expected_type, expected_event);
 error:
-	d_fnend(3, i1480->dev, "(%p, %s, %zu) = %zd\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(i1480_cmd);
diff --git a/drivers/uwb/i1480/dfu/mac.c b/drivers/uwb/i1480/dfu/mac.c
index 2e4d8f0..694d0da 100644
--- a/drivers/uwb/i1480/dfu/mac.c
+++ b/drivers/uwb/i1480/dfu/mac.c
@@ -31,9 +31,6 @@
 #include <linux/uwb.h>
 #include "i1480-dfu.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 /*
  * Descriptor for a continuous segment of MAC fw data
  */
@@ -184,10 +181,6 @@ ssize_t i1480_fw_cmp(struct i1480 *i1480, struct fw_hdr *hdr)
 		}
 		if (memcmp(i1480->cmd_buf, bin + src_itr, result)) {
 			u8 *buf = i1480->cmd_buf;
-			d_printf(2, i1480->dev,
-				 "original data @ %p + %u, %zu bytes\n",
-				 bin, src_itr, result);
-			d_dump(4, i1480->dev, bin + src_itr, result);
 			for (cnt = 0; cnt < result; cnt++)
 				if (bin[src_itr + cnt] != buf[cnt]) {
 					dev_err(i1480->dev, "byte failed at "
@@ -224,7 +217,6 @@ int mac_fw_hdrs_push(struct i1480 *i1480, struct fw_hdr *hdr,
 	struct fw_hdr *hdr_itr;
 	int verif_retry_count;
 
-	d_fnstart(3, dev, "(%p, %p)\n", i1480, hdr);
 	/* Now, header by header, push them to the hw */
 	for (hdr_itr = hdr; hdr_itr != NULL; hdr_itr = hdr_itr->next) {
 		verif_retry_count = 0;
@@ -264,7 +256,6 @@ retry:
 			break;
 		}
 	}
-	d_fnend(3, dev, "(%zd)\n", result);
 	return result;
 }
 
@@ -337,11 +328,9 @@ int __mac_fw_upload(struct i1480 *i1480, const char *fw_name,
 	const struct firmware *fw;
 	struct fw_hdr *fw_hdrs;
 
-	d_fnstart(3, i1480->dev, "(%p, %s, %s)\n", i1480, fw_name, fw_tag);
 	result = request_firmware(&fw, fw_name, i1480->dev);
 	if (result < 0)	/* Up to caller to complain on -ENOENT */
 		goto out;
-	d_printf(3, i1480->dev, "%s fw '%s': uploading\n", fw_tag, fw_name);
 	result = fw_hdrs_load(i1480, &fw_hdrs, fw->data, fw->size);
 	if (result < 0) {
 		dev_err(i1480->dev, "%s fw '%s': failed to parse firmware "
@@ -363,8 +352,6 @@ out_hdrs_release:
 out_release:
 	release_firmware(fw);
 out:
-	d_fnend(3, i1480->dev, "(%p, %s, %s) = %d\n", i1480, fw_name, fw_tag,
-		result);
 	return result;
 }
 
@@ -433,7 +420,6 @@ int i1480_fw_is_running_q(struct i1480 *i1480)
 	int result;
 	u32 *val = (u32 *) i1480->cmd_buf;
 
-	d_fnstart(3, i1480->dev, "(i1480 %p)\n", i1480);
 	for (cnt = 0; cnt < 10; cnt++) {
 		msleep(100);
 		result = i1480->read(i1480, 0x80080000, 4);
@@ -447,7 +433,6 @@ int i1480_fw_is_running_q(struct i1480 *i1480)
 	dev_err(i1480->dev, "Timed out waiting for fw to start\n");
 	result = -ETIMEDOUT;
 out:
-	d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result);
 	return result;
 
 }
@@ -467,7 +452,6 @@ int i1480_mac_fw_upload(struct i1480 *i1480)
 	int result = 0, deprecated_name = 0;
 	struct i1480_rceb *rcebe = (void *) i1480->evt_buf;
 
-	d_fnstart(3, i1480->dev, "(%p)\n", i1480);
 	result = __mac_fw_upload(i1480, i1480->mac_fw_name, "MAC");
 	if (result == -ENOENT) {
 		result = __mac_fw_upload(i1480, i1480->mac_fw_name_deprecate,
@@ -501,7 +485,6 @@ int i1480_mac_fw_upload(struct i1480 *i1480)
 		dev_err(i1480->dev, "MAC fw '%s': initialization event returns "
 			"wrong size (%zu bytes vs %zu needed)\n",
 			i1480->mac_fw_name, i1480->evt_result, sizeof(*rcebe));
-		dump_bytes(i1480->dev, rcebe, min(i1480->evt_result, (ssize_t)32));
 		goto error_size;
 	}
 	result = -EIO;
@@ -522,6 +505,5 @@ error_fw_not_running:
 error_init_timeout:
 error_size:
 error_setup:
-	d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result);
 	return result;
 }
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index b7ea525..686795e 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -43,10 +43,6 @@
 #include <linux/usb/wusb-wa.h>
 #include "i1480-dfu.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
 struct i1480_usb {
 	struct i1480 i1480;
 	struct usb_device *usb_dev;
@@ -117,8 +113,6 @@ int i1480_usb_write(struct i1480 *i1480, u32 memory_address,
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 	size_t buffer_size, itr = 0;
 
-	d_fnstart(3, i1480->dev, "(%p, 0x%08x, %p, %zu)\n",
-		  i1480, memory_address, buffer, size);
 	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
 	while (size > 0) {
 		buffer_size = size < i1480->buf_size ? size : i1480->buf_size;
@@ -131,16 +125,10 @@ int i1480_usb_write(struct i1480 *i1480, u32 memory_address,
 			i1480->cmd_buf, buffer_size, 100 /* FIXME: arbitrary */);
 		if (result < 0)
 			break;
-		d_printf(3, i1480->dev,
-			 "wrote @ 0x%08x %u bytes (of %zu bytes requested)\n",
-			 memory_address, result, buffer_size);
-		d_dump(4, i1480->dev, i1480->cmd_buf, result);
 		itr += result;
 		memory_address += result;
 		size -= result;
 	}
-	d_fnend(3, i1480->dev, "(%p, 0x%08x, %p, %zu) = %d\n",
-		i1480, memory_address, buffer, size, result);
 	return result;
 }
 
@@ -165,8 +153,6 @@ int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size)
 	size_t itr, read_size = i1480->buf_size;
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 
-	d_fnstart(3, i1480->dev, "(%p, 0x%08x, %zu)\n",
-		  i1480, addr, size);
 	BUG_ON(size > i1480->buf_size);
 	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
 	BUG_ON(read_size > 512);
@@ -200,10 +186,6 @@ int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size)
 	}
 	result = bytes;
 out:
-	d_fnend(3, i1480->dev, "(%p, 0x%08x, %zu) = %zd\n",
-		i1480, addr, size, result);
-	if (result > 0)
-		d_dump(4, i1480->dev, i1480->cmd_buf, result);
 	return result;
 }
 
@@ -259,7 +241,6 @@ int i1480_usb_wait_init_done(struct i1480 *i1480)
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 	struct usb_endpoint_descriptor *epd;
 
-	d_fnstart(3, dev, "(%p)\n", i1480);
 	init_completion(&i1480->evt_complete);
 	i1480->evt_result = -EINPROGRESS;
 	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
@@ -281,14 +262,12 @@ int i1480_usb_wait_init_done(struct i1480 *i1480)
 		goto error_wait;
 	}
 	usb_kill_urb(i1480_usb->neep_urb);
-	d_fnend(3, dev, "(%p) = 0\n", i1480);
 	return 0;
 
 error_wait:
 	usb_kill_urb(i1480_usb->neep_urb);
 error_submit:
 	i1480->evt_result = result;
-	d_fnend(3, dev, "(%p) = %d\n", i1480, result);
 	return result;
 }
 
@@ -319,7 +298,6 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size)
 	struct uwb_rccb *cmd = i1480->cmd_buf;
 	u8 iface_no;
 
-	d_fnstart(3, dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size);
 	/* Post a read on the notification & event endpoint */
 	iface_no = i1480_usb->usb_iface->cur_altsetting->desc.bInterfaceNumber;
 	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
@@ -347,15 +325,11 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size)
 			cmd_name, result);
 		goto error_submit_ep0;
 	}
-	d_fnend(3, dev, "(%p, %s, %zu) = %d\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 
 error_submit_ep0:
 	usb_kill_urb(i1480_usb->neep_urb);
 error_submit_ep1:
-	d_fnend(3, dev, "(%p, %s, %zu) = %d\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 }
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/rx.c b/drivers/uwb/i1480/i1480u-wlp/rx.c
index 9fc0353..34f4cf9 100644
--- a/drivers/uwb/i1480/i1480u-wlp/rx.c
+++ b/drivers/uwb/i1480/i1480u-wlp/rx.c
@@ -68,11 +68,7 @@
 #include <linux/etherdevice.h>
 #include "i1480u-wlp.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
-/**
+/*
  * Setup the RX context
  *
  * Each URB is provided with a transfer_buffer that is the data field
@@ -129,7 +125,7 @@ error:
 }
 
 
-/** Release resources associated to the rx context */
+/* Release resources associated to the rx context */
 void i1480u_rx_release(struct i1480u *i1480u)
 {
 	int cnt;
@@ -155,7 +151,7 @@ void i1480u_rx_unlink_urbs(struct i1480u *i1480u)
 	}
 }
 
-/** Fix an out-of-sequence packet */
+/* Fix an out-of-sequence packet */
 #define i1480u_fix(i1480u, msg...)			\
 do {							\
 	if (printk_ratelimit())				\
@@ -166,7 +162,7 @@ do {							\
 } while (0)
 
 
-/** Drop an out-of-sequence packet */
+/* Drop an out-of-sequence packet */
 #define i1480u_drop(i1480u, msg...)			\
 do {							\
 	if (printk_ratelimit())				\
@@ -177,7 +173,7 @@ do {							\
 
 
-/** Finalizes setting up the SKB and delivers it
+/* Finalizes setting up the SKB and delivers it
  *
  * We first pass the incoming frame to WLP substack for verification. It
  * may also be a WLP association frame in which case WLP will take over the
@@ -192,18 +188,11 @@ void i1480u_skb_deliver(struct i1480u *i1480u)
 	struct net_device *net_dev = i1480u->net_dev;
 	struct device *dev = &i1480u->usb_iface->dev;
 
-	d_printf(6, dev, "RX delivered pre skb(%p), %u bytes\n",
-		 i1480u->rx_skb, i1480u->rx_skb->len);
-	d_dump(7, dev, i1480u->rx_skb->data, i1480u->rx_skb->len);
 	should_parse = wlp_receive_frame(dev, &i1480u->wlp, i1480u->rx_skb,
 					 &i1480u->rx_srcaddr);
 	if (!should_parse)
 		goto out;
 	i1480u->rx_skb->protocol = eth_type_trans(i1480u->rx_skb, net_dev);
-	d_printf(5, dev, "RX delivered skb(%p), %u bytes\n",
-		 i1480u->rx_skb, i1480u->rx_skb->len);
-	d_dump(7, dev, i1480u->rx_skb->data,
-	       i1480u->rx_skb->len > 72 ? 72 : i1480u->rx_skb->len);
 	i1480u->stats.rx_packets++;
 	i1480u->stats.rx_bytes += i1480u->rx_untd_pkt_size;
 	net_dev->last_rx = jiffies;
@@ -216,7 +205,7 @@ out:
 }
 
 
-/**
+/*
  * Process a buffer of data received from the USB RX endpoint
  *
  * First fragment arrives with next or last fragment. All other fragments
@@ -404,7 +393,7 @@ out:
 }
 
 
-/**
+/*
  * Called when an RX URB has finished receiving or has found some kind
  * of error condition.
  *
diff --git a/drivers/uwb/i1480/i1480u-wlp/tx.c b/drivers/uwb/i1480/i1480u-wlp/tx.c
index 3426bfb..39032cc 100644
--- a/drivers/uwb/i1480/i1480u-wlp/tx.c
+++ b/drivers/uwb/i1480/i1480u-wlp/tx.c
@@ -55,8 +55,6 @@
  */
 
 #include "i1480u-wlp.h"
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
 
 enum {
 	/* This is only for Next and Last TX packets */
@@ -64,7 +62,7 @@ enum {
 		- sizeof(struct untd_hdr_rst),
 };
 
-/** Free resources allocated to a i1480u tx context. */
+/* Free resources allocated to a i1480u tx context. */
 static
 void i1480u_tx_free(struct i1480u_tx *wtx)
 {
@@ -99,7 +97,7 @@ void i1480u_tx_unlink_urbs(struct i1480u *i1480u)
 }
 
 
-/**
+/*
  * Callback for a completed tx USB URB.
  *
  * TODO:
@@ -149,8 +147,6 @@ void i1480u_tx_cb(struct urb *urb)
 	    <= i1480u->tx_inflight.threshold
 	    && netif_queue_stopped(net_dev)
 	    && i1480u->tx_inflight.threshold != 0) {
-		if (d_test(2) && printk_ratelimit())
-			d_printf(2, dev, "Restart queue. \n");
 		netif_start_queue(net_dev);
 		atomic_inc(&i1480u->tx_inflight.restart_count);
 	}
@@ -158,7 +154,7 @@ void i1480u_tx_cb(struct urb *urb)
 }
 
 
-/**
+/*
  * Given a buffer that doesn't fit in a single fragment, create an
  * scatter/gather structure for delivery to the USB pipe.
  *
@@ -253,15 +249,11 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb,
 	/* Now do each remaining fragment */
 	result = -EINVAL;
 	while (pl_size_left > 0) {
-		d_printf(5, NULL, "ITR HDR: pl_size_left %zu buf_itr %zu\n",
-			 pl_size_left, buf_itr - wtx->buf);
 		if (buf_itr + sizeof(*untd_hdr_rst) - wtx->buf
 		    > wtx->buf_size) {
 			printk(KERN_ERR "BUG: no space for header\n");
 			goto error_bug;
 		}
-		d_printf(5, NULL, "ITR HDR 2: pl_size_left %zu buf_itr %zu\n",
-			 pl_size_left, buf_itr - wtx->buf);
 		untd_hdr_rst = buf_itr;
 		buf_itr += sizeof(*untd_hdr_rst);
 		if (pl_size_left > i1480u_MAX_PL_SIZE) {
@@ -271,9 +263,6 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb,
 			frg_pl_size = pl_size_left;
 			untd_hdr_set_type(&untd_hdr_rst->hdr, i1480u_PKT_FRAG_LST);
 		}
-		d_printf(5, NULL,
-			 "ITR PL: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n",
-			 pl_size_left, buf_itr - wtx->buf, frg_pl_size);
 		untd_hdr_set_rx_tx(&untd_hdr_rst->hdr, 0);
 		untd_hdr_rst->hdr.len = cpu_to_le16(frg_pl_size);
 		untd_hdr_rst->padding = 0;
@@ -286,9 +275,6 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb,
 		buf_itr += frg_pl_size;
 		pl_itr += frg_pl_size;
 		pl_size_left -= frg_pl_size;
-		d_printf(5, NULL,
-			 "ITR PL 2: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n",
-			 pl_size_left, buf_itr - wtx->buf, frg_pl_size);
 	}
 	dev_kfree_skb_irq(skb);
 	return 0;
@@ -308,7 +294,7 @@ error_buf_alloc:
 }
 
 
-/**
+/*
  * Given a buffer that fits in a single fragment, fill out a @wtx
  * struct for transmitting it down the USB pipe.
  *
@@ -346,7 +332,7 @@ int i1480u_tx_create_1(struct i1480u_tx *wtx, struct sk_buff *skb,
 }
 
 
-/**
+/*
  * Given a skb to transmit, massage it to become palatable for the TX pipe
  *
  * This will break the buffer in chunks smaller than
@@ -425,7 +411,7 @@ error_wtx_alloc:
 	return NULL;
 }
 
-/**
+/*
  * Actual fragmentation and transmission of frame
  *
  * @wlp:  WLP substack data structure
@@ -447,20 +433,12 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb,
 	struct i1480u_tx *wtx;
 	struct wlp_tx_hdr *wlp_tx_hdr;
 	static unsigned char dev_bcast[2] = { 0xff, 0xff };
-#if 0
-	int lockup = 50;
-#endif
 
-	d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len,
-		  net_dev);
 	BUG_ON(i1480u->wlp.rc == NULL);
 	if ((net_dev->flags & IFF_UP) == 0)
 		goto out;
 	result = -EBUSY;
 	if (atomic_read(&i1480u->tx_inflight.count) >= i1480u->tx_inflight.max) {
-		if (d_test(2) && printk_ratelimit())
-			d_printf(2, dev, "Max frames in flight "
-				 "stopping queue.\n");
 		netif_stop_queue(net_dev);
 		goto error_max_inflight;
 	}
@@ -489,21 +467,6 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb,
 		wlp_tx_hdr_set_delivery_id_type(wlp_tx_hdr, i1480u->options.pca_base_priority);
 	}
 
-#if 0
-	dev_info(dev, "TX delivering skb -> USB, %zu bytes\n", skb->len);
-	dump_bytes(dev, skb->data, skb->len > 72 ? 72 : skb->len);
-#endif
-#if 0
-	/* simulates a device lockup after every lockup# packets */
-	if (lockup && ((i1480u->stats.tx_packets + 1) % lockup) == 0) {
-		/* Simulate a dropped transmit interrupt */
-		net_dev->trans_start = jiffies;
-		netif_stop_queue(net_dev);
-		dev_err(dev, "Simulate lockup at %ld\n", jiffies);
-		return result;
-	}
-#endif
-
 	result = usb_submit_urb(wtx->urb, GFP_ATOMIC);		/* Go baby */
 	if (result < 0) {
 		dev_err(dev, "TX: cannot submit URB: %d\n", result);
@@ -513,8 +476,6 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb,
 	}
 	atomic_inc(&i1480u->tx_inflight.count);
 	net_dev->trans_start = jiffies;
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return result;
 
 error_tx_urb_submit:
@@ -522,13 +483,11 @@ error_tx_urb_submit:
 error_wtx_alloc:
 error_max_inflight:
 out:
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return result;
 }
 
 
-/**
+/*
  * Transmit an skb  Called when an skbuf has to be transmitted
  *
  * The skb is first passed to WLP substack to ensure this is a valid
@@ -551,9 +510,6 @@ int i1480u_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 	struct device *dev = &i1480u->usb_iface->dev;
 	struct uwb_dev_addr dst;
 
-	d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len,
-		  net_dev);
-	BUG_ON(i1480u->wlp.rc == NULL);
 	if ((net_dev->flags & IFF_UP) == 0)
 		goto error;
 	result = wlp_prepare_tx_frame(dev, &i1480u->wlp, skb, &dst);
@@ -562,31 +518,25 @@ int i1480u_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 			"Dropping packet.\n", result);
 		goto error;
 	} else if (result == 1) {
-		d_printf(6, dev, "WLP will transmit frame. \n");
 		/* trans_start time will be set when WLP actually transmits
 		 * the frame */
 		goto out;
 	}
-	d_printf(6, dev, "Transmitting frame. \n");
 	result = i1480u_xmit_frame(&i1480u->wlp, skb, &dst);
 	if (result < 0) {
 		dev_err(dev, "Frame TX failed (%d).\n", result);
 		goto error;
 	}
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return NETDEV_TX_OK;
 error:
 	dev_kfree_skb_any(skb);
 	i1480u->stats.tx_dropped++;
 out:
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return NETDEV_TX_OK;
 }
 
 
-/**
+/*
  * Called when a pkt transmission doesn't complete in a reasonable period
  * Device reset may sleep - do it outside of interrupt context (delayed)
  */
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
index 15f856c..f78087b 100644
--- a/drivers/uwb/lc-dev.c
+++ b/drivers/uwb/lc-dev.c
@@ -22,7 +22,6 @@
  *
  * FIXME: docs
  */
-
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -30,10 +29,6 @@
 #include <linux/random.h>
 #include "uwb-internal.h"
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
-
-
 /* We initialize addresses to 0xff (invalid, as it is bcast) */
 static inline void uwb_dev_addr_init(struct uwb_dev_addr *addr)
 {
@@ -104,12 +99,9 @@ static void uwb_dev_sys_release(struct device *dev)
 {
 	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
 
-	d_fnstart(4, NULL, "(dev %p uwb_dev %p)\n", dev, uwb_dev);
 	uwb_bce_put(uwb_dev->bce);
-	d_printf(0, &uwb_dev->dev, "uwb_dev %p freed\n", uwb_dev);
 	memset(uwb_dev, 0x69, sizeof(*uwb_dev));
 	kfree(uwb_dev);
-	d_fnend(4, NULL, "(dev %p uwb_dev %p) = void\n", dev, uwb_dev);
 }
 
 /*
@@ -275,12 +267,8 @@ static struct attribute_group *groups[] = {
  */
 static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev)
 {
-	int result;
 	struct device *dev;
 
-	d_fnstart(4, NULL, "(uwb_dev %p parent_dev %p)\n", uwb_dev, parent_dev);
-	BUG_ON(parent_dev == NULL);
-
 	dev = &uwb_dev->dev;
 	/* Device sysfs files are only useful for neighbor devices not
 	   local radio controllers. */
@@ -289,18 +277,14 @@ static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev)
 	dev->parent = parent_dev;
 	dev_set_drvdata(dev, uwb_dev);
 
-	result = device_add(dev);
-	d_fnend(4, NULL, "(uwb_dev %p parent_dev %p) = %d\n", uwb_dev, parent_dev, result);
-	return result;
+	return device_add(dev);
 }
 
 
 static void __uwb_dev_sys_rm(struct uwb_dev *uwb_dev)
 {
-	d_fnstart(4, NULL, "(uwb_dev %p)\n", uwb_dev);
 	dev_set_drvdata(&uwb_dev->dev, NULL);
 	device_del(&uwb_dev->dev);
-	d_fnend(4, NULL, "(uwb_dev %p) = void\n", uwb_dev);
 }
 
 
@@ -384,7 +368,6 @@ int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc)
 	struct device *dev = &uwb_dev->dev;
 	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
 
-	d_fnstart(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p)\n", dev, uwb_dev, rc);
 	uwb_mac_addr_print(macbuf, sizeof(macbuf), &uwb_dev->mac_addr);
 	uwb_dev_addr_print(devbuf, sizeof(devbuf), &uwb_dev->dev_addr);
 	dev_info(dev, "uwb device (mac %s dev %s) disconnected from %s %s\n",
@@ -393,7 +376,7 @@ int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc)
 		 rc ? dev_name(rc->uwb_dev.dev.parent) : "");
 	uwb_dev_rm(uwb_dev);
 	uwb_dev_put(uwb_dev);	/* for the creation in _onair() */
-	d_fnend(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p) = 0\n", dev, uwb_dev, rc);
+
 	return 0;
 }
 
diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c
index 6df18ed..0af8916 100644
--- a/drivers/uwb/neh.c
+++ b/drivers/uwb/neh.c
@@ -86,8 +86,6 @@
 #include <linux/err.h>
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /*
  * UWB Radio Controller Notification/Event Handle
@@ -479,8 +477,6 @@ void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size)
 	size_t size, real_size, event_size;
 	int needtofree;
 
-	d_fnstart(3, dev, "(rc %p buf %p %zu buf_size)\n", rc, buf, buf_size);
-	d_printf(2, dev, "groking event block: %zu bytes\n", buf_size);
 	itr = buf;
 	size = buf_size;
 	while (size > 0) {
@@ -528,10 +524,7 @@ void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size)
 
 		itr += real_size;
 		size -= real_size;
-		d_printf(2, dev, "consumed %zd bytes, %zu left\n",
-			 event_size, size);
 	}
-	d_fnend(3, dev, "(rc %p buf %p %zu buf_size) = void\n", rc, buf, buf_size);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_neh_grok);
 
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index ce8283c..70f8050 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -32,8 +32,6 @@
 #include <linux/err.h>
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /**
  * Command result codes (WUSB1.0[T8-69])
diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c
index 53207e1..1fc7d82 100644
--- a/drivers/uwb/umc-dev.c
+++ b/drivers/uwb/umc-dev.c
@@ -7,8 +7,6 @@
  */
 #include <linux/kernel.h>
 #include <linux/uwb/umc.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 static void umc_device_release(struct device *dev)
 {
@@ -53,8 +51,6 @@ int umc_device_register(struct umc_dev *umc)
 {
 	int err;
 
-	d_fnstart(3, &umc->dev, "(umc_dev %p)\n", umc);
-
 	err = request_resource(umc->resource.parent, &umc->resource);
 	if (err < 0) {
 		dev_err(&umc->dev, "can't allocate resource range "
@@ -68,13 +64,11 @@ int umc_device_register(struct umc_dev *umc)
 	err = device_register(&umc->dev);
 	if (err < 0)
 		goto error_device_register;
-	d_fnend(3, &umc->dev, "(umc_dev %p) = 0\n", umc);
 	return 0;
 
 error_device_register:
 	release_resource(&umc->resource);
 error_request_resource:
-	d_fnend(3, &umc->dev, "(umc_dev %p) = %d\n", umc, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(umc_device_register);
@@ -94,10 +88,8 @@ void umc_device_unregister(struct umc_dev *umc)
 	if (!umc)
 		return;
 	dev = get_device(&umc->dev);
-	d_fnstart(3, dev, "(umc_dev %p)\n", umc);
 	device_unregister(&umc->dev);
 	release_resource(&umc->resource);
-	d_fnend(3, dev, "(umc_dev %p) = void\n", umc);
 	put_device(dev);
 }
 EXPORT_SYMBOL_GPL(umc_device_unregister);
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index ec42ce9..57bd6bf 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -68,17 +68,13 @@
  *
  * Handler functions are called normally uwbd_evt_handle_*().
  */
-
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/freezer.h>
-#include "uwb-internal.h"
-
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
+#include "uwb-internal.h"
 
-/**
+/*
  * UWBD Event handler function signature
  *
  * Return !0 if the event needs not to be freed (ie the handler
@@ -101,9 +97,8 @@ struct uwbd_event {
 	const char *name;
 };
 
-/** Table of handlers for and properties of the UWBD Radio Control Events */
-static
-struct uwbd_event uwbd_events[] = {
+/* Table of handlers for and properties of the UWBD Radio Control Events */
+static struct uwbd_event uwbd_urc_events[] = {
 	[UWB_RC_EVT_IE_RCV] = {
 		.handler = uwbd_evt_handle_rc_ie_rcv,
 		.name = "IE_RECEIVED"
@@ -146,23 +141,15 @@ struct uwbd_evt_type_handler {
 	size_t size;
 };
 
-#define UWBD_EVT_TYPE_HANDLER(n,a) {		\
-	.name = (n),				\
-	.uwbd_events = (a),			\
-	.size = sizeof(a)/sizeof((a)[0])	\
-}
-
-
-/** Table of handlers for each UWBD Event type. */
-static
-struct uwbd_evt_type_handler uwbd_evt_type_handlers[] = {
-	[UWB_RC_CET_GENERAL] = UWBD_EVT_TYPE_HANDLER("RC", uwbd_events)
+/* Table of handlers for each UWBD Event type. */
+static struct uwbd_evt_type_handler uwbd_urc_evt_type_handlers[] = {
+	[UWB_RC_CET_GENERAL] = {
+		.name        = "URC",
+		.uwbd_events = uwbd_urc_events,
+		.size        = ARRAY_SIZE(uwbd_urc_events),
+	},
 };
 
-static const
-size_t uwbd_evt_type_handlers_len =
-	sizeof(uwbd_evt_type_handlers) / sizeof(uwbd_evt_type_handlers[0]);
-
 static const struct uwbd_event uwbd_message_handlers[] = {
 	[UWB_EVT_MSG_RESET] = {
 		.handler = uwbd_msg_handle_reset,
@@ -170,7 +157,7 @@ static const struct uwbd_event uwbd_message_handlers[] = {
 	},
 };
 
-/**
+/*
  * Handle an URC event passed to the UWB Daemon
  *
  * @evt: the event to handle
@@ -190,6 +177,7 @@ static const struct uwbd_event uwbd_message_handlers[] = {
 static
 int uwbd_event_handle_urc(struct uwb_event *evt)
 {
+	int result = -EINVAL;
 	struct uwbd_evt_type_handler *type_table;
 	uwbd_evt_handler_f handler;
 	u8 type, context;
@@ -199,26 +187,24 @@ int uwbd_event_handle_urc(struct uwb_event *evt)
 	event = le16_to_cpu(evt->notif.rceb->wEvent);
 	context = evt->notif.rceb->bEventContext;
 
-	if (type > uwbd_evt_type_handlers_len) {
-		printk(KERN_ERR "UWBD: event type %u: unknown (too high)\n", type);
-		return -EINVAL;
-	}
-	type_table = &uwbd_evt_type_handlers[type];
-	if (type_table->uwbd_events == NULL) {
-		printk(KERN_ERR "UWBD: event type %u: unknown\n", type);
-		return -EINVAL;
-	}
-	if (event > type_table->size) {
-		printk(KERN_ERR "UWBD: event %s[%u]: unknown (too high)\n",
-		       type_table->name, event);
-		return -EINVAL;
-	}
+	if (type > ARRAY_SIZE(uwbd_urc_evt_type_handlers))
+		goto out;
+	type_table = &uwbd_urc_evt_type_handlers[type];
+	if (type_table->uwbd_events == NULL)
+		goto out;
+	if (event > type_table->size)
+		goto out;
 	handler = type_table->uwbd_events[event].handler;
-	if (handler == NULL) {
-		printk(KERN_ERR "UWBD: event %s[%u]: unknown\n", type_table->name, event);
-		return -EINVAL;
-	}
-	return (*handler)(evt);
+	if (handler == NULL)
+		goto out;
+
+	result = (*handler)(evt);
+out:
+	if (result < 0)
+		dev_err(&evt->rc->uwb_dev.dev,
+			"UWBD: event 0x%02x/%04x/%02x, handling failed: %d\n",
+			type, event, context, result);
+	return result;
 }
 
 static void uwbd_event_handle_message(struct uwb_event *evt)
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 5f00386..19a1dd1 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -48,10 +48,8 @@
 #include <linux/uwb.h>
 #include <linux/uwb/whci.h>
 #include <linux/uwb/umc.h>
-#include "uwb-internal.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
+#include "uwb-internal.h"
 
 /**
  * Descriptor for an instance of the UWB Radio Control Driver that
@@ -97,13 +95,8 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc,
 	struct device *dev = &whcrc->umc_dev->dev;
 	u32 urccmd;
 
-	d_fnstart(3, dev, "(%p, %p, %zu)\n", uwb_rc, cmd, cmd_size);
-	might_sleep();
-
-	if (cmd_size >= 4096) {
-		result = -E2BIG;
-		goto error;
-	}
+	if (cmd_size >= 4096)
+		return -EINVAL;
 
 	/*
 	 * If the URC is halted, then the hardware has reset itself.
@@ -114,16 +107,14 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc,
 	if (le_readl(whcrc->rc_base + URCSTS) & URCSTS_HALTED) {
 		dev_err(dev, "requesting reset of halted radio controller\n");
 		uwb_rc_reset_all(uwb_rc);
-		result = -EIO;
-		goto error;
+		return -EIO;
 	}
 
 	result = wait_event_timeout(whcrc->cmd_wq,
 		!(le_readl(whcrc->rc_base + URCCMD) & URCCMD_ACTIVE), HZ/2);
 	if (result == 0) {
 		dev_err(dev, "device is not ready to execute commands\n");
-		result = -ETIMEDOUT;
-		goto error;
+		return -ETIMEDOUT;
 	}
 
 	memmove(whcrc->cmd_buf, cmd, cmd_size);
@@ -136,10 +127,7 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc,
 		  whcrc->rc_base + URCCMD);
 	spin_unlock(&whcrc->irq_lock);
 
-error:
-	d_fnend(3, dev, "(%p, %p, %zu) = %d\n",
-		uwb_rc, cmd, cmd_size, result);
-	return result;
+	return 0;
 }
 
 static int whcrc_reset(struct uwb_rc *rc)
@@ -166,34 +154,25 @@ static int whcrc_reset(struct uwb_rc *rc)
 static
 void whcrc_enable_events(struct whcrc *whcrc)
 {
-	struct device *dev = &whcrc->umc_dev->dev;
 	u32 urccmd;
 
-	d_fnstart(4, dev, "(whcrc %p)\n", whcrc);
-
 	le_writeq(whcrc->evt_dma_buf, whcrc->rc_base + URCEVTADDR);
 
 	spin_lock(&whcrc->irq_lock);
 	urccmd = le_readl(whcrc->rc_base + URCCMD) & ~URCCMD_ACTIVE;
 	le_writel(urccmd | URCCMD_EARV, whcrc->rc_base + URCCMD);
 	spin_unlock(&whcrc->irq_lock);
-
-	d_fnend(4, dev, "(whcrc %p) = void\n", whcrc);
 }
 
 static void whcrc_event_work(struct work_struct *work)
 {
 	struct whcrc *whcrc = container_of(work, struct whcrc, event_work);
-	struct device *dev = &whcrc->umc_dev->dev;
 	size_t size;
 	u64 urcevtaddr;
 
 	urcevtaddr = le_readq(whcrc->rc_base + URCEVTADDR);
 	size = urcevtaddr & URCEVTADDR_OFFSET_MASK;
 
-	d_printf(3, dev, "received %zu octet event\n", size);
-	d_dump(4, dev, whcrc->evt_buf, size > 32 ? 32 : size);
-
 	uwb_rc_neh_grok(whcrc->uwb_rc, whcrc->evt_buf, size);
 	whcrc_enable_events(whcrc);
 }
@@ -216,22 +195,15 @@ irqreturn_t whcrc_irq_cb(int irq, void *_whcrc)
 		return IRQ_NONE;
 	le_writel(urcsts & URCSTS_INT_MASK, whcrc->rc_base + URCSTS);
 
-	d_printf(4, dev, "acked 0x%08x, urcsts 0x%08x\n",
-		 le_readl(whcrc->rc_base + URCSTS), urcsts);
-
 	if (urcsts & URCSTS_HSE) {
 		dev_err(dev, "host system error -- hardware halted\n");
 		/* FIXME: do something sensible here */
 		goto out;
 	}
-	if (urcsts & URCSTS_ER) {
-		d_printf(3, dev, "ER: event ready\n");
+	if (urcsts & URCSTS_ER)
 		schedule_work(&whcrc->event_work);
-	}
-	if (urcsts & URCSTS_RCI) {
-		d_printf(3, dev, "RCI: ready to execute another command\n");
+	if (urcsts & URCSTS_RCI)
 		wake_up_all(&whcrc->cmd_wq);
-	}
 out:
 	return IRQ_HANDLED;
 }
@@ -250,8 +222,7 @@ int whcrc_setup_rc_umc(struct whcrc *whcrc)
 	whcrc->area = umc_dev->resource.start;
 	whcrc->rc_len = umc_dev->resource.end - umc_dev->resource.start + 1;
 	result = -EBUSY;
-	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME)
-	    == NULL) {
+	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) == NULL) {
 		dev_err(dev, "can't request URC region (%zu bytes @ 0x%lx): %d\n",
 			whcrc->rc_len, whcrc->area, result);
 		goto error_request_region;
@@ -286,8 +257,6 @@ int whcrc_setup_rc_umc(struct whcrc *whcrc)
 		dev_err(dev, "Can't allocate evt transfer buffer\n");
 		goto error_evt_buffer;
 	}
-	d_printf(3, dev, "UWB RC Interface: %zu bytes at 0x%p, irq %u\n",
-		 whcrc->rc_len, whcrc->rc_base, umc_dev->irq);
 	return 0;
 
 error_evt_buffer:
@@ -396,7 +365,6 @@ int whcrc_probe(struct umc_dev *umc_dev)
 	struct whcrc *whcrc;
 	struct device *dev = &umc_dev->dev;
 
-	d_fnstart(3, dev, "(umc_dev %p)\n", umc_dev);
 	result = -ENOMEM;
 	uwb_rc = uwb_rc_alloc();
 	if (uwb_rc == NULL) {
@@ -428,7 +396,6 @@ int whcrc_probe(struct umc_dev *umc_dev)
 	if (result < 0)
 		goto error_rc_add;
 	umc_set_drvdata(umc_dev, whcrc);
-	d_fnend(3, dev, "(umc_dev %p) = 0\n", umc_dev);
 	return 0;
 
 error_rc_add:
@@ -438,7 +405,6 @@ error_setup_rc_umc:
 error_alloc:
 	uwb_rc_put(uwb_rc);
 error_rc_alloc:
-	d_fnend(3, dev, "(umc_dev %p) = %d\n", umc_dev, result);
 	return result;
 }
 
@@ -461,7 +427,6 @@ static void whcrc_remove(struct umc_dev *umc_dev)
 	whcrc_release_rc_umc(whcrc);
 	kfree(whcrc);
 	uwb_rc_put(uwb_rc);
-	d_printf(1, &umc_dev->dev, "freed whcrc %p\n", whcrc);
 }
 
 static int whcrc_pre_reset(struct umc_dev *umc)
diff --git a/drivers/uwb/wlp/eda.c b/drivers/uwb/wlp/eda.c
index cdfe8df..0b4659e 100644
--- a/drivers/uwb/wlp/eda.c
+++ b/drivers/uwb/wlp/eda.c
@@ -51,9 +51,7 @@
  * the tag and address of the transmitting neighbor.
  */
 
-#define D_LOCAL 5
 #include <linux/netdevice.h>
-#include <linux/uwb/debug.h>
 #include <linux/etherdevice.h>
 #include <linux/wlp.h>
 #include "wlp-internal.h"
@@ -304,7 +302,6 @@ int wlp_eda_for_virtual(struct wlp_eda *eda,
 {
 	int result = 0;
 	struct wlp *wlp = container_of(eda, struct wlp, eda);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_eda_node *itr;
 	unsigned long flags;
 	int found = 0;
@@ -313,40 +310,14 @@ int wlp_eda_for_virtual(struct wlp_eda *eda,
 	list_for_each_entry(itr, &eda->cache, list_node) {
 		if (!memcmp(itr->virt_addr, virt_addr,
 			   sizeof(itr->virt_addr))) {
-			d_printf(6, dev, "EDA: looking for "
-			       "%02x:%02x:%02x:%02x:%02x:%02x hit %02x:%02x "
-			       "wss %p tag 0x%02x state %u\n",
-			       virt_addr[0], virt_addr[1],
-			       virt_addr[2], virt_addr[3],
-			       virt_addr[4], virt_addr[5],
-			       itr->dev_addr.data[1],
-			       itr->dev_addr.data[0], itr->wss,
-			       itr->tag, itr->state);
 			result = (*function)(wlp, itr, priv);
 			*dev_addr = itr->dev_addr;
 			found = 1;
 			break;
-		} else
-			d_printf(6, dev, "EDA: looking for "
-			       "%02x:%02x:%02x:%02x:%02x:%02x "
-			       "against "
-			       "%02x:%02x:%02x:%02x:%02x:%02x miss\n",
-			       virt_addr[0], virt_addr[1],
-			       virt_addr[2], virt_addr[3],
-			       virt_addr[4], virt_addr[5],
-			       itr->virt_addr[0], itr->virt_addr[1],
-			       itr->virt_addr[2], itr->virt_addr[3],
-			       itr->virt_addr[4], itr->virt_addr[5]);
+		}
 	}
-	if (!found) {
-		if (printk_ratelimit())
-			dev_err(dev, "EDA: Eth addr %02x:%02x:%02x"
-				":%02x:%02x:%02x not found.\n",
-				virt_addr[0], virt_addr[1],
-				virt_addr[2], virt_addr[3],
-				virt_addr[4], virt_addr[5]);
+	if (!found)
 		result = -ENODEV;
-	}
 	spin_unlock_irqrestore(&eda->lock, flags);
 	return result;
 }
diff --git a/drivers/uwb/wlp/messages.c b/drivers/uwb/wlp/messages.c
index a64cb82..aa42fce 100644
--- a/drivers/uwb/wlp/messages.c
+++ b/drivers/uwb/wlp/messages.c
@@ -24,8 +24,7 @@
  */
 
 #include <linux/wlp.h>
-#define D_LOCAL 6
-#include <linux/uwb/debug.h>
+
 #include "wlp-internal.h"
 
 static
@@ -105,24 +104,18 @@ static inline void wlp_set_attr_hdr(struct wlp_attr_hdr *hdr, unsigned type,
 #define wlp_set(type, type_code, name)					\
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code,				\
 			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
 	attr->name = value;						\
-	d_dump(6, NULL, attr, sizeof(*attr));				\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr);						\
 }
 
 #define wlp_pset(type, type_code, name)					\
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code,				\
 			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
 	attr->name = *value;						\
-	d_dump(6, NULL, attr, sizeof(*attr));				\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr);						\
 }
 
@@ -139,11 +132,8 @@ static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value,	\
 				size_t len)				\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code, len);			\
 	memcpy(attr->name, value, len);					\
-	d_dump(6, NULL, attr, sizeof(*attr) + len);			\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr) + len;					\
 }
 
@@ -182,7 +172,7 @@ static size_t wlp_set_wss_info(struct wlp_attr_wss_info *attr,
 	size_t datalen;
 	void *ptr = attr->wss_info;
 	size_t used = sizeof(*attr);
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);
+
 	datalen = sizeof(struct wlp_wss_info) + strlen(wss->name);
 	wlp_set_attr_hdr(&attr->hdr, WLP_ATTR_WSS_INFO, datalen);
 	used = wlp_set_wssid(ptr, &wss->wssid);
@@ -190,9 +180,6 @@ static size_t wlp_set_wss_info(struct wlp_attr_wss_info *attr,
 	used += wlp_set_accept_enrl(ptr + used, wss->accept_enroll);
 	used += wlp_set_wss_sec_status(ptr + used, wss->secure_status);
 	used += wlp_set_wss_bcast(ptr + used, &wss->bcast);
-	d_dump(6, NULL, attr, sizeof(*attr) + datalen);
-	d_fnend(6, NULL, "(attribute %p, used %d)\n",
-		attr, (int)(sizeof(*attr) + used));
 	return sizeof(*attr) + used;
 }
 
@@ -414,7 +401,6 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp,
 	size_t used = 0;
 	ssize_t result = -EINVAL;
 
-	d_printf(6, dev, "WLP: WSS info: Retrieving WSS name\n");
 	result = wlp_get_wss_name(wlp, ptr, info->name, buflen);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain WSS name from "
@@ -422,7 +408,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp,
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving accept enroll\n");
+
 	result = wlp_get_accept_enrl(wlp, ptr + used, &info->accept_enroll,
 				     buflen - used);
 	if (result < 0) {
@@ -437,7 +423,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp,
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving secure status\n");
+
 	result = wlp_get_wss_sec_status(wlp, ptr + used, &info->sec_status,
 					buflen - used);
 	if (result < 0) {
@@ -452,7 +438,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp,
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving broadcast\n");
+
 	result = wlp_get_wss_bcast(wlp, ptr + used, &info->bcast,
 				   buflen - used);
 	if (result < 0) {
@@ -530,7 +516,7 @@ static ssize_t wlp_get_wss_info(struct wlp *wlp, struct wlp_attr_wss_info *attr,
 	len = result;
 	used = sizeof(*attr);
 	ptr = attr;
-	d_printf(6, dev, "WLP: WSS info: Retrieving WSSID\n");
+
 	result = wlp_get_wssid(wlp, ptr + used, wssid, buflen - used);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain WSSID from WSS info.\n");
@@ -553,8 +539,6 @@ static ssize_t wlp_get_wss_info(struct wlp *wlp, struct wlp_attr_wss_info *attr,
 		goto out;
 	}
 	result = used;
-	d_printf(6, dev, "WLP: Successfully parsed WLP information "
-		 "attribute. used %zu bytes\n", used);
 out:
 	return result;
 }
@@ -598,8 +582,6 @@ static ssize_t wlp_get_all_wss_info(struct wlp *wlp,
 	struct wlp_wssid_e *wssid_e;
 	char buf[WLP_WSS_UUID_STRSIZE];
 
-	d_fnstart(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d \n",
-		  wlp, attr, neighbor, wss, (int)buflen);
 	if (buflen < 0)
 		goto out;
 
@@ -638,8 +620,7 @@ static ssize_t wlp_get_all_wss_info(struct wlp *wlp,
 			wss->accept_enroll = wss_info.accept_enroll;
 			wss->state = WLP_WSS_STATE_PART_ENROLLED;
 			wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-			d_printf(2, dev, "WLP: Found WSS %s. Enrolling.\n",
-				 buf);
+			dev_dbg(dev, "WLP: Found WSS %s. Enrolling.\n", buf);
 		} else {
 			wssid_e = wlp_create_wssid_e(wlp, neighbor);
 			if (wssid_e == NULL) {
@@ -660,9 +641,6 @@ error_parse:
 	if (result < 0 && !enroll) /* this was a discovery */
 		wlp_remove_neighbor_tmp_info(neighbor);
 out:
-	d_fnend(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d, "
-		"result %d \n", wlp, attr, neighbor, wss, (int)buflen,
-		(int)result);
 	return result;
 
 }
@@ -718,7 +696,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss,
 	struct sk_buff *_skb;
 	void *d1_itr;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	if (wlp->dev_info == NULL) {
 		result = __wlp_setup_device_info(wlp);
 		if (result < 0) {
@@ -728,24 +705,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss,
 		}
 	}
 	info = wlp->dev_info;
-	d_printf(6, dev, "Local properties:\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 (int)strlen(info->name), info->name,
-		 (int)strlen(info->model_name), info->model_name,
-		 (int)strlen(info->manufacturer), info->manufacturer,
-		 (int)strlen(info->model_nr),  info->model_nr,
-		 (int)strlen(info->serial), info->serial,
-		 info->prim_dev_type.category,
-		 info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1],
-		 info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv);
 	_skb = dev_alloc_skb(sizeof(*_d1)
 		      + sizeof(struct wlp_attr_uuid_e)
 		      + sizeof(struct wlp_attr_wss_sel_mthd)
@@ -768,7 +727,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss,
 		goto error;
 	}
 	_d1 = (void *) _skb->data;
-	d_printf(6, dev, "D1 starts at %p \n", _d1);
 	_d1->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	_d1->hdr.type = WLP_FRAME_ASSOCIATION;
 	_d1->type = WLP_ASSOC_D1;
@@ -791,25 +749,8 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss,
 	used += wlp_set_prim_dev_type(d1_itr + used, &info->prim_dev_type);
 	used += wlp_set_wlp_assc_err(d1_itr + used, WLP_ASSOC_ERROR_NONE);
 	skb_put(_skb, sizeof(*_d1) + used);
-	d_printf(6, dev, "D1 message:\n");
-	d_dump(6, dev, _d1, sizeof(*_d1)
-		     + sizeof(struct wlp_attr_uuid_e)
-		     + sizeof(struct wlp_attr_wss_sel_mthd)
-		     + sizeof(struct wlp_attr_dev_name)
-		     + strlen(info->name)
-		     + sizeof(struct wlp_attr_manufacturer)
-		     + strlen(info->manufacturer)
-		     + sizeof(struct wlp_attr_model_name)
-		     + strlen(info->model_name)
-		     + sizeof(struct wlp_attr_model_nr)
-		     + strlen(info->model_nr)
-		     + sizeof(struct wlp_attr_serial)
-		     + strlen(info->serial)
-		     + sizeof(struct wlp_attr_prim_dev_type)
-		     + sizeof(struct wlp_attr_wlp_assc_err));
 	*skb = _skb;
 error:
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 
@@ -837,7 +778,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss,
 	void *d2_itr;
 	size_t mem_needed;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	if (wlp->dev_info == NULL) {
 		result = __wlp_setup_device_info(wlp);
 		if (result < 0) {
@@ -847,24 +787,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss,
 		}
 	}
 	info = wlp->dev_info;
-	d_printf(6, dev, "Local properties:\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 (int)strlen(info->name), info->name,
-		 (int)strlen(info->model_name), info->model_name,
-		 (int)strlen(info->manufacturer), info->manufacturer,
-		 (int)strlen(info->model_nr),  info->model_nr,
-		 (int)strlen(info->serial), info->serial,
-		 info->prim_dev_type.category,
-		 info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1],
-		 info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv);
 	mem_needed = sizeof(*_d2)
 		      + sizeof(struct wlp_attr_uuid_e)
 		      + sizeof(struct wlp_attr_uuid_r)
@@ -892,7 +814,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss,
 		goto error;
 	}
 	_d2 = (void *) _skb->data;
-	d_printf(6, dev, "D2 starts at %p \n", _d2);
 	_d2->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	_d2->hdr.type = WLP_FRAME_ASSOCIATION;
 	_d2->type = WLP_ASSOC_D2;
@@ -917,11 +838,8 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss,
 	used += wlp_set_prim_dev_type(d2_itr + used, &info->prim_dev_type);
 	used += wlp_set_wlp_assc_err(d2_itr + used, WLP_ASSOC_ERROR_NONE);
 	skb_put(_skb, sizeof(*_d2) + used);
-	d_printf(6, dev, "D2 message:\n");
-	d_dump(6, dev, _d2, mem_needed);
 	*skb = _skb;
 error:
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 
@@ -947,7 +865,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb,
 	struct sk_buff *_skb;
 	struct wlp_nonce tmp;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	_skb = dev_alloc_skb(sizeof(*f0));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for F0 "
@@ -955,7 +872,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb,
 		goto error_alloc;
 	}
 	f0 = (void *) _skb->data;
-	d_printf(6, dev, "F0 starts at %p \n", f0);
 	f0->f0_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	f0->f0_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	f0->f0_hdr.type = WLP_ASSOC_F0;
@@ -969,7 +885,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb,
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, result %d \n", wlp, result);
 	return result;
 }
 
@@ -1242,12 +1157,9 @@ void wlp_handle_d1_frame(struct work_struct *ws)
 	enum wlp_wss_sel_mthd sel_mthd = 0;
 	struct wlp_device_info dev_info;
 	enum wlp_assc_error assc_err;
-	char uuid[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 
 	/* Parse D1 frame */
-	d_fnstart(6, dev, "WLP: handle D1 frame. wlp = %p, skb = %p\n",
-		  wlp, skb);
 	mutex_lock(&wss->mutex);
 	mutex_lock(&wlp->mutex); /* to access wlp->uuid */
 	memset(&dev_info, 0, sizeof(dev_info));
@@ -1258,30 +1170,6 @@ void wlp_handle_d1_frame(struct work_struct *ws)
 		kfree_skb(skb);
 		goto out;
 	}
-	wlp_wss_uuid_print(uuid, sizeof(uuid), &uuid_e);
-	d_printf(6, dev, "From D1 frame:\n"
-		 "UUID-E: %s\n"
-		 "Selection method: %d\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 uuid, sel_mthd,
-		 (int)strlen(dev_info.name), dev_info.name,
-		 (int)strlen(dev_info.model_name), dev_info.model_name,
-		 (int)strlen(dev_info.manufacturer), dev_info.manufacturer,
-		 (int)strlen(dev_info.model_nr),  dev_info.model_nr,
-		 (int)strlen(dev_info.serial), dev_info.serial,
-		 dev_info.prim_dev_type.category,
-		 dev_info.prim_dev_type.OUI[0],
-		 dev_info.prim_dev_type.OUI[1],
-		 dev_info.prim_dev_type.OUI[2],
-		 dev_info.prim_dev_type.OUIsubdiv);
 
 	kfree_skb(skb);
 	if (!wlp_uuid_is_set(&wlp->uuid)) {
@@ -1316,7 +1204,6 @@ out:
 	kfree(frame_ctx);
 	mutex_unlock(&wlp->mutex);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle D1 frame. wlp = %p\n", wlp);
 }
 
 /**
@@ -1546,10 +1433,8 @@ int wlp_parse_c3c4_frame(struct wlp *wlp, struct sk_buff *skb,
 	void *ptr = skb->data;
 	size_t len = skb->len;
 	size_t used;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct wlp_frame_assoc *assoc = ptr;
 
-	d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb);
 	used = sizeof(*assoc);
 	result = wlp_get_wssid(wlp, ptr + used, wssid, len - used);
 	if (result < 0) {
@@ -1572,14 +1457,7 @@ int wlp_parse_c3c4_frame(struct wlp *wlp, struct sk_buff *skb,
 			wlp_assoc_frame_str(assoc->type));
 		goto error_parse;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_printf(6, dev, "WLP: parsed: WSSID %s, tag 0x%02x, virt "
-		 "%02x:%02x:%02x:%02x:%02x:%02x \n", buf, *tag,
-		 virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		 virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]);
-
 error_parse:
-	d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result);
 	return result;
 }
 
@@ -1600,7 +1478,6 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss,
 	} *c;
 	struct sk_buff *_skb;
 
-	d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss);
 	_skb = dev_alloc_skb(sizeof(*c));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for C1/C2 "
@@ -1608,7 +1485,6 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss,
 		goto error_alloc;
 	}
 	c = (void *) _skb->data;
-	d_printf(6, dev, "C1/C2 starts at %p \n", c);
 	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	c->c_hdr.type = type;
@@ -1616,12 +1492,9 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss,
 	wlp_set_msg_type(&c->c_hdr.msg_type, type);
 	wlp_set_wssid(&c->wssid, &wss->wssid);
 	skb_put(_skb, sizeof(*c));
-	d_printf(6, dev, "C1/C2 message:\n");
-	d_dump(6, dev, c, sizeof(*c));
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result);
 	return result;
 }
 
@@ -1660,7 +1533,6 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss,
 	} *c;
 	struct sk_buff *_skb;
 
-	d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss);
 	_skb = dev_alloc_skb(sizeof(*c));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for C3/C4 "
@@ -1668,7 +1540,6 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss,
 		goto error_alloc;
 	}
 	c = (void *) _skb->data;
-	d_printf(6, dev, "C3/C4 starts at %p \n", c);
 	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	c->c_hdr.type = type;
@@ -1678,12 +1549,9 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss,
 	wlp_set_wss_tag(&c->wss_tag, wss->tag);
 	wlp_set_wss_virt(&c->wss_virt, &wss->virtual_addr);
 	skb_put(_skb, sizeof(*c));
-	d_printf(6, dev, "C3/C4 message:\n");
-	d_dump(6, dev, c, sizeof(*c));
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result);
 	return result;
 }
 
@@ -1709,10 +1577,7 @@ static int wlp_send_assoc_##type(struct wlp *wlp, struct wlp_wss *wss,	\
 	struct device *dev = &wlp->rc->uwb_dev.dev;			\
 	int result;							\
 	struct sk_buff *skb = NULL;					\
-	d_fnstart(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n",	\
-		  wlp, wss, dev_addr->data[1], dev_addr->data[0]);	\
-	d_printf(6, dev, "WLP: Constructing %s frame. \n",		\
-		 wlp_assoc_frame_str(id));				\
+									\
 	/* Build the frame */						\
 	result = wlp_build_assoc_##type(wlp, wss, &skb);		\
 	if (result < 0) {						\
@@ -1721,9 +1586,6 @@ static int wlp_send_assoc_##type(struct wlp *wlp, struct wlp_wss *wss,	\
 		goto error_build_assoc;					\
 	}								\
 	/* Send the frame */						\
-	d_printf(6, dev, "Transmitting %s frame to %02x:%02x \n",	\
-		 wlp_assoc_frame_str(id),				\
-		 dev_addr->data[1], dev_addr->data[0]);			\
 	BUG_ON(wlp->xmit_frame == NULL);				\
 	result = wlp->xmit_frame(wlp, skb, dev_addr);			\
 	if (result < 0) {						\
@@ -1740,8 +1602,6 @@ error_xmit:								\
 	/* We could try again ... */					\
 	dev_kfree_skb_any(skb);/*we need to free if tx fails*/		\
 error_build_assoc:							\
-	d_fnend(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n",	\
-		wlp, wss, dev_addr->data[1], dev_addr->data[0]);	\
 	return result;							\
 }
 
@@ -1794,12 +1654,9 @@ void wlp_handle_c1_frame(struct work_struct *ws)
 	struct uwb_dev_addr *src = &frame_ctx->src;
 	int result;
 	struct wlp_uuid wssid;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 
 	/* Parse C1 frame */
-	d_fnstart(6, dev, "WLP: handle C1 frame. wlp = %p, c1 = %p\n",
-		  wlp, c1);
 	mutex_lock(&wss->mutex);
 	result = wlp_get_wssid(wlp, (void *)c1 + sizeof(*c1), &wssid,
 			       len - sizeof(*c1));
@@ -1807,12 +1664,8 @@ void wlp_handle_c1_frame(struct work_struct *ws)
 		dev_err(dev, "WLP: unable to obtain WSSID from C1 frame.\n");
 		goto out;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-	d_printf(6, dev, "Received C1 frame with WSSID %s \n", buf);
 	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
 	    && wss->state == WLP_WSS_STATE_ACTIVE) {
-		d_printf(6, dev, "WSSID from C1 frame is known locally "
-			 "and is active\n");
 		/* Construct C2 frame */
 		result = wlp_build_assoc_c2(wlp, wss, &resp);
 		if (result < 0) {
@@ -1820,8 +1673,6 @@ void wlp_handle_c1_frame(struct work_struct *ws)
 			goto out;
 		}
 	} else {
-		d_printf(6, dev, "WSSID from C1 frame is not known locally "
-			 "or is not active\n");
 		/* Construct F0 frame */
 		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
 		if (result < 0) {
@@ -1830,8 +1681,6 @@ void wlp_handle_c1_frame(struct work_struct *ws)
 		}
 	}
 	/* Send C2 frame */
-	d_printf(6, dev, "Transmitting response (C2/F0) frame to %02x:%02x \n",
-		 src->data[1], src->data[0]);
 	BUG_ON(wlp->xmit_frame == NULL);
 	result = wlp->xmit_frame(wlp, resp, src);
 	if (result < 0) {
@@ -1846,7 +1695,6 @@ out:
 	kfree_skb(frame_ctx->skb);
 	kfree(frame_ctx);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle C1 frame. wlp = %p\n", wlp);
 }
 
 /**
@@ -1868,27 +1716,20 @@ void wlp_handle_c3_frame(struct work_struct *ws)
 	struct sk_buff *skb = frame_ctx->skb;
 	struct uwb_dev_addr *src = &frame_ctx->src;
 	int result;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 	struct wlp_uuid wssid;
 	u8 tag;
 	struct uwb_mac_addr virt_addr;
 
 	/* Parse C3 frame */
-	d_fnstart(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n",
-		  wlp, skb);
 	mutex_lock(&wss->mutex);
 	result = wlp_parse_c3c4_frame(wlp, skb, &wssid, &tag, &virt_addr);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain values from C3 frame.\n");
 		goto out;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-	d_printf(6, dev, "Received C3 frame with WSSID %s \n", buf);
 	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
 	    && wss->state >= WLP_WSS_STATE_ACTIVE) {
-		d_printf(6, dev, "WSSID from C3 frame is known locally "
-			 "and is active\n");
 		result = wlp_eda_update_node(&wlp->eda, src, wss,
 					     (void *) virt_addr.data, tag,
 					     WLP_WSS_CONNECTED);
@@ -1913,8 +1754,6 @@ void wlp_handle_c3_frame(struct work_struct *ws)
 			}
 		}
 	} else {
-		d_printf(6, dev, "WSSID from C3 frame is not known locally "
-			 "or is not active\n");
 		/* Construct F0 frame */
 		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
 		if (result < 0) {
@@ -1923,8 +1762,6 @@ void wlp_handle_c3_frame(struct work_struct *ws)
 		}
 	}
 	/* Send C4 frame */
-	d_printf(6, dev, "Transmitting response (C4/F0) frame to %02x:%02x \n",
-		 src->data[1], src->data[0]);
 	BUG_ON(wlp->xmit_frame == NULL);
 	result = wlp->xmit_frame(wlp, resp, src);
 	if (result < 0) {
@@ -1939,8 +1776,6 @@ out:
 	kfree_skb(frame_ctx->skb);
 	kfree(frame_ctx);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n",
-		wlp, skb);
 }
 
 
diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c
index 1bb9b1f..0370399 100644
--- a/drivers/uwb/wlp/sysfs.c
+++ b/drivers/uwb/wlp/sysfs.c
@@ -23,8 +23,8 @@
  * FIXME: Docs
  *
  */
-
 #include <linux/wlp.h>
+
 #include "wlp-internal.h"
 
 static
diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c
index c701bd1..cd20357 100644
--- a/drivers/uwb/wlp/txrx.c
+++ b/drivers/uwb/wlp/txrx.c
@@ -26,12 +26,10 @@
 
 #include <linux/etherdevice.h>
 #include <linux/wlp.h>
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
-#include "wlp-internal.h"
 
+#include "wlp-internal.h"
 
-/**
+/*
  * Direct incoming association msg to correct parsing routine
  *
  * We only expect D1, E1, C1, C3 messages as new. All other incoming
@@ -48,35 +46,31 @@ void wlp_direct_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_frame_assoc *assoc = (void *) skb->data;
 	struct wlp_assoc_frame_ctx *frame_ctx;
-	d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb);
+
 	frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_ATOMIC);
 	if (frame_ctx == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for association "
 			"frame handling.\n");
 		kfree_skb(skb);
-		goto out;
+		return;
 	}
 	frame_ctx->wlp = wlp;
 	frame_ctx->skb = skb;
 	frame_ctx->src = *src;
 	switch (assoc->type) {
 	case WLP_ASSOC_D1:
-		d_printf(5, dev, "Received a D1 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_d1_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
 	case WLP_ASSOC_E1:
-		d_printf(5, dev, "Received a E1 frame. FIXME?\n");
 		kfree_skb(skb); /* Temporary until we handle it */
 		kfree(frame_ctx); /* Temporary until we handle it */
 		break;
 	case WLP_ASSOC_C1:
-		d_printf(5, dev, "Received a C1 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_c1_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
 	case WLP_ASSOC_C3:
-		d_printf(5, dev, "Received a C3 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_c3_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
@@ -87,11 +81,9 @@ void wlp_direct_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
 		kfree(frame_ctx);
 		break;
 	}
-out:
-	d_fnend(5, dev, "wlp %p\n", wlp);
 }
 
-/**
+/*
  * Process incoming association frame
  *
  * Although it could be possible to deal with some incoming association
@@ -112,7 +104,6 @@ void wlp_receive_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
 	struct wlp_frame_assoc *assoc = (void *) skb->data;
 	struct wlp_session *session = wlp->session;
 	u8 version;
-	d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb);
 
 	if (wlp_get_version(wlp, &assoc->version, &version,
 			    sizeof(assoc->version)) < 0)
@@ -150,14 +141,12 @@ void wlp_receive_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
 	} else {
 		wlp_direct_assoc_frame(wlp, skb, src);
 	}
-	d_fnend(5, dev, "wlp %p\n", wlp);
 	return;
 error:
 	kfree_skb(skb);
-	d_fnend(5, dev, "wlp %p\n", wlp);
 }
 
-/**
+/*
  * Verify incoming frame is from connected neighbor, prep to pass to WLP client
  *
  * Verification proceeds according to WLP 0.99 [7.3.1]. The source address
@@ -176,7 +165,6 @@ int wlp_verify_prep_rx_frame(struct wlp *wlp, struct sk_buff *skb,
 	struct wlp_eda_node eda_entry;
 	struct wlp_frame_std_abbrv_hdr *hdr = (void *) skb->data;
 
-	d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb);
 	/*verify*/
 	result = wlp_copy_eda_node(&wlp->eda, src, &eda_entry);
 	if (result < 0) {
@@ -207,11 +195,10 @@ int wlp_verify_prep_rx_frame(struct wlp *wlp, struct sk_buff *skb,
 	/*prep*/
 	skb_pull(skb, sizeof(*hdr));
 out:
-	d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result);
 	return result;
 }
 
-/**
+/*
  * Receive a WLP frame from device
  *
  * @returns: 1 if calling function should free the skb
@@ -226,14 +213,12 @@ int wlp_receive_frame(struct device *dev, struct wlp *wlp, struct sk_buff *skb,
 	struct wlp_frame_hdr *hdr;
 	int result = 0;
 
-	d_fnstart(6, dev, "skb (%p), len (%u)\n", skb, len);
 	if (len < sizeof(*hdr)) {
 		dev_err(dev, "Not enough data to parse WLP header.\n");
 		result = -EINVAL;
 		goto out;
 	}
 	hdr = ptr;
-	d_dump(6, dev, hdr, sizeof(*hdr));
 	if (le16_to_cpu(hdr->mux_hdr) != WLP_PROTOCOL_ID) {
 		dev_err(dev, "Not a WLP frame type.\n");
 		result = -EINVAL;
@@ -270,7 +255,6 @@ int wlp_receive_frame(struct device *dev, struct wlp *wlp, struct sk_buff *skb,
 				"WLP header.\n");
 			goto out;
 		}
-		d_printf(5, dev, "Association frame received.\n");
 		wlp_receive_assoc_frame(wlp, skb, src);
 		break;
 	default:
@@ -283,13 +267,12 @@ out:
 		kfree_skb(skb);
 		result = 0;
 	}
-	d_fnend(6, dev, "skb (%p)\n", skb);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_receive_frame);
 
 
-/**
+/*
  * Verify frame from network stack, prepare for further transmission
  *
  * @skb:   the socket buffer that needs to be prepared for transmission (it
@@ -343,9 +326,7 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
 	int result = -EINVAL;
 	struct ethhdr *eth_hdr = (void *) skb->data;
 
-	d_fnstart(6, dev, "wlp (%p), skb (%p) \n", wlp, skb);
 	if (is_broadcast_ether_addr(eth_hdr->h_dest)) {
-		d_printf(6, dev, "WLP: handling broadcast frame. \n");
 		result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb);
 		if (result < 0) {
 			if (printk_ratelimit())
@@ -357,7 +338,6 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
 		result = 1;
 		/* Frame will be transmitted by WLP. */
 	} else {
-		d_printf(6, dev, "WLP: handling unicast frame. \n");
 		result = wlp_eda_for_virtual(&wlp->eda, eth_hdr->h_dest, dst,
 					     wlp_wss_prep_hdr, skb);
 		if (unlikely(result < 0)) {
@@ -368,7 +348,6 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
 		}
 	}
 out:
-	d_fnend(6, dev, "wlp (%p), skb (%p). result = %d \n", wlp, skb, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_prepare_tx_frame);
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index e531093..13db739 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -21,12 +21,9 @@
  *
  * FIXME: docs
  */
-
 #include <linux/wlp.h>
-#define D_LOCAL 6
-#include <linux/uwb/debug.h>
-#include "wlp-internal.h"
 
+#include "wlp-internal.h"
 
 static
 void wlp_neighbor_init(struct wlp_neighbor_e *neighbor)
@@ -61,11 +58,6 @@ int __wlp_alloc_device_info(struct wlp *wlp)
 static
 void __wlp_fill_device_info(struct wlp *wlp)
 {
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	BUG_ON(wlp->fill_device_info == NULL);
-	d_printf(6, dev, "Retrieving device information "
-			 "from device driver.\n");
 	wlp->fill_device_info(wlp, wlp->dev_info);
 }
 
@@ -127,7 +119,7 @@ void wlp_remove_neighbor_tmp_info(struct wlp_neighbor_e *neighbor)
 	}
 }
 
-/**
+/*
  * Populate WLP neighborhood cache with neighbor information
  *
  * A new neighbor is found. If it is discoverable then we add it to the
@@ -141,10 +133,7 @@ int wlp_add_neighbor(struct wlp *wlp, struct uwb_dev *dev)
 	int discoverable;
 	struct wlp_neighbor_e *neighbor;
 
-	d_fnstart(6, &dev->dev, "uwb %p \n", dev);
-	d_printf(6, &dev->dev, "Found neighbor device %02x:%02x \n",
-		 dev->dev_addr.data[1], dev->dev_addr.data[0]);
-	/**
+	/*
 	 * FIXME:
 	 * Use contents of WLP IE found in beacon cache to determine if
 	 * neighbor is discoverable.
@@ -167,7 +156,6 @@ int wlp_add_neighbor(struct wlp *wlp, struct uwb_dev *dev)
 		list_add(&neighbor->node, &wlp->neighbors);
 	}
 error_no_mem:
-	d_fnend(6, &dev->dev, "uwb %p, result = %d \n", dev, result);
 	return result;
 }
 
@@ -255,8 +243,6 @@ int wlp_d1d2_exchange(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
 		dev_err(dev, "Unable to send D1 frame to neighbor "
 			"%02x:%02x (%d)\n", dev_addr->data[1],
 			dev_addr->data[0], result);
-		d_printf(6, dev, "Add placeholders into buffer next to "
-			 "neighbor information we have (dev address).\n");
 		goto out;
 	}
 	/* Create session, wait for response */
@@ -284,8 +270,6 @@ int wlp_d1d2_exchange(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
 	/* Parse message in session->data: it will be either D2 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(6, dev, "Received response to D1 frame. \n");
-	d_dump(6, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
@@ -337,10 +321,9 @@ int wlp_enroll_neighbor(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	char buf[WLP_WSS_UUID_STRSIZE];
 	struct uwb_dev_addr *dev_addr = &neighbor->uwb_dev->dev_addr;
+
 	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n",
-		  wlp, neighbor, wss, wssid, buf);
-	d_printf(6, dev, "Complete me.\n");
+
 	result =  wlp_d1d2_exchange(wlp, neighbor, wss, wssid);
 	if (result < 0) {
 		dev_err(dev, "WLP: D1/D2 message exchange for enrollment "
@@ -360,13 +343,10 @@ int wlp_enroll_neighbor(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
 		goto error;
 	} else {
 		wss->state = WLP_WSS_STATE_ENROLLED;
-		d_printf(2, dev, "WLP: Success Enrollment into unsecure WSS "
-			 "%s using neighbor %02x:%02x. \n", buf,
-			 dev_addr->data[1], dev_addr->data[0]);
+		dev_dbg(dev, "WLP: Success Enrollment into unsecure WSS "
+			"%s using neighbor %02x:%02x. \n",
+			buf, dev_addr->data[1], dev_addr->data[0]);
 	}
-
-	d_fnend(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n",
-		  wlp, neighbor, wss, wssid, buf);
 out:
 	return result;
 error:
@@ -449,7 +429,6 @@ ssize_t wlp_discover(struct wlp *wlp)
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 
-	d_fnstart(6, dev, "wlp %p \n", wlp);
 	mutex_lock(&wlp->nbmutex);
 	/* Clear current neighborhood cache. */
 	__wlp_neighbors_release(wlp);
@@ -469,7 +448,6 @@ ssize_t wlp_discover(struct wlp *wlp)
 	}
 error_dev_for_each:
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(6, dev, "wlp %p \n", wlp);
 	return result;
 }
 
@@ -492,9 +470,6 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev,
 	int result;
 	switch (event) {
 	case UWB_NOTIF_ONAIR:
-		d_printf(6, dev, "UWB device %02x:%02x is onair\n",
-				uwb_dev->dev_addr.data[1],
-				uwb_dev->dev_addr.data[0]);
 		result = wlp_eda_create_node(&wlp->eda,
 					     uwb_dev->mac_addr.data,
 					     &uwb_dev->dev_addr);
@@ -505,18 +480,11 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev,
 				uwb_dev->dev_addr.data[0]);
 		break;
 	case UWB_NOTIF_OFFAIR:
-		d_printf(6, dev, "UWB device %02x:%02x is offair\n",
-				uwb_dev->dev_addr.data[1],
-				uwb_dev->dev_addr.data[0]);
 		wlp_eda_rm_node(&wlp->eda, &uwb_dev->dev_addr);
 		mutex_lock(&wlp->nbmutex);
-		list_for_each_entry_safe(neighbor, next, &wlp->neighbors,
-					 node) {
-			if (neighbor->uwb_dev == uwb_dev) {
-				d_printf(6, dev, "Removing device from "
-					 "neighborhood.\n");
+		list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) {
+			if (neighbor->uwb_dev == uwb_dev)
 				__wlp_neighbor_release(neighbor);
-			}
 		}
 		mutex_unlock(&wlp->nbmutex);
 		break;
@@ -538,14 +506,13 @@ static void wlp_channel_changed(struct uwb_pal *pal, int channel)
 
 int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
 {
-	struct device *dev = &rc->uwb_dev.dev;
 	int result;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	BUG_ON(wlp->fill_device_info == NULL);
 	BUG_ON(wlp->xmit_frame == NULL);
 	BUG_ON(wlp->stop_queue == NULL);
 	BUG_ON(wlp->start_queue == NULL);
+
 	wlp->rc = rc;
 	wlp->ndev = ndev;
 	wlp_eda_init(&wlp->eda);/* Set up address cache */
@@ -560,15 +527,12 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_setup);
 
 void wlp_remove(struct wlp *wlp)
 {
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	wlp_neighbors_release(wlp);
 	uwb_pal_unregister(&wlp->pal);
 	uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
@@ -578,9 +542,6 @@ void wlp_remove(struct wlp *wlp)
 		kfree(wlp->dev_info);
 	mutex_unlock(&wlp->mutex);
 	wlp->rc = NULL;
-	/* We have to use NULL here because this function can be called
-	 * when the device disappeared. */
-	d_fnend(6, NULL, "wlp %p\n", wlp);
 }
 EXPORT_SYMBOL_GPL(wlp_remove);
 
diff --git a/drivers/uwb/wlp/wss-lc.c b/drivers/uwb/wlp/wss-lc.c
index 96b18c9..5913c7a 100644
--- a/drivers/uwb/wlp/wss-lc.c
+++ b/drivers/uwb/wlp/wss-lc.c
@@ -43,14 +43,11 @@
  * 	wlp_wss_release()
  * 		wlp_wss_reset()
  */
-
 #include <linux/etherdevice.h> /* for is_valid_ether_addr */
 #include <linux/skbuff.h>
 #include <linux/wlp.h>
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
-#include "wlp-internal.h"
 
+#include "wlp-internal.h"
 
 size_t wlp_wss_key_print(char *buf, size_t bufsize, u8 *key)
 {
@@ -116,9 +113,6 @@ struct uwb_mac_addr wlp_wss_sel_bcast_addr(struct wlp_wss *wss)
  */
 void wlp_wss_reset(struct wlp_wss *wss)
 {
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
 	memset(&wss->wssid, 0, sizeof(wss->wssid));
 	wss->hash = 0;
 	memset(&wss->name[0], 0, sizeof(wss->name));
@@ -127,7 +121,6 @@ void wlp_wss_reset(struct wlp_wss *wss)
 	memset(&wss->master_key[0], 0, sizeof(wss->master_key));
 	wss->tag = 0;
 	wss->state = WLP_WSS_STATE_NONE;
-	d_fnend(5, dev, "wss (%p) \n", wss);
 }
 
 /**
@@ -145,7 +138,6 @@ int wlp_wss_sysfs_add(struct wlp_wss *wss, char *wssid_str)
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result;
 
-	d_fnstart(5, dev, "wss (%p), wssid: %s\n", wss, wssid_str);
 	result = kobject_set_name(&wss->kobj, "wss-%s", wssid_str);
 	if (result < 0)
 		return result;
@@ -162,7 +154,6 @@ int wlp_wss_sysfs_add(struct wlp_wss *wss, char *wssid_str)
 			result);
 		goto error_sysfs_create_group;
 	}
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return 0;
 error_sysfs_create_group:
 
@@ -214,22 +205,14 @@ int wlp_wss_enroll_target(struct wlp_wss *wss, struct wlp_uuid *wssid,
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_neighbor_e *neighbor;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	int result = -ENXIO;
 	struct uwb_dev_addr *dev_addr;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wss %p, wssid %s, registrar %02x:%02x \n",
-		  wss, buf, dest->data[1], dest->data[0]);
 	mutex_lock(&wlp->nbmutex);
 	list_for_each_entry(neighbor, &wlp->neighbors, node) {
 		dev_addr = &neighbor->uwb_dev->dev_addr;
 		if (!memcmp(dest, dev_addr, sizeof(*dest))) {
-			d_printf(5, dev, "Neighbor %02x:%02x is valid, "
-				 "enrolling. \n",
-				 dev_addr->data[1], dev_addr->data[0]);
-			result = wlp_enroll_neighbor(wlp, neighbor, wss,
-						     wssid);
+			result = wlp_enroll_neighbor(wlp, neighbor, wss, wssid);
 			break;
 		}
 	}
@@ -237,8 +220,6 @@ int wlp_wss_enroll_target(struct wlp_wss *wss, struct wlp_uuid *wssid,
 		dev_err(dev, "WLP: Cannot find neighbor %02x:%02x. \n",
 			dest->data[1], dest->data[0]);
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(5, dev, "wss %p, wssid %s, registrar %02x:%02x, result %d \n",
-		  wss, buf, dest->data[1], dest->data[0], result);
 	return result;
 }
 
@@ -260,16 +241,11 @@ int wlp_wss_enroll_discovered(struct wlp_wss *wss, struct wlp_uuid *wssid)
 	char buf[WLP_WSS_UUID_STRSIZE];
 	int result = -ENXIO;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wss %p, wssid %s \n", wss, buf);
+
 	mutex_lock(&wlp->nbmutex);
 	list_for_each_entry(neighbor, &wlp->neighbors, node) {
 		list_for_each_entry(wssid_e, &neighbor->wssid, node) {
 			if (!memcmp(wssid, &wssid_e->wssid, sizeof(*wssid))) {
-				d_printf(5, dev, "Found WSSID %s in neighbor "
-					 "%02x:%02x cache. \n", buf,
-					 neighbor->uwb_dev->dev_addr.data[1],
-					 neighbor->uwb_dev->dev_addr.data[0]);
 				result = wlp_enroll_neighbor(wlp, neighbor,
 							     wss, wssid);
 				if (result == 0) /* enrollment success */
@@ -279,10 +255,11 @@ int wlp_wss_enroll_discovered(struct wlp_wss *wss, struct wlp_uuid *wssid)
 		}
 	}
 out:
-	if (result == -ENXIO)
+	if (result == -ENXIO) {
+		wlp_wss_uuid_print(buf, sizeof(buf), wssid);
 		dev_err(dev, "WLP: Cannot find WSSID %s in cache. \n", buf);
+	}
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(5, dev, "wss %p, wssid %s, result %d \n", wss, buf, result);
 	return result;
 }
 
@@ -307,27 +284,22 @@ int wlp_wss_enroll(struct wlp_wss *wss, struct wlp_uuid *wssid,
 	struct uwb_dev_addr bcast = {.data = {0xff, 0xff} };
 
 	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
+
 	if (wss->state != WLP_WSS_STATE_NONE) {
 		dev_err(dev, "WLP: Already enrolled in WSS %s.\n", buf);
 		result = -EEXIST;
 		goto error;
 	}
-	if (!memcmp(&bcast, devaddr, sizeof(bcast))) {
-		d_printf(5, dev, "Request to enroll in discovered WSS "
-			 "with WSSID %s \n", buf);
+	if (!memcmp(&bcast, devaddr, sizeof(bcast)))
 		result = wlp_wss_enroll_discovered(wss, wssid);
-	} else {
-		d_printf(5, dev, "Request to enroll in WSSID %s with "
-			 "registrar %02x:%02x\n", buf, devaddr->data[1],
-			 devaddr->data[0]);
+	else
 		result = wlp_wss_enroll_target(wss, wssid, devaddr);
-	}
 	if (result < 0) {
 		dev_err(dev, "WLP: Unable to enroll into WSS %s, result %d \n",
 			buf, result);
 		goto error;
 	}
-	d_printf(2, dev, "Successfully enrolled into WSS %s \n", buf);
+	dev_dbg(dev, "Successfully enrolled into WSS %s \n", buf);
 	result = wlp_wss_sysfs_add(wss, buf);
 	if (result < 0) {
 		dev_err(dev, "WLP: Unable to set up sysfs for WSS kobject.\n");
@@ -363,7 +335,6 @@ int wlp_wss_activate(struct wlp_wss *wss)
 		u8 hash; /* only include one hash */
 	} ie_data;
 
-	d_fnstart(5, dev, "Activating WSS %p. \n", wss);
 	BUG_ON(wss->state != WLP_WSS_STATE_ENROLLED);
 	wss->hash = wlp_wss_comp_wssid_hash(&wss->wssid);
 	wss->tag = wss->hash;
@@ -382,7 +353,6 @@ int wlp_wss_activate(struct wlp_wss *wss)
 	wss->state = WLP_WSS_STATE_ACTIVE;
 	result = 0;
 error_wlp_ie:
-	d_fnend(5, dev, "Activating WSS %p, result = %d \n", wss, result);
 	return result;
 }
 
@@ -405,7 +375,6 @@ int wlp_wss_enroll_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
 	int result = 0;
 	char buf[WLP_WSS_UUID_STRSIZE];
 
-	d_fnstart(5, dev, "Enrollment and activation requested. \n");
 	mutex_lock(&wss->mutex);
 	result = wlp_wss_enroll(wss, wssid, devaddr);
 	if (result < 0) {
@@ -424,7 +393,6 @@ int wlp_wss_enroll_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
 error_activate:
 error_enroll:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return result;
 }
 
@@ -447,11 +415,9 @@ int wlp_wss_create_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
 	char buf[WLP_WSS_UUID_STRSIZE];
-	d_fnstart(5, dev, "Request to create new WSS.\n");
+
 	result = wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_printf(5, dev, "Request to create WSS: WSSID=%s, name=%s, "
-		 "sec_status=%u, accepting enrollment=%u \n",
-		 buf, name, sec_status, accept);
+
 	if (!mutex_trylock(&wss->mutex)) {
 		dev_err(dev, "WLP: WLP association session in progress.\n");
 		return -EBUSY;
@@ -498,7 +464,6 @@ int wlp_wss_create_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
 	result = 0;
 out:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return result;
 }
 
@@ -520,16 +485,12 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss,
 {
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	DECLARE_COMPLETION_ONSTACK(completion);
 	struct wlp_session session;
 	struct sk_buff  *skb;
 	struct wlp_frame_assoc *resp;
 	struct wlp_uuid wssid;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	mutex_lock(&wlp->mutex);
 	/* Send C1 association frame */
 	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C1);
@@ -565,8 +526,6 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss,
 	/* Parse message in session->data: it will be either C2 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(5, dev, "Received response to C1 frame. \n");
-	d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
 		if (result < 0)
@@ -584,11 +543,9 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss,
 		result = 0;
 		goto error_resp_parse;
 	}
-	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))) {
-		d_printf(5, dev, "WSSID in C2 frame matches local "
-			 "active WSS.\n");
+	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid)))
 		result = 1;
-	} else {
+	else {
 		dev_err(dev, "WLP: Received a C2 frame without matching "
 			"WSSID.\n");
 		result = 0;
@@ -598,8 +555,6 @@ error_resp_parse:
 out:
 	wlp->session = NULL;
 	mutex_unlock(&wlp->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	return result;
 }
 
@@ -620,16 +575,8 @@ int wlp_wss_activate_connection(struct wlp *wlp, struct wlp_wss *wss,
 {
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
-	char buf[WLP_WSS_UUID_STRSIZE];
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual "
-		  "%02x:%02x:%02x:%02x:%02x:%02x \n", wlp, wss, buf, *tag,
-		  virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		  virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]);
 
 	if (!memcmp(wssid, &wss->wssid, sizeof(*wssid))) {
-		d_printf(5, dev, "WSSID from neighbor frame matches local "
-			 "active WSS.\n");
 		/* Update EDA cache */
 		result = wlp_eda_update_node(&wlp->eda, dev_addr, wss,
 					     (void *) virt_addr->data, *tag,
@@ -638,18 +585,9 @@ int wlp_wss_activate_connection(struct wlp *wlp, struct wlp_wss *wss,
 			dev_err(dev, "WLP: Unable to update EDA cache "
 				"with new connected neighbor information.\n");
 	} else {
-		dev_err(dev, "WLP: Neighbor does not have matching "
-			"WSSID.\n");
+		dev_err(dev, "WLP: Neighbor does not have matching WSSID.\n");
 		result = -EINVAL;
 	}
-
-	d_fnend(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual "
-		  "%02x:%02x:%02x:%02x:%02x:%02x, result = %d \n",
-		  wlp, wss, buf, *tag,
-		  virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		  virt_addr->data[3], virt_addr->data[4], virt_addr->data[5],
-		  result);
-
 	return result;
 }
 
@@ -665,7 +603,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss,
 {
 	int result;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct wlp_uuid wssid;
 	u8 tag;
 	struct uwb_mac_addr virt_addr;
@@ -674,9 +611,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss,
 	struct wlp_frame_assoc *resp;
 	struct sk_buff *skb;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	mutex_lock(&wlp->mutex);
 	/* Send C3 association frame */
 	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C3);
@@ -711,8 +645,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss,
 	/* Parse message in session->data: it will be either C4 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(5, dev, "Received response to C3 frame. \n");
-	d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
 		if (result < 0)
@@ -744,8 +676,6 @@ out:
 					  WLP_WSS_CONNECT_FAILED);
 	wlp->session = NULL;
 	mutex_unlock(&wlp->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	return result;
 }
 
@@ -780,12 +710,8 @@ void wlp_wss_connect_send(struct work_struct *ws)
 	struct wlp_wss *wss = &wlp->wss;
 	int result;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 
 	mutex_lock(&wss->mutex);
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	if (wss->state < WLP_WSS_STATE_ACTIVE) {
 		if (printk_ratelimit())
 			dev_err(dev, "WLP: Attempting to connect with "
@@ -836,7 +762,6 @@ out:
 	BUG_ON(wlp->start_queue == NULL);
 	wlp->start_queue(wlp);
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s)\n", wlp, wss, buf);
 }
 
 /**
@@ -855,7 +780,6 @@ int wlp_wss_prep_hdr(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 	struct sk_buff *skb = _skb;
 	struct wlp_frame_std_abbrv_hdr *std_hdr;
 
-	d_fnstart(6, dev, "wlp %p \n", wlp);
 	if (eda_entry->state == WLP_WSS_CONNECTED) {
 		/* Add WLP header */
 		BUG_ON(skb_headroom(skb) < sizeof(*std_hdr));
@@ -873,7 +797,6 @@ int wlp_wss_prep_hdr(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 				dev_addr->data[0]);
 		result = -EINVAL;
 	}
-	d_fnend(6, dev, "wlp %p \n", wlp);
 	return result;
 }
 
@@ -893,16 +816,9 @@ int wlp_wss_connect_prep(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 {
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
-	unsigned char *eth_addr = eda_entry->eth_addr;
 	struct sk_buff *skb = _skb;
 	struct wlp_assoc_conn_ctx *conn_ctx;
 
-	d_fnstart(5, dev, "wlp %p\n", wlp);
-	d_printf(5, dev, "To neighbor %02x:%02x with eth "
-		  "%02x:%02x:%02x:%02x:%02x:%02x\n", dev_addr->data[1],
-		  dev_addr->data[0], eth_addr[0], eth_addr[1], eth_addr[2],
-		  eth_addr[3], eth_addr[4], eth_addr[5]);
 	if (eda_entry->state == WLP_WSS_UNCONNECTED) {
 		/* We don't want any more packets while we set up connection */
 		BUG_ON(wlp->stop_queue == NULL);
@@ -929,12 +845,9 @@ int wlp_wss_connect_prep(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 			 "previously. Not retrying. \n");
 		result = -ENONET;
 		goto out;
-	} else { /* eda_entry->state == WLP_WSS_CONNECTED */
-		d_printf(5, dev, "Neighbor is connected, preparing frame.\n");
+	} else /* eda_entry->state == WLP_WSS_CONNECTED */
 		result = wlp_wss_prep_hdr(wlp, eda_entry, skb);
-	}
 out:
-	d_fnend(5, dev, "wlp %p, result = %d \n", wlp, result);
 	return result;
 }
 
@@ -957,8 +870,6 @@ int wlp_wss_send_copy(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 	struct sk_buff *copy;
 	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
 
-	d_fnstart(5, dev, "to neighbor %02x:%02x, skb (%p) \n",
-		  dev_addr->data[1], dev_addr->data[0], skb);
 	copy = skb_copy(skb, GFP_ATOMIC);
 	if (copy == NULL) {
 		if (printk_ratelimit())
@@ -988,8 +899,6 @@ int wlp_wss_send_copy(struct wlp *wlp, struct wlp_eda_node *eda_entry,
 		dev_kfree_skb_irq(copy);/*we need to free if tx fails */
 	}
 out:
-	d_fnend(5, dev, "to neighbor %02x:%02x \n", dev_addr->data[1],
-		  dev_addr->data[0]);
 	return result;
 }
 
@@ -1005,7 +914,7 @@ int wlp_wss_setup(struct net_device *net_dev, struct wlp_wss *wss)
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
+
 	mutex_lock(&wss->mutex);
 	wss->kobj.parent = &net_dev->dev.kobj;
 	if (!is_valid_ether_addr(net_dev->dev_addr)) {
@@ -1018,7 +927,6 @@ int wlp_wss_setup(struct net_device *net_dev, struct wlp_wss *wss)
 	       sizeof(wss->virtual_addr.data));
 out:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wss (%p) \n", wss);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_wss_setup);
@@ -1035,8 +943,7 @@ EXPORT_SYMBOL_GPL(wlp_wss_setup);
 void wlp_wss_remove(struct wlp_wss *wss)
 {
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
+
 	mutex_lock(&wss->mutex);
 	if (wss->state == WLP_WSS_STATE_ACTIVE)
 		uwb_rc_ie_rm(wlp->rc, UWB_IE_WLP);
@@ -1050,6 +957,5 @@ void wlp_wss_remove(struct wlp_wss *wss)
 	wlp_eda_release(&wlp->eda);
 	wlp_eda_init(&wlp->eda);
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wss (%p) \n", wss);
 }
 EXPORT_SYMBOL_GPL(wlp_wss_remove);
-- 
cgit v0.10.2


From e43ace891229607c43d35597cbba77c2e40f48d4 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 22 Dec 2008 18:27:17 +0000
Subject: uwb: use print_hex_dump()

Use print_hex_dump() instead of the home-grown dump_bytes().

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 9d9128a..9ec7fd5 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -51,13 +51,18 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 #include <linux/scatterlist.h>
-#include <linux/uwb/debug.h>
 
 static int debug_crypto_verify = 0;
 
 module_param(debug_crypto_verify, int, 0);
 MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms");
 
+static void wusb_key_dump(const void *buf, size_t len)
+{
+	print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_OFFSET, 16, 1,
+		       buf, len, 0);
+}
+
 /*
  * Block of data, as understood by AES-CCM
  *
@@ -396,14 +401,14 @@ static int wusb_oob_mic_verify(void)
 		       "mismatch between MIC result and WUSB1.0[A2]\n");
 		hs_size = sizeof(stv_hsmic_hs) - sizeof(stv_hsmic_hs.MIC);
 		printk(KERN_ERR "E: Handshake2 in: (%zu bytes)\n", hs_size);
-		dump_bytes(NULL, &stv_hsmic_hs, hs_size);
+		wusb_key_dump(&stv_hsmic_hs, hs_size);
 		printk(KERN_ERR "E: CCM Nonce in: (%zu bytes)\n",
 		       sizeof(stv_hsmic_n));
-		dump_bytes(NULL, &stv_hsmic_n, sizeof(stv_hsmic_n));
+		wusb_key_dump(&stv_hsmic_n, sizeof(stv_hsmic_n));
 		printk(KERN_ERR "E: MIC out:\n");
-		dump_bytes(NULL, mic, sizeof(mic));
+		wusb_key_dump(mic, sizeof(mic));
 		printk(KERN_ERR "E: MIC out (from WUSB1.0[A.2]):\n");
-		dump_bytes(NULL, stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
+		wusb_key_dump(stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
 		result = -EINVAL;
 	} else
 		result = 0;
@@ -471,19 +476,16 @@ static int wusb_key_derive_verify(void)
 		printk(KERN_ERR "E: WUSB key derivation test: "
 		       "mismatch between key derivation result "
 		       "and WUSB1.0[A1] Errata 2006/12\n");
-		printk(KERN_ERR "E: keydvt in: key (%zu bytes)\n",
-		       sizeof(stv_key_a1));
-		dump_bytes(NULL, stv_key_a1, sizeof(stv_key_a1));
-		printk(KERN_ERR "E: keydvt in: nonce (%zu bytes)\n",
-		       sizeof(stv_keydvt_n_a1));
-		dump_bytes(NULL, &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
-		printk(KERN_ERR "E: keydvt in: hnonce & dnonce (%zu bytes)\n",
-		       sizeof(stv_keydvt_in_a1));
-		dump_bytes(NULL, &stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
+		printk(KERN_ERR "E: keydvt in: key\n");
+		wusb_key_dump(stv_key_a1, sizeof(stv_key_a1));
+		printk(KERN_ERR "E: keydvt in: nonce\n");
+		wusb_key_dump( &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
+		printk(KERN_ERR "E: keydvt in: hnonce & dnonce\n");
+		wusb_key_dump(&stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
 		printk(KERN_ERR "E: keydvt out: KCK\n");
-		dump_bytes(NULL, &keydvt_out.kck, sizeof(keydvt_out.kck));
+		wusb_key_dump(&keydvt_out.kck, sizeof(keydvt_out.kck));
 		printk(KERN_ERR "E: keydvt out: PTK\n");
-		dump_bytes(NULL, &keydvt_out.ptk, sizeof(keydvt_out.ptk));
+		wusb_key_dump(&keydvt_out.ptk, sizeof(keydvt_out.ptk));
 		result = -EINVAL;
 	} else
 		result = 0;
diff --git a/drivers/usb/wusbcore/wa-nep.c b/drivers/usb/wusbcore/wa-nep.c
index 3f54299..17d2626 100644
--- a/drivers/usb/wusbcore/wa-nep.c
+++ b/drivers/usb/wusbcore/wa-nep.c
@@ -51,7 +51,7 @@
  */
 #include <linux/workqueue.h>
 #include <linux/ctype.h>
-#include <linux/uwb/debug.h>
+
 #include "wa-hc.h"
 #include "wusbhc.h"
 
@@ -139,13 +139,10 @@ static void wa_notif_dispatch(struct work_struct *ws)
 			/* FIXME: unimplemented WA NOTIFs */
 			/* fallthru */
 		default:
-			if (printk_ratelimit()) {
-				dev_err(dev, "HWA: unknown notification 0x%x, "
-					"%zu bytes; discarding\n",
-					notif_hdr->bNotifyType,
-					(size_t)notif_hdr->bLength);
-				dump_bytes(dev, notif_hdr, 16);
-			}
+			dev_err(dev, "HWA: unknown notification 0x%x, "
+				"%zu bytes; discarding\n",
+				notif_hdr->bNotifyType,
+				(size_t)notif_hdr->bLength);
 			break;
 		}
 	}
@@ -160,12 +157,9 @@ out:
 	 * discard the data, as this should not happen.
 	 */
 exhausted_buffer:
-	if (!printk_ratelimit())
-		goto out;
 	dev_warn(dev, "HWA: device sent short notification, "
 		 "%d bytes missing; discarding %d bytes.\n",
 		 missing, (int)size);
-	dump_bytes(dev, itr, size);
 	goto out;
 }
 
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 89b2e6a..4a42993 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2005-2006 Intel Corporation
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version
@@ -36,26 +37,6 @@
 
 #include "uwb-internal.h"
 
-void dump_bytes(struct device *dev, const void *_buf, size_t rsize)
-{
-	const char *buf = _buf;
-	char line[32];
-	size_t offset = 0;
-	int cnt, cnt2;
-	for (cnt = 0; cnt < rsize; cnt += 8) {
-		size_t rtop = rsize - cnt < 8 ? rsize - cnt : 8;
-		for (offset = cnt2 = 0; cnt2 < rtop; cnt2++) {
-			offset += scnprintf(line + offset, sizeof(line) - offset,
-					    "%02x ", buf[cnt + cnt2] & 0xff);
-		}
-		if (dev)
-			dev_info(dev, "%s\n", line);
-		else
-			printk(KERN_INFO "%s\n", line);
-	}
-}
-EXPORT_SYMBOL_GPL(dump_bytes);
-
 /*
  * Debug interface
  *
-- 
cgit v0.10.2


From a01777ecf227de735d7e525ecda48fe74b838a17 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 22 Dec 2008 18:30:29 +0000
Subject: uwb: remove unused include/linux/uwb/debug.h

Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/address.c b/drivers/uwb/address.c
index 1664ae5..ad21b1d 100644
--- a/drivers/uwb/address.c
+++ b/drivers/uwb/address.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/random.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index f57c265..da77e41 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -53,7 +53,7 @@
 #include <linux/err.h>
 #include <linux/kdev_t.h>
 #include <linux/random.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 488b2e3..049c05d 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -57,7 +57,7 @@
  */
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 2eafb97..e3873ff 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -41,7 +41,7 @@
 
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 struct i1480u_cmd_set_ip_mas {
diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
index a92a787..4ffaf54 100644
--- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c
+++ b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
@@ -25,8 +25,8 @@
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
 #include <linux/device.h>
+
 #include "i1480u-wlp.h"
 
 
diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h
deleted file mode 100644
index 67a2405..0000000
--- a/include/linux/uwb/debug.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Ultra Wide Band
- * Debug Support
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: doc
- * Invoke like:
- *
- * #define D_LOCAL 4
- * #include <linux/uwb/debug.h>
- *
- * At the end of your include files.
- */
-#include <linux/types.h>
-
-struct device;
-extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize);
-
-/* Master debug switch; !0 enables, 0 disables */
-#define D_MASTER (!0)
-
-/* Local (per-file) debug switch; #define before #including */
-#ifndef D_LOCAL
-#define D_LOCAL 0
-#endif
-
-#undef __d_printf
-#undef d_fnstart
-#undef d_fnend
-#undef d_printf
-#undef d_dump
-
-#define __d_printf(l, _tag, _dev, f, a...)				\
-do {									\
-	struct device *__dev = (_dev);					\
-	if (D_MASTER && D_LOCAL >= (l)) {				\
-		char __head[64] = "";					\
-		if (_dev != NULL) {					\
-			if ((unsigned long)__dev < 4096)		\
-				printk(KERN_ERR "E: Corrupt dev %p\n",	\
-					__dev);				\
-			else						\
-				snprintf(__head, sizeof(__head),	\
-					 "%s %s: ",			\
-					 dev_driver_string(__dev),	\
-					 dev_name(__dev));		\
-		}							\
-		printk(KERN_ERR "%s%s" _tag ": " f, __head,		\
-			__func__, ## a);				\
-	}								\
-} while (0 && _dev)
-
-#define d_fnstart(l, _dev, f, a...)	\
-	__d_printf(l, " FNSTART", _dev, f, ## a)
-#define d_fnend(l, _dev, f, a...)	\
-	__d_printf(l, " FNEND", _dev, f, ## a)
-#define d_printf(l, _dev, f, a...)	\
-	__d_printf(l, "", _dev, f, ## a)
-#define d_dump(l, _dev, ptr, size)		\
-do {						\
-	struct device *__dev = _dev;		\
-	if (D_MASTER && D_LOCAL >= (l))		\
-		dump_bytes(__dev, ptr, size);	\
-} while (0 && _dev)
-#define d_test(l) (D_MASTER && D_LOCAL >= (l))
-- 
cgit v0.10.2


From 7bbe5b5aa6d1e38af6f1fc866efc0aa461d73f19 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Dec 2008 11:02:51 -0500
Subject: UBIFS: use PAGE_CACHE_MASK correctly

It has high bits set, not low bits set as the UBIFS code
assumed.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2624411..7f1de98 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -254,7 +254,7 @@ static int write_begin_slow(struct address_space *mapping,
 	}
 
 	if (!PageUptodate(page)) {
-		if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
 			SetPageChecked(page);
 		else {
 			err = do_readpage(page);
@@ -444,7 +444,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 
 	if (!PageUptodate(page)) {
 		/* The page is not loaded from the flash */
-		if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
 			/*
 			 * We change whole page so no need to load it. But we
 			 * have to set the @PG_checked flag to make the further
-- 
cgit v0.10.2


From 24fa9e9438b263600737c839b36543981d87d65b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 17 Dec 2008 17:45:14 +0200
Subject: UBIFS: fix tnc dumping

debugfs tnc dumping was broken because of an obvious typo.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 934db18..367d975 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2443,7 +2443,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
 		spin_lock(&c->space_lock);
 		dbg_dump_budg(c);
 		spin_unlock(&c->space_lock);
-	} else if (file->f_path.dentry == d->dump_budg) {
+	} else if (file->f_path.dentry == d->dump_tnc) {
 		mutex_lock(&c->tnc_mutex);
 		dbg_dump_tnc(c);
 		mutex_unlock(&c->tnc_mutex);
-- 
cgit v0.10.2


From 21a60258976227daaf7a4c35e96c3d77d4988b15 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 12 Dec 2008 11:13:17 -0500
Subject: UBIFS: improve budgeting dump

Dump available space calculated by budgeting subsystem.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1a4973e..d5a6503 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -713,8 +713,8 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
  * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
  * are able to write a file of size N. UBIFS attaches node headers to each data
  * node and it has to write indexind nodes as well. This introduces additional
- * overhead, and UBIFS it has to report sligtly less free space to meet the
- * above expectetion.
+ * overhead, and UBIFS has to report sligtly less free space to meet the above
+ * expectetions.
  *
  * This function assumes free space is made up of uncompressed data nodes and
  * full index nodes (one per data node, tripled because we always allow enough
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 367d975..6ecb01a 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -597,7 +597,9 @@ void dbg_dump_budg(struct ubifs_info *c)
 	struct rb_node *rb;
 	struct ubifs_bud *bud;
 	struct ubifs_gced_idx_leb *idx_gc;
+	long long available, outstanding, free;
 
+	ubifs_assert(spin_is_locked(&c->space_lock));
 	spin_lock(&dbg_lock);
 	printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
 	       "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
@@ -630,6 +632,17 @@ void dbg_dump_budg(struct ubifs_info *c)
 		printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
 		       idx_gc->lnum, idx_gc->unmap);
 	printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
+
+	/* Print budgeting predictions */
+	available = ubifs_calc_available(c, c->min_idx_lebs);
+	outstanding = c->budg_data_growth + c->budg_dd_growth;
+	if (available > outstanding)
+		free = ubifs_reported_space(c, available - outstanding);
+	else
+		free = 0;
+	printk(KERN_DEBUG "Budgeting predictions:\n");
+	printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
+	       available, outstanding, free);
 	spin_unlock(&dbg_lock);
 }
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 055c6b5..e61c081 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -419,7 +419,7 @@ struct ubifs_unclean_leb {
  *
  * LPROPS_UNCAT: not categorized
  * LPROPS_DIRTY: dirty > 0, not index
- * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index
+ * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
  * LPROPS_FREE: free > 0, not empty, not index
  * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
  * LPROPS_EMPTY: LEB is empty, not taken
-- 
cgit v0.10.2


From d3cf502b6ccee1c52890d42cd18cbc98b7526126 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 16 Dec 2008 17:52:35 +0200
Subject: UBIFS: various comment improvements and fixes

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 10ba663..dfd2bce 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -520,13 +520,13 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
  * @flags: new flags
  * @idx_gc_cnt: change to the count of idx_gc list
  *
- * This function changes LEB properties. This function does not change a LEB
- * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC.
+ * This function changes LEB properties (@free, @dirty or @flag). However, the
+ * property which has the %LPROPS_NC value is not changed. Returns a pointer to
+ * the updated LEB properties on success and a negative error code on failure.
  *
- * This function returns a pointer to the updated LEB properties on success
- * and a negative error code on failure. N.B. the LEB properties may have had to
- * be copied (due to COW) and consequently the pointer returned may not be the
- * same as the pointer passed.
+ * Note, the LEB properties may have had to be copied (due to COW) and
+ * consequently the pointer returned may not be the same as the pointer
+ * passed.
  */
 const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
 					   const struct ubifs_lprops *lp,
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e61c081..f8ef7c1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -482,24 +482,26 @@ struct ubifs_lpt_lprops {
  * @empty_lebs: number of empty LEBs
  * @taken_empty_lebs: number of taken LEBs
  * @idx_lebs: number of indexing LEBs
- * @total_free: total free space in bytes
- * @total_dirty: total dirty space in bytes
- * @total_used: total used space in bytes (includes only data LEBs)
- * @total_dead: total dead space in bytes (includes only data LEBs)
- * @total_dark: total dark space in bytes (includes only data LEBs)
+ * @total_free: total free space in bytes (includes all LEBs)
+ * @total_dirty: total dirty space in bytes (includes all LEBs)
+ * @total_used: total used space in bytes (does not include index LEBs)
+ * @total_dead: total dead space in bytes (does not include index LEBs)
+ * @total_dark: total dark space in bytes (does not include index LEBs)
  *
- * N.B. total_dirty and total_used are different to other total_* fields,
- * because they account _all_ LEBs, not just data LEBs.
+ * The @taken_empty_lebs field counts the LEBs that are in the transient state
+ * of having been "taken" for use but not yet written to. @taken_empty_lebs is
+ * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be
+ * used by itself (in which case 'unused_lebs' would be a better name). In the
+ * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained
+ * by GC, but unlike other empty LEBs that are "taken", it may not be written
+ * straight away (i.e. before the next commit start or unmount), so either
+ * @gc_lnum must be specially accounted for, or the current approach followed
+ * i.e. count it under @taken_empty_lebs.
  *
- * 'taken_empty_lebs' counts the LEBs that are in the transient state of having
- * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed
- * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
- * by itself (in which case 'unused_lebs' would be a better name). In the case
- * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
- * but unlike other empty LEBs that are 'taken', it may not be written straight
- * away (i.e. before the next commit start or unmount), so either gc_lnum must
- * be specially accounted for, or the current approach followed i.e. count it
- * under 'taken_empty_lebs'.
+ * @empty_lebs includes @taken_empty_lebs.
+ *
+ * @total_used, @total_dead and @total_dark fields do not account indexing
+ * LEBs.
  */
 struct ubifs_lp_stats {
 	int empty_lebs;
-- 
cgit v0.10.2


From af14a1ad792621942a03e4bd0e5f17b6e177e2e0 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 19 Dec 2008 19:26:29 +0200
Subject: UBIFS: fix available blocks count

Take into account that 2 eraseblocks are never available because
they are reserved for the index. This gives more realistic count
of FS blocks.

To avoid future confusions like this, introduce a constant.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d5a6503..e423425 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -280,13 +280,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
 	 * extra LEB to compensate.
 	 */
 	ret += 1;
-	/*
-	 * At present the index needs at least 2 LEBs: one for the index head
-	 * and one for in-the-gaps method (which currently does not cater for
-	 * the index head and so excludes it from consideration).
-	 */
-	if (ret < 2)
-		ret = 2;
+	if (ret < MIN_INDEX_LEBS)
+		ret = MIN_INDEX_LEBS;
 	return ret;
 }
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 2dbaa4f..a6a7798 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -695,9 +695,10 @@ static int init_constants_late(struct ubifs_info *c)
 	 * necessary to report something for the 'statfs()' call.
 	 *
 	 * Subtract the LEB reserved for GC, the LEB which is reserved for
-	 * deletions, and assume only one journal head is available.
+	 * deletions, minimum LEBs for the index, and assume only one journal
+	 * head is available.
 	 */
-	tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
+	tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1;
 	tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
 	tmp64 = ubifs_reported_space(c, tmp64);
 	c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f8ef7c1..543e850 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -63,6 +63,14 @@
 #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
 #define SQNUM_WATERMARK      0xFFFFFFFFFF000000ULL
 
+/*
+ * Minimum amount of LEBs reserved for the index. At present the index needs at
+ * least 2 LEBs: one for the index head and one for in-the-gaps method (which
+ * currently does not cater for the index head and so excludes it from
+ * consideration).
+ */
+#define MIN_INDEX_LEBS 2
+
 /* Minimum amount of data UBIFS writes to the flash */
 #define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
 
-- 
cgit v0.10.2


From 4d61db4f87b527734ac0cc830dda8fcc4e2add2f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 18 Dec 2008 14:06:51 +0200
Subject: UBIFS: use nicer 64-bit math

Instead of using do_div(), use better primitives from
linux/math64.h.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index e423425..0bcb803 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -32,7 +32,7 @@
 
 #include "ubifs.h"
 #include <linux/writeback.h>
-#include <asm/div64.h>
+#include <linux/math64.h>
 
 /*
  * When pessimistic budget calculations say that there is no enough space,
@@ -258,8 +258,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
  */
 int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
 {
-	int ret;
-	uint64_t idx_size;
+	int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
+	long long idx_size;
 
 	idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
 
@@ -271,18 +271,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
 	 * pair, nor similarly the two variables for the new index size, so we
 	 * have to do this costly 64-bit division on fast-path.
 	 */
-	if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
-		ret = idx_size + 1;
-	else
-		ret = idx_size;
+	idx_size += eff_leb_size - 1;
+	idx_lebs = div_u64(idx_size, eff_leb_size);
 	/*
 	 * The index head is not available for the in-the-gaps method, so add an
 	 * extra LEB to compensate.
 	 */
-	ret += 1;
-	if (ret < MIN_INDEX_LEBS)
-		ret = MIN_INDEX_LEBS;
-	return ret;
+	idx_lebs += 1;
+	if (idx_lebs < MIN_INDEX_LEBS)
+		idx_lebs = MIN_INDEX_LEBS;
+	return idx_lebs;
 }
 
 /**
@@ -718,7 +716,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
  * Note, the calculation is pessimistic, which means that most of the time
  * UBIFS reports less space than it actually has.
  */
-long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
+long long ubifs_reported_space(const struct ubifs_info *c, long long free)
 {
 	int divisor, factor, f;
 
@@ -740,8 +738,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
 	divisor = UBIFS_MAX_DATA_NODE_SZ;
 	divisor += (c->max_idx_node_sz * 3) / (f - 1);
 	free *= factor;
-	do_div(free, divisor);
-	return free;
+	return div_u64(free, divisor);
 }
 
 /**
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 6ecb01a..a2be115 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
+#include <linux/math64.h>
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 6d91416..b2792e8 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -43,8 +43,9 @@
  * mounted.
  */
 
-#include <linux/crc16.h>
 #include "ubifs.h"
+#include <linux/crc16.h>
+#include <linux/math64.h>
 
 /**
  * do_calc_lpt_geom - calculate sizes for the LPT area.
@@ -135,15 +136,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
 int ubifs_calc_lpt_geom(struct ubifs_info *c)
 {
 	int lebs_needed;
-	uint64_t sz;
+	long long sz;
 
 	do_calc_lpt_geom(c);
 
 	/* Verify that lpt_lebs is big enough */
 	sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
-	sz += c->leb_size - 1;
-	do_div(sz, c->leb_size);
-	lebs_needed = sz;
+	lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
 	if (lebs_needed > c->lpt_lebs) {
 		ubifs_err("too few LPT LEBs");
 		return -EINVAL;
@@ -175,7 +174,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
 			      int *big_lpt)
 {
 	int i, lebs_needed;
-	uint64_t sz;
+	long long sz;
 
 	/* Start by assuming the minimum number of LPT LEBs */
 	c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
@@ -202,9 +201,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
 	/* Now check there are enough LPT LEBs */
 	for (i = 0; i < 64 ; i++) {
 		sz = c->lpt_sz * 4; /* Allow 4 times the size */
-		sz += c->leb_size - 1;
-		do_div(sz, c->leb_size);
-		lebs_needed = sz;
+		lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
 		if (lebs_needed > c->lpt_lebs) {
 			/* Not enough LPT LEBs so try again with more */
 			c->lpt_lebs = lebs_needed;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c5da201..e070c64 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -28,6 +28,7 @@
 
 #include "ubifs.h"
 #include <linux/random.h>
+#include <linux/math64.h>
 
 /*
  * Default journal size in logical eraseblocks as a percent of total
@@ -80,7 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
 	int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
 	int min_leb_cnt = UBIFS_MIN_LEB_CNT;
-	uint64_t tmp64, main_bytes;
+	long long tmp64, main_bytes;
 	__le64 tmp_le64;
 
 	/* Some functions called from here depend on the @c->key_len filed */
@@ -160,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	if (!sup)
 		return -ENOMEM;
 
-	tmp64 = (uint64_t)max_buds * c->leb_size;
+	tmp64 = (long long)max_buds * c->leb_size;
 	if (big_lpt)
 		sup_flags |= UBIFS_FLG_BIGLPT;
 
@@ -187,9 +188,8 @@ static int create_default_filesystem(struct ubifs_info *c)
 
 	generate_random_uuid(sup->uuid);
 
-	main_bytes = (uint64_t)main_lebs * c->leb_size;
-	tmp64 = main_bytes * DEFAULT_RP_PERCENT;
-	do_div(tmp64, 100);
+	main_bytes = (long long)main_lebs * c->leb_size;
+	tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);
 	if (tmp64 > DEFAULT_MAX_RP_SIZE)
 		tmp64 = DEFAULT_MAX_RP_SIZE;
 	sup->rp_size = cpu_to_le64(tmp64);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a6a7798..c3cefc8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -34,6 +34,7 @@
 #include <linux/parser.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/math64.h>
 #include "ubifs.h"
 
 /*
@@ -612,7 +613,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
 static int init_constants_late(struct ubifs_info *c)
 {
 	int tmp, err;
-	uint64_t tmp64;
+	long long tmp64;
 
 	c->main_bytes = (long long)c->main_lebs * c->leb_size;
 	c->max_znode_sz = sizeof(struct ubifs_znode) +
@@ -639,9 +640,8 @@ static int init_constants_late(struct ubifs_info *c)
 	 * Make sure that the log is large enough to fit reference nodes for
 	 * all buds plus one reserved LEB.
 	 */
-	tmp64 = c->max_bud_bytes;
-	tmp = do_div(tmp64, c->leb_size);
-	c->max_bud_cnt = tmp64 + !!tmp;
+	tmp64 = c->max_bud_bytes + c->leb_size - 1;
+	c->max_bud_cnt = div_u64(tmp64, c->leb_size);
 	tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
 	tmp /= c->leb_size;
 	tmp += 1;
@@ -677,7 +677,7 @@ static int init_constants_late(struct ubifs_info *c)
 	 * Consequently, if the journal is too small, UBIFS will treat it as
 	 * always full.
 	 */
-	tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1;
+	tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1;
 	if (c->bg_bud_bytes < tmp64)
 		c->bg_bud_bytes = tmp64;
 	if (c->max_bud_bytes < tmp64 + c->leb_size)
@@ -699,7 +699,7 @@ static int init_constants_late(struct ubifs_info *c)
 	 * head is available.
 	 */
 	tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1;
-	tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
+	tmp64 *= (long long)c->leb_size - c->leb_overhead;
 	tmp64 = ubifs_reported_space(c, tmp64);
 	c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 543e850..a17dd79 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1498,7 +1498,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
 long long ubifs_get_free_space(struct ubifs_info *c);
 int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
 void ubifs_convert_page_budget(struct ubifs_info *c);
-long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
+long long ubifs_reported_space(const struct ubifs_info *c, long long free);
 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
 
 /* find.c */
-- 
cgit v0.10.2


From 650ed50f4298e76007070b7ab9d640dfe7228ab3 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 22 Dec 2008 11:09:04 +0200
Subject: UBIFS: re-calculate min_idx_size after the commit

When we commit, but before we try to write anything to the flash
media, @c->min_idx_size is inaccurate, because we do not re-calculate
it after the commit. Do not forget to do this.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 3c0af45..fde8d12 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -802,8 +802,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
 	 * budgeting subsystem to assume the index is already committed,
 	 * even though it is not.
 	 */
+	ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
 	c->old_idx_sz = c->calc_idx_sz;
 	c->budg_uncommitted_idx = 0;
+	c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
 	spin_unlock(&c->space_lock);
 	mutex_unlock(&c->tnc_mutex);
 
-- 
cgit v0.10.2


From c8f915913afdfe1a796e312e21658b8edcf20868 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 19 Dec 2008 16:11:13 +0200
Subject: UBIFS: avoid unnecessary calculations

Do not calculate min_idx_lebs, because it is available in
c->min_idx_lebs

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 0bcb803..44cff80 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -763,7 +763,8 @@ long long ubifs_get_free_space(struct ubifs_info *c)
 	long long available, outstanding, free;
 
 	spin_lock(&c->space_lock);
-	min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+	min_idx_lebs = c->min_idx_lebs;
+	ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
 	outstanding = c->budg_data_growth + c->budg_dd_growth;
 
 	/*
-- 
cgit v0.10.2


From 3af373021fa32f8f787bfbdcc1a9277a287bde4e Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 23 Dec 2008 12:31:09 +0000
Subject: uwb: remove beacon cache entry after calling uwb_notify()

Removing the beacon cache entry from a uwb_dev can cause an oops if the
bce is released before the call to uwb_notify().

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 0315093..36bc315 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -289,8 +289,6 @@ void uwb_beca_purge(struct uwb_rc *rc)
 		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
 		if (time_after(jiffies, expires)) {
 			uwbd_dev_offair(bce);
-			list_del(&bce->node);
-			uwb_bce_put(bce);
 		}
 	}
 	mutex_unlock(&rc->uwb_beca.mutex);
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
index f78087b..e9fe1bb 100644
--- a/drivers/uwb/lc-dev.c
+++ b/drivers/uwb/lc-dev.c
@@ -375,6 +375,8 @@ int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc)
 		 rc ? rc->uwb_dev.dev.parent->bus->name : "n/a",
 		 rc ? dev_name(rc->uwb_dev.dev.parent) : "");
 	uwb_dev_rm(uwb_dev);
+	list_del(&uwb_dev->bce->node);
+	uwb_bce_put(uwb_dev->bce);
 	uwb_dev_put(uwb_dev);	/* for the creation in _onair() */
 
 	return 0;
-- 
cgit v0.10.2


From d6d7b702a3a1ca50f7ca2bebaa79c80425156bac Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 12 Nov 2008 16:49:48 -0600
Subject: dlm: fix up memory allocation flags

Use ls_allocation for memory allocations, which a cluster fs sets to
GFP_NOFS.  Use GFP_NOFS for allocations when no lockspace struct is
available.  Taking dlm locks needs to avoid calling back into the
cluster fs because write-out can require taking dlm locks.

Cc: Christine Caulfield <ccaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3962262..1e72031 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -295,6 +295,7 @@ static int add_sock(struct socket *sock, struct connection *con)
 	con->sock->sk->sk_write_space = lowcomms_write_space;
 	con->sock->sk->sk_state_change = lowcomms_state_change;
 	con->sock->sk->sk_user_data = con;
+	con->sock->sk->sk_allocation = GFP_NOFS;
 	return 0;
 }
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 54c14c6..c1775b8 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -39,7 +39,7 @@ char *dlm_allocate_lvb(struct dlm_ls *ls)
 {
 	char *p;
 
-	p = kzalloc(ls->ls_lvblen, GFP_KERNEL);
+	p = kzalloc(ls->ls_lvblen, ls->ls_allocation);
 	return p;
 }
 
@@ -57,7 +57,7 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
 
 	DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
 
-	r = kzalloc(sizeof(*r) + namelen, GFP_KERNEL);
+	r = kzalloc(sizeof(*r) + namelen, ls->ls_allocation);
 	return r;
 }
 
@@ -72,7 +72,7 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
 
-	lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
+	lkb = kmem_cache_zalloc(lkb_cache, ls->ls_allocation);
 	return lkb;
 }
 
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 07ac709..f3396c6 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -112,7 +112,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
 		   ordinary messages). */
 
 		if (msglen > sizeof(__tmp) && p == &__tmp.p) {
-			p = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
+			p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
 			if (p == NULL)
 				return ret;
 		}
-- 
cgit v0.10.2


From cd8e4679bdcf9b54564f2cda2389bd0f0457e12d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 12 Nov 2008 16:28:43 -0600
Subject: dlm: trivial annotation of be16 value

fs/dlm/dir.c:419:14: warning: incorrect type in assignment (different base types)
fs/dlm/dir.c:419:14:    expected unsigned short [unsigned] [addressable] [assigned] [usertype] be_namelen
fs/dlm/dir.c:419:14:    got restricted __be16 [usertype] <noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 85defeb..92969f8 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -374,7 +374,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
 	struct list_head *list;
 	struct dlm_rsb *r;
 	int offset = 0, dir_nodeid;
-	uint16_t be_namelen;
+	__be16 be_namelen;
 
 	down_read(&ls->ls_root_sem);
 
@@ -410,15 +410,15 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
 
 		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
 			/* Write end-of-block record */
-			be_namelen = 0;
-			memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
-			offset += sizeof(uint16_t);
+			be_namelen = cpu_to_be16(0);
+			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+			offset += sizeof(__be16);
 			goto out;
 		}
 
 		be_namelen = cpu_to_be16(r->res_length);
-		memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
-		offset += sizeof(uint16_t);
+		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+		offset += sizeof(__be16);
 		memcpy(outbuf + offset, r->res_name, r->res_length);
 		offset += r->res_length;
 	}
@@ -430,9 +430,9 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
 
 	if ((list == &ls->ls_root_list) &&
 	    (offset + sizeof(uint16_t) <= outlen)) {
-		be_namelen = 0xFFFF;
-		memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
-		offset += sizeof(uint16_t);
+		be_namelen = cpu_to_be16(0xFFFF);
+		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+		offset += sizeof(__be16);
 	}
 
  out:
-- 
cgit v0.10.2


From 1521848cbb42935a52d11305c054b14461ad061c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 12 Nov 2008 17:00:16 -0600
Subject: dlm: remove kmap/kunmap

The pages used in lowcomms are not highmem, so kmap is not necessary.

Cc: Christine Caulfield <ccaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1e72031..103a5eb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -824,7 +824,6 @@ static void sctp_init_assoc(struct connection *con)
 	len = e->len;
 	offset = e->offset;
 	spin_unlock(&con->writequeue_lock);
-	kmap(e->page);
 
 	/* Send the first block off the write queue */
 	iov[0].iov_base = page_address(e->page)+offset;
@@ -855,7 +854,6 @@ static void sctp_init_assoc(struct connection *con)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
-			kunmap(e->page);
 			free_entry(e);
 		}
 		spin_unlock(&con->writequeue_lock);
@@ -1204,8 +1202,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 
 	if (e) {
 	got_one:
-		if (users == 0)
-			kmap(e->page);
 		*ppc = page_address(e->page) + offset;
 		return e;
 	}
@@ -1234,7 +1230,6 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	if (users)
 		goto out;
 	e->len = e->end - e->offset;
-	kunmap(e->page);
 	spin_unlock(&con->writequeue_lock);
 
 	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
@@ -1273,7 +1268,6 @@ static void send_to_sock(struct connection *con)
 		offset = e->offset;
 		BUG_ON(len == 0 && e->users == 0);
 		spin_unlock(&con->writequeue_lock);
-		kmap(e->page);
 
 		ret = 0;
 		if (len) {
@@ -1295,7 +1289,6 @@ static void send_to_sock(struct connection *con)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
-			kunmap(e->page);
 			free_entry(e);
 			continue;
 		}
-- 
cgit v0.10.2


From d61e9aac96317a43c192f1faabfa95d4d675b7ce Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 10 Dec 2008 09:31:02 -0600
Subject: dlm: replace schedule with cond_resched

This is a one-liner to use cond_resched() rather than schedule()
in the ast delivery loop. It should not be necessary to schedule
every time, so this will save some cpu time while continuing to
allow scheduling when required.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 8bf31e3..30c11f3 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -101,7 +101,7 @@ static void process_asts(void)
 		   and may result in the lkb being freed */
 		dlm_put_lkb(lkb);
 
-		schedule();
+		cond_resched();
 	}
 }
 
-- 
cgit v0.10.2


From 03339696314fffb95dafb349b84243358e945ce6 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 8 Dec 2008 17:14:10 -0600
Subject: dlm: remove extra blocking callback check

Just before delivering a blocking callback (bast), the dlm_astd
thread checks again that the granted mode of the lkb actually
blocks the mode requested by the bast.  The idea behind this was
originally that the granted mode may have changed since the bast
was queued, making the callback now unnecessary.  Reasons for
removing this extra check are:
- dlm_astd doesn't lock the rsb before reading the lkb grmode, so
  it's not technically safe (this removes the long standing FIXME)
- after running some tests, it doesn't appear the check ever actually
  eliminates a bast
- delivering an unnecessary blocking callback isn't a bad thing and
  can happen anyway

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 30c11f3..09b167d 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -89,13 +89,8 @@ static void process_asts(void)
 		if ((type & AST_COMP) && cast)
 			cast(lkb->lkb_astparam);
 
-		/* FIXME: Is it safe to look at lkb_grmode here
-		   without doing a lock_rsb() ?
-		   Look at other checks in v1 to avoid basts. */
-
 		if ((type & AST_BAST) && bast)
-			if (!dlm_modes_compat(lkb->lkb_grmode, bmode))
-				bast(lkb->lkb_astparam, bmode);
+			bast(lkb->lkb_astparam, bmode);
 
 		/* this removes the reference added by dlm_add_ast
 		   and may result in the lkb being freed */
-- 
cgit v0.10.2


From fd22a51bcc0b7b76fc729b02316214fd979f9fe1 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 9 Dec 2008 11:55:46 -0600
Subject: dlm: improve how bast mode handling

The lkb bastmode value is set in the context of processing the
lock, and read by the dlm_astd thread.  Because it's accessed
in these two separate contexts, the writing/reading ought to
be done under a lock.  This is simple to do by setting it and
reading it when the lkb is added to and removed from dlm_astd's
callback list which is properly locked.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 09b167d..fbe840d 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -33,10 +33,10 @@ void dlm_del_ast(struct dlm_lkb *lkb)
 	spin_unlock(&ast_queue_lock);
 }
 
-void dlm_add_ast(struct dlm_lkb *lkb, int type)
+void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
 {
 	if (lkb->lkb_flags & DLM_IFL_USER) {
-		dlm_user_add_ast(lkb, type);
+		dlm_user_add_ast(lkb, type, bastmode);
 		return;
 	}
 
@@ -46,6 +46,8 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type)
 		list_add_tail(&lkb->lkb_astqueue, &ast_queue);
 	}
 	lkb->lkb_ast_type |= type;
+	if (bastmode)
+		lkb->lkb_bastmode = bastmode;
 	spin_unlock(&ast_queue_lock);
 
 	set_bit(WAKE_ASTS, &astd_wakeflags);
@@ -59,7 +61,7 @@ static void process_asts(void)
 	struct dlm_lkb *lkb;
 	void (*cast) (void *astparam);
 	void (*bast) (void *astparam, int mode);
-	int type = 0, found, bmode;
+	int type = 0, found, bastmode;
 
 	for (;;) {
 		found = 0;
@@ -74,6 +76,7 @@ static void process_asts(void)
 			list_del(&lkb->lkb_astqueue);
 			type = lkb->lkb_ast_type;
 			lkb->lkb_ast_type = 0;
+			bastmode = lkb->lkb_bastmode;
 			found = 1;
 			break;
 		}
@@ -84,13 +87,12 @@ static void process_asts(void)
 
 		cast = lkb->lkb_astfn;
 		bast = lkb->lkb_bastfn;
-		bmode = lkb->lkb_bastmode;
 
 		if ((type & AST_COMP) && cast)
 			cast(lkb->lkb_astparam);
 
 		if ((type & AST_BAST) && bast)
-			bast(lkb->lkb_astparam, bmode);
+			bast(lkb->lkb_astparam, bastmode);
 
 		/* this removes the reference added by dlm_add_ast
 		   and may result in the lkb being freed */
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 6ee276c..1b5fc5f 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -13,7 +13,7 @@
 #ifndef __ASTD_DOT_H__
 #define __ASTD_DOT_H__
 
-void dlm_add_ast(struct dlm_lkb *lkb, int type);
+void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode);
 void dlm_del_ast(struct dlm_lkb *lkb);
 
 void dlm_astd_wake(void);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 724ddac..7b758da 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -307,7 +307,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	lkb->lkb_lksb->sb_status = rv;
 	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
 
-	dlm_add_ast(lkb, AST_COMP);
+	dlm_add_ast(lkb, AST_COMP, 0);
 }
 
 static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -320,10 +320,8 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
 {
 	if (is_master_copy(lkb))
 		send_bast(r, lkb, rqmode);
-	else {
-		lkb->lkb_bastmode = rqmode;
-		dlm_add_ast(lkb, AST_BAST);
-	}
+	else
+		dlm_add_ast(lkb, AST_BAST, rqmode);
 }
 
 /*
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b3832c6..065149e 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -175,7 +175,7 @@ static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
    being removed and then remove that lkb from the orphans list and free it */
 
-void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
+void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
 {
 	struct dlm_ls *ls;
 	struct dlm_user_args *ua;
@@ -208,6 +208,8 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
 
 	ast_type = lkb->lkb_ast_type;
 	lkb->lkb_ast_type |= type;
+	if (bastmode)
+		lkb->lkb_bastmode = bastmode;
 
 	if (!ast_type) {
 		kref_get(&lkb->lkb_ref);
diff --git a/fs/dlm/user.h b/fs/dlm/user.h
index 35eb6a1..1c96864 100644
--- a/fs/dlm/user.h
+++ b/fs/dlm/user.h
@@ -9,7 +9,7 @@
 #ifndef __USER_DOT_H__
 #define __USER_DOT_H__
 
-void dlm_user_add_ast(struct dlm_lkb *lkb, int type);
+void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode);
 int dlm_user_init(void);
 void dlm_user_exit(void);
 int dlm_device_deregister(struct dlm_ls *ls);
-- 
cgit v0.10.2


From eeda418d8c2646f33f24e9ad33d86c239adc6de7 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 9 Dec 2008 14:12:21 -0600
Subject: dlm: change lock time stamping

Use ktime instead of jiffies for timestamping lkb's.  Also stamp the
time on every lkb whenever it's added to a resource queue, instead of
just stamping locks subject to timeouts.  This will allow us to use
timestamps more widely for debugging all locks.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8fc24f4..19e4f9e 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -162,21 +162,21 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 
 static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
 {
-	unsigned int waiting = 0;
-	uint64_t xid = 0;
+	u64 xid = 0;
+	u64 us;
 
 	if (lkb->lkb_flags & DLM_IFL_USER) {
 		if (lkb->lkb_ua)
 			xid = lkb->lkb_ua->xid;
 	}
 
-	if (lkb->lkb_timestamp)
-		waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+	/* microseconds since lkb was added to current queue */
+	us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_timestamp));
 
-	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_us
 	   r_nodeid r_len r_name */
 
-	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
 		   lkb->lkb_id,
 		   lkb->lkb_nodeid,
 		   lkb->lkb_remid,
@@ -187,7 +187,7 @@ static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *
 		   lkb->lkb_status,
 		   lkb->lkb_grmode,
 		   lkb->lkb_rqmode,
-		   waiting,
+		   (unsigned long long)us,
 		   r->res_nodeid,
 		   r->res_length,
 		   r->res_name);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 868e4c9..e69135c 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -245,7 +245,7 @@ struct dlm_lkb {
 	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
 	struct list_head	lkb_time_list;
-	unsigned long		lkb_timestamp;
+	ktime_t			lkb_timestamp;
 	unsigned long		lkb_timeout_cs;
 
 	char			*lkb_lvbptr;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 7b758da..dfc57ae 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -742,6 +742,8 @@ static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status)
 
 	DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
 
+	lkb->lkb_timestamp = ktime_get();
+
 	lkb->lkb_status = status;
 
 	switch (status) {
@@ -1011,10 +1013,8 @@ static void add_timeout(struct dlm_lkb *lkb)
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
 
-	if (is_master_copy(lkb)) {
-		lkb->lkb_timestamp = jiffies;
+	if (is_master_copy(lkb))
 		return;
-	}
 
 	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
 	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
@@ -1029,7 +1029,6 @@ static void add_timeout(struct dlm_lkb *lkb)
 	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
 	mutex_lock(&ls->ls_timeout_mutex);
 	hold_lkb(lkb);
-	lkb->lkb_timestamp = jiffies;
 	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
 	mutex_unlock(&ls->ls_timeout_mutex);
 }
@@ -1057,6 +1056,7 @@ void dlm_scan_timeout(struct dlm_ls *ls)
 	struct dlm_rsb *r;
 	struct dlm_lkb *lkb;
 	int do_cancel, do_warn;
+	s64 wait_us;
 
 	for (;;) {
 		if (dlm_locking_stopped(ls))
@@ -1067,14 +1067,15 @@ void dlm_scan_timeout(struct dlm_ls *ls)
 		mutex_lock(&ls->ls_timeout_mutex);
 		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
 
+			wait_us = ktime_to_us(ktime_sub(ktime_get(),
+					      		lkb->lkb_timestamp));
+
 			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
-			    time_after_eq(jiffies, lkb->lkb_timestamp +
-					  lkb->lkb_timeout_cs * HZ/100))
+			    wait_us >= (lkb->lkb_timeout_cs * 10000))
 				do_cancel = 1;
 
 			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
-			    time_after_eq(jiffies, lkb->lkb_timestamp +
-				   	   dlm_config.ci_timewarn_cs * HZ/100))
+			    wait_us >= dlm_config.ci_timewarn_cs * 10000)
 				do_warn = 1;
 
 			if (!do_cancel && !do_warn)
@@ -1120,12 +1121,12 @@ void dlm_scan_timeout(struct dlm_ls *ls)
 void dlm_adjust_timeouts(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
-	long adj = jiffies - ls->ls_recover_begin;
+	u64 adj_us = jiffies_to_usecs(jiffies - ls->ls_recover_begin);
 
 	ls->ls_recover_begin = 0;
 	mutex_lock(&ls->ls_timeout_mutex);
 	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
-		lkb->lkb_timestamp += adj;
+		lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
 	mutex_unlock(&ls->ls_timeout_mutex);
 }
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 18bda83..46e582c 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -115,7 +115,6 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
 	data->status = lkb->lkb_status;
 	data->grmode = lkb->lkb_grmode;
 	data->rqmode = lkb->lkb_rqmode;
-	data->timestamp = lkb->lkb_timestamp;
 	if (lkb->lkb_ua)
 		data->xid = lkb->lkb_ua->xid;
 	if (r) {
-- 
cgit v0.10.2


From e3a84ad495d1fddb542e0922160f0194a1361950 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 9 Dec 2008 14:47:29 -0600
Subject: dlm: add time stamp of blocking callback

Record the time the latest blocking callback was queued for
a lock.  This will be used for debugging in combination with
lock queue timestamp changes in the previous patch.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index e69135c..0c48829 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -245,6 +245,7 @@ struct dlm_lkb {
 	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
 	struct list_head	lkb_time_list;
+	ktime_t			lkb_time_bast;	/* for debugging */
 	ktime_t			lkb_timestamp;
 	unsigned long		lkb_timeout_cs;
 
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index dfc57ae..6cfe65b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -318,6 +318,8 @@ static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
 
 static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
 {
+	lkb->lkb_time_bast = ktime_get();
+
 	if (is_master_copy(lkb))
 		send_bast(r, lkb, rqmode);
 	else
-- 
cgit v0.10.2


From d022509d1c54be4918e7fc8f1195ee8c392e9a57 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 16 Dec 2008 14:53:23 -0600
Subject: dlm: add new debugfs entry

The new debugfs entry dumps all rsb and lkb structures, and includes
a lot more information than has been available before.  This includes
the new timestamps added by a previous patch for debugging callback
issues.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 19e4f9e..2f107d1 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ static struct dentry *dlm_root;
 
 struct rsb_iter {
 	int entry;
-	int locks;
+	int format;
 	int header;
 	struct dlm_ls *ls;
 	struct list_head *next;
@@ -60,8 +60,8 @@ static char *print_lockmode(int mode)
 	}
 }
 
-static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
-				struct dlm_rsb *res)
+static void print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			       struct dlm_rsb *res)
 {
 	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
 
@@ -83,7 +83,7 @@ static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
 	seq_printf(s, "\n");
 }
 
-static int print_resource(struct dlm_rsb *res, struct seq_file *s)
+static int print_format1(struct dlm_rsb *res, struct seq_file *s)
 {
 	struct dlm_lkb *lkb;
 	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
@@ -134,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	/* Print the locks attached to this resource */
 	seq_printf(s, "Granted Queue\n");
 	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-		print_resource_lock(s, lkb, res);
+		print_format1_lock(s, lkb, res);
 
 	seq_printf(s, "Conversion Queue\n");
 	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-		print_resource_lock(s, lkb, res);
+		print_format1_lock(s, lkb, res);
 
 	seq_printf(s, "Waiting Queue\n");
 	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-		print_resource_lock(s, lkb, res);
+		print_format1_lock(s, lkb, res);
 
 	if (list_empty(&res->res_lookup))
 		goto out;
@@ -160,7 +160,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	return 0;
 }
 
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+static void print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			       struct dlm_rsb *r)
 {
 	u64 xid = 0;
 	u64 us;
@@ -193,20 +194,108 @@ static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *
 		   r->res_name);
 }
 
-static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+static int print_format2(struct dlm_rsb *r, struct seq_file *s)
 {
 	struct dlm_lkb *lkb;
 
 	lock_rsb(r);
 
 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
-		print_lock(s, lkb, r);
+		print_format2_lock(s, lkb, r);
 
 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
-		print_lock(s, lkb, r);
+		print_format2_lock(s, lkb, r);
 
 	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
-		print_lock(s, lkb, r);
+		print_format2_lock(s, lkb, r);
+
+	unlock_rsb(r);
+	return 0;
+}
+
+static void print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			       int rsb_lookup)
+{
+	u64 xid = 0;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		if (lkb->lkb_ua)
+			xid = lkb->lkb_ua->xid;
+	}
+
+	seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   lkb->lkb_highbast,
+		   rsb_lookup,
+		   lkb->lkb_wait_type,
+		   lkb->lkb_lvbseq,
+		   (unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
+		   (unsigned long long)ktime_to_ns(lkb->lkb_time_bast));
+}
+
+static int print_format3(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+	int i, lvblen = r->res_ls->ls_lvblen;
+	int print_name = 1;
+
+	lock_rsb(r);
+
+	seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
+		   r,
+		   r->res_nodeid,
+		   r->res_first_lkid,
+		   r->res_flags,
+		   !list_empty(&r->res_root_list),
+		   !list_empty(&r->res_recover_list),
+		   r->res_recover_locks_count,
+		   r->res_length);
+
+	for (i = 0; i < r->res_length; i++) {
+		if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
+			print_name = 0;
+	}
+
+	seq_printf(s, "%s", print_name ? "str " : "hex");
+
+	for (i = 0; i < r->res_length; i++) {
+		if (print_name)
+			seq_printf(s, "%c", r->res_name[i]);
+		else
+			seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
+	}
+	seq_printf(s, "\n");
+
+	if (!r->res_lvbptr)
+		goto do_locks;
+
+	seq_printf(s, "lvb %u %d", r->res_lvbseq, lvblen);
+
+	for (i = 0; i < lvblen; i++)
+		seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
+	seq_printf(s, "\n");
+
+ do_locks:
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+		print_format3_lock(s, lkb, 0);
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+		print_format3_lock(s, lkb, 0);
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+		print_format3_lock(s, lkb, 0);
+
+	list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup)
+		print_format3_lock(s, lkb, 1);
 
 	unlock_rsb(r);
 	return 0;
@@ -231,7 +320,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
 				break;
 			}
 			read_unlock(&ls->ls_rsbtbl[i].lock);
-                }
+		}
 		ri->entry = i;
 
 		if (ri->entry >= ls->ls_rsbtbl_size)
@@ -248,7 +337,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			read_unlock(&ls->ls_rsbtbl[i].lock);
 			dlm_put_rsb(old);
 			goto top;
-                }
+		}
 		ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
 		dlm_hold_rsb(ri->rsb);
 		read_unlock(&ls->ls_rsbtbl[i].lock);
@@ -274,6 +363,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 	ri->ls = ls;
 	ri->entry = 0;
 	ri->next = NULL;
+	ri->format = 1;
 
 	if (rsb_iter_next(ri)) {
 		rsb_iter_free(ri);
@@ -325,16 +415,26 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
 	struct rsb_iter *ri = iter_ptr;
 
-	if (ri->locks) {
+	switch (ri->format) {
+	case 1:
+		print_format1(ri->rsb, file);
+		break;
+	case 2:
 		if (ri->header) {
-			seq_printf(file, "id nodeid remid pid xid exflags flags "
-					 "sts grmode rqmode time_ms r_nodeid "
-					 "r_len r_name\n");
+			seq_printf(file, "id nodeid remid pid xid exflags "
+					 "flags sts grmode rqmode time_ms "
+					 "r_nodeid r_len r_name\n");
 			ri->header = 0;
 		}
-		print_locks(ri->rsb, file);
-	} else {
-		print_resource(ri->rsb, file);
+		print_format2(ri->rsb, file);
+		break;
+	case 3:
+		if (ri->header) {
+			seq_printf(file, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
+			ri->header = 0;
+		}
+		print_format3(ri->rsb, file);
+		break;
 	}
 
 	return 0;
@@ -385,7 +485,7 @@ static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
 	ri->ls = ls;
 	ri->entry = 0;
 	ri->next = NULL;
-	ri->locks = 1;
+	ri->format = 2;
 
 	if (*pos == 0)
 		ri->header = 1;
@@ -448,6 +548,84 @@ static const struct file_operations locks_fops = {
 };
 
 /*
+ * Dump all rsb/lvb/lkb state in compact listing, more complete than _locks
+ * This can replace both formats 1 and 2 eventually.
+ */
+
+static struct rsb_iter *all_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+	struct rsb_iter *ri;
+
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
+	if (!ri)
+		return NULL;
+
+	ri->ls = ls;
+	ri->entry = 0;
+	ri->next = NULL;
+	ri->format = 3;
+
+	if (*pos == 0)
+		ri->header = 1;
+
+	if (rsb_iter_next(ri)) {
+		rsb_iter_free(ri);
+		return NULL;
+	}
+
+	return ri;
+}
+
+static void *all_seq_start(struct seq_file *file, loff_t *pos)
+{
+	struct rsb_iter *ri;
+	loff_t n = *pos;
+
+	ri = all_iter_init(file->private, pos);
+	if (!ri)
+		return NULL;
+
+	while (n--) {
+		if (rsb_iter_next(ri)) {
+			rsb_iter_free(ri);
+			return NULL;
+		}
+	}
+
+	return ri;
+}
+
+static struct seq_operations all_seq_ops = {
+	.start = all_seq_start,
+	.next  = rsb_seq_next,
+	.stop  = rsb_seq_stop,
+	.show  = rsb_seq_show,
+};
+
+static int all_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &all_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private;
+
+	return 0;
+}
+
+static const struct file_operations all_fops = {
+	.owner   = THIS_MODULE,
+	.open    = all_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+/*
  * dump lkb's on the ls_waiters list
  */
 
@@ -489,30 +667,33 @@ static const struct file_operations waiters_fops = {
 	.read    = waiters_read
 };
 
+void dlm_delete_debug_file(struct dlm_ls *ls)
+{
+	if (ls->ls_debug_rsb_dentry)
+		debugfs_remove(ls->ls_debug_rsb_dentry);
+	if (ls->ls_debug_waiters_dentry)
+		debugfs_remove(ls->ls_debug_waiters_dentry);
+	if (ls->ls_debug_locks_dentry)
+		debugfs_remove(ls->ls_debug_locks_dentry);
+	if (ls->ls_debug_all_dentry)
+		debugfs_remove(ls->ls_debug_all_dentry);
+}
+
 int dlm_create_debug_file(struct dlm_ls *ls)
 {
 	char name[DLM_LOCKSPACE_LEN+8];
 
+	/* format 1 */
+
 	ls->ls_debug_rsb_dentry = debugfs_create_file(ls->ls_name,
 						      S_IFREG | S_IRUGO,
 						      dlm_root,
 						      ls,
 						      &rsb_fops);
 	if (!ls->ls_debug_rsb_dentry)
-		return -ENOMEM;
+		goto fail;
 
-	memset(name, 0, sizeof(name));
-	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
-
-	ls->ls_debug_waiters_dentry = debugfs_create_file(name,
-							  S_IFREG | S_IRUGO,
-							  dlm_root,
-							  ls,
-							  &waiters_fops);
-	if (!ls->ls_debug_waiters_dentry) {
-		debugfs_remove(ls->ls_debug_rsb_dentry);
-		return -ENOMEM;
-	}
+	/* format 2 */
 
 	memset(name, 0, sizeof(name));
 	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
@@ -522,23 +703,38 @@ int dlm_create_debug_file(struct dlm_ls *ls)
 							dlm_root,
 							ls,
 							&locks_fops);
-	if (!ls->ls_debug_locks_dentry) {
-		debugfs_remove(ls->ls_debug_waiters_dentry);
-		debugfs_remove(ls->ls_debug_rsb_dentry);
-		return -ENOMEM;
-	}
+	if (!ls->ls_debug_locks_dentry)
+		goto fail;
+
+	/* format 3 */
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_all", ls->ls_name);
+
+	ls->ls_debug_all_dentry = debugfs_create_file(name,
+						      S_IFREG | S_IRUGO,
+						      dlm_root,
+						      ls,
+						      &all_fops);
+	if (!ls->ls_debug_all_dentry)
+		goto fail;
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
+
+	ls->ls_debug_waiters_dentry = debugfs_create_file(name,
+							  S_IFREG | S_IRUGO,
+							  dlm_root,
+							  ls,
+							  &waiters_fops);
+	if (!ls->ls_debug_waiters_dentry)
+		goto fail;
 
 	return 0;
-}
 
-void dlm_delete_debug_file(struct dlm_ls *ls)
-{
-	if (ls->ls_debug_rsb_dentry)
-		debugfs_remove(ls->ls_debug_rsb_dentry);
-	if (ls->ls_debug_waiters_dentry)
-		debugfs_remove(ls->ls_debug_waiters_dentry);
-	if (ls->ls_debug_locks_dentry)
-		debugfs_remove(ls->ls_debug_locks_dentry);
+ fail:
+	dlm_delete_debug_file(ls);
+	return -ENOMEM;
 }
 
 int __init dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 0c48829..ef2f1e3 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -482,6 +482,7 @@ struct dlm_ls {
 	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
 	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
 	struct dentry		*ls_debug_locks_dentry; /* debugfs */
+	struct dentry		*ls_debug_all_dentry; /* debugfs */
 
 	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
 	int			ls_uevent_result;
-- 
cgit v0.10.2


From 722d74219ea21223c74e5e894b0afcc5e4ca75a7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 23 Dec 2008 10:22:56 -0600
Subject: dlm: fs/dlm/ast.c: fix warning

fs/dlm/ast.c: In function 'dlm_astd':
fs/dlm/ast.c:64: warning: 'bastmode' may be used uninitialized in this function

Cleans code up.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index fbe840d..dc2ad60 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -61,30 +61,23 @@ static void process_asts(void)
 	struct dlm_lkb *lkb;
 	void (*cast) (void *astparam);
 	void (*bast) (void *astparam, int mode);
-	int type = 0, found, bastmode;
-
-	for (;;) {
-		found = 0;
-		spin_lock(&ast_queue_lock);
-		list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
-			r = lkb->lkb_resource;
-			ls = r->res_ls;
-
-			if (dlm_locking_stopped(ls))
-				continue;
-
-			list_del(&lkb->lkb_astqueue);
-			type = lkb->lkb_ast_type;
-			lkb->lkb_ast_type = 0;
-			bastmode = lkb->lkb_bastmode;
-			found = 1;
-			break;
-		}
-		spin_unlock(&ast_queue_lock);
+	int type = 0, bastmode;
+
+repeat:
+	spin_lock(&ast_queue_lock);
+	list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
+		r = lkb->lkb_resource;
+		ls = r->res_ls;
 
-		if (!found)
-			break;
+		if (dlm_locking_stopped(ls))
+			continue;
 
+		list_del(&lkb->lkb_astqueue);
+		type = lkb->lkb_ast_type;
+		lkb->lkb_ast_type = 0;
+		bastmode = lkb->lkb_bastmode;
+
+		spin_unlock(&ast_queue_lock);
 		cast = lkb->lkb_astfn;
 		bast = lkb->lkb_bastfn;
 
@@ -99,7 +92,9 @@ static void process_asts(void)
 		dlm_put_lkb(lkb);
 
 		cond_resched();
+		goto repeat;
 	}
+	spin_unlock(&ast_queue_lock);
 }
 
 static inline int no_asts(void)
-- 
cgit v0.10.2


From b77b881f21b29aa7efa668fde69ee3dc0372ae3f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 19 Dec 2008 15:23:44 -0800
Subject: x86: fix lguest used_vectors breakage, -v2

Impact: fix lguest, clean up

32-bit lguest used used_vectors to record vectors, but that model of
allocating vectors changed and got broken, after we changed vector
allocation to a per_cpu array.

Try enable that for 64bit, and the array is used for all vectors that
are not managed by vector_irq per_cpu array.

Also kill system_vectors[], that is now a duplication of the
used_vectors bitmap.

[ merged in cpus4096 due to io_apic.c cpumask changes. ]
[ -v2, fix build failure ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17..dc27705 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr)
 	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
 }
 
-#define SYS_VECTOR_FREE		0
-#define SYS_VECTOR_ALLOCED	1
-
 extern int first_system_vector;
-extern char system_vectors[];
+/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
+extern unsigned long used_vectors[];
 
 static inline void alloc_system_vector(int vector)
 {
-	if (system_vectors[vector] == SYS_VECTOR_FREE) {
-		system_vectors[vector] = SYS_VECTOR_ALLOCED;
+	if (!test_bit(vector, used_vectors)) {
+		set_bit(vector, used_vectors);
 		if (first_system_vector > vector)
 			first_system_vector = vector;
 	} else
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 8766d30..4bb732e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -46,5 +46,6 @@ extern void native_init_IRQ(void);
 
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+extern int vector_used_by_percpu_irq(unsigned int vector);
 
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index f7a32a3..b901927 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -118,8 +118,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
 int first_system_vector = 0xfe;
 
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
 /*
  * Debug level, exported for io_apic.c
  */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 908c1d0..1cbf7c8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1326,13 +1326,10 @@ next:
 		}
 		if (unlikely(current_vector == vector))
 			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
-			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
+
+		if (test_bit(vector, used_vectors))
 			goto next;
-#endif
+
 		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 6a92f47..61aa2a1 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
 	/* IPI for single call function */
-	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				 call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 40c1e62..1020919 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -135,6 +135,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 void __init init_ISA_irqs(void)
 {
 	int i;
@@ -187,6 +199,7 @@ static void __init smp_intr_init(void)
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242a..4a6dff3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
 
 #include "cpu/mcheck/mce.h"
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
-
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 #endif
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 static int ignore_nmis;
 
 static inline void conditional_sti(struct pt_regs *regs)
@@ -949,9 +949,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
-#ifdef CONFIG_X86_32
 	int i;
-#endif
 
 #ifdef CONFIG_EISA
 	void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1008,11 +1006,15 @@ void __init trap_init(void)
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+#endif
 
 	/* Reserve all the builtin and the syscall vector: */
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
 
+#ifdef CONFIG_X86_64
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
 	/*
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a103906..415fab0 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -222,11 +222,16 @@ bool check_syscall_vector(struct lguest *lg)
 int init_interrupts(void)
 {
 	/* If they want some strange system call vector, reserve it now */
-	if (syscall_vector != SYSCALL_VECTOR
-	    && test_and_set_bit(syscall_vector, used_vectors)) {
-		printk("lg: couldn't reserve syscall %u\n", syscall_vector);
-		return -EBUSY;
+	if (syscall_vector != SYSCALL_VECTOR) {
+		if (test_bit(syscall_vector, used_vectors) ||
+		    vector_used_by_percpu_irq(syscall_vector)) {
+			printk(KERN_ERR "lg: couldn't reserve syscall %u\n",
+				 syscall_vector);
+			return -EBUSY;
+		}
+		set_bit(syscall_vector, used_vectors);
 	}
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 36dffab679c7eeb91c2507400cf4da6e9e01164e Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Sat, 20 Dec 2008 10:06:38 +0530
Subject: sched: nominate preferred wakeup cpu, fix

Andrew Morton reported:

> kernel/sched.c: In function 'schedule':
> kernel/sched.c:3679: warning: 'active_balance' may be used uninitialized in this function
>
> This warning is correct - the code is buggy.

In sched.c load_balance_newidle, there's real potential use of
uninitialised variable - fix it.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index ae5ca3f..756d981 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3670,7 +3670,7 @@ redo:
 	}
 
 	if (!ld_moved) {
-		int active_balance;
+		int active_balance = 0;
 
 		schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
 		if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-- 
cgit v0.10.2


From 7d87d5365556b1c6e8c00abcc632c3ad1fdc58b8 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 22 Dec 2008 17:33:28 -0800
Subject: x86: use logical apicid in x2apic_cluster's
 x2apic_cpu_mask_to_apicid_and()

These commits:

	commit 95d313cf1c1ecedc8bec5727b09bdacbf67dfc45
	Author: Mike Travis <travis@sgi.com>
	Date:   Tue Dec 16 17:33:54 2008 -0800

	    x86: Add cpu_mask_to_apicid_and

and
	commit 6eeb7c5a99434596c5953a95baa17d2f085664e3
	Author: Mike Travis <travis@sgi.com>
	Date:   Tue Dec 16 17:33:55 2008 -0800

	    x86: update add-cpu_mask_to_apicid_and to use struct cpumask*

broke interrupt delivery on x2apic platforms.  As x2apic cluster mode uses
logical delivery mode, we need to use logical apicid instead of physical apicid
in x2apic_cpu_mask_to_apicid_and()

Impact: fixes the broken interrupt delivery issue on generic x2apic platforms.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Mike Travis <travis@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index d451c9b..6ce497c 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -114,7 +114,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
 	 * May as well be the first.
 	 */
 	cpu = cpumask_first(cpumask);
@@ -130,14 +130,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
 	 * May as well be the first.
 	 */
 	for_each_cpu_and(cpu, cpumask, andmask)
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
+		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	return BAD_APICID;
 }
 
-- 
cgit v0.10.2


From c3d80000e3a812fe5a200d6bde755fbd7fa65481 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 15:15:17 +0100
Subject: x86: export vector_used_by_percpu_irq

Impact: build fix

lguest can be built as a module and makes use of this new symbol:

ERROR: "vector_used_by_percpu_irq" [drivers/lguest/lg.ko] undefined!

export it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d1..bce53e1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
+#include <asm/irq.h>
 
 atomic_t irq_err_count;
 
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
 #endif
 	return sum;
 }
+
+EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
-- 
cgit v0.10.2


From a73ad3331fdbf4191cf99b83ea9ac7082b6757ba Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 25 Dec 2008 10:39:01 -0800
Subject: x86: unify the implementation of FPU traps

On 32 bits, we may suffer IRQ 13, or supposedly we might have a buggy
implementation which gives spurious trap 16.  We did not check for
this on 64 bits, but there is no reason we can't make the code the
same in both cases.  Furthermore, this is presumably rare, so do the
spurious check last, instead of first.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f37cee7..f5a640b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -689,12 +689,7 @@ void math_error(void __user *ip)
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 
-	err = swd & ~cwd & 0x3f;
-
-#ifdef CONFIG_X86_32
-	if (!err)
-		return;
-#endif
+	err = swd & ~cwd;
 
 	if (err & 0x001) {	/* Invalid op */
 		/*
@@ -712,7 +707,9 @@ void math_error(void __user *ip)
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
+		/* If we're using IRQ 13, or supposedly even some trap 16
+		   implementations, it's possible we get a spurious trap... */
+		return;		/* Spurious trap, no error */
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
-- 
cgit v0.10.2


From 71ab6b245fda6e7597a667a67cce0d26c3c7a14b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 25 Dec 2008 17:18:43 +1030
Subject: x86: remove impossible test in mtrr/main.c

Impact: cleanup

enable_mtrr_cleanup is static, and is never set to anything but 0 or 1.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c048..acd9ac5 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -823,16 +823,14 @@ static int enable_mtrr_cleanup __initdata =
 
 static int __init disable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
+	enable_mtrr_cleanup = 0;
 	return 0;
 }
 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 
 static int __init enable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
+	enable_mtrr_cleanup = 1;
 	return 0;
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
-- 
cgit v0.10.2


From bd8b96dfc216eebc72950a6c40da8d3eca8667df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 26 Dec 2008 09:20:22 +0100
Subject: x86: clean up comment style in arch/x86/kernel/traps.c

Impact: cleanup

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f5a640b..dbfb802 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 8;
 
-	/* This is always a kernel trap and never fixable (and thus must
-	   never return). */
+	/*
+	 * This is always a kernel trap and never fixable (and thus must
+	 * never return).
+	 */
 	for (;;)
 		die(str, regs, error_code);
 }
@@ -524,9 +526,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 }
 
 #ifdef CONFIG_X86_64
-/* Help handler running on IST stack to switch back to user stack
-   for scheduling or signal handling. The actual stack switch is done in
-   entry.S */
+/*
+ * Help handler running on IST stack to switch back to user stack
+ * for scheduling or signal handling. The actual stack switch is done in
+ * entry.S
+ */
 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
@@ -536,8 +540,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
-	/* Exception from kernel and interrupts are enabled. Move to
-	   kernel process stack. */
+	/*
+	 * Exception from kernel and interrupts are enabled. Move to
+	 * kernel process stack.
+	 */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -707,8 +713,10 @@ void math_error(void __user *ip)
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		/* If we're using IRQ 13, or supposedly even some trap 16
-		   implementations, it's possible we get a spurious trap... */
+		/*
+		 * If we're using IRQ 13, or supposedly even some trap 16
+		 * implementations, it's possible we get a spurious trap...
+		 */
 		return;		/* Spurious trap, no error */
 	}
 	force_sig_info(SIGFPE, &info, task);
-- 
cgit v0.10.2


From 51bc39f4ba35bae153b32145077fb1109bcae14c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:23:00 +0900
Subject: hrtimer: remove #include <linux/irq.h>

Impact: cleanup

<linux/irq.h> can be removed and should be, because:

  - hrtimer doesn't use any irq feature.
  - <linux/irq.h> shouldn't be include from generic code.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 47e6334..0ad3f3d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -32,7 +32,6 @@
  */
 
 #include <linux/cpu.h>
-#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/hrtimer.h>
-- 
cgit v0.10.2


From f9af0e70911e9d6cc9a68f784dca86415486084d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:24:24 +0900
Subject: irq: for_each_irq_desc() move to irqnr.h

Impact: cleanup

before CONFIG_SPARSE_IRQ age, for_each_irq_desc() sat in irqnr.h and
could be called from generic code.

CONFIG_SPARSE_IRQ breaks this assumption, but SPARSE_IRQ version
for_each_irq_desc() also can move into irqnr.h easily.

Also, this patch unifies CONFIG_SPARSE_IRQ and !CONFIG_SPARSE_IRQ
for_each_irq_desc().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 98564dc..69da275 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,33 +202,17 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-
-static inline struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
-}
-static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
-{
-	return irq_to_desc(irq);
-}
-
-#else
-
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+#else /* CONFIG_SPARSE_IRQ */
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
-
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 #define kstat_incr_irqs_this_cpu(irqno, DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()]++)
 
-#endif
+#endif /* CONFIG_SPARSE_IRQ */
+
+extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
 static inline struct irq_desc *
 irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 95d2b74..c4a59c7 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -15,20 +15,19 @@
 
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
-#else
+#else /* CONFIG_GENERIC_HARDIRQS */
 
 extern int nr_irqs;
+extern struct irq_desc *irq_to_desc(unsigned int irq);
 
-#ifndef CONFIG_SPARSE_IRQ
+# define for_each_irq_desc(irq, desc)					\
+	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
+	     irq++, desc = irq_to_desc(irq))
+# define for_each_irq_desc_reverse(irq, desc)				\
+	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
+	     irq--, desc = irq_to_desc(irq))
 
-struct irq_desc;
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
-	    irq >= 0; irq--, desc--)
-#endif
-#endif
+#endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6492400..4db7d2d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -203,7 +203,7 @@ out_unlock:
 	return desc;
 }
 
-#else
+#else /* !CONFIG_SPARSE_IRQ */
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
@@ -218,7 +218,16 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
-#endif
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	return irq_to_desc(irq);
+}
+#endif /* !CONFIG_SPARSE_IRQ */
 
 /*
  * What should we do if we get a hw irq event on an illegal vector?
-- 
cgit v0.10.2


From 26ddd8d5cac8a563953d5febe8c6e40909f7bce1 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 14:24:10 +0900
Subject: proc: remove ifdef CONFIG_SPARSE_IRQ from stat.c

Impact: cleanup

irq_desc can be NULL when CONFIG_SPARSE_IRQ=y only.
therefore, NULL checking can move into kstat_irqs_cpu() of SPARSE_IRQ version.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: "Yinghai Lu" <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 3bb1cf1..f75efa2 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -9,6 +9,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/irqnr.h>
 #include <asm/cputime.h>
 
 #ifndef arch_irq_stat_cpu
@@ -45,10 +46,6 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 		for_each_irq_nr(j) {
-#ifdef CONFIG_SPARSE_IRQ
-			if (!irq_to_desc(j))
-				continue;
-#endif
 			sum += kstat_irqs_cpu(j, i);
 		}
 		sum += arch_irq_stat_cpu(i);
@@ -95,12 +92,6 @@ static int show_stat(struct seq_file *p, void *v)
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j) {
 		per_irq_sum = 0;
-#ifdef CONFIG_SPARSE_IRQ
-		if (!irq_to_desc(j)) {
-			seq_printf(p, " %u", per_irq_sum);
-			continue;
-		}
-#endif
 		for_each_possible_cpu(i)
 			per_irq_sum += kstat_irqs_cpu(j, i);
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 4db7d2d..03479df 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -448,7 +448,7 @@ void early_init_irq_lock_class(void)
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	return desc->kstat_irqs[cpu];
+	return desc ? desc->kstat_irqs[cpu] : 0;
 }
 #endif
 EXPORT_SYMBOL(kstat_irqs_cpu);
-- 
cgit v0.10.2


From 18eefedfe8ad33e8fc7614c13359e29a9fab4644 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:29:48 +0900
Subject: irq: simplify for_each_irq_desc() usage

Impact: cleanup

all for_each_irq_desc() usage point have !desc check.
then its check can move into for_each_irq_desc() macro.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a74887b..2fe543f 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1345,8 +1345,6 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
@@ -1730,8 +1728,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	for_each_irq_desc(irq, desc) {
 		struct irq_pin_list *entry;
 
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		entry = cfg->irq_2_pin;
 		if (!entry)
@@ -2378,9 +2374,6 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2671,9 +2664,6 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		cfg = desc->chip_data;
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 46625cd..e26733a9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -142,9 +142,6 @@ static void init_evtchn_cpu_bindings(void)
 
 	/* By default all event channels notify CPU#0. */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		desc->affinity = cpumask_of_cpu(0);
 	}
 #endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index c4a59c7..5504a5c 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -22,10 +22,14 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
-	     irq++, desc = irq_to_desc(irq))
+	     irq++, desc = irq_to_desc(irq))				\
+		if (desc)
+
+
 # define for_each_irq_desc_reverse(irq, desc)				\
 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
-	     irq--, desc = irq_to_desc(irq))
+	     irq--, desc = irq_to_desc(irq))				\
+		if (desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 650ce41..cc0f732 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,9 +40,6 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -71,9 +68,6 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +86,6 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 03479df..7dbdfe5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -437,9 +437,6 @@ void early_init_irq_lock_class(void)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	}
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3738107..dd364c1 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
 	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		if (!i)
 			 continue;
 
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
-		if (!desc)
-			continue;
 		if (!i)
 			 continue;
 
-- 
cgit v0.10.2


From 00c23634879062d1c38d60128bf150c394a359e8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 23 Dec 2008 17:29:00 -0800
Subject: sparseirq: remove duplicated arch_early_irq_init()

Impact: clean up

We already have a weak copy of this function in init/main.c

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 7dbdfe5..06b05a4 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -56,10 +56,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
-void __init __attribute__((weak)) arch_early_irq_init(void)
-{
-}
-
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_desc irq_desc_init = {
 	.irq	    = -1,
-- 
cgit v0.10.2


From 393d68fb9929817cde7ab31c82d66fcb28ad35fc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:38 +1030
Subject: cpumask: x86: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

Also makes __pcibus_to_node take a const pointer.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38e..52d80d3 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -98,9 +98,9 @@ static inline void early_quirks(void) { }
 
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
-static inline int __pcibus_to_node(struct pci_bus *bus)
+static inline int __pcibus_to_node(const struct pci_bus *bus)
 {
-	struct pci_sysdata *sd = bus->sysdata;
+	const struct pci_sysdata *sd = bus->sysdata;
 
 	return sd->node;
 }
@@ -109,6 +109,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
 {
 	return node_to_cpumask(__pcibus_to_node(bus));
 }
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpumask_of_node(__pcibus_to_node(bus));
+}
 #endif
 
 #endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff..45da5dc 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu)
  *
  * Side note: this function creates the returned cpumask on the stack
  * so with a high NR_CPUS count, excessive stack space is used.  The
- * node_to_cpumask_ptr function should be used whenever possible.
+ * cpumask_of_node function should be used whenever possible.
  */
 static inline cpumask_t node_to_cpumask(int node)
 {
 	return node_to_cpumask_map[node];
 }
 
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return &node_to_cpumask_map[node];
+}
+
 #else /* CONFIG_X86_64 */
 
 /* Mappings between node number and cpus on that node. */
@@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
 extern int early_cpu_to_node(int cpu);
-extern const cpumask_t *_node_to_cpumask_ptr(int node);
+extern const cpumask_t *cpumask_of_node(int node);
 extern cpumask_t node_to_cpumask(int node);
 
 #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
@@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu)
 }
 
 /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
 {
 	return &node_to_cpumask_map[node];
 }
@@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node)
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #define node_to_cpumask_ptr(v, node)		\
-		const cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = cpumask_of_node(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
-			   v = _node_to_cpumask_ptr(node)
+			   v = cpumask_of_node(node)
 
 #endif /* CONFIG_X86_64 */
 
@@ -187,7 +196,7 @@ extern int __node_distance(int, int);
 #define	cpu_to_node(cpu)	0
 #define	early_cpu_to_node(cpu)	0
 
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
 {
 	return &cpu_online_map;
 }
@@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node)
 	return first_cpu(cpu_online_map);
 }
 
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #define node_to_cpumask_ptr(v, node)		\
-		const cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = cpumask_of_node(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
-			   v = _node_to_cpumask_ptr(node)
+			   v = cpumask_of_node(node)
 #endif
 
 #include <asm-generic/topology.h>
@@ -214,8 +226,7 @@ static inline int node_to_first_cpu(int node)
 /* Returns the number of the first CPU on Node 'node'. */
 static inline int node_to_first_cpu(int node)
 {
-	node_to_cpumask_ptr(mask, node);
-	return first_cpu(*mask);
+	return cpumask_first(cpumask_of_node(node));
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1c20842..8e8b119 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -334,25 +334,25 @@ static const cpumask_t cpu_mask_none;
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
-const cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *cpumask_of_node(int node)
 {
 	if (node_to_cpumask_map == NULL) {
 		printk(KERN_WARNING
-			"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
+			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
 			node);
 		dump_stack();
 		return (const cpumask_t *)&cpu_online_map;
 	}
 	if (node >= nr_node_ids) {
 		printk(KERN_WARNING
-			"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
+			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
 			node, nr_node_ids);
 		dump_stack();
 		return &cpu_mask_none;
 	}
 	return &node_to_cpumask_map[node];
 }
-EXPORT_SYMBOL(_node_to_cpumask_ptr);
+EXPORT_SYMBOL(cpumask_of_node);
 
 /*
  * Returns a bitmask of CPUs on Node 'node'.
-- 
cgit v0.10.2


From 96d76a74870d5f11ce2abdd09a8dcdc401d714d1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:38 +1030
Subject: cpumask: sparc: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 001c040..afd3cc1 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node)
 {
 	return numa_cpumask_lookup_table[node];
 }
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
 
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+/*
+ * Returns a pointer to the cpumask of CPUs on Node 'node'.
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #define node_to_cpumask_ptr(v, node)		\
 		cpumask_t *v = &(numa_cpumask_lookup_table[node])
 
@@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node)
 
 static inline int node_to_first_cpu(int node)
 {
-	cpumask_t tmp;
-	tmp = node_to_cpumask(node);
-	return first_cpu(tmp);
+	return cpumask_first(cpumask_of_node(node));
 }
 
 struct pci_bus;
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index df2efb7..4f6098d 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -778,7 +778,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 out:
 	nid = of_node_to_nid(dp);
 	if (nid != -1) {
-		cpumask_t numa_mask = node_to_cpumask(nid);
+		cpumask_t numa_mask = *cpumask_of_node(nid);
 
 		irq_set_affinity(irq, &numa_mask);
 	}
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
index 0d0cd81..4ef282e 100644
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -286,7 +286,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
 	nid = pbm->numa_node;
 	if (nid != -1) {
-		cpumask_t numa_mask = node_to_cpumask(nid);
+		cpumask_t numa_mask = *cpumask_of_node(nid);
 
 		irq_set_affinity(irq, &numa_mask);
 	}
-- 
cgit v0.10.2


From 7479a2939df4957ba794cce814379b6d10914bdc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:39 +1030
Subject: cpumask: sh: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085..9aa160d 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -33,6 +33,7 @@
 #define parent_node(node)	((void)(node),0)
 
 #define node_to_cpumask(node)	((void)node, cpu_online_map)
+#define cpumask_of_node(node)	((void)node, cpu_online_mask)
 #define node_to_first_cpu(node)	((void)(node),0)
 
 #define pcibus_to_node(bus)	((void)(bus), -1)
-- 
cgit v0.10.2


From 86c6f274f52c3e991d429869780945c0790e7b65 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:39 +1030
Subject: cpumask: powerpc: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

(Also replaces powerpc internal uses of node_to_cpumask).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f..bcf25c2 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node)
 	return numa_cpumask_lookup_table[node];
 }
 
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
+
 static inline int node_to_first_cpu(int node)
 {
-	cpumask_t tmp;
-	tmp = node_to_cpumask(node);
-	return first_cpu(tmp);
+	return cpumask_first(cpumask_of_node(node));
 }
 
 int of_node_to_nid(struct device_node *device);
@@ -46,6 +46,10 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 					node_to_cpumask(pcibus_to_node(bus)) \
 				)
 
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
 /* sched_domains SD_NODE_INIT for PPC64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
 	.span			= CPU_MASK_NONE,	\
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
index 906a0a2..1410443 100644
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu)
 	u64 route;
 
 	if (nr_cpus_node(spu->node)) {
-		cpumask_t spumask = node_to_cpumask(spu->node);
-		cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu));
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
 
-		if (!cpus_intersects(spumask, cpumask))
+		if (!cpumask_intersects(spumask, cpumask))
 			return;
 	}
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2ad914c..6a0ad19 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx)
 static int __node_allowed(struct spu_context *ctx, int node)
 {
 	if (nr_cpus_node(node)) {
-		cpumask_t mask = node_to_cpumask(node);
+		const struct cpumask *mask = cpumask_of_node(node);
 
-		if (cpus_intersects(mask, ctx->cpus_allowed))
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
 			return 1;
 	}
 
-- 
cgit v0.10.2


From fbb776c3ca4501d5a2821bf1e9bceefcaec7ae47 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:40 +1030
Subject: cpumask: IA64: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

We can also use the new for_each_cpu_and() to avoid a temporary cpumask,
and a gratuitous test in sn_topology_show.

(Includes fix from KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb64..66f0f1e 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -34,6 +34,7 @@
  * Returns a bitmask of CPUs on Node 'node'.
  */
 #define node_to_cpumask(node) (node_to_cpu_mask[node])
+#define cpumask_of_node(node) (&node_to_cpu_mask[node])
 
 /*
  * Returns the number of the node containing Node 'nid'.
@@ -45,7 +46,7 @@
 /*
  * Returns the number of the first CPU on Node 'node'.
  */
-#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node)))
+#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node)))
 
 /*
  * Determines the node for a given pci bus
@@ -121,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot);
 					node_to_cpumask(pcibus_to_node(bus)) \
 				)
 
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_from_node(pcibus_to_node(bus)))
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_IA64_TOPOLOGY_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index bd7acc7..54ae373 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -1001,7 +1001,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
 	node = pxm_to_node(pxm);
 
 	if (node >= MAX_NUMNODES || !node_online(node) ||
-	    cpus_empty(node_to_cpumask(node)))
+	    cpumask_empty(cpumask_of_node(node)))
 		return AE_OK;
 
 	/* We know a gsi to node mapping! */
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c8adecd..5cfd3d9 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -695,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq)
 #ifdef CONFIG_NUMA
 	{
 		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
-		cpumask_t cpu_mask;
+		const struct cpumask *cpu_mask;
 
 		iosapic_index = find_iosapic(gsi);
 		if (iosapic_index < 0 ||
 		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
 			goto skip_numa_setup;
 
-		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-		cpus_and(cpu_mask, cpu_mask, domain);
-		for_each_cpu_mask(numa_cpu, cpu_mask) {
-			if (!cpu_online(numa_cpu))
-				cpu_clear(numa_cpu, cpu_mask);
+		cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+		num_cpus = 0;
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+			if (cpu_online(numa_cpu))
+				num_cpus++;
 		}
 
-		num_cpus = cpus_weight(cpu_mask);
-
 		if (!num_cpus)
 			goto skip_numa_setup;
 
 		/* Use irq assignment to distribute across cpus in node */
 		cpu_index = irq % num_cpus;
 
-		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
-			numa_cpu = next_cpu(numa_cpu, cpu_mask);
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+			if (cpu_online(numa_cpu) && i++ >= cpu_index)
+				break;
 
-		if (numa_cpu != NR_CPUS)
+		if (numa_cpu < nr_cpu_ids)
 			return cpu_physical_id(numa_cpu);
 	}
 skip_numa_setup:
@@ -731,7 +730,7 @@ skip_numa_setup:
 	 * case of NUMA.)
 	 */
 	do {
-		if (++cpu >= NR_CPUS)
+		if (++cpu >= nr_cpu_ids)
 			cpu = 0;
 	} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
 
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 636588e..be33947 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d)
 	int j;
 	const char *slabname;
 	int ordinal;
-	cpumask_t cpumask;
 	char slice;
 	struct cpuinfo_ia64 *c;
 	struct sn_hwperf_port_info *ptdata;
@@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		 * CPUs on this node, if any
 		 */
 		if (!SN_HWPERF_IS_IONODE(obj)) {
-			cpumask = node_to_cpumask(ordinal);
-			for_each_online_cpu(i) {
-				if (cpu_isset(i, cpumask)) {
-					slice = 'a' + cpuid_to_slice(i);
-					c = cpu_data(i);
-					seq_printf(s, "cpu %d %s%c local"
-						" freq %luMHz, arch ia64",
-						i, obj->location, slice,
-						c->proc_freq / 1000000);
-					for_each_online_cpu(j) {
-						seq_printf(s, j ? ":%d" : ", dist %d",
-							node_distance(
+			for_each_cpu_and(i, cpu_online_mask,
+					 cpumask_of_node(ordinal)) {
+				slice = 'a' + cpuid_to_slice(i);
+				c = cpu_data(i);
+				seq_printf(s, "cpu %d %s%c local"
+					   " freq %luMHz, arch ia64",
+					   i, obj->location, slice,
+					   c->proc_freq / 1000000);
+				for_each_online_cpu(j) {
+					seq_printf(s, j ? ":%d" : ", dist %d",
+						   node_distance(
 						    	cpu_to_node(i),
 						    	cpu_to_node(j)));
-					}
-					seq_putc(s, '\n');
 				}
+				seq_putc(s, '\n');
 			}
 		}
 	}
-- 
cgit v0.10.2


From b4a2f916a8326065816a0743dd1b0ca2ffd18f5f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:40 +1030
Subject: cpumask: Mips: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec..c1c3f5b 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 #define cpu_to_node(cpu)	(sn_cpu_info[(cpu)].p_nodeid)
 #define parent_node(node)	(node)
 #define node_to_cpumask(node)	(hub_data(node)->h_cpus)
-#define node_to_first_cpu(node)	(first_cpu(node_to_cpumask(node)))
+#define cpumask_of_node(node)	(&hub_data(node)->h_cpus)
+#define node_to_first_cpu(node)	(cpumask_first(cpumask_of_node(node)))
 struct pci_bus;
 extern int pcibus_to_node(struct pci_bus *);
 
 #define pcibus_to_cpumask(bus)	(cpu_online_map)
+#define cpumask_of_pcibus(bus)	(cpu_online_mask)
 
 extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 
-- 
cgit v0.10.2


From 2258a5bb1064351b552aceaff29393967d694fa3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:41 +1030
Subject: cpumask: alpha: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

I'm not sure the existing code even compiles, but new version is
straightforward.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Richard Henderson <rth@twiddle.net>

diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index 149532e..b4f284c 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node)
 	return node_cpu_mask;
 }
 
+extern struct cpumask node_to_cpumask_map[];
+/* FIXME: This is dumb, recalculating every time.  But simple. */
+static const struct cpumask *cpumask_of_node(int node)
+{
+	int cpu;
+
+	cpumask_clear(&node_to_cpumask_map[node]);
+
+	for_each_online_cpu(cpu) {
+		if (cpu_to_node(cpu) == node)
+			cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+	}
+
+	return &node_to_cpumask_map[node];
+}
+
 #define pcibus_to_cpumask(bus)	(cpu_online_map)
+#define cpumask_of_pcibus(bus)	(cpu_online_mask)
 
 #endif /* !CONFIG_NUMA */
 # include <asm-generic/topology.h>
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index a449e99..02bee69 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -79,6 +79,11 @@ int alpha_l3_cacheshape;
 unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
 #endif
 
+#ifdef CONFIG_NUMA
+struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask_map);
+#endif
+
 /* Which processor we booted from.  */
 int boot_cpuid;
 
-- 
cgit v0.10.2


From 030bb203e01db12e3f2866799f4f03a114d06349 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:41 +1030
Subject: cpumask: cpu_coregroup_mask(): x86

Impact: New API

Like cpu_coregroup_map, but returns a (const) pointer.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 45da5dc..168203c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -231,6 +231,7 @@ static inline int node_to_first_cpu(int node)
 #endif
 
 extern cpumask_t cpu_coregroup_map(int cpu);
+extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 468c2f9..d5274b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -497,7 +497,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 }
 
 /* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
+const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
@@ -505,9 +505,14 @@ cpumask_t cpu_coregroup_map(int cpu)
 	 * And for power savings, we return cpu_core_map
 	 */
 	if (sched_mc_power_savings || sched_smt_power_savings)
-		return per_cpu(cpu_core_map, cpu);
+		return &per_cpu(cpu_core_map, cpu);
 	else
-		return c->llc_shared_map;
+		return &c->llc_shared_map;
+}
+
+cpumask_t cpu_coregroup_map(int cpu)
+{
+	return *cpu_coregroup_mask(cpu);
 }
 
 static void impress_friends(void)
-- 
cgit v0.10.2


From a0ae09b46a516f05ea76e3419ad43c46f52c1165 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:42 +1030
Subject: cpumask: cpu_coregroup_mask(): sparc

Like cpu_coregroup_map, but returns a (const) pointer.

Compile-tested on sparc64 (defconfig).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index afd3cc1..3270cfb 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -84,5 +84,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #endif /* CONFIG_SMP */
 
 #define cpu_coregroup_map(cpu)			(cpu_core_map[cpu])
+#define cpu_coregroup_mask(cpu)			(&cpu_core_map[cpu])
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
-- 
cgit v0.10.2


From 9be3eec2c83848a1ca57ebad13c63c95d0df01e2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:42 +1030
Subject: cpumask: cpu_coregroup_mask(): s390

Like cpu_coregroup_map, but returns a (const) pointer.

Compile-tested on s390 (defconfig).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index d96c916..fff4a86 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -6,6 +6,7 @@
 #define mc_capable()	(1)
 
 cpumask_t cpu_coregroup_map(unsigned int cpu);
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
 
 extern cpumask_t cpu_core_map[NR_CPUS];
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index a947899..0601cd3 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -93,6 +93,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu)
 	return mask;
 }
 
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+	return &cpu_core_map[cpu];
+}
+
 static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
 {
 	unsigned int cpu;
-- 
cgit v0.10.2


From be4d638c1597580ed2294d899d9f1a2cd10e462c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:43 +1030
Subject: cpumask: Replace cpu_coregroup_map with cpu_coregroup_mask

cpu_coregroup_map returned a cpumask_t: it's going away.

(Note, the sched part of this patch won't apply meaningfully to the
sched tree, but I'm posting it to show the goal).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>

diff --git a/block/blk.h b/block/blk.h
index d2e49af..6e1ed40 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -99,8 +99,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
 static inline int blk_cpu_to_group(int cpu)
 {
 #ifdef CONFIG_SCHED_MC
-	cpumask_t mask = cpu_coregroup_map(cpu);
-	return first_cpu(mask);
+	const struct cpumask *mask = cpu_coregroup_mask(cpu);
+	return cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
 	return first_cpu(per_cpu(cpu_sibling_map, cpu));
 #else
diff --git a/kernel/sched.c b/kernel/sched.c
index d2d16d1..4292923 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7119,7 +7119,7 @@ cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
-	*mask = cpu_coregroup_map(cpu);
+	*mask = *cpu_coregroup_mask(cpu);
 	cpus_and(*mask, *mask, *cpu_map);
 	group = first_cpu(*mask);
 #elif defined(CONFIG_SCHED_SMT)
@@ -7485,7 +7485,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(core_domains, i);
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		sd->span = cpu_coregroup_map(i);
+		sd->span = *cpu_coregroup_mask(i);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
@@ -7528,7 +7528,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		SCHED_CPUMASK_VAR(this_core_map, allmasks);
 		SCHED_CPUMASK_VAR(send_covered, allmasks);
 
-		*this_core_map = cpu_coregroup_map(i);
+		*this_core_map = *cpu_coregroup_mask(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
 		if (i != first_cpu(*this_core_map))
 			continue;
-- 
cgit v0.10.2


From 8b07cd44511f3aa78dd912cca6493275a6787dc5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 26 Dec 2008 19:10:04 +0100
Subject: sparseirq: do not printk when migrating IRQ descriptors

Impact: reduce printk noise

There were a couple of leftover KERN_DEBUG debugging printks, remove
them. Also clarify an error message.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 089c374..a565ce3 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -74,10 +74,8 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 
 	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "  move irq_desc for %d to cpu %d node %d\n",
-		 irq, cpu, node);
 	if (!desc) {
-		printk(KERN_ERR "can not get new irq_desc for moving\n");
+		printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
 		/* still use old one */
 		desc = old_desc;
 		goto out_unlock;
@@ -106,8 +104,6 @@ struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
 		return desc;
 
 	old_cpu = desc->cpu;
-	printk(KERN_DEBUG
-		 "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
 	if (old_cpu != cpu) {
 		node = cpu_to_node(cpu);
 		old_node = cpu_to_node(old_cpu);
-- 
cgit v0.10.2


From 793f7b12a0c95e7bfec1badf9628043fb78fd440 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 26 Dec 2008 19:02:20 +0100
Subject: sparseirq: fix desc->lock init

Impact: cleanup

init_one_irq_desc() does not initialize the desc->lock properly -
you cannot init a lock by memcpying some other lock on it.

This happens to work right now (because irq_desc_init is never in use),
but it's a dangerous construct nevertheless, so fix it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 06b05a4..893da67 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -93,6 +93,8 @@ void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+
+	spin_lock_init(&desc->lock);
 	desc->irq = irq;
 #ifdef CONFIG_SMP
 	desc->cpu = cpu;
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index a565ce3..ecf765c 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,6 +42,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
 		 struct irq_desc *desc, int cpu)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
+	spin_lock_init(&desc->lock);
 	desc->cpu = cpu;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
-- 
cgit v0.10.2


From 0b936bfdeb85784b7df132b2c64fb34ad9b11ffa Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 23 Dec 2008 21:51:28 +0530
Subject: x86: reboot.c declare port_cf9_safe before they get used

Impact: cleanup, avoid sparse warning

Include "../pci/pci.h" for port_cf9_safe

Fixes this sparse warning:

  arch/x86/kernel/reboot.c:43:6: warning: symbol 'port_cf9_safe' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718d..b165eb0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -22,6 +22,7 @@
 #endif
 
 #include <mach_ipi.h>
+#include "../pci/pci.h"
 
 
 /*
-- 
cgit v0.10.2


From b6b301aa9fba57b114c3a00f5f43abf672bd4ecd Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 23 Dec 2008 21:52:33 +0530
Subject: x86: apic.c x2apic_preenabled and disable_x2apic should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/apic.c:103:5: warning: symbol 'disable_x2apic' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa07..e644bf6 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -93,7 +93,7 @@ static inline u32 native_apic_msr_read(u32 reg)
 }
 
 #ifndef CONFIG_X86_32
-extern int x2apic, x2apic_preenabled;
+extern int x2apic;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7397911..3a961bd 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -97,8 +97,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
 #ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
+static int x2apic_preenabled;
+static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
 	disable_x2apic = 1;
-- 
cgit v0.10.2


From 13a0c3c269b223f60abfac8a9811d77111a8b4ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 02:05:47 -0800
Subject: sparseirq: work around compiler optimizing away __weak functions

Impact: fix panic on null pointer with sparseirq

Some GCC versions seem to inline the weak global function,
when that function is empty.

Work it around, by making the functions return a (dummy) integer.

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 2fe543f..9760393 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -170,7 +170,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-void __init arch_early_irq_init(void)
+int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -184,6 +184,8 @@ void __init arch_early_irq_init(void)
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -212,7 +214,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-void arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
 	struct irq_cfg *cfg;
 
@@ -224,6 +226,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 			BUG_ON(1);
 		}
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 69da275..0e40af4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,9 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern void early_irq_init(void);
-extern void arch_early_irq_init(void);
-extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c1f999a..c314aa1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,13 +539,14 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-void __init __weak arch_early_irq_init(void)
+int __init __weak arch_early_irq_init(void)
 {
+	return 0;
 }
 
-void __init __weak early_irq_init(void)
+int __init __weak early_irq_init(void)
 {
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 asmlinkage void __init start_kernel(void)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 893da67..0bef3ec 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -86,8 +86,9 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 		desc->kstat_irqs = (unsigned int *)ptr;
 }
 
-void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
+	return 0;
 }
 
 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
@@ -132,7 +133,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
 /* FIXME: use bootmem alloc ...*/
 static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
 
-void __init early_irq_init(void)
+int __init early_irq_init(void)
 {
 	struct irq_desc *desc;
 	int legacy_count;
@@ -151,7 +152,7 @@ void __init early_irq_init(void)
 	for (i = legacy_count; i < NR_IRQS; i++)
 		irq_desc_ptrs[i] = NULL;
 
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 struct irq_desc *irq_to_desc(unsigned int irq)
-- 
cgit v0.10.2


From fa6beb37b0d9bc00f90f11154eeed9502d8b0a37 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 22 Dec 2008 20:24:09 -0800
Subject: sparseirq: set lock_class for legacy irq when sparse_irq is selected

Impact: add lockdep annotation to legacy IRQ descs

Warnings resulting out of this were not seen in practice, but it's prudent
to initialize the legacy descriptors to the lock class as well, symmetric
to how we do it with other descriptors.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 0bef3ec..e1cf4e3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -145,6 +145,7 @@ int __init early_irq_init(void)
 	for (i = 0; i < legacy_count; i++) {
 		desc[i].irq = i;
 		desc[i].kstat_irqs = kstat_irqs_legacy[i];
+		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 
 		irq_desc_ptrs[i] = desc + i;
 	}
-- 
cgit v0.10.2


From 12026ea16a618b289fcf457661aed24f57323a20 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 22:38:15 -0800
Subject: sparseirq: fix hang with !SPARSE_IRQ

Impact: fix hang

Suresh report his two sockets system only works with SPARSE_IRQ enable
it turns out we miss the setting desc->irq

so provide early_irq_init() even !SPARSE_IRQ to set desc->irq

Reported-by: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e1cf4e3..157c04c 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -218,6 +218,21 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
+int __init early_irq_init(void)
+{
+	struct irq_desc *desc;
+	int count;
+	int i;
+
+	desc = irq_desc;
+	count = ARRAY_SIZE(irq_desc);
+
+	for (i = 0; i < count; i++)
+		desc[i].irq = i;
+
+	return arch_early_irq_init();
+}
+
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
-- 
cgit v0.10.2


From f2863c54f30cccb50661697a6e4bdcd0ad0b0a6c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 12:20:51 +0200
Subject: UBI: fix checkpatch.pl warnings

Just minor indentation and "over 80 characters" fixes.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index b30a0b8..98cf31e 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -721,7 +721,8 @@ static int rename_volumes(struct ubi_device *ubi,
 		 * It seems we need to remove volume with name @re->new_name,
 		 * if it exists.
 		 */
-		desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
+		desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name,
+					  UBI_EXCLUSIVE);
 		if (IS_ERR(desc)) {
 			err = PTR_ERR(desc);
 			if (err == -ENODEV)
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 78e914d..13777e5 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -27,11 +27,11 @@
 #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
 
 #define ubi_assert(expr)  do {                                               \
-        if (unlikely(!(expr))) {                                             \
-                printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
-                       __func__, __LINE__, current->pid);                    \
-                ubi_dbg_dump_stack();                                        \
-        }                                                                    \
+	if (unlikely(!(expr))) {                                             \
+		printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+		       __func__, __LINE__, current->pid);                    \
+		ubi_dbg_dump_stack();                                        \
+	}                                                                    \
 } while (0)
 
 #define dbg_msg(fmt, ...)                                    \
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index f60f7002..a74118c 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -1034,7 +1034,7 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum,
 
 	err = paranoid_check_peb_ec_hdr(ubi, pnum);
 	if (err)
-		return err > 0 ? -EINVAL: err;
+		return err > 0 ? -EINVAL : err;
 
 	vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
 	vid_hdr->version = UBI_VERSION;
-- 
cgit v0.10.2


From b2e2fe99628c4f944c3075258e536197b5a4f3f8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 29 Dec 2008 00:16:45 +0100
Subject: sparseirq: work around __weak alias bug

Impact: fix boot crash if the kernel is built with certain GCC versions

GCC has a bug with __weak alias functions: if the functions are in
the same compilation unit as their call site, GCC can decide to
inline them - and thus rob the linker of the opportunity to override
the weak alias with the real thing.

This can lead to the boot crash reported by Kamalesh Babulal:

 ACPI: Core revision 20080926
 Setting APIC routing to flat
 BUG: unable to handle kernel NULL pointer dereference at
 0000000000000000
 IP: [<ffffffff8021f9a8>] add_pin_to_irq_cpu+0x14/0x74
 PGD 0
 Oops: 0000 [#1] SMP
 [...]

So move the arch_init_chip_data() function from handle.c to manage.c.

Reported-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 157c04c..c20db0b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -86,11 +86,6 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 		desc->kstat_irqs = (unsigned int *)ptr;
 }
 
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-	return 0;
-}
-
 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 46953a0..c2741b0 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -261,6 +261,15 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
+/*
+ * [ Not in kernel/irq/handle.c, so that GCC does not
+ *   inline the __weak alias: ]
+ */
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+	return 0;
+}
+
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-- 
cgit v0.10.2


From 43a256322ac1fc105c181b3cade3b9bfc0b63ca1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 28 Dec 2008 16:01:13 -0800
Subject: sparseirq: move __weak symbols into separate compilation unit

GCC has a bug with __weak alias functions: if the functions are in
the same compilation unit as their call site, GCC can decide to
inline them - and thus rob the linker of the opportunity to override
the weak alias with the real thing.

So move all the IRQ handling related __weak symbols to kernel/irq/chip.c.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 777f89e..d9a3703 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,4 +467,10 @@ static inline void init_irq_proc(void)
 
 int show_interrupts(struct seq_file *p, void *v);
 
+struct irq_desc;
+
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0e40af4..d64a6d4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,6 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern int early_irq_init(void);
-extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c314aa1..2c183ab 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,16 +539,6 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-int __init __weak arch_early_irq_init(void)
-{
-	return 0;
-}
-
-int __init __weak early_irq_init(void)
-{
-	return arch_early_irq_init();
-}
-
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c2741b0..46953a0 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -261,15 +261,6 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-/*
- * [ Not in kernel/irq/handle.c, so that GCC does not
- *   inline the __weak alias: ]
- */
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-	return 0;
-}
-
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a7..daf4635 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -797,3 +797,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 }
 EXPORT_SYMBOL(on_each_cpu);
 #endif
+
+/*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
+ *   GCC does not inline them incorrectly. ]
+ */
+
+int __init __weak early_irq_init(void)
+{
+	return 0;
+}
+
+int __init __weak arch_early_irq_init(void)
+{
+	return 0;
+}
+
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+	return 0;
+}
-- 
cgit v0.10.2


From 6092848a2a23b660150a38bc06f59d75838d70c8 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Sun, 28 Dec 2008 04:12:26 -0300
Subject: x86: mark get_cpu_leaves() with __cpuinit annotation

Impact: fix section mismatch warning

Commit b2bb85549134c005e997e5a7ed303bda6a1ae738 ("x86: Remove cpumask games
in x86/kernel/cpu/intel_cacheinfo.c") introduced get_cpu_leaves(), which
references __cpuinit cpuid4_cache_lookup().

Mark get_cpu_leaves() with a __cpuinit annotation.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fb7f946..7bd00a5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,7 +534,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
-static void get_cpu_leaves(void *_retval)
+static void __cpuinit get_cpu_leaves(void *_retval)
 {
 	int j, *retval = _retval, cpu = smp_processor_id();
 
-- 
cgit v0.10.2


From c805b7300ed20ec4f10ea385988d6d3fa935b26c Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 27 Dec 2008 17:10:18 +0300
Subject: x86: mach-default setup.c cleanups

Impact: cleanup

- Break long lines into shorter form.
- Use pr_ macros instead of plain printk.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4..df167f2 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
  **/
 void mca_nmi_hook(void)
 {
-	/* If I recall correctly, there's a whole bunch of other things that
+	/*
+	 * If I recall correctly, there's a whole bunch of other things that
 	 * we can do to check for NMI problems, but that's all I know about
 	 * at the moment.
 	 */
-
-	printk("NMI generated from unknown source!\n");
+	pr_warning("NMI generated from unknown source!\n");
 }
 #endif
 
 static __init int no_ipi_broadcast(char *str)
 {
 	get_option(&str, &no_broadcast);
-	printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
-											"IPI Broadcast");
+	pr_info("Using %s mode\n",
+		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
 	return 1;
 }
-
 __setup("no_ipi_broadcast=", no_ipi_broadcast);
 
 static int __init print_ipi_mode(void)
 {
-	printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
-											"Shortcut");
+	pr_info("Using IPI %s mode\n",
+		no_broadcast ? "No-Shortcut" : "Shortcut");
 	return 0;
 }
 
-- 
cgit v0.10.2


From 2f06de0671096e19350c9efe21cfdbc0891aab20 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 27 Dec 2008 21:37:10 +0530
Subject: x86: introducing asm/sys_ia32.h

Impact: cleanup, avoid 44 sparse warnings, new file asm/sys_ia32.h

Fixes following sparse warnings:

  CHECK   arch/x86/ia32/sys_ia32.c
arch/x86/ia32/sys_ia32.c:53:17: warning: symbol 'sys32_truncate64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:60:17: warning: symbol 'sys32_ftruncate64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:98:17: warning: symbol 'sys32_stat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:109:17: warning: symbol 'sys32_lstat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:119:17: warning: symbol 'sys32_fstat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:128:17: warning: symbol 'sys32_fstatat' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:164:17: warning: symbol 'sys32_mmap' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:195:17: warning: symbol 'sys32_mprotect' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:201:17: warning: symbol 'sys32_pipe' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:215:17: warning: symbol 'sys32_rt_sigaction' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:291:17: warning: symbol 'sys32_sigaction' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:330:17: warning: symbol 'sys32_rt_sigprocmask' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:370:17: warning: symbol 'sys32_alarm' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:383:17: warning: symbol 'sys32_old_select' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:393:17: warning: symbol 'sys32_waitpid' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:401:17: warning: symbol 'sys32_sysfs' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:406:17: warning: symbol 'sys32_sched_rr_get_interval' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:421:17: warning: symbol 'sys32_rt_sigpending' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:445:17: warning: symbol 'sys32_rt_sigqueueinfo' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:472:17: warning: symbol 'sys32_sysctl' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:517:17: warning: symbol 'sys32_pread' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:524:17: warning: symbol 'sys32_pwrite' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:532:17: warning: symbol 'sys32_personality' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:545:17: warning: symbol 'sys32_sendfile' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:565:17: warning: symbol 'sys32_mmap2' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:589:17: warning: symbol 'sys32_olduname' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:626:6: warning: symbol 'sys32_uname' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:641:6: warning: symbol 'sys32_ustat' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:663:17: warning: symbol 'sys32_execve' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:678:17: warning: symbol 'sys32_clone' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:693:6: warning: symbol 'sys32_lseek' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:698:6: warning: symbol 'sys32_kill' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:703:6: warning: symbol 'sys32_fadvise64_64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:712:6: warning: symbol 'sys32_vm86_warning' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:726:6: warning: symbol 'sys32_lookup_dcookie' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:732:20: warning: symbol 'sys32_readahead' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:738:17: warning: symbol 'sys32_sync_file_range' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:746:17: warning: symbol 'sys32_fadvise64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:753:17: warning: symbol 'sys32_fallocate' was not declared. Should it be static?
  CHECK   arch/x86/ia32/ia32_signal.c
arch/x86/ia32/ia32_signal.c:126:17: warning: symbol 'sys32_sigsuspend' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:141:17: warning: symbol 'sys32_sigaltstack' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:249:17: warning: symbol 'sys32_sigreturn' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:279:17: warning: symbol 'sys32_rt_sigreturn' was not declared. Should it be static?
  CHECK   arch/x86/ia32/ipc32.c
arch/x86/ia32/ipc32.c:12:17: warning: symbol 'sys32_ipc' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85..9dabd00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
-#include <asm/ia32.h>
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
 #include <asm/sigcontext32.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
-
 #include <asm/sigframe.h>
+#include <asm/sys_ia32.h>
 
 #define DEBUG_SIG 0
 
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991c..29cdcd0 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
 #include <linux/shm.h>
 #include <linux/ipc.h>
 #include <linux/compat.h>
+#include <asm/sys_ia32.h>
 
 asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
 			  compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd..6c0d7f6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
-#include <asm/ia32.h>
 #include <asm/vgtod.h>
+#include <asm/sys_ia32.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 0000000..ffb08be
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
+/*
+ * sys_ia32.h - Linux ia32 syscall interfaces
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYS_IA32_H
+#define _ASM_X86_SYS_IA32_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/compat.h>
+#include <asm/ia32.h>
+
+/* ia32/sys_ia32.c */
+asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
+asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+
+asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long sys32_fstatat(unsigned int, char __user *,
+			      struct stat64 __user *, int);
+struct mmap_arg_struct;
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
+
+asmlinkage long sys32_pipe(int __user *);
+struct sigaction32;
+struct old_sigaction32;
+asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
+				   struct sigaction32 __user *, unsigned int);
+asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
+				struct old_sigaction32 __user *);
+asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
+				     compat_sigset_t __user *, unsigned int);
+asmlinkage long sys32_alarm(unsigned int);
+
+struct sel_arg_struct;
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_sysfs(int, u32, u32);
+
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
+					    struct compat_timespec __user *);
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
+asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
+
+#ifdef CONFIG_SYSCTL_SYSCALL
+struct sysctl_ia32;
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
+#endif
+
+asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
+asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
+
+asmlinkage long sys32_personality(unsigned long);
+asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
+
+asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
+			    unsigned long, unsigned long, unsigned long);
+
+struct oldold_utsname;
+struct old_utsname;
+asmlinkage long sys32_olduname(struct oldold_utsname __user *);
+long sys32_uname(struct old_utsname __user *);
+
+long sys32_ustat(unsigned, struct ustat32 __user *);
+
+asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
+			     compat_uptr_t __user *, struct pt_regs *);
+asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
+
+long sys32_lseek(unsigned int, int, unsigned int);
+long sys32_kill(int, int);
+long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
+long sys32_vm86_warning(void);
+long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
+
+asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
+asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
+				      unsigned, unsigned, int);
+asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
+asmlinkage long sys32_fallocate(int, int, unsigned,
+				unsigned, unsigned, unsigned);
+
+/* ia32/ia32_signal.c */
+asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
+				  stack_ia32_t __user *, struct pt_regs *);
+asmlinkage long sys32_sigreturn(struct pt_regs *);
+asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
+
+/* ia32/ipc32.c */
+asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
+#endif /* _ASM_X86_SYS_IA32_H */
-- 
cgit v0.10.2


From a1ae299dfb6ef219b296b61d1f222732391973b5 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:32:52 +0530
Subject: x86: apic.c declare pic_mode before they get used

Impact: cleanup, avoid sparse warning

In asm/mpspec.h moved out pic_mode from CONFIG_X86_32 as it is common
for both 32 and 64 bit.

Fixes this sparse warning for x86_64:

  arch/x86/kernel/apic.c:128:5: warning: symbol 'pic_mode' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c2..62d14ce 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
 #include <asm/mpspec_def.h>
 
 extern int apic_version[MAX_APICS];
+extern int pic_mode;
 
 #ifdef CONFIG_X86_32
 #include <mach_mpspec.h>
 
 extern unsigned int def_to_bigsmp;
 extern u8 apicid_2_node[];
-extern int pic_mode;
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
-- 
cgit v0.10.2


From 7f3e632f9d8d234819bcdef7a68fc8b84f7d3d3d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:34:35 +0530
Subject: x86: io_apic.c io_apic_sync should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/io_apic.c:709:6: warning: symbol 'io_apic_sync' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 679e7bb..b8c8a8e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -481,7 +481,7 @@ static void __unmask_IO_APIC_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_X86_64
-void io_apic_sync(struct irq_pin_list *entry)
+static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
 	 * Synchronize the IO-APIC and the CPU by doing
-- 
cgit v0.10.2


From cbafbc826bf645f7fbbfbb2ff20138e5ccb4700e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:36:40 +0530
Subject: x86: efi.c declare add_efi_memmap before they get used

Impact: cleanup, avoid sparse warning

Fixes this sparse warning:

  arch/x86/kernel/efi.c:67:5: warning: symbol 'add_efi_memmap' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c..ca5ffb2 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
+extern int add_efi_memmap;
 extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
-- 
cgit v0.10.2


From c854c91979e0717c619bc55e124d41d60d5eb3d6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:38:09 +0530
Subject: x86_64: pci-gart_64.c iommu_fullflush should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/pci-gart_64.c:55:5: warning: symbol 'iommu_fullflush' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa3..00c2bcd 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base;		/* Remapping table */
  * to trigger bugs with some popular PCI cards, in particular 3ware (but
  * has been also also seen with Qlogic at least).
  */
-int iommu_fullflush = 1;
+static int iommu_fullflush = 1;
 
 /* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
-- 
cgit v0.10.2


From 824877111cd7f2b4fd2fe6947c5c5cbbb3ac5bd8 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 27 Dec 2008 18:32:28 +0530
Subject: x86, pci: move arch/x86/pci/pci.h to arch/x86/include/asm/pci_x86.h

Impact: cleanup

Now that arch/x86/pci/pci.h is used in a number of other places as well,
move the lowlevel x86 pci definitions into the architecture include files.
(not to be confused with the existing arch/x86/include/asm/pci.h file,
which provides public details about x86 PCI)

Tested on: X86_32_UP, X86_32_SMP and X86_64_SMP

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
new file mode 100644
index 0000000..e60fd3e
--- /dev/null
+++ b/arch/x86/include/asm/pci_x86.h
@@ -0,0 +1,165 @@
+/*
+ *	Low-Level PCI Access for i386 machines.
+ *
+ *	(c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS		0x0001
+#define PCI_PROBE_CONF1		0x0002
+#define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+#define PCI_PROBE_NOEARLY	0x0010
+
+#define PCI_NO_CHECKS		0x0400
+#define PCI_USE_PIRQ_MASK	0x0800
+#define PCI_ASSIGN_ROMS		0x1000
+#define PCI_BIOS_IRQ_SCAN	0x2000
+#define PCI_ASSIGN_ALL_BUSSES	0x4000
+#define PCI_CAN_SKIP_ISA_ALIGN	0x8000
+#define PCI_USE__CRS		0x10000
+#define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
+#define PCI_HAS_IO_ECS		0x40000
+#define PCI_NOASSIGN_ROMS	0x80000
+
+extern unsigned int pci_probe;
+extern unsigned long pirq_table_addr;
+
+enum pci_bf_sort_state {
+	pci_bf_sort_default,
+	pci_force_nobf,
+	pci_force_bf,
+	pci_dmi_bf,
+};
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+	u8 bus, devfn;			/* Bus, device and function */
+	struct {
+		u8 link;		/* IRQ line ID, chipset dependent,
+					   0 = not routed */
+		u16 bitmap;		/* Available IRQs */
+	} __attribute__((packed)) irq[4];
+	u8 slot;			/* Slot number, 0=onboard */
+	u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+	u32 signature;			/* PIRQ_SIGNATURE should be here */
+	u16 version;			/* PIRQ_VERSION */
+	u16 size;			/* Table size in bytes */
+	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to
+					   PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of
+					   interrupt router */
+	u32 miniport_data;		/* Crap */
+	u8 rfu[11];
+	u8 checksum;			/* Modulo 256 checksum must give 0 */
+	struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+extern int pcibios_scanned;
+extern spinlock_t pci_config_lock;
+
+extern int (*pcibios_enable_irq)(struct pci_dev *dev);
+extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+
+struct pci_raw_ops {
+	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 *val);
+	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 val);
+};
+
+extern struct pci_raw_ops *raw_pci_ops;
+extern struct pci_raw_ops *raw_pci_ext_ops;
+
+extern struct pci_raw_ops pci_direct_conf1;
+extern bool port_cf9_safe;
+
+/* arch_initcall level */
+extern int pci_direct_probe(void);
+extern void pci_direct_init(int type);
+extern void pci_pcbios_init(void);
+extern int pci_olpc_init(void);
+extern void __init dmi_check_pciprobe(void);
+extern void __init dmi_check_skip_isa_align(void);
+
+/* some common used subsys_initcalls */
+extern int __init pci_acpi_init(void);
+extern int __init pcibios_irq_init(void);
+extern int __init pci_visws_init(void);
+extern int __init pci_numaq_init(void);
+extern int __init pcibios_init(void);
+
+/* pci-mmconfig.c */
+
+extern int __init pci_mmcfg_arch_init(void);
+extern void __init pci_mmcfg_arch_free(void);
+
+/*
+ * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
+ * on their northbrige except through the * %eax register. As such, you MUST
+ * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
+ * accessor functions.
+ * In fact just use pci_config_*, nothing else please.
+ */
+static inline unsigned char mmio_config_readb(void __iomem *pos)
+{
+	u8 val;
+	asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned short mmio_config_readw(void __iomem *pos)
+{
+	u16 val;
+	asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned int mmio_config_readl(void __iomem *pos)
+{
+	u32 val;
+	asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline void mmio_config_writeb(void __iomem *pos, u8 val)
+{
+	asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writew(void __iomem *pos, u16 val)
+{
+	asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writel(void __iomem *pos, u32 val)
+{
+	asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f36..666e43d 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
 #include <asm/msr.h>
 #include <asm/acpi.h>
 #include <asm/mmconfig.h>
-
-#include "../pci/pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_hostbridge_probe {
 	u32 bus;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b165eb0..a90913c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
+#include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -22,8 +23,6 @@
 #endif
 
 #include <mach_ipi.h>
-#include "../pci/pci.h"
-
 
 /*
  * Power off function, if any
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b..9e5752f 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <asm/numa.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_root_info {
 	char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e0576..9bb0982 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f..62ddb73 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
 #include <asm/segment.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c..bd13c3e 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631cc..f6adf2c 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Direct PCI access. This is used for PCI accesses in early boot before
    the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc9..7d388d5 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
-
+#include <asm/pci_x86.h>
 
 static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0c..e51bf2c 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
 
 #include <asm/pat.h>
 #include <asm/e820.h>
+#include <asm/pci_x86.h>
 
-#include "pci.h"
 
 static int
 skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f..bec3b04 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* arch_initcall has too random ordering, so call the initializers
    in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe..373b9af 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
 #include <asm/io_apic.h>
 #include <linux/irq.h>
 #include <linux/acpi.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd4..f1065b1 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
  */
 #include <linux/init.h>
 #include <linux/pci.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a223..89bf924 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761d..8b2d561 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <asm/e820.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Assume systems with more busses have correct MCFG */
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a199416..30007ff 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Static virtual mapping of the MMCONFIG aperture */
 struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845..2089354 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
 #include <linux/nodemask.h>
 #include <mach_apic.h>
 #include <asm/mpspec.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e8..b889d82 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/olpc.h>
 #include <asm/geode.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc..b82cae9 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
-#include "pci.h"
-#include "pci-functions.h"
-
+#include <asm/pci_x86.h>
+#include <asm/mach-default/pci-functions.h>
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
deleted file mode 100644
index 1959018..0000000
--- a/arch/x86/pci/pci.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- *	Low-Level PCI Access for i386 machines.
- *
- *	(c) 1999 Martin Mares <mj@ucw.cz>
- */
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
-#define PCI_PROBE_BIOS		0x0001
-#define PCI_PROBE_CONF1		0x0002
-#define PCI_PROBE_CONF2		0x0004
-#define PCI_PROBE_MMCONF	0x0008
-#define PCI_PROBE_MASK		0x000f
-#define PCI_PROBE_NOEARLY	0x0010
-
-#define PCI_NO_CHECKS		0x0400
-#define PCI_USE_PIRQ_MASK	0x0800
-#define PCI_ASSIGN_ROMS		0x1000
-#define PCI_BIOS_IRQ_SCAN	0x2000
-#define PCI_ASSIGN_ALL_BUSSES	0x4000
-#define PCI_CAN_SKIP_ISA_ALIGN	0x8000
-#define PCI_USE__CRS		0x10000
-#define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
-#define PCI_HAS_IO_ECS		0x40000
-#define PCI_NOASSIGN_ROMS	0x80000
-
-extern unsigned int pci_probe;
-extern unsigned long pirq_table_addr;
-
-enum pci_bf_sort_state {
-	pci_bf_sort_default,
-	pci_force_nobf,
-	pci_force_bf,
-	pci_dmi_bf,
-};
-
-/* pci-i386.c */
-
-extern unsigned int pcibios_max_latency;
-
-void pcibios_resource_survey(void);
-
-/* pci-pc.c */
-
-extern int pcibios_last_bus;
-extern struct pci_bus *pci_root_bus;
-extern struct pci_ops pci_root_ops;
-
-/* pci-irq.c */
-
-struct irq_info {
-	u8 bus, devfn;			/* Bus, device and function */
-	struct {
-		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
-		u16 bitmap;		/* Available IRQs */
-	} __attribute__((packed)) irq[4];
-	u8 slot;			/* Slot number, 0=onboard */
-	u8 rfu;
-} __attribute__((packed));
-
-struct irq_routing_table {
-	u32 signature;			/* PIRQ_SIGNATURE should be here */
-	u16 version;			/* PIRQ_VERSION */
-	u16 size;			/* Table size in bytes */
-	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
-	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
-	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
-	u32 miniport_data;		/* Crap */
-	u8 rfu[11];
-	u8 checksum;			/* Modulo 256 checksum must give zero */
-	struct irq_info slots[0];
-} __attribute__((packed));
-
-extern unsigned int pcibios_irq_mask;
-
-extern int pcibios_scanned;
-extern spinlock_t pci_config_lock;
-
-extern int (*pcibios_enable_irq)(struct pci_dev *dev);
-extern void (*pcibios_disable_irq)(struct pci_dev *dev);
-
-struct pci_raw_ops {
-	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
-						int reg, int len, u32 *val);
-	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
-						int reg, int len, u32 val);
-};
-
-extern struct pci_raw_ops *raw_pci_ops;
-extern struct pci_raw_ops *raw_pci_ext_ops;
-
-extern struct pci_raw_ops pci_direct_conf1;
-extern bool port_cf9_safe;
-
-/* arch_initcall level */
-extern int pci_direct_probe(void);
-extern void pci_direct_init(int type);
-extern void pci_pcbios_init(void);
-extern int pci_olpc_init(void);
-extern void __init dmi_check_pciprobe(void);
-extern void __init dmi_check_skip_isa_align(void);
-
-/* some common used subsys_initcalls */
-extern int __init pci_acpi_init(void);
-extern int __init pcibios_irq_init(void);
-extern int __init pci_visws_init(void);
-extern int __init pci_numaq_init(void);
-extern int __init pcibios_init(void);
-
-/* pci-mmconfig.c */
-
-extern int __init pci_mmcfg_arch_init(void);
-extern void __init pci_mmcfg_arch_free(void);
-
-/*
- * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
- * on their northbrige except through the * %eax register. As such, you MUST
- * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
- * accessor functions.
- * In fact just use pci_config_*, nothing else please.
- */
-static inline unsigned char mmio_config_readb(void __iomem *pos)
-{
-	u8 val;
-	asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline unsigned short mmio_config_readw(void __iomem *pos)
-{
-	u16 val;
-	asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline unsigned int mmio_config_readl(void __iomem *pos)
-{
-	u32 val;
-	asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline void mmio_config_writeb(void __iomem *pos, u8 val)
-{
-	asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writew(void __iomem *pos, u16 val)
-{
-	asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writel(void __iomem *pos, u32 val)
-{
-	asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb1..16d0c0e 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
 #include <linux/init.h>
 
 #include <asm/setup.h>
+#include <asm/pci_x86.h>
 #include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
 
-#include "pci.h"
-
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 8514c3a..c2e1bcb 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -45,7 +45,7 @@
 
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
 
 
 /* Global variables */
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index 0902193..df146be 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -37,7 +37,7 @@
 #include "../pci.h"
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
 
 
 u8 cpqhp_nic_irq;
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 633e743..dd18f85 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -35,7 +35,7 @@
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include "../pci.h"
-#include "../../../arch/x86/pci/pci.h"	/* for struct irq_routing_table */
+#include <asm/pci_x86.h>		/* for struct irq_routing_table */
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
-- 
cgit v0.10.2


From 278d1ed65e25d80af7c3a112d707b3f70516ddb4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:12 +1030
Subject: cpumask: make CONFIG_NR_CPUS always valid.

Impact: cleanup

Currently we have NR_CPUS, which is 1 on UP, and CONFIG_NR_CPUS on
SMP.  If we make CONFIG_NR_CPUS always valid (and always 1 on !SMP),
we can skip the middleman.

This also allows us to find and check all the unaudited NR_CPUS usage
as we prepare for v. large NR_CPUS.

To avoid breaking every arch, we cheat and do this for the moment
in the header if the arch doesn't.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/include/linux/threads.h b/include/linux/threads.h
index 38d1a5d..052b12b 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -8,17 +8,17 @@
  */
 
 /*
- * Maximum supported processors that can run under SMP.  This value is
- * set via configure setting.  The maximum is equal to the size of the
- * bitmasks used on that platform, i.e. 32 or 64.  Setting this smaller
- * saves quite a bit of memory.
+ * Maximum supported processors.  Setting this smaller saves quite a
+ * bit of memory.  Use nr_cpu_ids instead of this except for static bitmaps.
  */
-#ifdef CONFIG_SMP
-#define NR_CPUS		CONFIG_NR_CPUS
-#else
-#define NR_CPUS		1
+#ifndef CONFIG_NR_CPUS
+/* FIXME: This should be fixed in the arch's Kconfig */
+#define CONFIG_NR_CPUS	1
 #endif
 
+/* Places which use this should consider cpumask_var_t. */
+#define NR_CPUS		CONFIG_NR_CPUS
+
 #define MIN_THREADS_LEFT_FOR_ROOT 4
 
 /*
-- 
cgit v0.10.2


From 4b0bc0bca83f3fb7cf920e2ec80684c15d2269c0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:13 +1030
Subject: bitmap: test for constant as well as small size for inline versions

Impact: reduce text size

bitmap_zero et al have a fastpath for nbits <= BITS_PER_LONG, but this
should really only apply where the nbits is known at compile time.

This only saves about 1200 bytes on an allyesconfig kernel, but with
cpumasks going variable that number will increase.

   text		data	bss	dec		hex	filename
35327852        5035607 6782976 47146435        2cf65c3 vmlinux-before
35326640        5035607 6782976 47145223        2cf6107 vmlinux-after

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a08c33a..2878811 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -137,9 +137,12 @@ extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 		(1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL		\
 )
 
+#define small_const_nbits(nbits) \
+	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
 static inline void bitmap_zero(unsigned long *dst, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = 0UL;
 	else {
 		int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -150,7 +153,7 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
 static inline void bitmap_fill(unsigned long *dst, int nbits)
 {
 	size_t nlongs = BITS_TO_LONGS(nbits);
-	if (nlongs > 1) {
+	if (!small_const_nbits(nbits)) {
 		int len = (nlongs - 1) * sizeof(unsigned long);
 		memset(dst, 0xff,  len);
 	}
@@ -160,7 +163,7 @@ static inline void bitmap_fill(unsigned long *dst, int nbits)
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 			int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src;
 	else {
 		int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -171,7 +174,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 & *src2;
 	else
 		__bitmap_and(dst, src1, src2, nbits);
@@ -180,7 +183,7 @@ static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 | *src2;
 	else
 		__bitmap_or(dst, src1, src2, nbits);
@@ -189,7 +192,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 ^ *src2;
 	else
 		__bitmap_xor(dst, src1, src2, nbits);
@@ -198,7 +201,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 & ~(*src2);
 	else
 		__bitmap_andnot(dst, src1, src2, nbits);
@@ -207,7 +210,7 @@ static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
 			int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
 	else
 		__bitmap_complement(dst, src, nbits);
@@ -216,7 +219,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 static inline int bitmap_equal(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_equal(src1, src2, nbits);
@@ -225,7 +228,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 static inline int bitmap_intersects(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	else
 		return __bitmap_intersects(src1, src2, nbits);
@@ -234,7 +237,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 static inline int bitmap_subset(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_subset(src1, src2, nbits);
@@ -242,7 +245,7 @@ static inline int bitmap_subset(const unsigned long *src1,
 
 static inline int bitmap_empty(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_empty(src, nbits);
@@ -250,7 +253,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
 
 static inline int bitmap_full(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_full(src, nbits);
@@ -258,7 +261,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
 
 static inline int bitmap_weight(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
 	return __bitmap_weight(src, nbits);
 }
@@ -266,7 +269,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
 static inline void bitmap_shift_right(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src >> n;
 	else
 		__bitmap_shift_right(dst, src, n, nbits);
@@ -275,7 +278,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
 static inline void bitmap_shift_left(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
 	else
 		__bitmap_shift_left(dst, src, n, nbits);
-- 
cgit v0.10.2


From cb78a0ce69fad2026825f957e24e2d9cda1ec9f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:14 +1030
Subject: bitmap: fix seq_bitmap and seq_cpumask to take const pointer

Impact: cleanup

seq_bitmap just calls bitmap_scnprintf on the bits: that arg can be const.
Similarly, seq_cpumask just calls seq_bitmap.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 16c2115..c99358a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,7 +462,8 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 	return -1;
 }
 
-int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
+int seq_bitmap(struct seq_file *m, const unsigned long *bits,
+				   unsigned int nr_bits)
 {
 	if (m->count < m->size) {
 		int len = bitmap_scnprintf(m->buf + m->count,
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index b3dfa72..952e018 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -50,8 +50,9 @@ int seq_path(struct seq_file *, struct path *, char *);
 int seq_dentry(struct seq_file *, struct dentry *, char *);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc);
-int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits);
-static inline int seq_cpumask(struct seq_file *m, cpumask_t *mask)
+int seq_bitmap(struct seq_file *m, const unsigned long *bits,
+				   unsigned int nr_bits);
+static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask)
 {
 	return seq_bitmap(m, mask->bits, NR_CPUS);
 }
-- 
cgit v0.10.2


From b3199c025d1646e25e7d1d640dd605db251dccf8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:14 +1030
Subject: cpumask: switch over to cpu_online/possible/active/present_mask: core

Impact: cleanup

This implements the obsolescent cpu_online_map in terms of
cpu_online_mask, rather than the other way around.  Same for the other
maps.

The documentation comments are also updated to refer to _mask rather
than _map.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b5ad19a..db2341b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -416,65 +416,54 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
 
 /*
  * The following particular system cpumasks and operations manage
- * possible, present, active and online cpus.  Each of them is a fixed size
- * bitmap of size NR_CPUS.
+ * possible, present, active and online cpus.
  *
- *  #ifdef CONFIG_HOTPLUG_CPU
- *     cpu_possible_map - has bit 'cpu' set iff cpu is populatable
- *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
- *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
- *     cpu_active_map   - has bit 'cpu' set iff cpu available to migration
- *  #else
- *     cpu_possible_map - has bit 'cpu' set iff cpu is populated
- *     cpu_present_map  - copy of cpu_possible_map
- *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
- *  #endif
+ *     cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
+ *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
+ *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
+ *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
  *
- *  In either case, NR_CPUS is fixed at compile time, as the static
- *  size of these bitmaps.  The cpu_possible_map is fixed at boot
- *  time, as the set of CPU id's that it is possible might ever
- *  be plugged in at anytime during the life of that system boot.
- *  The cpu_present_map is dynamic(*), representing which CPUs
- *  are currently plugged in.  And cpu_online_map is the dynamic
- *  subset of cpu_present_map, indicating those CPUs available
- *  for scheduling.
+ *  If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
  *
- *  If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ *  The cpu_possible_mask is fixed at boot time, as the set of CPU id's
+ *  that it is possible might ever be plugged in at anytime during the
+ *  life of that system boot.  The cpu_present_mask is dynamic(*),
+ *  representing which CPUs are currently plugged in.  And
+ *  cpu_online_mask is the dynamic subset of cpu_present_mask,
+ *  indicating those CPUs available for scheduling.
+ *
+ *  If HOTPLUG is enabled, then cpu_possible_mask is forced to have
  *  all NR_CPUS bits set, otherwise it is just the set of CPUs that
  *  ACPI reports present at boot.
  *
- *  If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ *  If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
  *  depending on what ACPI reports as currently plugged in, otherwise
- *  cpu_present_map is just a copy of cpu_possible_map.
+ *  cpu_present_mask is just a copy of cpu_possible_mask.
  *
- *  (*) Well, cpu_present_map is dynamic in the hotplug case.  If not
- *      hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *  (*) Well, cpu_present_mask is dynamic in the hotplug case.  If not
+ *      hotplug, it's a copy of cpu_possible_mask, hence fixed at boot.
  *
  * Subtleties:
  * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
  *    assumption that their single CPU is online.  The UP
- *    cpu_{online,possible,present}_maps are placebos.  Changing them
+ *    cpu_{online,possible,present}_masks are placebos.  Changing them
  *    will have no useful affect on the following num_*_cpus()
  *    and cpu_*() macros in the UP case.  This ugliness is a UP
  *    optimization - don't waste any instructions or memory references
  *    asking if you're online or how many CPUs there are if there is
  *    only one CPU.
- * 2) Most SMP arch's #define some of these maps to be some
- *    other map specific to that arch.  Therefore, the following
- *    must be #define macros, not inlines.  To see why, examine
- *    the assembly code produced by the following.  Note that
- *    set1() writes phys_x_map, but set2() writes x_map:
- *        int x_map, phys_x_map;
- *        #define set1(a) x_map = a
- *        inline void set2(int a) { x_map = a; }
- *        #define x_map phys_x_map
- *        main(){ set1(3); set2(5); }
  */
 
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_online_map;
-extern cpumask_t cpu_present_map;
-extern cpumask_t cpu_active_map;
+extern const struct cpumask *const cpu_possible_mask;
+extern const struct cpumask *const cpu_online_mask;
+extern const struct cpumask *const cpu_present_mask;
+extern const struct cpumask *const cpu_active_mask;
+
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
 
 #if NR_CPUS > 1
 #define num_online_cpus()	cpus_weight_nr(cpu_online_map)
@@ -1058,12 +1047,6 @@ static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
 }
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
-/* The pointer versions of the maps, these will become the primary versions. */
-#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map)
-#define cpu_online_mask ((const struct cpumask *)&cpu_online_map)
-#define cpu_present_mask ((const struct cpumask *)&cpu_present_map)
-#define cpu_active_mask ((const struct cpumask *)&cpu_active_map)
-
 /* It's common to want to use cpu_all_mask in struct member initializers,
  * so it has to refer to an address rather than a pointer. */
 extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bae131a..3ddc509 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,30 +15,8 @@
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
 
-/*
- * Represents all cpu's present in the system
- * In systems capable of hotplug, this map could dynamically grow
- * as new cpu's are detected in the system via any platform specific
- * method, such as ACPI for e.g.
- */
-cpumask_t cpu_present_map __read_mostly;
-EXPORT_SYMBOL(cpu_present_map);
-
-/*
- * Represents all cpu's that are currently online.
- */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
-#ifdef CONFIG_INIT_ALL_POSSIBLE
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
-#else
-cpumask_t cpu_possible_map __read_mostly;
-#endif
-EXPORT_SYMBOL(cpu_possible_map);
-
 #ifdef CONFIG_SMP
-/* Serializes the updates to cpu_online_map, cpu_present_map */
+/* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
 static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
@@ -65,8 +43,6 @@ void __init cpu_hotplug_init(void)
 	cpu_hotplug.refcount = 0;
 }
 
-cpumask_t cpu_active_map;
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 void get_online_cpus(void)
@@ -97,7 +73,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
  * The following two API's must be used when attempting
- * to serialize the updates to cpu_online_map, cpu_present_map.
+ * to serialize the updates to cpu_online_mask, cpu_present_mask.
  */
 void cpu_maps_update_begin(void)
 {
@@ -503,3 +479,24 @@ EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
 
 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
 EXPORT_SYMBOL(cpu_all_bits);
+
+#ifdef CONFIG_INIT_ALL_POSSIBLE
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
+	= CPU_BITS_ALL;
+#else
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
+#endif
+const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
+EXPORT_SYMBOL(cpu_possible_mask);
+
+static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
+EXPORT_SYMBOL(cpu_online_mask);
+
+static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
+EXPORT_SYMBOL(cpu_present_mask);
+
+static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
+EXPORT_SYMBOL(cpu_active_mask);
-- 
cgit v0.10.2


From ae7a47e72e1a0b5e2b46d1596bc2c22942a73023 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:15 +1030
Subject: cpumask: make cpumask.h eat its own dogfood.

Changes:
1) cpumask_t to struct cpumask,
2) cpus_weight_nr to cpumask_weight,
3) cpu_isset to cpumask_test_cpu,
4) ->bits to cpumask_bits()
5) cpu_*_map to cpu_*_mask.
6) for_each_cpu_mask_nr to for_each_cpu

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index db2341b..e62a671 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -268,6 +268,25 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
 
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
 /*
  * Special-case data structure for "single bit set only" constant CPU masks.
  *
@@ -278,11 +297,11 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 extern const unsigned long
 	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
 
-static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 {
 	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
 	p -= cpu / BITS_PER_LONG;
-	return (const cpumask_t *)p;
+	return to_cpumask(p);
 }
 
 /*
@@ -466,13 +485,13 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
 
 #if NR_CPUS > 1
-#define num_online_cpus()	cpus_weight_nr(cpu_online_map)
-#define num_possible_cpus()	cpus_weight_nr(cpu_possible_map)
-#define num_present_cpus()	cpus_weight_nr(cpu_present_map)
-#define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
-#define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
-#define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
-#define cpu_active(cpu)		cpu_isset((cpu), cpu_active_map)
+#define num_online_cpus()	cpumask_weight(cpu_online_mask)
+#define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
+#define num_present_cpus()	cpumask_weight(cpu_present_mask)
+#define cpu_online(cpu)		cpumask_test_cpu((cpu), cpu_online_mask)
+#define cpu_possible(cpu)	cpumask_test_cpu((cpu), cpu_possible_mask)
+#define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
+#define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
 #else
 #define num_online_cpus()	1
 #define num_possible_cpus()	1
@@ -485,10 +504,6 @@ extern const struct cpumask *const cpu_active_mask;
 
 #define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
 
-#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
-#define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
-#define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
-
 /* These are the new versions of the cpumask operators: passed by pointer.
  * The older versions will be implemented in terms of these, then deleted. */
 #define cpumask_bits(maskp) ((maskp)->bits)
@@ -676,7 +691,7 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
  * No static inline type checking - see Subtlety (1) above.
  */
 #define cpumask_test_cpu(cpu, cpumask) \
-	test_bit(cpumask_check(cpu), (cpumask)->bits)
+	test_bit(cpumask_check(cpu), cpumask_bits((cpumask)))
 
 /**
  * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask
@@ -919,7 +934,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
 static inline int cpumask_scnprintf(char *buf, int len,
 				    const struct cpumask *srcp)
 {
-	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
+	return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits);
 }
 
 /**
@@ -933,7 +948,7 @@ static inline int cpumask_scnprintf(char *buf, int len,
 static inline int cpumask_parse_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
-	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
+	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -948,7 +963,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
 static inline int cpulist_scnprintf(char *buf, int len,
 				    const struct cpumask *srcp)
 {
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
+	return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp),
+				    nr_cpumask_bits);
 }
 
 /**
@@ -961,26 +977,7 @@ static inline int cpulist_scnprintf(char *buf, int len,
  */
 static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
-	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
-}
-
-/**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
- *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
- */
-#define to_cpumask(bitmap)						\
-	((struct cpumask *)(1 ? (bitmap)				\
-			    : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
-	return 1;
+	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -1055,6 +1052,10 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 /* First bits of cpu_bit_bitmap are in fact unset. */
 #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])
 
+#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
+#define for_each_online_cpu(cpu)   for_each_cpu((cpu), cpu_online_mask)
+#define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
+
 /* Wrappers for arch boot code to manipulate normally-constant masks */
 static inline void set_cpu_possible(unsigned int cpu, bool possible)
 {
-- 
cgit v0.10.2


From 3fa41520696fec2815e2d88fbcccdda77ba4d693 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:16 +1030
Subject: cpumask: make set_cpu_*/init_cpu_* out-of-line

They're only for use in boot/cpu hotplug code anyway, and this avoids
the use of deprecated cpu_*_map.

Stephen Rothwell points out that gcc 4.2.4 (on powerpc at least)
didn't like the cast away of const anyway:

  include/linux/cpumask.h: In function 'set_cpu_possible':
  include/linux/cpumask.h:1052: warning: passing argument 2 of 'cpumask_set_cpu' discards qualifiers from pointer target type

So this kills two birds with one stone.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index e62a671..7c178a6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1057,50 +1057,11 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 #define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
 
 /* Wrappers for arch boot code to manipulate normally-constant masks */
-static inline void set_cpu_possible(unsigned int cpu, bool possible)
-{
-	if (possible)
-		cpumask_set_cpu(cpu, &cpu_possible_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_possible_map);
-}
-
-static inline void set_cpu_present(unsigned int cpu, bool present)
-{
-	if (present)
-		cpumask_set_cpu(cpu, &cpu_present_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_present_map);
-}
-
-static inline void set_cpu_online(unsigned int cpu, bool online)
-{
-	if (online)
-		cpumask_set_cpu(cpu, &cpu_online_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_online_map);
-}
-
-static inline void set_cpu_active(unsigned int cpu, bool active)
-{
-	if (active)
-		cpumask_set_cpu(cpu, &cpu_active_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_active_map);
-}
-
-static inline void init_cpu_present(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_present_map, src);
-}
-
-static inline void init_cpu_possible(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_possible_map, src);
-}
-
-static inline void init_cpu_online(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_online_map, src);
-}
+void set_cpu_possible(unsigned int cpu, bool possible);
+void set_cpu_present(unsigned int cpu, bool present);
+void set_cpu_online(unsigned int cpu, bool online);
+void set_cpu_active(unsigned int cpu, bool active);
+void init_cpu_present(const struct cpumask *src);
+void init_cpu_possible(const struct cpumask *src);
+void init_cpu_online(const struct cpumask *src);
 #endif /* __LINUX_CPUMASK_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3ddc509..2c9f78f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -500,3 +500,50 @@ EXPORT_SYMBOL(cpu_present_mask);
 static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
 EXPORT_SYMBOL(cpu_active_mask);
+
+void set_cpu_possible(unsigned int cpu, bool possible)
+{
+	if (possible)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
+}
+
+void set_cpu_present(unsigned int cpu, bool present)
+{
+	if (present)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
+}
+
+void set_cpu_online(unsigned int cpu, bool online)
+{
+	if (online)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
+}
+
+void set_cpu_active(unsigned int cpu, bool active)
+{
+	if (active)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
+}
+
+void init_cpu_present(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_present_bits), src);
+}
+
+void init_cpu_possible(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_possible_bits), src);
+}
+
+void init_cpu_online(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_online_bits), src);
+}
-- 
cgit v0.10.2


From 54b11e6d57a10aa9d0009efd93873e17bffd5d30 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:16 +1030
Subject: cpumask: smp_call_function_many()

Impact: Implementation change to remove cpumask_t from stack.

Actually change smp_call_function_mask() to smp_call_function_many().
We avoid cpumasks on the stack in this version.

(S390 has its own version, but that's going away apparently).

We have to do some dancing to figure out if 0 or 1 other cpus are in
the mask supplied and the online mask without allocating a tmp
cpumask.  It's still fairly cheap.

We allocate the cpumask at the end of the call_function_data
structure: if allocation fails we fallback to smp_call_function_single
rather than using the baroque quiescing code (which needs a cpumask on
stack).

(Thanks to Hiroshi Shimamoto for spotting several bugs in previous versions!)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: npiggin@suse.de
Cc: axboe@kernel.dk

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 2f85f3b..b824669 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -67,15 +67,16 @@ extern void smp_cpus_done(unsigned int max_cpus);
  * Call a function on all other processors
  */
 int smp_call_function(void(*func)(void *info), void *info, int wait);
-/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */
-int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
-				int wait);
+void smp_call_function_many(const struct cpumask *mask,
+			    void (*func)(void *info), void *info, bool wait);
 
-static inline void smp_call_function_many(const struct cpumask *mask,
-					  void (*func)(void *info), void *info,
-					  int wait)
+/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
+static inline int
+smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
+		       int wait)
 {
-	smp_call_function_mask(*mask, func, info, wait);
+	smp_call_function_many(&mask, func, info, wait);
+	return 0;
 }
 
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
diff --git a/kernel/smp.c b/kernel/smp.c
index 75c8dde..9f0eafe 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
 	struct call_single_data csd;
 	spinlock_t lock;
 	unsigned int refs;
-	cpumask_t cpumask;
 	struct rcu_head rcu_head;
+	unsigned long cpumask_bits[];
 };
 
 struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
 		int refs;
 
-		if (!cpu_isset(cpu, data->cpumask))
+		if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
 			continue;
 
 		data->csd.func(data->csd.info);
 
 		spin_lock(&data->lock);
-		cpu_clear(cpu, data->cpumask);
+		cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
 		WARN_ON(data->refs == 0);
 		data->refs--;
 		refs = data->refs;
@@ -266,51 +266,13 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
 	generic_exec_single(cpu, data);
 }
 
-/* Dummy function */
-static void quiesce_dummy(void *unused)
-{
-}
-
-/*
- * Ensure stack based data used in call function mask is safe to free.
- *
- * This is needed by smp_call_function_mask when using on-stack data, because
- * a single call function queue is shared by all CPUs, and any CPU may pick up
- * the data item on the queue at any time before it is deleted. So we need to
- * ensure that all CPUs have transitioned through a quiescent state after
- * this call.
- *
- * This is a very slow function, implemented by sending synchronous IPIs to
- * all possible CPUs. For this reason, we have to alloc data rather than use
- * stack based data even in the case of synchronous calls. The stack based
- * data is then just used for deadlock/oom fallback which will be very rare.
- *
- * If a faster scheme can be made, we could go back to preferring stack based
- * data -- the data allocation/free is non-zero cost.
- */
-static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
-{
-	struct call_single_data data;
-	int cpu;
-
-	data.func = quiesce_dummy;
-	data.info = NULL;
-
-	for_each_cpu_mask(cpu, mask) {
-		data.flags = CSD_FLAG_WAIT;
-		generic_exec_single(cpu, &data);
-	}
-}
-
 /**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.
+ * smp_call_function_many(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on (only runs on online subset).
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code.
- *
  * If @wait is true, then returns once @func has returned. Note that @wait
  * will be implicitly turned on in case of allocation failures, since
  * we fall back to on-stack allocation.
@@ -319,53 +281,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
  * hardware interrupt handler or from a bottom half handler. Preemption
  * must be disabled when calling this function.
  */
-int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
-			   int wait)
+void smp_call_function_many(const struct cpumask *mask,
+			    void (*func)(void *), void *info,
+			    bool wait)
 {
-	struct call_function_data d;
-	struct call_function_data *data = NULL;
-	cpumask_t allbutself;
+	struct call_function_data *data;
 	unsigned long flags;
-	int cpu, num_cpus;
-	int slowpath = 0;
+	int cpu, next_cpu;
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
 
-	cpu = smp_processor_id();
-	allbutself = cpu_online_map;
-	cpu_clear(cpu, allbutself);
-	cpus_and(mask, mask, allbutself);
-	num_cpus = cpus_weight(mask);
-
-	/*
-	 * If zero CPUs, return. If just a single CPU, turn this request
-	 * into a targetted single call instead since it's faster.
-	 */
-	if (!num_cpus)
-		return 0;
-	else if (num_cpus == 1) {
-		cpu = first_cpu(mask);
-		return smp_call_function_single(cpu, func, info, wait);
+	/* So, what's a CPU they want?  Ignoring this one. */
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	if (cpu == smp_processor_id())
+		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+	/* No online cpus?  We're done. */
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	/* Do we have another CPU which isn't us? */
+	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+	if (next_cpu == smp_processor_id())
+		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
+
+	/* Fastpath: do that cpu by itself. */
+	if (next_cpu >= nr_cpu_ids) {
+		smp_call_function_single(cpu, func, info, wait);
+		return;
 	}
 
-	data = kmalloc(sizeof(*data), GFP_ATOMIC);
-	if (data) {
-		data->csd.flags = CSD_FLAG_ALLOC;
-		if (wait)
-			data->csd.flags |= CSD_FLAG_WAIT;
-	} else {
-		data = &d;
-		data->csd.flags = CSD_FLAG_WAIT;
-		wait = 1;
-		slowpath = 1;
+	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
+	if (unlikely(!data)) {
+		/* Slow path. */
+		for_each_online_cpu(cpu) {
+			if (cpu == smp_processor_id())
+				continue;
+			if (cpumask_test_cpu(cpu, mask))
+				smp_call_function_single(cpu, func, info, wait);
+		}
+		return;
 	}
 
 	spin_lock_init(&data->lock);
+	data->csd.flags = CSD_FLAG_ALLOC;
+	if (wait)
+		data->csd.flags |= CSD_FLAG_WAIT;
 	data->csd.func = func;
 	data->csd.info = info;
-	data->refs = num_cpus;
-	data->cpumask = mask;
+	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
+	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
 
 	spin_lock_irqsave(&call_function_lock, flags);
 	list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +343,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
 	smp_mb();
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi(mask);
+	arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits));
 
 	/* optionally wait for the CPUs to complete */
-	if (wait) {
+	if (wait)
 		csd_flag_wait(&data->csd);
-		if (unlikely(slowpath))
-			smp_call_function_mask_quiesce_stack(mask);
-	}
-
-	return 0;
 }
-EXPORT_SYMBOL(smp_call_function_mask);
+EXPORT_SYMBOL(smp_call_function_many);
 
 /**
  * smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +357,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code.
+ * Returns 0.
  *
  * If @wait is true, then returns once @func has returned; otherwise
  * it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +368,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
  */
 int smp_call_function(void (*func)(void *), void *info, int wait)
 {
-	int ret;
-
 	preempt_disable();
-	ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+	smp_call_function_many(cpu_online_mask, func, info, wait);
 	preempt_enable();
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
 
-- 
cgit v0.10.2


From ce47d974f71af26d00832e83a43ac79bec272d99 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:17 +1030
Subject: cpumask: arch_send_call_function_ipi_mask: core

Impact: new API to reduce stack usage

We're weaning the core code off handing cpumask's around on-stack.
This introduces arch_send_call_function_ipi_mask().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/smp.c b/kernel/smp.c
index 9f0eafe..172b182 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -266,6 +266,12 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
 	generic_exec_single(cpu, data);
 }
 
+/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
+#ifndef arch_send_call_function_ipi_mask
+#define arch_send_call_function_ipi_mask(maskp) \
+	arch_send_call_function_ipi(*(maskp))
+#endif
+
 /**
  * smp_call_function_many(): Run a function on a set of other CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
@@ -343,7 +349,7 @@ void smp_call_function_many(const struct cpumask *mask,
 	smp_mb();
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits));
+	arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
 
 	/* optionally wait for the CPUs to complete */
 	if (wait)
-- 
cgit v0.10.2


From 259c4ddd00237e5072921afa15a900839643fd98 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:17 +1030
Subject: cpumask: use for_each_online_cpu() in
 drivers/infiniband/hw/ehca/ehca_irq.c

Impact: cleanup

In future, accessing cpu numbers beyond nr_cpu_ids (the runtime limit)
will be undefined.  We can avoid future problems by using
for_each_online_cpu() here.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Tested-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Cc: Christoph Raisch <raisch@de.ibm.com>

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 757035e..6305209 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -934,10 +934,9 @@ void ehca_destroy_comp_pool(void)
 
 	unregister_hotcpu_notifier(&comp_pool_callback_nb);
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i))
-			destroy_comp_task(pool, i);
-	}
+	for_each_online_cpu(i)
+		destroy_comp_task(pool, i);
+
 	free_percpu(pool->cpu_comp_tasks);
 	kfree(pool);
 }
-- 
cgit v0.10.2


From b29179c3d32021d79c11ece7199a1da41d31b1b7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:18 +1030
Subject: cpumask: use new cpumask API in drivers/infiniband/hw/ehca

Impact: cleanup

We're moving from handing around cpumask_t's to handing around struct
cpumask *'s.  cpus_*, cpumask_t and cpu_*_map are deprecated: convert
to cpumask_*, cpu_*_mask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Tested-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Cc: Christoph Raisch <raisch@de.ibm.com>

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6305209..3128a50 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -659,12 +659,12 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
 
 	WARN_ON_ONCE(!in_interrupt());
 	if (ehca_debug_level >= 3)
-		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+		ehca_dmp(cpu_online_mask, cpumask_size(), "");
 
 	spin_lock_irqsave(&pool->last_cpu_lock, flags);
-	cpu = next_cpu_nr(pool->last_cpu, cpu_online_map);
+	cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
 	if (cpu >= nr_cpu_ids)
-		cpu = first_cpu(cpu_online_map);
+		cpu = cpumask_first(cpu_online_mask);
 	pool->last_cpu = cpu;
 	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 
@@ -855,7 +855,7 @@ static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
 	case CPU_UP_CANCELED_FROZEN:
 		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
 		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+		kthread_bind(cct->task, cpumask_any(cpu_online_mask));
 		destroy_comp_task(pool, cpu);
 		break;
 	case CPU_ONLINE:
@@ -902,7 +902,7 @@ int ehca_create_comp_pool(void)
 		return -ENOMEM;
 
 	spin_lock_init(&pool->last_cpu_lock);
-	pool->last_cpu = any_online_cpu(cpu_online_map);
+	pool->last_cpu = cpumask_any(cpu_online_mask);
 
 	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
 	if (pool->cpu_comp_tasks == NULL) {
-- 
cgit v0.10.2


From cbe31f02f5b5536f17dd978118e25052af528071 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:18 +1030
Subject: cpumask: use new cpumask API in drivers/infiniband/hw/ipath

Impact: cleanup

We're moving from handing around cpumask_t's to handing around struct
cpumask *'s.  cpus_*, cpumask_t and cpu_*_map are deprecated: convert
to cpumask_*, cpu_*_mask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ralph Campbell <infinipath@qlogic.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 239d4e8..2317398 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1679,7 +1679,7 @@ static int find_best_unit(struct file *fp,
 	 * InfiniPath chip to that processor (we assume reasonable connectivity,
 	 * for now).  This code assumes that if affinity has been set
 	 * before this point, that at most one cpu is set; for now this
-	 * is reasonable.  I check for both cpus_empty() and cpus_full(),
+	 * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
 	 * in case some kernel variant sets none of the bits when no
 	 * affinity is set.  2.6.11 and 12 kernels have all present
 	 * cpus set.  Some day we'll have to fix it up further to handle
@@ -1688,11 +1688,11 @@ static int find_best_unit(struct file *fp,
 	 * information.  There may be some issues with dual core numbering
 	 * as well.  This needs more work prior to release.
 	 */
-	if (!cpus_empty(current->cpus_allowed) &&
-	    !cpus_full(current->cpus_allowed)) {
+	if (!cpumask_empty(&current->cpus_allowed) &&
+	    !cpumask_full(&current->cpus_allowed)) {
 		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
 		for (i = 0; i < ncpus; i++)
-			if (cpu_isset(i, current->cpus_allowed)) {
+			if (cpumask_test_cpu(i, &current->cpus_allowed)) {
 				ipath_cdbg(PROC, "%s[%u] affinity set for "
 					   "cpu %d/%d\n", current->comm,
 					   current->pid, i, ncpus);
-- 
cgit v0.10.2


From e12f0102ac81d660c9f801d0a0e10ccf4537a9de Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:19 +1030
Subject: cpumask: Use nr_cpu_ids in seq_cpumask

Impact: cleanup, futureproof

nr_cpu_ids is the (badly named) runtime limit on possible CPU numbers;
ie. the variable version of NR_CPUS.

With the new cpumask operators, only bits less than this are defined.
So we should use it everywhere, rather than NR_CPUS.  Eventually this
will make it possible to allocate cpumasks of the minimal length at runtime.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 952e018..40ea505 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -54,7 +54,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 				   unsigned int nr_bits);
 static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask)
 {
-	return seq_bitmap(m, mask->bits, NR_CPUS);
+	return seq_bitmap(m, mask->bits, nr_cpu_ids);
 }
 
 static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
-- 
cgit v0.10.2


From 412a1be265b894a45cebbfc2b57eb7a593bf34b2 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:44:12 +0530
Subject: x86: amd_iommu_init.c: iommu_enable and iommu_enable_event_logging
 should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu_init.c:246:13: warning: symbol 'iommu_enable' was not declared. Should it be static?
arch/x86/kernel/amd_iommu_init.c:259:13: warning: symbol 'iommu_enable_event_logging' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800..fb85e8d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-void __init iommu_enable(struct amd_iommu *iommu)
+static void __init iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
 	       "at %02x:%02x.%x cap 0x%hx\n",
@@ -256,7 +256,7 @@ void __init iommu_enable(struct amd_iommu *iommu)
 }
 
 /* Function to enable IOMMU event logging and event interrupts */
-void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
 {
 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
-- 
cgit v0.10.2


From 557f687c87ddb8adb094b2dad4e1c83c7717982d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:45:22 +0530
Subject: x86: amd_iommu.c: prealloc_protection_domains should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da71..658e29e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
-- 
cgit v0.10.2


From 4d08d97f5262dab4482af5bc91b30af4ca02269e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 22:11:40 +0530
Subject: x86: genx2apic_phys.c: x2apic_send_IPI_self and init_x2apic_ldr
 should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warnings

Fixes sparse warnings:
arch/x86/kernel/genx2apic_phys.c:164:6: warning: symbol 'x2apic_send_IPI_self' was not declared. Should it be static?
arch/x86/kernel/genx2apic_phys.c:169:6: warning: symbol 'init_x2apic_ldr' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211..a177c78 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -123,12 +123,12 @@ static unsigned int phys_pkg_id(int index_msb)
 	return current_cpu_data.initial_apicid >> index_msb;
 }
 
-void x2apic_send_IPI_self(int vector)
+static void x2apic_send_IPI_self(int vector)
 {
 	apic_write(APIC_SELF_IPI, vector);
 }
 
-void init_x2apic_ldr(void)
+static void init_x2apic_ldr(void)
 {
 	return;
 }
-- 
cgit v0.10.2


From c62e9d56ea90ef94f9708ce3f11860c20fa5e135 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 22:12:50 +0530
Subject: x86: bios_uv.c: uv_systab should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/bios_uv.c:28:18: warning: symbol 'uv_systab' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3..f638827 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
-struct uv_systab uv_systab;
+static struct uv_systab uv_systab;
 
 s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-- 
cgit v0.10.2


From ec8c842a524888fdcccece337d91798e3e8af880 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 22:46:36 +0530
Subject: x86: apic.c: xapic_icr_read and x2apic_icr_read should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/apic.c:270:5: warning: symbol 'x2apic_icr_read' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e644bf6..ab1d51a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
 extern int is_vsmp_box(void);
 extern void xapic_wait_icr_idle(void);
 extern u32 safe_xapic_wait_icr_idle(void);
-extern u64 xapic_icr_read(void);
 extern void xapic_icr_write(u32, u32);
 extern int setup_profiling_timer(unsigned int);
 
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index c67722f..66198cb 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -228,7 +228,7 @@ void xapic_icr_write(u32 low, u32 id)
 	apic_write(APIC_ICR, low);
 }
 
-u64 xapic_icr_read(void)
+static u64 xapic_icr_read(void)
 {
 	u32 icr1, icr2;
 
@@ -268,7 +268,7 @@ void x2apic_icr_write(u32 low, u32 id)
 	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 }
 
-u64 x2apic_icr_read(void)
+static u64 x2apic_icr_read(void)
 {
 	unsigned long val;
 
-- 
cgit v0.10.2


From fa95826fe0ddbc2a55373134d8d1a21b49d13434 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 20:13:49 +0530
Subject: x86: uv_bau.h: fix dubious bitfield

Impact: cleanup, avoid sparse warnings

declare bitfield as unsigned to avoid dubious bitfield issue

 CHECK   arch/x86/kernel/tlb_64.c
arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield

 CHECK   arch/x86/kernel/tlb_uv.c
arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e236325..50423c7 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
 struct bau_msg_header {
-	int dest_subnodeid:6;	/* must be zero */
+	unsigned int dest_subnodeid:6;	/* must be zero */
 	/* bits 5:0 */
-	int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
-	/* bits 20:6 */
-	int command:8;		/* message type */
+	unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
+	/* bits 20:6 */			  /* first bit in node_map */
+	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
 				/* 0x38: SN3net EndPoint Message */
-	int rsvd_1:3;		/* must be zero */
+	unsigned int rsvd_1:3;	/* must be zero */
 	/* bits 31:29 */
 				/* int will align on 32 bits */
-	int rsvd_2:9;		/* must be zero */
+	unsigned int rsvd_2:9;	/* must be zero */
 	/* bits 40:32 */
 				/* Suppl_A is 56-41 */
-	int payload_2a:8;	/* becomes byte 16 of msg */
+	unsigned int payload_2a:8;/* becomes byte 16 of msg */
 	/* bits 48:41 */	/* not currently using */
-	int payload_2b:8;	/* becomes byte 17 of msg */
+	unsigned int payload_2b:8;/* becomes byte 17 of msg */
 	/* bits 56:49 */	/* not currently using */
 				/* Address field (96:57) is never used as an
 				   address (these are address bits 42:3) */
-	int rsvd_3:1;		/* must be zero */
+	unsigned int rsvd_3:1;	/* must be zero */
 	/* bit 57 */
 				/* address bits 27:4 are payload */
 				/* these 24 bits become bytes 12-14 of msg */
-	int replied_to:1;	/* sent as 0 by the source to byte 12 */
+	unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
 	/* bit 58 */
 
-	int payload_1a:5;	/* not currently used */
+	unsigned int payload_1a:5;/* not currently used */
 	/* bits 63:59 */
-	int payload_1b:8;	/* not currently used */
+	unsigned int payload_1b:8;/* not currently used */
 	/* bits 71:64 */
-	int payload_1c:8;	/* not currently used */
+	unsigned int payload_1c:8;/* not currently used */
 	/* bits 79:72 */
-	int payload_1d:2;	/* not currently used */
+	unsigned int payload_1d:2;/* not currently used */
 	/* bits 81:80 */
 
-	int rsvd_4:7;		/* must be zero */
+	unsigned int rsvd_4:7;	/* must be zero */
 	/* bits 88:82 */
-	int sw_ack_flag:1;	/* software acknowledge flag */
+	unsigned int sw_ack_flag:1;/* software acknowledge flag */
 	/* bit 89 */
 				/* INTD trasactions at destination are to
 				   wait for software acknowledge */
-	int rsvd_5:6;		/* must be zero */
+	unsigned int rsvd_5:6;	/* must be zero */
 	/* bits 95:90 */
-	int rsvd_6:5;		/* must be zero */
+	unsigned int rsvd_6:5;	/* must be zero */
 	/* bits 100:96 */
-	int int_both:1;		/* if 1, interrupt both sockets on the blade */
+	unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
 	/* bit 101*/
-	int fairness:3;		/* usually zero */
+	unsigned int fairness:3;/* usually zero */
 	/* bits 104:102 */
-	int multilevel:1;	/* multi-level multicast format */
+	unsigned int multilevel:1;	/* multi-level multicast format */
 	/* bit 105 */
 				/* 0 for TLB: endpoint multi-unicast messages */
-	int chaining:1;		/* next descriptor is part of this activation*/
+	unsigned int chaining:1;/* next descriptor is part of this activation*/
 	/* bit 106 */
-	int rsvd_7:21;		/* must be zero */
+	unsigned int rsvd_7:21;	/* must be zero */
 	/* bits 127:107 */
 };
 
-- 
cgit v0.10.2


From 7820b75643a763abf595c99fab963000ffc8b5f0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 22:05:55 +0530
Subject: x86: xsave.c: restore_user_xstate should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/xsave.c:162:5: warning: symbol 'restore_user_xstate' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e69..2b54fe0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
  * Restore the extended state if present. Otherwise, restore the FP/SSE
  * state.
  */
-int restore_user_xstate(void __user *buf)
+static int restore_user_xstate(void __user *buf)
 {
 	struct _fpx_sw_bytes fx_sw_user;
 	u64 mask;
-- 
cgit v0.10.2


From 79807d075ab8d1ca3574f5f52421e0047c1f1256 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sat, 27 Dec 2008 19:18:00 +0200
Subject: UBIFS: fix constants initialization

The c->min_idx_lebs constant depends on c->old_idx_sz, which
is read from the master node. This means that we have to
initialize c->min_idx_lebs only after we have read the master
node.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c3cefc8..1309783 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -602,7 +602,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
 }
 
 /*
- * init_constants_late - initialize UBIFS constants.
+ * init_constants_sb - initialize UBIFS constants.
  * @c: UBIFS file-system description object
  *
  * This is a helper function which initializes various UBIFS constants after
@@ -610,7 +610,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
  * makes sure they are all right. Returns zero in case of success and a
  * negative error code in case of failure.
  */
-static int init_constants_late(struct ubifs_info *c)
+static int init_constants_sb(struct ubifs_info *c)
 {
 	int tmp, err;
 	long long tmp64;
@@ -687,6 +687,21 @@ static int init_constants_late(struct ubifs_info *c)
 	if (err)
 		return err;
 
+	return 0;
+}
+
+/*
+ * init_constants_master - initialize UBIFS constants.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which initializes various UBIFS constants after
+ * the master node has been read. It also checks various UBIFS parameters and
+ * makes sure they are all right.
+ */
+static void init_constants_master(struct ubifs_info *c)
+{
+	long long tmp64;
+
 	c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
 
 	/*
@@ -702,8 +717,6 @@ static int init_constants_late(struct ubifs_info *c)
 	tmp64 *= (long long)c->leb_size - c->leb_overhead;
 	tmp64 = ubifs_reported_space(c, tmp64);
 	c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
-
-	return 0;
 }
 
 /**
@@ -1138,7 +1151,7 @@ static int mount_ubifs(struct ubifs_info *c)
 		goto out_free;
 	}
 
-	err = init_constants_late(c);
+	err = init_constants_sb(c);
 	if (err)
 		goto out_free;
 
@@ -1172,6 +1185,8 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (err)
 		goto out_master;
 
+	init_constants_master(c);
+
 	if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
 		ubifs_msg("recovery needed");
 		c->need_recovery = 1;
-- 
cgit v0.10.2


From 304d427cd99eb645b44b08d77e70ce308e6bcd8c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 08:04:17 +0200
Subject: UBIFS: fix file-system synchronization

Argh. The ->sync_fs call is called _before_ all inodes are flushed.
This means we first sync write buffers and commit, then all
inodes are synced, and we end up with unflushed write buffers!

Fix this by forcing synching all indoes from 'ubifs_sync_fs()'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1309783..4713017 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -35,6 +35,7 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/math64.h>
+#include <linux/writeback.h>
 #include "ubifs.h"
 
 /*
@@ -431,6 +432,23 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	struct ubifs_info *c = sb->s_fs_info;
 	int i, ret = 0, err;
 	long long bud_bytes;
+	struct writeback_control wbc = {
+		.sync_mode   = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+		.range_start = 0,
+		.range_end   = LLONG_MAX,
+		.nr_to_write = LONG_MAX,
+	};
+
+	/*
+	 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
+	 * pages, so synchronize them first, then commit the journal. Strictly
+	 * speaking, it is not necessary to commit the journal here,
+	 * synchronizing write-buffers would be enough. But committing makes
+	 * UBIFS free space predictions much more accurate, so we want to let
+	 * the user be able to get more accurate results of 'statfs()' after
+	 * they synchronize the file system.
+	 */
+	generic_sync_sb_inodes(sb, &wbc);
 
 	if (c->jheads) {
 		for (i = 0; i < c->jhead_cnt; i++) {
-- 
cgit v0.10.2


From f10383006c26b33539820759b9dc8656497b02a4 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 08:16:32 +0200
Subject: UBIFS: always commit in sync_fs

Always run commit in sync_fs, because even if the journal seems
to be almost empty, there may be a deletion which removes a large
file, which affects the index greatly. And because we want
better free space predictions after 'sync_fs()', we have to
commit.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 4713017..ee8e774 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -429,9 +429,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
 
 static int ubifs_sync_fs(struct super_block *sb, int wait)
 {
+	int i, err;
 	struct ubifs_info *c = sb->s_fs_info;
-	int i, ret = 0, err;
-	long long bud_bytes;
 	struct writeback_control wbc = {
 		.sync_mode   = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
 		.range_start = 0,
@@ -439,6 +438,19 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 		.nr_to_write = LONG_MAX,
 	};
 
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	/*
+	 * Synchronize write buffers, because 'ubifs_run_commit()' does not
+	 * do this if it waits for an already running commit.
+	 */
+	for (i = 0; i < c->jhead_cnt; i++) {
+		err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+		if (err)
+			return err;
+	}
+
 	/*
 	 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
 	 * pages, so synchronize them first, then commit the journal. Strictly
@@ -450,30 +462,16 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	 */
 	generic_sync_sb_inodes(sb, &wbc);
 
-	if (c->jheads) {
-		for (i = 0; i < c->jhead_cnt; i++) {
-			err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
-			if (err && !ret)
-				ret = err;
-		}
-
-		/* Commit the journal unless it has too little data */
-		spin_lock(&c->buds_lock);
-		bud_bytes = c->bud_bytes;
-		spin_unlock(&c->buds_lock);
-		if (bud_bytes > c->leb_size) {
-			err = ubifs_run_commit(c);
-			if (err)
-				return err;
-		}
-	}
+	err = ubifs_run_commit(c);
+	if (err)
+		return err;
 
 	/*
 	 * We ought to call sync for c->ubi but it does not have one. If it had
 	 * it would in turn call mtd->sync, however mtd operations are
 	 * synchronous anyway, so we don't lose any sleep here.
 	 */
-	return ret;
+	return err;
 }
 
 /**
-- 
cgit v0.10.2


From cb5c6a2b2be59b480a3746c5113cb3411c053bff Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 08:18:43 +0200
Subject: UBIFS: use ubi_sync

UBI now has (fake for now, though) synchronization call - use
it.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ee8e774..a14703e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -466,12 +466,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	if (err)
 		return err;
 
-	/*
-	 * We ought to call sync for c->ubi but it does not have one. If it had
-	 * it would in turn call mtd->sync, however mtd operations are
-	 * synchronous anyway, so we don't lose any sleep here.
-	 */
-	return err;
+	return ubi_sync(c->vi.ubi_num);
 }
 
 /**
-- 
cgit v0.10.2


From 26d05777b0a23062a39e83c369c0a3583918f164 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 09:11:02 +0200
Subject: UBIFS: always commit on unmount

UBIFS commits on unmount to make the next mount faster. Currently,
it commits only if there is more than LEB size bytes in the
journal. This is not very good, because journal size may be
large (512KiB). And there may be few deletions in the journal
which do not take much journal space, but which do introduce
a lot of TNC changes and make mount slow.

Thus, jurt remove this condition and always commit.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a14703e..1c1bbe4 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1570,20 +1570,24 @@ out:
  * @c: UBIFS file-system description object
  *
  * This function is called during un-mounting and re-mounting, and it commits
- * the journal unless the "fast unmount" mode is enabled. It also avoids
- * committing the journal if it contains too few data.
+ * the journal unless the "fast unmount" mode is enabled.
  */
 static void commit_on_unmount(struct ubifs_info *c)
 {
-	if (!c->fast_unmount) {
-		long long bud_bytes;
+	struct super_block *sb = c->vfs_sb;
+	long long bud_bytes;
 
-		spin_lock(&c->buds_lock);
-		bud_bytes = c->bud_bytes;
-		spin_unlock(&c->buds_lock);
-		if (bud_bytes > c->leb_size)
-			ubifs_run_commit(c);
-	}
+	/*
+	 * This function is called before the background thread is stopped, so
+	 * we may race with ongoing commit, which means we have to take
+	 * @c->bud_lock to access @c->bud_bytes.
+	 */
+	spin_lock(&c->buds_lock);
+	bud_bytes = c->bud_bytes;
+	spin_unlock(&c->buds_lock);
+
+	if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes)
+		ubifs_run_commit(c);
 }
 
 /**
@@ -2009,7 +2013,7 @@ static void ubifs_kill_sb(struct super_block *sb)
 	 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
 	 * in order to be outside BKL.
 	 */
-	if (sb->s_root && !(sb->s_flags & MS_RDONLY))
+	if (sb->s_root)
 		commit_on_unmount(c);
 	/* The un-mount routine is actually done in put_super() */
 	generic_shutdown_super(sb);
-- 
cgit v0.10.2


From 6edbfafda682b30ad984964cc432da6fa1c8fab5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 30 Dec 2008 20:06:49 +0200
Subject: UBIFS: restore budg_uncommitted_idx

UBIFS stores uncommitted index size in c->budg_uncommitted_idx,
and this affect budgeting calculations. When mounting and
replaying, this variable is not updated, so we may end up
with "over-budgeting". This patch fixes the issue.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 21f7d04..ce42a7b 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -144,7 +144,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
 		/*
 		 * If the replay order was perfect the dirty space would now be
 		 * zero. The order is not perfect because the the journal heads
-		 * race with eachother. This is not a problem but is does mean
+		 * race with each other. This is not a problem but is does mean
 		 * that the dirty space may temporarily exceed c->leb_size
 		 * during the replay.
 		 */
@@ -656,7 +656,7 @@ out_dump:
  * @dirty: amount of dirty space from padding and deletion nodes
  *
  * This function inserts a reference node to the replay tree and returns zero
- * in case of success ort a negative error code in case of failure.
+ * in case of success or a negative error code in case of failure.
  */
 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
 			   unsigned long long sqnum, int free, int dirty)
@@ -883,7 +883,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 		 * This means that we reached end of log and now
 		 * look to the older log data, which was already
 		 * committed but the eraseblock was not erased (UBIFS
-		 * only unmaps it). So this basically means we have to
+		 * only un-maps it). So this basically means we have to
 		 * exit with "end of log" code.
 		 */
 		err = 1;
@@ -1062,6 +1062,15 @@ int ubifs_replay_journal(struct ubifs_info *c)
 	if (err)
 		goto out;
 
+	/*
+	 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
+	 * to roughly estimate index growth. Things like @c->min_idx_lebs
+	 * depend on it. This means we have to initialize it to make sure
+	 * budgeting works properly.
+	 */
+	c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
+	c->budg_uncommitted_idx *= c->max_idx_node_sz;
+
 	ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
 	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
 		"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
-- 
cgit v0.10.2


From 2edc2025c2583a18eafe5cdbc7deb36e320aaec5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 22 Dec 2008 11:21:03 +0200
Subject: UBIFS: do not lie about used blocks

Do not force UBIFS return 0 used space when it is empty. It leads
to a situation when creating any file immediately produces tens of
used blocks, which looks very weird. It is better to be honest and
say that some blocks are used even if the FS is empty. And ext2
does the same.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 44cff80..3715d01 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -766,16 +766,6 @@ long long ubifs_get_free_space(struct ubifs_info *c)
 	min_idx_lebs = c->min_idx_lebs;
 	ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
 	outstanding = c->budg_data_growth + c->budg_dd_growth;
-
-	/*
-	 * Force the amount available to the total size reported if the used
-	 * space is zero.
-	 */
-	if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
-		spin_unlock(&c->space_lock);
-		return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
-	}
-
 	available = ubifs_calc_available(c, min_idx_lebs);
 
 	/*
-- 
cgit v0.10.2


From 2acf80675800d5e6775990d1280cca5c2ffb30e6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 9 Dec 2008 11:04:40 -0500
Subject: UBIFS: simplify make_free_space

The 'make_free_space()' function was too complex and this patch
simplifies it. It also fixes a bug - the freespace test failed
straight away on UBI volumes with 512 bytes LEB size.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 3715d01..4d270f0 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -37,13 +37,10 @@
 /*
  * When pessimistic budget calculations say that there is no enough space,
  * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
- * or committing. The below constants define maximum number of times UBIFS
+ * or committing. The below constant defines maximum number of times UBIFS
  * repeats the operations.
  */
-#define MAX_SHRINK_RETRIES 8
-#define MAX_GC_RETRIES     4
-#define MAX_CMT_RETRIES    2
-#define MAX_NOSPC_RETRIES  1
+#define MAX_MKSPC_RETRIES 3
 
 /*
  * The below constant defines amount of dirty pages which should be written
@@ -52,30 +49,6 @@
 #define NR_TO_WRITE 16
 
 /**
- * struct retries_info - information about re-tries while making free space.
- * @prev_liability: previous liability
- * @shrink_cnt: how many times the liability was shrinked
- * @shrink_retries: count of liability shrink re-tries (increased when
- *                  liability does not shrink)
- * @try_gc: GC should be tried first
- * @gc_retries: how many times GC was run
- * @cmt_retries: how many times commit has been done
- * @nospc_retries: how many times GC returned %-ENOSPC
- *
- * Since we consider budgeting to be the fast-path, and this structure has to
- * be allocated on stack and zeroed out, we make it smaller using bit-fields.
- */
-struct retries_info {
-	long long prev_liability;
-	unsigned int shrink_cnt;
-	unsigned int shrink_retries:5;
-	unsigned int try_gc:1;
-	unsigned int gc_retries:4;
-	unsigned int cmt_retries:3;
-	unsigned int nospc_retries:1;
-};
-
-/**
  * shrink_liability - write-back some dirty pages/inodes.
  * @c: UBIFS file-system description object
  * @nr_to_write: how many dirty pages to write-back
@@ -147,9 +120,25 @@ static int run_gc(struct ubifs_info *c)
 }
 
 /**
+ * get_liability - calculate current liability.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and returns current UBIFS liability, i.e. the
+ * amount of bytes UBIFS has "promised" to write to the media.
+ */
+static long long get_liability(struct ubifs_info *c)
+{
+	long long liab;
+
+	spin_lock(&c->space_lock);
+	liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
+	spin_unlock(&c->space_lock);
+	return liab;
+}
+
+/**
  * make_free_space - make more free space on the file-system.
  * @c: UBIFS file-system description object
- * @ri: information about previous invocations of this function
  *
  * This function is called when an operation cannot be budgeted because there
  * is supposedly no free space. But in most cases there is some free space:
@@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c)
  * Returns %-ENOSPC if it couldn't do more free space, and other negative error
  * codes on failures.
  */
-static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
+static int make_free_space(struct ubifs_info *c)
 {
-	int err;
-
-	/*
-	 * If we have some dirty pages and inodes (liability), try to write
-	 * them back unless this was tried too many times without effect
-	 * already.
-	 */
-	if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
-		long long liability;
-
-		spin_lock(&c->space_lock);
-		liability = c->budg_idx_growth + c->budg_data_growth +
-			    c->budg_dd_growth;
-		spin_unlock(&c->space_lock);
+	int err, retries = 0;
+	long long liab1, liab2;
 
-		if (ri->prev_liability >= liability) {
-			/* Liability does not shrink, next time try GC then */
-			ri->shrink_retries += 1;
-			if (ri->gc_retries < MAX_GC_RETRIES)
-				ri->try_gc = 1;
-			dbg_budg("liability did not shrink: retries %d of %d",
-				 ri->shrink_retries, MAX_SHRINK_RETRIES);
-		}
-
-		dbg_budg("force write-back (count %d)", ri->shrink_cnt);
-		shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
+	do {
+		liab1 = get_liability(c);
+		/*
+		 * We probably have some dirty pages or inodes (liability), try
+		 * to write them back.
+		 */
+		dbg_budg("liability %lld, run write-back", liab1);
+		shrink_liability(c, NR_TO_WRITE);
 
-		ri->prev_liability = liability;
-		ri->shrink_cnt += 1;
-		return -EAGAIN;
-	}
+		liab2 = get_liability(c);
+		if (liab2 < liab1)
+			return -EAGAIN;
 
-	/*
-	 * Try to run garbage collector unless it was already tried too many
-	 * times.
-	 */
-	if (ri->gc_retries < MAX_GC_RETRIES) {
-		ri->gc_retries += 1;
-		dbg_budg("run GC, retries %d of %d",
-			 ri->gc_retries, MAX_GC_RETRIES);
+		dbg_budg("new liability %lld (not shrinked)", liab2);
 
-		ri->try_gc = 0;
+		/* Liability did not shrink again, try GC */
+		dbg_budg("Run GC");
 		err = run_gc(c);
 		if (!err)
 			return -EAGAIN;
 
-		if (err == -EAGAIN) {
-			dbg_budg("GC asked to commit");
-			err = ubifs_run_commit(c);
-			if (err)
-				return err;
-			return -EAGAIN;
-		}
-
-		if (err != -ENOSPC)
+		if (err != -EAGAIN && err != -ENOSPC)
+			/* Some real error happened */
 			return err;
 
-		/*
-		 * GC could not make any progress. If this is the first time,
-		 * then it makes sense to try to commit, because it might make
-		 * some dirty space.
-		 */
-		dbg_budg("GC returned -ENOSPC, retries %d",
-			 ri->nospc_retries);
-		if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
-			return err;
-		ri->nospc_retries += 1;
-	}
-
-	/* Neither GC nor write-back helped, try to commit */
-	if (ri->cmt_retries < MAX_CMT_RETRIES) {
-		ri->cmt_retries += 1;
-		dbg_budg("run commit, retries %d of %d",
-			 ri->cmt_retries, MAX_CMT_RETRIES);
+		dbg_budg("Run commit (retries %d)", retries);
 		err = ubifs_run_commit(c);
 		if (err)
 			return err;
-		return -EAGAIN;
-	}
+	} while (retries++ < MAX_MKSPC_RETRIES);
+
 	return -ENOSPC;
 }
 
@@ -523,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
 int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
 {
 	int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
-	int err, idx_growth, data_growth, dd_growth;
-	struct retries_info ri;
+	int err, idx_growth, data_growth, dd_growth, retried = 0;
 
 	ubifs_assert(req->new_page <= 1);
 	ubifs_assert(req->dirtied_page <= 1);
@@ -542,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
 	if (!data_growth && !dd_growth)
 		return 0;
 	idx_growth = calc_idx_growth(c, req);
-	memset(&ri, 0, sizeof(struct retries_info));
 
 again:
 	spin_lock(&c->space_lock);
@@ -580,12 +522,17 @@ again:
 		return err;
 	}
 
-	err = make_free_space(c, &ri);
+	err = make_free_space(c);
+	cond_resched();
 	if (err == -EAGAIN) {
 		dbg_budg("try again");
-		cond_resched();
 		goto again;
 	} else if (err == -ENOSPC) {
+		if (!retried) {
+			retried = 1;
+			dbg_budg("-ENOSPC, but anyway try once again");
+			goto again;
+		}
 		dbg_budg("FS is full, -ENOSPC");
 		c->nospace = 1;
 		if (can_use_rp(c) || c->rp_size == 0)
-- 
cgit v0.10.2


From 6a4a9b438fe43397f4652853838f284cddd629b5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 11:00:55 +0200
Subject: UBIFS: fix sparse warnings

fs/ubifs/compress.c:111:8: warning: incorrect type in argument 5 (different signedness)
fs/ubifs/compress.c:111:8:    expected unsigned int *dlen
fs/ubifs/compress.c:111:8:    got int *out_len
fs/ubifs/compress.c:175:10: warning: incorrect type in argument 5 (different signedness)
fs/ubifs/compress.c:175:10:    expected unsigned int *dlen
fs/ubifs/compress.c:175:10:    got int *out_len

Fix this by adding a cast to (unsigned int *). We guarantee that
our lengths are small and no overflow is possible.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 4c90ee2..11e4132 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -108,7 +108,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 	if (compr->comp_mutex)
 		mutex_lock(compr->comp_mutex);
 	err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
-				   out_len);
+				   (unsigned int *)out_len);
 	if (compr->comp_mutex)
 		mutex_unlock(compr->comp_mutex);
 	if (unlikely(err)) {
@@ -172,7 +172,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
 	if (compr->decomp_mutex)
 		mutex_lock(compr->decomp_mutex);
 	err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
-				     out_len);
+				     (unsigned int *)out_len);
 	if (compr->decomp_mutex)
 		mutex_unlock(compr->decomp_mutex);
 	if (err)
-- 
cgit v0.10.2


From f92b982680e4b4149c559789a54e1e9db190752a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Dec 2008 11:34:26 +0200
Subject: UBIFS: fix checkpatch.pl warnings

These are mostly long lines and wrong indentation warning
fixes. But also there are two volatile variables and
checkpatch.pl complains about them:

WARNING: Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt
+       volatile int gc_seq;

WARNING: Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt
+       volatile int gced_lnum;

Well, we anyway use smp_wmb() for c->gc_seq and c->gced_lnum, so
these 'volatile' modifiers can be just dropped.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index a2be115..350fede 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -703,7 +703,8 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
 	printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
 	printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
 	       c->nhead_lnum, c->nhead_offs);
-	printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
+	printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n",
+	       c->ltab_lnum, c->ltab_offs);
 	if (c->big_lpt)
 		printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
 		       c->lsave_lnum, c->lsave_offs);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 7f1de98..fe82d24 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
 		return err;
 	}
 
-	ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum);
-
+	ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+		     ubifs_inode(inode)->creat_sqnum);
 	len = le32_to_cpu(dn->size);
 	if (len <= 0 || len > UBIFS_BLOCK_SIZE)
 		goto dump;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f91b745..3b0fa70 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1220,7 +1220,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 	data_key_init(c, &key, inum, blk);
 
 	bit = old_size & (UBIFS_BLOCK_SIZE - 1);
-	blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
+	blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1);
 	data_key_init(c, &to_key, inum, blk);
 
 	err = ubifs_tnc_remove_range(c, &key, &to_key);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index da60b5a..b8a0607 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -548,7 +548,7 @@ static int write_cnodes(struct ubifs_info *c)
 no_space:
 	ubifs_err("LPT out of space mismatch");
 	dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
-	        "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+		"%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
 	dbg_dump_lpt_info(c);
 	dbg_dump_lpt_lebs(c);
 	dump_stack();
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6eef534..f7e36f5 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2245,12 +2245,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
 			if (found) {
 				/* Ensure the znode is dirtied */
 				if (znode->cnext || !ubifs_zn_dirty(znode)) {
-					    znode = dirty_cow_bottom_up(c,
-									znode);
-					    if (IS_ERR(znode)) {
-						    err = PTR_ERR(znode);
-						    goto out_unlock;
-					    }
+					znode = dirty_cow_bottom_up(c, znode);
+					if (IS_ERR(znode)) {
+						err = PTR_ERR(znode);
+						goto out_unlock;
+					}
 				}
 				zbr = &znode->zbranch[n];
 				lnc_free(zbr);
@@ -2317,11 +2316,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
 
 		/* Ensure the znode is dirtied */
 		if (znode->cnext || !ubifs_zn_dirty(znode)) {
-			    znode = dirty_cow_bottom_up(c, znode);
-			    if (IS_ERR(znode)) {
-				    err = PTR_ERR(znode);
-				    goto out_unlock;
-			    }
+			znode = dirty_cow_bottom_up(c, znode);
+			if (IS_ERR(znode)) {
+				err = PTR_ERR(znode);
+				goto out_unlock;
+			}
 		}
 
 		if (found == 1) {
@@ -2627,11 +2626,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
 
 		/* Ensure the znode is dirtied */
 		if (znode->cnext || !ubifs_zn_dirty(znode)) {
-			    znode = dirty_cow_bottom_up(c, znode);
-			    if (IS_ERR(znode)) {
-				    err = PTR_ERR(znode);
-				    goto out_unlock;
-			    }
+			znode = dirty_cow_bottom_up(c, znode);
+			if (IS_ERR(znode)) {
+				err = PTR_ERR(znode);
+				goto out_unlock;
+			}
 		}
 
 		/* Remove all keys in range except the first */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a17dd79..3275c89 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -481,8 +481,8 @@ struct ubifs_lprops {
 struct ubifs_lpt_lprops {
 	int free;
 	int dirty;
-	unsigned tgc : 1;
-	unsigned cmt : 1;
+	unsigned tgc:1;
+	unsigned cmt:1;
 };
 
 /**
@@ -1322,8 +1322,8 @@ struct ubifs_info {
 	void *sbuf;
 	struct list_head idx_gc;
 	int idx_gc_cnt;
-	volatile int gc_seq;
-	volatile int gced_lnum;
+	int gc_seq;
+	int gced_lnum;
 
 	struct list_head infos_list;
 	struct mutex umount_mutex;
-- 
cgit v0.10.2


From a9f2fc0e251e71a51deb8059b181c375a4a5e979 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 23 Dec 2008 14:39:14 +0200
Subject: UBIFS: fix writing uncompressed files

UBIFS does not disable compression if ui->flags is non-zero, e.g.
if the file has "sync" flag. This is because of the typo which
is fixed by this patch. The patch also adds a couple of useful
debugging prints.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 5e82cff..6db7a6b 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -154,6 +154,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FS_IOC_GETFLAGS:
 		flags = ubifs2ioctl(ubifs_inode(inode)->flags);
 
+		dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags);
 		return put_user(flags, (int __user *) arg);
 
 	case FS_IOC_SETFLAGS: {
@@ -176,6 +177,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		err = mnt_want_write(file->f_path.mnt);
 		if (err)
 			return err;
+		dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
 		err = setflags(inode, flags);
 		mnt_drop_write(file->f_path.mnt);
 		return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 3b0fa70..10ae25b 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -704,7 +704,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	data->size = cpu_to_le32(len);
 	zero_data_node_unused(data);
 
-	if (!(ui->flags && UBIFS_COMPR_FL))
+	if (!(ui->flags & UBIFS_COMPR_FL))
 		/* Compression is disabled for this inode */
 		compr_type = UBIFS_COMPR_NONE;
 	else
-- 
cgit v0.10.2


From 57a450e95932f7798677885b8a01443aca72fdc7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 30 Dec 2008 16:23:34 +0200
Subject: UBIFS: allow mounting when short of space

It is fine if there is not free space - we should still allow mounting
this FS. This patch relaxes the free space requirements and adds info
dumps.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1c1bbe4..2c91d6f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1074,6 +1074,30 @@ again:
 }
 
 /**
+ * check_free_space - check if there is enough free space to mount.
+ * @c: UBIFS file-system description object
+ *
+ * This function makes sure UBIFS has enough free space to be mounted in
+ * read/write mode. UBIFS must always have some free space to allow deletions.
+ */
+static int check_free_space(struct ubifs_info *c)
+{
+	ubifs_assert(c->dark_wm > 0);
+	if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
+		ubifs_err("insufficient free space to mount in read/write mode");
+		dbg_dump_budg(c);
+		dbg_dump_lprops(c);
+		/*
+		 * We return %-EINVAL instead of %-ENOSPC because it seems to
+		 * be the closest error code mentioned in the mount function
+		 * documentation.
+		 */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
  * mount_ubifs - mount UBIFS file-system.
  * @c: UBIFS file-system description object
  *
@@ -1154,7 +1178,7 @@ static int mount_ubifs(struct ubifs_info *c)
 
 	/*
 	 * Make sure the compressor which is set as default in the superblock
-	 * or overriden by mount options is actually compiled in.
+	 * or overridden by mount options is actually compiled in.
 	 */
 	if (!ubifs_compr_present(c->default_compr)) {
 		ubifs_err("'compressor \"%s\" is not compiled in",
@@ -1236,12 +1260,9 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (!mounted_read_only) {
 		int lnum;
 
-		/* Check for enough free space */
-		if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
-			ubifs_err("insufficient available space");
-			err = -EINVAL;
+		err = check_free_space(c);
+		if (err)
 			goto out_orphans;
-		}
 
 		/* Check for enough log space */
 		lnum = c->lhead_lnum + 1;
@@ -1442,12 +1463,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 	c->remounting_rw = 1;
 	c->always_chk_crc = 1;
 
-	/* Check for enough free space */
-	if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
-		ubifs_err("insufficient available space");
-		err = -EINVAL;
+	err = check_free_space(c);
+	if (err)
 		goto out;
-	}
 
 	if (c->old_leb_cnt != c->leb_cnt) {
 		struct ubifs_sb_node *sup;
-- 
cgit v0.10.2


From 80736d41f895bc472b2433a1c27fa6d4afe6ca35 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 30 Dec 2008 17:44:02 +0200
Subject: UBIFS: fix numerous spelling mistakes

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 2d0db54..84da2a4 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -95,9 +95,9 @@ no_chk_data_crc		skip checking of CRCs on data nodes in order to
 			of this option is that corruption of the contents
 			of a file can go unnoticed.
 chk_data_crc (*)	do not skip checking CRCs on data nodes
-compr=none              override defoult comressor and set it to "none"
-compr=lzo               override defoult comressor and set it to "lzo"
-compr=zlib              override defoult comressor and set it to "zlib"
+compr=none              override default compressor and set it to "none"
+compr=lzo               override default compressor and set it to "lzo"
+compr=zlib              override default compressor and set it to "zlib"
 
 
 Quick usage instructions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 4d270f0..31870d8 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -652,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
  * user-space. User-space application tend to expect that if the file-system
  * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
  * are able to write a file of size N. UBIFS attaches node headers to each data
- * node and it has to write indexind nodes as well. This introduces additional
- * overhead, and UBIFS has to report sligtly less free space to meet the above
- * expectetions.
+ * node and it has to write indexing nodes as well. This introduces additional
+ * overhead, and UBIFS has to report slightly less free space to meet the above
+ * expectations.
  *
  * This function assumes free space is made up of uncompressed data nodes and
  * full index nodes (one per data node, tripled because we always allow enough
@@ -677,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
 	 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
 	 * as less than maximum fanout, we assume that each data node
 	 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
-	 * Note, the multiplier 3 is because UBIFS reseves thrice as more space
+	 * Note, the multiplier 3 is because UBIFS reserves thrice as more space
 	 * for the index.
 	 */
 	f = c->fanout > 3 ? c->fanout >> 1 : 2;
@@ -695,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
  * This function calculates amount of free space to report to user-space.
  *
  * Because UBIFS may introduce substantial overhead (the index, node headers,
- * alighment, wastage at the end of eraseblocks, etc), it cannot report real
+ * alignment, wastage at the end of eraseblocks, etc), it cannot report real
  * amount of free flash space it has (well, because not all dirty space is
- * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
- * it would bread user expectetion about what free space is. Users seem to
+ * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
+ * it would bread user expectations about what free space is. Users seem to
  * accustomed to assume that if the file-system reports N bytes of free space,
  * they would be able to fit a file of N bytes to the FS. This almost works for
  * traditional file-systems, because they have way less overhead than UBIFS.
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index b8a0607..96ca957 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -753,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c)
  * LPT trivial garbage collection is where a LPT LEB contains only dirty and
  * free space and so may be reused as soon as the next commit is completed.
  * This function is called after the commit is completed (master node has been
- * written) and unmaps LPT LEBs that were marked for trivial GC.
+ * written) and un-maps LPT LEBs that were marked for trivial GC.
  */
 static int lpt_tgc_end(struct ubifs_info *c)
 {
@@ -1467,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
 /**
- * dbg_is_all_ff - determine if a buffer contains only 0xff bytes.
+ * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes.
  * @buf: buffer
  * @len: buffer length
  */
@@ -1492,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
 	struct ubifs_nnode *nnode;
 	int hght;
 
-	/* Entire tree is in memory so first_nnode / next_nnode are ok */
+	/* Entire tree is in memory so first_nnode / next_nnode are OK */
 	nnode = first_nnode(c, &hght);
 	for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
 		struct ubifs_nbranch *branch;
@@ -1837,7 +1837,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
  * This function dumps an LEB from LPT area. Nodes in this area are very
  * different to nodes in the main area (e.g., they do not have common headers,
  * they do not have 8-byte alignments, etc), so we have a separate function to
- * dump LPT area LEBs. Note, LPT has to be locked by the coller.
+ * dump LPT area LEBs. Note, LPT has to be locked by the caller.
  */
 static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 {
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 3275c89..fc2a4cc 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1168,7 +1168,6 @@ struct ubifs_debug_info;
  * @mount_opts: UBIFS-specific mount options
  *
  * @dbg: debugging-related information
- * @dfs: debugfs support-related information
  */
 struct ubifs_info {
 	struct super_block *vfs_sb;
-- 
cgit v0.10.2


From 5d38b3ac78e0e0e420fba134716fc3d20e6b978a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 30 Dec 2008 17:58:42 +0200
Subject: UBIFS: print debugging messages properly

We cannot use ubifs_err() macro with DBGKEY() and DBGKEY1(),
because this is racy and holding dbg_lock is needed. Use
dbg_err() instead, which does have the lock held.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 350fede..792c5a1 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1010,20 +1010,20 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 	err = 1;
 	key_read(c, &dent1->key, &key);
 	if (keys_cmp(c, &zbr1->key, &key)) {
-		ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum,
-			  zbr1->offs, DBGKEY(&key));
-		ubifs_err("but it should have key %s according to tnc",
-			  DBGKEY(&zbr1->key));
+		dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
+			zbr1->offs, DBGKEY(&key));
+		dbg_err("but it should have key %s according to tnc",
+			DBGKEY(&zbr1->key));
 		dbg_dump_node(c, dent1);
 		goto out_free;
 	}
 
 	key_read(c, &dent2->key, &key);
 	if (keys_cmp(c, &zbr2->key, &key)) {
-		ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum,
-			  zbr1->offs, DBGKEY(&key));
-		ubifs_err("but it should have key %s according to tnc",
-			  DBGKEY(&zbr2->key));
+		dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
+			zbr1->offs, DBGKEY(&key));
+		dbg_err("but it should have key %s according to tnc",
+			DBGKEY(&zbr2->key));
 		dbg_dump_node(c, dent2);
 		goto out_free;
 	}
@@ -1037,9 +1037,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		goto out_free;
 	}
 	if (cmp == 0 && nlen1 == nlen2)
-		ubifs_err("2 xent/dent nodes with the same name");
+		dbg_err("2 xent/dent nodes with the same name");
 	else
-		ubifs_err("bad order of colliding key %s",
+		dbg_err("bad order of colliding key %s",
 			DBGKEY(&key));
 
 	ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
-- 
cgit v0.10.2


From 8e5033adc78ff4fbeab7052134e7af1f6ff04187 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 30 Dec 2008 18:37:45 +0200
Subject: UBIFS: add more useful debugging prints

Print node sizes and maximum node sizes.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 2c91d6f..0d7564b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1361,8 +1361,20 @@ static int mount_ubifs(struct ubifs_info *c)
 	dbg_msg("tree fanout:         %d", c->fanout);
 	dbg_msg("reserved GC LEB:     %d", c->gc_lnum);
 	dbg_msg("first main LEB:      %d", c->main_first);
+	dbg_msg("max. znode size      %d", c->max_znode_sz);
+	dbg_msg("max. index node size %d", c->max_idx_node_sz);
+	dbg_msg("node sizes:          data %zu, inode %zu, dentry %zu",
+		UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
+	dbg_msg("node sizes:          trun %zu, sb %zu, master %zu",
+		UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
+	dbg_msg("node sizes:          ref %zu, cmt. start %zu, orph %zu",
+		UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
+	dbg_msg("max. node sizes:     data %zu, inode %zu dentry %zu",
+	        UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
+		UBIFS_MAX_DENT_NODE_SZ);
 	dbg_msg("dead watermark:      %d", c->dead_wm);
 	dbg_msg("dark watermark:      %d", c->dark_wm);
+	dbg_msg("LEB overhead:        %d", c->leb_overhead);
 	x = (long long)c->main_lebs * c->dark_wm;
 	dbg_msg("max. dark space:     %lld (%lld KiB, %lld MiB)",
 		x, x >> 10, x >> 20);
-- 
cgit v0.10.2


From f9a3fba2ce8622977c5373d2449eb71705613721 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 31 Dec 2008 10:08:37 +0200
Subject: ASoC: TWL4030: Make the enum filter generic for twl4030

Modify the enum filter to more generic that it will filter
out the enums with text "Invalid".
The enum filter also required for the capture path.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 5184888..2c279cd 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -298,25 +298,23 @@ static const struct soc_enum twl4030_handsfreer_enum =
 static const struct snd_kcontrol_new twl4030_dapm_handsfreer_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreer_enum);
 
-static int outmixer_event(struct snd_soc_dapm_widget *w,
+/*
+ * This function filters out the non valid mux settings, named as "Invalid"
+ * in the enum texts.
+ * Just refuse to set an invalid mux mode.
+ */
+static int twl4030_enum_event(struct snd_soc_dapm_widget *w,
 	struct snd_kcontrol *kcontrol, int event)
 {
 	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
 	int ret = 0;
 	int val;
 
-	switch (e->reg) {
-	case TWL4030_REG_PREDL_CTL:
-	case TWL4030_REG_PREDR_CTL:
-	case TWL4030_REG_EAR_CTL:
-		val = w->value >> e->shift_l;
-		if (val == 3) {
-			printk(KERN_WARNING
-			"Invalid MUX setting for register 0x%02x (%d)\n",
-			      e->reg, val);
-			ret = -1;
-		}
-		break;
+	val = w->value >> e->shift_l;
+	if (!strcmp("Invalid", e->texts[val])) {
+		printk(KERN_WARNING "Invalid MUX setting on 0x%02x (%d)\n",
+			e->reg, val);
+		ret = -1;
 	}
 
 	return ret;
@@ -810,14 +808,14 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	/* Output MUX controls */
 	/* Earpiece */
 	SND_SOC_DAPM_MUX_E("Earpiece Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_earpiece_control, outmixer_event,
+		&twl4030_dapm_earpiece_control, twl4030_enum_event,
 		SND_SOC_DAPM_PRE_REG),
 	/* PreDrivL/R */
 	SND_SOC_DAPM_MUX_E("PredriveL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predrivel_control, outmixer_event,
+		&twl4030_dapm_predrivel_control, twl4030_enum_event,
 		SND_SOC_DAPM_PRE_REG),
 	SND_SOC_DAPM_MUX_E("PredriveR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predriver_control, outmixer_event,
+		&twl4030_dapm_predriver_control, twl4030_enum_event,
 		SND_SOC_DAPM_PRE_REG),
 	/* HeadsetL/R */
 	SND_SOC_DAPM_MUX("HeadsetL Mux", SND_SOC_NOPM, 0, 0,
-- 
cgit v0.10.2


From 276c62225a7c98737510483dcaec6af7e7965389 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 31 Dec 2008 10:08:38 +0200
Subject: ASoC: TWL4030: DAPM based capture implementation

This patch adds DAPM implementaion for the capture path
on twlx030.

TWL has two physical ADC and two digital microphone (stereo) connections.
The CPU interface has four microphone channels.
For simplicity the microphone channel paths are named as:
TX1 (Left/Right) - when using i2s mode, only the TX1 data is valid
TX2 (Left/Right)

Input routing (simplified version):
There is two levels of mux settings for TWL in input path:
Analog input mux:
 ADCL <- {Off, Main mic, Headset mic, AUXL, Carkit mic}
 ADCR <- {Off, Sub mic, AUXR}

Analog/Digital mux:
TX1 Analog mode:
 TX1L <- ADCL
 TX1R <- ADCR
TX1 Digital mode:
 TX1L <- Digimic0 (Left)
 TX1R <- Digimic0 (Right)

TX2 Analog mode:
 TX2L <- ADCL
 TX2R <- ADCR
TX2 Digital mode:
 TX2L <- Digimic1 (Left)
 TX2R <- Digimic1 (Right)

The patch provides the following user controls for the capture path:
Mux settings:
"TX1 Capture Route": {Analog, Digimic0}
"TX2 Capture Route": {Analog, Digimic1}
"Analog Left Capture Route":  {Off, Main Mic, Headset Mic, AUXL, Carkit Mic}
"Analog Right Capture Route": {Off, Sub Mic, AUXR}

Volume/Gain controls:
"TX1 Digital Capture Volume": Stereo gain control for TX1 path
"TX2 Digital Capture Volume": Stereo gain control for TX2 path
"Analog Capture Volume":      Stereo gain control for the analog path only

Important things for the board files:
Microphone bias:
"Mic Bias 1":       Bias for Main mic or for digimic0 (analog or digital path)
"Mic Bias 2":       Bias for Sub mic or for digimic1 (analog or digital path)
"Headset Mic Bias": Bias for Headset mic

When the routing configured correctly only the needed components will be
powered/enabled.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 2c279cd..31e44e3 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -298,6 +298,55 @@ static const struct soc_enum twl4030_handsfreer_enum =
 static const struct snd_kcontrol_new twl4030_dapm_handsfreer_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreer_enum);
 
+/* Left analog microphone selection */
+static const char *twl4030_analoglmic_texts[] =
+		{"Off", "Main mic", "Headset mic", "Invalid", "AUXL",
+		 "Invalid", "Invalid", "Invalid", "Carkit mic"};
+
+static const struct soc_enum twl4030_analoglmic_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_ANAMICL, 0,
+			ARRAY_SIZE(twl4030_analoglmic_texts),
+			twl4030_analoglmic_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_analoglmic_control =
+SOC_DAPM_ENUM("Route", twl4030_analoglmic_enum);
+
+/* Right analog microphone selection */
+static const char *twl4030_analogrmic_texts[] =
+		{"Off", "Sub mic", "Invalid", "Invalid", "AUXR"};
+
+static const struct soc_enum twl4030_analogrmic_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_ANAMICR, 0,
+			ARRAY_SIZE(twl4030_analogrmic_texts),
+			twl4030_analogrmic_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_analogrmic_control =
+SOC_DAPM_ENUM("Route", twl4030_analogrmic_enum);
+
+/* TX1 L/R Analog/Digital microphone selection */
+static const char *twl4030_micpathtx1_texts[] =
+		{"Analog", "Digimic0"};
+
+static const struct soc_enum twl4030_micpathtx1_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_ADCMICSEL, 0,
+			ARRAY_SIZE(twl4030_micpathtx1_texts),
+			twl4030_micpathtx1_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_micpathtx1_control =
+SOC_DAPM_ENUM("Route", twl4030_micpathtx1_enum);
+
+/* TX2 L/R Analog/Digital microphone selection */
+static const char *twl4030_micpathtx2_texts[] =
+		{"Analog", "Digimic1"};
+
+static const struct soc_enum twl4030_micpathtx2_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_ADCMICSEL, 2,
+			ARRAY_SIZE(twl4030_micpathtx2_texts),
+			twl4030_micpathtx2_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_micpathtx2_control =
+SOC_DAPM_ENUM("Route", twl4030_micpathtx2_enum);
+
 /*
  * This function filters out the non valid mux settings, named as "Invalid"
  * in the enum texts.
@@ -320,6 +369,36 @@ static int twl4030_enum_event(struct snd_soc_dapm_widget *w,
 	return ret;
 }
 
+static int micpath_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct soc_enum *e = (struct soc_enum *)w->kcontrols->private_value;
+	unsigned char adcmicsel, micbias_ctl;
+
+	adcmicsel = twl4030_read_reg_cache(w->codec, TWL4030_REG_ADCMICSEL);
+	micbias_ctl = twl4030_read_reg_cache(w->codec, TWL4030_REG_MICBIAS_CTL);
+	/* Prepare the bits for the given TX path:
+	 * shift_l == 0: TX1 microphone path
+	 * shift_l == 2: TX2 microphone path */
+	if (e->shift_l) {
+		/* TX2 microphone path */
+		if (adcmicsel & TWL4030_TX2IN_SEL)
+			micbias_ctl |= TWL4030_MICBIAS2_CTL; /* digimic */
+		else
+			micbias_ctl &= ~TWL4030_MICBIAS2_CTL;
+	} else {
+		/* TX1 microphone path */
+		if (adcmicsel & TWL4030_TX1IN_SEL)
+			micbias_ctl |= TWL4030_MICBIAS1_CTL; /* digimic */
+		else
+			micbias_ctl &= ~TWL4030_MICBIAS1_CTL;
+	}
+
+	twl4030_write(w->codec, TWL4030_REG_MICBIAS_CTL, micbias_ctl);
+
+	return 0;
+}
+
 static int handsfree_event(struct snd_soc_dapm_widget *w,
 		struct snd_kcontrol *kcontrol, int event)
 {
@@ -501,162 +580,6 @@ static int snd_soc_put_volsw_r2_twl4030(struct snd_kcontrol *kcontrol,
 	return err;
 }
 
-static int twl4030_get_left_input(struct snd_kcontrol *kcontrol,
-	struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_codec *codec = kcontrol->private_data;
-	u8 reg = twl4030_read_reg_cache(codec, TWL4030_REG_ANAMICL);
-	int result = 0;
-
-	/* one bit must be set a time */
-	reg &= TWL4030_CKMIC_EN | TWL4030_AUXL_EN | TWL4030_HSMIC_EN
-			| TWL4030_MAINMIC_EN;
-	if (reg != 0) {
-		result++;
-		while ((reg & 1) == 0) {
-			result++;
-			reg >>= 1;
-		}
-	}
-
-	ucontrol->value.integer.value[0] = result;
-	return 0;
-}
-
-static int twl4030_put_left_input(struct snd_kcontrol *kcontrol,
-	struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_codec *codec = kcontrol->private_data;
-	int value = ucontrol->value.integer.value[0];
-	u8 anamicl, micbias, avadc_ctl;
-
-	anamicl = twl4030_read_reg_cache(codec, TWL4030_REG_ANAMICL);
-	anamicl &= ~(TWL4030_CKMIC_EN | TWL4030_AUXL_EN | TWL4030_HSMIC_EN
-			| TWL4030_MAINMIC_EN);
-	micbias = twl4030_read_reg_cache(codec, TWL4030_REG_MICBIAS_CTL);
-	micbias &= ~(TWL4030_HSMICBIAS_EN | TWL4030_MICBIAS1_EN);
-	avadc_ctl = twl4030_read_reg_cache(codec, TWL4030_REG_AVADC_CTL);
-
-	switch (value) {
-	case 1:
-		anamicl |= TWL4030_MAINMIC_EN;
-		micbias |= TWL4030_MICBIAS1_EN;
-		break;
-	case 2:
-		anamicl |= TWL4030_HSMIC_EN;
-		micbias |= TWL4030_HSMICBIAS_EN;
-		break;
-	case 3:
-		anamicl |= TWL4030_AUXL_EN;
-		break;
-	case 4:
-		anamicl |= TWL4030_CKMIC_EN;
-		break;
-	default:
-		break;
-	}
-
-	/* If some input is selected, enable amp and ADC */
-	if (value != 0) {
-		anamicl |= TWL4030_MICAMPL_EN;
-		avadc_ctl |= TWL4030_ADCL_EN;
-	} else {
-		anamicl &= ~TWL4030_MICAMPL_EN;
-		avadc_ctl &= ~TWL4030_ADCL_EN;
-	}
-
-	twl4030_write(codec, TWL4030_REG_ANAMICL, anamicl);
-	twl4030_write(codec, TWL4030_REG_MICBIAS_CTL, micbias);
-	twl4030_write(codec, TWL4030_REG_AVADC_CTL, avadc_ctl);
-
-	return 1;
-}
-
-static int twl4030_get_right_input(struct snd_kcontrol *kcontrol,
-	struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_codec *codec = kcontrol->private_data;
-	u8 reg = twl4030_read_reg_cache(codec, TWL4030_REG_ANAMICR);
-	int value = 0;
-
-	reg &= TWL4030_SUBMIC_EN|TWL4030_AUXR_EN;
-	switch (reg) {
-	case TWL4030_SUBMIC_EN:
-		value = 1;
-		break;
-	case TWL4030_AUXR_EN:
-		value = 2;
-		break;
-	default:
-		break;
-	}
-
-	ucontrol->value.integer.value[0] = value;
-	return 0;
-}
-
-static int twl4030_put_right_input(struct snd_kcontrol *kcontrol,
-	struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_codec *codec = kcontrol->private_data;
-	int value = ucontrol->value.integer.value[0];
-	u8 anamicr, micbias, avadc_ctl;
-
-	anamicr = twl4030_read_reg_cache(codec, TWL4030_REG_ANAMICR);
-	anamicr &= ~(TWL4030_SUBMIC_EN|TWL4030_AUXR_EN);
-	micbias = twl4030_read_reg_cache(codec, TWL4030_REG_MICBIAS_CTL);
-	micbias &= ~TWL4030_MICBIAS2_EN;
-	avadc_ctl = twl4030_read_reg_cache(codec, TWL4030_REG_AVADC_CTL);
-
-	switch (value) {
-	case 1:
-		anamicr |= TWL4030_SUBMIC_EN;
-		micbias |= TWL4030_MICBIAS2_EN;
-		break;
-	case 2:
-		anamicr |= TWL4030_AUXR_EN;
-		break;
-	default:
-		break;
-	}
-
-	if (value != 0) {
-		anamicr |= TWL4030_MICAMPR_EN;
-		avadc_ctl |= TWL4030_ADCR_EN;
-	} else {
-		anamicr &= ~TWL4030_MICAMPR_EN;
-		avadc_ctl &= ~TWL4030_ADCR_EN;
-	}
-
-	twl4030_write(codec, TWL4030_REG_ANAMICR, anamicr);
-	twl4030_write(codec, TWL4030_REG_MICBIAS_CTL, micbias);
-	twl4030_write(codec, TWL4030_REG_AVADC_CTL, avadc_ctl);
-
-	return 1;
-}
-
-static const char *twl4030_left_in_sel[] = {
-	"None",
-	"Main Mic",
-	"Headset Mic",
-	"Line In",
-	"Carkit Mic",
-};
-
-static const char *twl4030_right_in_sel[] = {
-	"None",
-	"Sub Mic",
-	"Line In",
-};
-
-static const struct soc_enum twl4030_left_input_mux =
-	SOC_ENUM_SINGLE_EXT(ARRAY_SIZE(twl4030_left_in_sel),
-		twl4030_left_in_sel);
-
-static const struct soc_enum twl4030_right_input_mux =
-	SOC_ENUM_SINGLE_EXT(ARRAY_SIZE(twl4030_right_in_sel),
-		twl4030_right_in_sel);
-
 /*
  * FGAIN volume control:
  * from -62 to 0 dB in 1 dB steps (mute instead of -63 dB)
@@ -739,18 +662,15 @@ static const struct snd_kcontrol_new twl4030_snd_controls[] = {
 		TWL4030_REG_EAR_CTL, 4, 3, 0, output_tvl),
 
 	/* Common capture gain controls */
-	SOC_DOUBLE_R_TLV("Capture Volume",
+	SOC_DOUBLE_R_TLV("TX1 Digital Capture Volume",
 		TWL4030_REG_ATXL1PGA, TWL4030_REG_ATXR1PGA,
 		0, 0x1f, 0, digital_capture_tlv),
+	SOC_DOUBLE_R_TLV("TX2 Digital Capture Volume",
+		TWL4030_REG_AVTXL2PGA, TWL4030_REG_AVTXR2PGA,
+		0, 0x1f, 0, digital_capture_tlv),
 
-	SOC_DOUBLE_TLV("Input Boost Volume", TWL4030_REG_ANAMIC_GAIN,
+	SOC_DOUBLE_TLV("Analog Capture Volume", TWL4030_REG_ANAMIC_GAIN,
 		0, 3, 5, 0, input_gain_tlv),
-
-	/* Input source controls */
-	SOC_ENUM_EXT("Left Input Source", twl4030_left_input_mux,
-		twl4030_get_left_input, twl4030_put_left_input),
-	SOC_ENUM_EXT("Right Input Source", twl4030_right_input_mux,
-		twl4030_get_right_input, twl4030_put_right_input),
 };
 
 /* add non dapm controls */
@@ -770,9 +690,19 @@ static int twl4030_add_controls(struct snd_soc_codec *codec)
 }
 
 static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
-	SND_SOC_DAPM_INPUT("INL"),
-	SND_SOC_DAPM_INPUT("INR"),
-
+	/* Left channel inputs */
+	SND_SOC_DAPM_INPUT("MAINMIC"),
+	SND_SOC_DAPM_INPUT("HSMIC"),
+	SND_SOC_DAPM_INPUT("AUXL"),
+	SND_SOC_DAPM_INPUT("CARKITMIC"),
+	/* Right channel inputs */
+	SND_SOC_DAPM_INPUT("SUBMIC"),
+	SND_SOC_DAPM_INPUT("AUXR"),
+	/* Digital microphones (Stereo) */
+	SND_SOC_DAPM_INPUT("DIGIMIC0"),
+	SND_SOC_DAPM_INPUT("DIGIMIC1"),
+
+	/* Outputs */
 	SND_SOC_DAPM_OUTPUT("OUTL"),
 	SND_SOC_DAPM_OUTPUT("OUTR"),
 	SND_SOC_DAPM_OUTPUT("EARPIECE"),
@@ -835,8 +765,50 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 		&twl4030_dapm_handsfreer_control, handsfree_event,
 		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
 
-	SND_SOC_DAPM_ADC("ADCL", "Left Capture", SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_ADC("ADCR", "Right Capture", SND_SOC_NOPM, 0, 0),
+	/* Introducing four virtual ADC, since TWL4030 have four channel for
+	   capture */
+	SND_SOC_DAPM_ADC("ADC Virtual Left1", "Left Front Capture",
+		SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC Virtual Right1", "Right Front Capture",
+		SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC Virtual Left2", "Left Rear Capture",
+		SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC Virtual Right2", "Right Rear Capture",
+		SND_SOC_NOPM, 0, 0),
+
+	/* Analog/Digital mic path selection.
+	   TX1 Left/Right: either analog Left/Right or Digimic0
+	   TX2 Left/Right: either analog Left/Right or Digimic1 */
+	SND_SOC_DAPM_MUX_E("TX1 Capture Route", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_micpathtx1_control, micpath_event,
+		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD|
+		SND_SOC_DAPM_POST_REG),
+	SND_SOC_DAPM_MUX_E("TX2 Capture Route", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_micpathtx2_control, micpath_event,
+		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD|
+		SND_SOC_DAPM_POST_REG),
+
+	/* Analog input muxes with power switch for the physical ADCL/R */
+	SND_SOC_DAPM_MUX_E("Analog Left Capture Route",
+		TWL4030_REG_AVADC_CTL, 3, 0, &twl4030_dapm_analoglmic_control,
+		twl4030_enum_event, SND_SOC_DAPM_PRE_REG),
+	SND_SOC_DAPM_MUX_E("Analog Right Capture Route",
+		TWL4030_REG_AVADC_CTL, 1, 0, &twl4030_dapm_analogrmic_control,
+		twl4030_enum_event, SND_SOC_DAPM_PRE_REG),
+
+	SND_SOC_DAPM_PGA("Analog Left Amplifier",
+		TWL4030_REG_ANAMICL, 4, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Analog Right Amplifier",
+		TWL4030_REG_ANAMICR, 4, 0, NULL, 0),
+
+	SND_SOC_DAPM_PGA("Digimic0 Enable",
+		TWL4030_REG_ADCMICSEL, 1, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Digimic1 Enable",
+		TWL4030_REG_ADCMICSEL, 3, 0, NULL, 0),
+
+	SND_SOC_DAPM_MICBIAS("Mic Bias 1", TWL4030_REG_MICBIAS_CTL, 0, 0),
+	SND_SOC_DAPM_MICBIAS("Mic Bias 2", TWL4030_REG_MICBIAS_CTL, 1, 0),
+	SND_SOC_DAPM_MICBIAS("Headset Mic Bias", TWL4030_REG_MICBIAS_CTL, 2, 0),
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
@@ -892,9 +864,39 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HFL", NULL, "HandsfreeL Mux"},
 	{"HFR", NULL, "HandsfreeR Mux"},
 
-	/* inputs */
-	{"ADCL", NULL, "INL"},
-	{"ADCR", NULL, "INR"},
+	/* Capture path */
+	{"Analog Left Capture Route", "Main mic", "MAINMIC"},
+	{"Analog Left Capture Route", "Headset mic", "HSMIC"},
+	{"Analog Left Capture Route", "AUXL", "AUXL"},
+	{"Analog Left Capture Route", "Carkit mic", "CARKITMIC"},
+
+	{"Analog Right Capture Route", "Sub mic", "SUBMIC"},
+	{"Analog Right Capture Route", "AUXR", "AUXR"},
+
+	{"Analog Left Amplifier", NULL, "Analog Left Capture Route"},
+	{"Analog Right Amplifier", NULL, "Analog Right Capture Route"},
+
+	{"Digimic0 Enable", NULL, "DIGIMIC0"},
+	{"Digimic1 Enable", NULL, "DIGIMIC1"},
+
+	/* TX1 Left capture path */
+	{"TX1 Capture Route", "Analog", "Analog Left Amplifier"},
+	{"TX1 Capture Route", "Digimic0", "Digimic0 Enable"},
+	/* TX1 Right capture path */
+	{"TX1 Capture Route", "Analog", "Analog Right Amplifier"},
+	{"TX1 Capture Route", "Digimic0", "Digimic0 Enable"},
+	/* TX2 Left capture path */
+	{"TX2 Capture Route", "Analog", "Analog Left Amplifier"},
+	{"TX2 Capture Route", "Digimic1", "Digimic1 Enable"},
+	/* TX2 Right capture path */
+	{"TX2 Capture Route", "Analog", "Analog Right Amplifier"},
+	{"TX2 Capture Route", "Digimic1", "Digimic1 Enable"},
+
+	{"ADC Virtual Left1", NULL, "TX1 Capture Route"},
+	{"ADC Virtual Right1", NULL, "TX1 Capture Route"},
+	{"ADC Virtual Left2", NULL, "TX2 Capture Route"},
+	{"ADC Virtual Right2", NULL, "TX2 Capture Route"},
+
 };
 
 static int twl4030_add_widgets(struct snd_soc_codec *codec)
@@ -921,6 +923,7 @@ static void twl4030_power_up(struct snd_soc_codec *codec)
 	twl4030_write(codec, TWL4030_REG_ANAMICL,
 		anamicl | TWL4030_CNCL_OFFSET_START);
 
+
 	/* wait for offset cancellation to complete */
 	do {
 		/* this takes a little while, so don't slam i2c */
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index 54615c7..442e5a8 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -147,6 +147,13 @@
 #define TWL4030_AVADC_CLK_PRIORITY	0x04
 #define TWL4030_ADCR_EN			0x02
 
+/* TWL4030_REG_ADCMICSEL (0x08) Fields */
+
+#define TWL4030_DIGMIC1_EN		0x08
+#define TWL4030_TX2IN_SEL		0x04
+#define TWL4030_DIGMIC0_EN		0x02
+#define TWL4030_TX1IN_SEL		0x01
+
 /* AUDIO_IF (0x0E) Fields */
 
 #define TWL4030_AIF_SLAVE_EN		0x80
-- 
cgit v0.10.2


From 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:37 +0100
Subject: [PATCH] fix scaled & unscaled cputime accounting

The utimescaled / stimescaled fields in the task structure and the
global cpustat should be set on all architectures. On s390 the calls
to account_user_time_scaled and account_system_time_scaled never have
been added. In addition system time that is accounted as guest time
to the user time of a process is accounted to the scaled system time
instead of the scaled user time.
To fix the bugs and to prevent future forgetfulness this patch merges
account_system_time_scaled into account_system_time and
account_user_time_scaled into account_user_time.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Michael Neuling <mikey@neuling.org>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a4..4ee3678 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,11 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
-	account_system_time(prev, 0, delta_stime);
-	account_system_time_scaled(prev, delta_stime);
+	account_system_time(prev, 0, delta_stime, delta_stime);
 
 	if (pi->ac_utime) {
 		delta_utime = cycle_to_cputime(pi->ac_utime);
-		account_user_time(prev, delta_utime);
-		account_user_time_scaled(prev, delta_utime);
+		account_user_time(prev, delta_utime, delta_utime);
 	}
 
 	pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +120,7 @@ void account_system_vtime(struct task_struct *tsk)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
-	account_system_time(tsk, 0, delta_stime);
-	account_system_time_scaled(tsk, delta_stime);
+	account_system_time(tsk, 0, delta_stime, delta_stime);
 	ti->ac_stime = 0;
 
 	ti->ac_stamp = now;
@@ -143,8 +140,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 
 	if (ti->ac_utime) {
 		delta_utime = cycle_to_cputime(ti->ac_utime);
-		account_user_time(p, delta_utime);
-		account_user_time_scaled(p, delta_utime);
+		account_user_time(p, delta_utime, delta_utime);
 		ti->ac_utime = 0;
 	}
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a51..92650cc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,7 @@ void account_system_vtime(struct task_struct *tsk)
 		delta += sys_time;
 		get_paca()->system_time = 0;
 	}
-	account_system_time(tsk, 0, delta);
-	account_system_time_scaled(tsk, deltascaled);
+	account_system_time(tsk, 0, delta, deltascaled);
 	per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
@@ -275,10 +274,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 
 	utime = get_paca()->user_time;
 	get_paca()->user_time = 0;
-	account_user_time(tsk, utime);
-
 	utimescaled = cputime_to_scaled(utime);
-	account_user_time_scaled(tsk, utimescaled);
+	account_user_time(tsk, utime, utimescaled);
 }
 
 /*
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62..07283ae 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -50,12 +50,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 	rcu_user_flag = cputime != 0;
 	S390_lowcore.user_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime);
+	account_user_time(tsk, cputime, cputime);
 
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, HARDIRQ_OFFSET, cputime);
+	account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
 
 	cputime = S390_lowcore.steal_clock;
 	if ((__s64) cputime > 0) {
@@ -82,12 +82,12 @@ void account_vtime(struct task_struct *tsk)
 	cputime = S390_lowcore.user_timer >> 12;
 	S390_lowcore.user_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime);
+	account_user_time(tsk, cputime, cputime);
 
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime);
+	account_system_time(tsk, 0, cputime, cputime);
 }
 
 /*
@@ -107,7 +107,7 @@ void account_system_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime);
+	account_system_time(tsk, 0, cputime, cputime);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4ee4b3d..c78a459 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,10 +79,8 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 }
 
 extern unsigned long long task_delta_exec(struct task_struct *);
-extern void account_user_time(struct task_struct *, cputime_t);
-extern void account_user_time_scaled(struct task_struct *, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t);
-extern void account_system_time_scaled(struct task_struct *, cputime_t);
+extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
+extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
 extern void account_steal_time(struct task_struct *, cputime_t);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/kernel/sched.c b/kernel/sched.c
index fff1c4a..5b03679 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4080,13 +4080,17 @@ unsigned long long task_delta_exec(struct task_struct *p)
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in user space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, cputime_t cputime,
+		       cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp;
 
+	/* Add user time to process. */
 	p->utime = cputime_add(p->utime, cputime);
+	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
 	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
@@ -4103,51 +4107,49 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
  * Account guest cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in virtual machine since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, cputime_t cputime,
+			       cputime_t cputime_scaled)
 {
 	cputime64_t tmp;
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 
 	tmp = cputime_to_cputime64(cputime);
 
+	/* Add guest time to process. */
 	p->utime = cputime_add(p->utime, cputime);
+	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
 	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
+	/* Add guest time to cpustat. */
 	cpustat->user = cputime64_add(cpustat->user, tmp);
 	cpustat->guest = cputime64_add(cpustat->guest, tmp);
 }
 
 /*
- * Account scaled user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- */
-void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
-{
-	p->utimescaled = cputime_add(p->utimescaled, cputime);
-}
-
-/*
  * Account system cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @hardirq_offset: the offset to subtract from hardirq_count()
  * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
 void account_system_time(struct task_struct *p, int hardirq_offset,
-			 cputime_t cputime)
+			 cputime_t cputime, cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	struct rq *rq = this_rq();
 	cputime64_t tmp;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
-		account_guest_time(p, cputime);
+		account_guest_time(p, cputime, cputime_scaled);
 		return;
 	}
 
+	/* Add system time to process. */
 	p->stime = cputime_add(p->stime, cputime);
+	p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
@@ -4167,17 +4169,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 }
 
 /*
- * Account scaled system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
- */
-void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
-{
-	p->stimescaled = cputime_add(p->stimescaled, cputime);
-}
-
-/*
  * Account for involuntary wait time.
  * @p: the process from which the cpu time has been stolen
  * @steal: the cpu time spent in involuntary wait
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8f3fc25..1f2fce2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -420,6 +420,7 @@ void tick_nohz_restart_sched_tick(void)
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long ticks;
+	cputime_t cputime;
 	ktime_t now;
 
 	local_irq_disable();
@@ -452,8 +453,8 @@ void tick_nohz_restart_sched_tick(void)
 	 */
 	if (ticks && ticks < LONG_MAX) {
 		add_preempt_count(HARDIRQ_OFFSET);
-		account_system_time(current, HARDIRQ_OFFSET,
-				    jiffies_to_cputime(ticks));
+		cputime = jiffies_to_cputime(ticks);
+		account_system_time(current, HARDIRQ_OFFSET, cputime, cputime);
 		sub_preempt_count(HARDIRQ_OFFSET);
 	}
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 566257d..b5efb52 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1023,13 +1023,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
 {
 	cputime_t one_jiffy = jiffies_to_cputime(1);
 
-	if (user_tick) {
-		account_user_time(p, one_jiffy);
-		account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
-	} else {
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
-		account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
-	}
+	if (user_tick)
+		account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy));
+	else
+		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+				    cputime_to_scaled(one_jiffy));
 }
 #endif
 
-- 
cgit v0.10.2


From 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:38 +0100
Subject: [PATCH] idle cputime accounting

The cpu time spent by the idle process actually doing something is
currently accounted as idle time. This is plain wrong, the architectures
that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the
time spent doing nothing and the time spent by idle doing work. The first
is accounted with account_idle_time and the second with account_system_time.
The architectures that use the account_xxx_time interface directly and not
the account_xxx_ticks interface now need to do the check for the idle
process in their arch code. In particular to improve the system vs true
idle time accounting the arch code needs to measure the true idle time
instead of just testing for the idle process.
To improve the tick based accounting as well we would need an architecture
primitive that can tell us if the pt_regs of the interrupted context
points to the magic instruction that halts the cpu.

In addition idle time is no more added to the stime of the idle process.
This field now contains the system time of the idle process as it should
be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as
every tick that occurs while idle is running will be accounted as idle
time.

This patch contains the necessary common code changes to be able to
distinguish idle system time and true idle time. The architectures with
support for VIRT_CPU_ACCOUNTING need some changes to exploit this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 4ee3678..f0ebb34 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,7 +93,10 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
-	account_system_time(prev, 0, delta_stime, delta_stime);
+	if (idle_task(smp_processor_id()) != prev)
+		account_system_time(prev, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
 
 	if (pi->ac_utime) {
 		delta_utime = cycle_to_cputime(pi->ac_utime);
@@ -120,7 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
-	account_system_time(tsk, 0, delta_stime, delta_stime);
+	if (irq_count() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
 	ti->ac_stime = 0;
 
 	ti->ac_stamp = now;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201d..fb7049c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
 #include <linux/mqueue.h>
 #include <linux/hardirq.h>
 #include <linux/utsname.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 92650cc..3be355c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,7 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
 		delta += sys_time;
 		get_paca()->system_time = 0;
 	}
-	account_system_time(tsk, 0, delta, deltascaled);
+	if (in_irq() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta, deltascaled);
+	else
+		account_idle_time(delta);
 	per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
@@ -335,8 +338,12 @@ void calculate_steal_time(void)
 	tb = mftb();
 	purr = mfspr(SPRN_PURR);
 	stolen = (tb - pme->tb) - (purr - pme->purr);
-	if (stolen > 0)
-		account_steal_time(current, stolen);
+	if (stolen > 0) {
+		if (idle_task(smp_processor_id()) != current)
+			account_steal_time(stolen);
+		else
+			account_idle_time(stolen);
+	}
 	pme->tb = tb;
 	pme->purr = purr;
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 07283ae..4a4a34c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -55,13 +55,19 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
+	if (idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
+	else
+		account_idle_time(cputime);
 
 	cputime = S390_lowcore.steal_clock;
 	if ((__s64) cputime > 0) {
 		cputime >>= 12;
 		S390_lowcore.steal_clock -= cputime << 12;
-		account_steal_time(tsk, cputime);
+		if (idle_task(smp_processor_id()) != current)
+			account_steal_time(cputime);
+		else
+			account_idle_time(cputime);
 	}
 }
 
@@ -87,7 +93,10 @@ void account_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime, cputime);
+	if (idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, 0, cputime, cputime);
+	else
+		account_idle_time(cputime);
 }
 
 /*
@@ -107,7 +116,10 @@ void account_system_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime, cputime);
+	if (in_irq() || idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, 0, cputime, cputime);
+	else
+		account_idle_time(cputime);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda..732e52d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
 	*snap = state;
 
 	/* Add the appropriate number of ticks of stolen time,
-	   including any left-overs from last time.  Passing NULL to
-	   account_steal_time accounts the time as stolen. */
+	   including any left-overs from last time. */
 	stolen = runnable + offline + __get_cpu_var(residual_stolen);
 
 	if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
 	__get_cpu_var(residual_stolen) = stolen;
-	account_steal_time(NULL, ticks);
+	account_steal_ticks(ticks);
 
 	/* Add the appropriate number of ticks of blocked time,
-	   including any left-overs from last time.  Passing idle to
-	   account_steal_time accounts the time as idle/wait. */
+	   including any left-overs from last time. */
 	blocked += __get_cpu_var(residual_blocked);
 
 	if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
 	__get_cpu_var(residual_blocked) = blocked;
-	account_steal_time(idle_task(smp_processor_id()), ticks);
+	account_idle_ticks(ticks);
 }
 
 /*
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c78a459..570d204 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,6 +81,11 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
-extern void account_steal_time(struct task_struct *, cputime_t);
+extern void account_steal_time(cputime_t);
+extern void account_idle_time(cputime_t);
+
+extern void account_process_tick(struct task_struct *, int user);
+extern void account_steal_ticks(unsigned long ticks);
+extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e71..b475d4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout);
 
 extern void cpu_init (void);
 extern void trap_init(void);
-extern void account_process_tick(struct task_struct *task, int user);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 5b03679..635eaff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4139,7 +4139,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 			 cputime_t cputime, cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	struct rq *rq = this_rq();
 	cputime64_t tmp;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
@@ -4158,37 +4157,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
-		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
-		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
-		cpustat->idle = cputime64_add(cpustat->idle, tmp);
+		cpustat->system = cputime64_add(cpustat->system, tmp);
+
 	/* Account for system time used */
 	acct_update_integrals(p);
 }
 
 /*
  * Account for involuntary wait time.
- * @p: the process from which the cpu time has been stolen
  * @steal: the cpu time spent in involuntary wait
  */
-void account_steal_time(struct task_struct *p, cputime_t steal)
+void account_steal_time(cputime_t cputime)
+{
+	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	cputime64_t cputime64 = cputime_to_cputime64(cputime);
+
+	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
+}
+
+/*
+ * Account for idle time.
+ * @cputime: the cpu time spent in idle wait
+ */
+void account_idle_time(cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t tmp = cputime_to_cputime64(steal);
+	cputime64_t cputime64 = cputime_to_cputime64(cputime);
 	struct rq *rq = this_rq();
 
-	if (p == rq->idle) {
-		p->stime = cputime_add(p->stime, steal);
-		if (atomic_read(&rq->nr_iowait) > 0)
-			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
-		else
-			cpustat->idle = cputime64_add(cpustat->idle, tmp);
-	} else
-		cpustat->steal = cputime64_add(cpustat->steal, tmp);
+	if (atomic_read(&rq->nr_iowait) > 0)
+		cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+	else
+		cpustat->idle = cputime64_add(cpustat->idle, cputime64);
+}
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
+/*
+ * Account a single tick of cpu time.
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: indicates if the tick is a user or a system tick
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	cputime_t one_jiffy = jiffies_to_cputime(1);
+	cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+	struct rq *rq = this_rq();
+
+	if (user_tick)
+		account_user_time(p, one_jiffy, one_jiffy_scaled);
+	else if (p != rq->idle)
+		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+				    one_jiffy_scaled);
+	else
+		account_idle_time(one_jiffy);
+}
+
+/*
+ * Account multiple ticks of steal time.
+ * @p: the process from which the cpu time has been stolen
+ * @ticks: number of stolen ticks
+ */
+void account_steal_ticks(unsigned long ticks)
+{
+	account_steal_time(jiffies_to_cputime(ticks));
+}
+
+/*
+ * Account multiple ticks of idle time.
+ * @ticks: number of stolen ticks
+ */
+void account_idle_ticks(unsigned long ticks)
+{
+	account_idle_time(jiffies_to_cputime(ticks));
 }
 
+#endif
+
 /*
  * Use precise platform statistics if available:
  */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1f2fce2..611fa4c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -419,8 +419,9 @@ void tick_nohz_restart_sched_tick(void)
 {
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	unsigned long ticks;
-	cputime_t cputime;
+#endif
 	ktime_t now;
 
 	local_irq_disable();
@@ -442,6 +443,7 @@ void tick_nohz_restart_sched_tick(void)
 	tick_do_update_jiffies64(now);
 	cpu_clear(cpu, nohz_cpu_mask);
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
@@ -451,12 +453,9 @@ void tick_nohz_restart_sched_tick(void)
 	/*
 	 * We might be one off. Do not randomly account a huge number of ticks!
 	 */
-	if (ticks && ticks < LONG_MAX) {
-		add_preempt_count(HARDIRQ_OFFSET);
-		cputime = jiffies_to_cputime(ticks);
-		account_system_time(current, HARDIRQ_OFFSET, cputime, cputime);
-		sub_preempt_count(HARDIRQ_OFFSET);
-	}
+	if (ticks && ticks < LONG_MAX)
+		account_idle_ticks(ticks);
+#endif
 
 	touch_softlockup_watchdog();
 	/*
diff --git a/kernel/timer.c b/kernel/timer.c
index b5efb52..dee3f64 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1018,19 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
 }
 #endif
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-void account_process_tick(struct task_struct *p, int user_tick)
-{
-	cputime_t one_jiffy = jiffies_to_cputime(1);
-
-	if (user_tick)
-		account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy));
-	else
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
-				    cputime_to_scaled(one_jiffy));
-}
-#endif
-
 /*
  * Called from the timer interrupt handler to charge one tick to the current
  * process.  user_tick is 1 if the tick is user time, 0 for system.
-- 
cgit v0.10.2


From aa5e97ce4bbc9d5daeec16b1d15bb3f6b7b4f4d4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:39 +0100
Subject: [PATCH] improve precision of process accounting.

The unit of the cputime accouting values that are stored per process is
currently a microsecond. The CPU timer has a maximum granularity of
2**-12 microseconds. There is no benefit in storing the per process values
in the lesser precision and there is the disadvantage that the backend
has to do the rounding to microseconds. The better solution is to use
the maximum granularity of the CPU timer as cputime unit.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce05..5217264 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
 
 #include <asm/div64.h>
 
-/* We want to use micro-second resolution. */
+/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
 typedef unsigned long long cputime_t;
 typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
 #define cputime_ge(__a, __b)		((__a) >= (__b))
 #define cputime_lt(__a, __b)		((__a) <  (__b))
 #define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__div((__ct), 1000000 / HZ))
+#define cputime_to_jiffies(__ct)	(__div((__ct), 4096000000ULL / HZ))
 #define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	((cputime_t)(__hz) * (1000000 / HZ))
+#define jiffies_to_cputime(__hz)	((cputime_t)(__hz) * (4096000000ULL / HZ))
 
 #define cputime64_zero			(0ULL)
 #define cputime64_add(__a, __b)		((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
 static inline u64
 cputime64_to_jiffies64(cputime64_t cputime)
 {
-	do_div(cputime, 1000000 / HZ);
+	do_div(cputime, 4096000000ULL / HZ);
 	return cputime;
 }
 
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
 static inline unsigned int
 cputime_to_msecs(const cputime_t cputime)
 {
-	return __div(cputime, 1000);
+	return __div(cputime, 4096000);
 }
 
 static inline cputime_t
 msecs_to_cputime(const unsigned int m)
 {
-	return (cputime_t) m * 1000;
+	return (cputime_t) m * 4096000;
 }
 
 /*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
 static inline unsigned int
 cputime_to_secs(const cputime_t cputime)
 {
-	return __div(cputime, 1000000);
+	return __div(cputime, 2048000000) >> 1;
 }
 
 static inline cputime_t
 secs_to_cputime(const unsigned int s)
 {
-	return (cputime_t) s * 1000000;
+	return (cputime_t) s * 4096000000ULL;
 }
 
 /*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
 static inline cputime_t
 timespec_to_cputime(const struct timespec *value)
 {
-        return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000;
+	return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
 }
 
 static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
 	register_pair rp;
 
 	rp.pair = cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
-	value->tv_nsec = rp.subreg.even * 1000;
+	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+	value->tv_nsec = rp.subreg.even * 1000 / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_nsec = (cputime % 1000000) * 1000;
-	value->tv_sec = cputime / 1000000;
+	value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
+	value->tv_sec = cputime / 4096000000ULL;
 #endif
 }
 
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
 static inline cputime_t
 timeval_to_cputime(const struct timeval *value)
 {
-        return value->tv_usec + (u64) value->tv_sec * 1000000;
+	return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
 }
 
 static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
 	register_pair rp;
 
 	rp.pair = cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
-	value->tv_usec = rp.subreg.even;
+	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+	value->tv_usec = rp.subreg.even / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_usec = cputime % 1000000;
-	value->tv_sec = cputime / 1000000;
+	value->tv_usec = cputime % 4096000000ULL;
+	value->tv_sec = cputime / 4096000000ULL;
 #endif
 }
 
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
 static inline clock_t
 cputime_to_clock_t(cputime_t cputime)
 {
-	return __div(cputime, 1000000 / USER_HZ);
+	return __div(cputime, 4096000000ULL / USER_HZ);
 }
 
 static inline cputime_t
 clock_t_to_cputime(unsigned long x)
 {
-	return (cputime_t) x * (1000000 / USER_HZ);
+	return (cputime_t) x * (4096000000ULL / USER_HZ);
 }
 
 /*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
 static inline clock_t
 cputime64_to_clock_t(cputime64_t cputime)
 {
-       return __div(cputime, 1000000 / USER_HZ);
+       return __div(cputime, 4096000000ULL / USER_HZ);
 }
 
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d5..a547817 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
 #define __LC_SYNC_ENTER_TIMER		0x248
 #define __LC_ASYNC_ENTER_TIMER		0x250
 #define __LC_EXIT_TIMER			0x258
-#define __LC_LAST_UPDATE_TIMER		0x260
-#define __LC_USER_TIMER			0x268
-#define __LC_SYSTEM_TIMER		0x270
-#define __LC_LAST_UPDATE_CLOCK		0x278
-#define __LC_STEAL_CLOCK		0x280
+#define __LC_USER_TIMER			0x260
+#define __LC_SYSTEM_TIMER		0x268
+#define __LC_STEAL_TIMER		0x270
+#define __LC_LAST_UPDATE_TIMER		0x278
+#define __LC_LAST_UPDATE_CLOCK		0x280
 #define __LC_RETURN_MCCK_PSW            0x288
 #define __LC_KERNEL_STACK               0xC40
 #define __LC_THREAD_INFO		0xC44
@@ -89,11 +89,11 @@
 #define __LC_SYNC_ENTER_TIMER		0x250
 #define __LC_ASYNC_ENTER_TIMER		0x258
 #define __LC_EXIT_TIMER			0x260
-#define __LC_LAST_UPDATE_TIMER		0x268
-#define __LC_USER_TIMER			0x270
-#define __LC_SYSTEM_TIMER		0x278
-#define __LC_LAST_UPDATE_CLOCK		0x280
-#define __LC_STEAL_CLOCK		0x288
+#define __LC_USER_TIMER			0x268
+#define __LC_SYSTEM_TIMER		0x270
+#define __LC_STEAL_TIMER		0x278
+#define __LC_LAST_UPDATE_TIMER		0x280
+#define __LC_LAST_UPDATE_CLOCK		0x288
 #define __LC_RETURN_MCCK_PSW            0x290
 #define __LC_KERNEL_STACK               0xD40
 #define __LC_THREAD_INFO		0xD48
@@ -252,11 +252,11 @@ struct _lowcore
 	__u64        sync_enter_timer;         /* 0x248 */
 	__u64        async_enter_timer;        /* 0x250 */
 	__u64        exit_timer;               /* 0x258 */
-	__u64        last_update_timer;        /* 0x260 */
-	__u64        user_timer;               /* 0x268 */
-	__u64        system_timer;             /* 0x270 */
-	__u64        last_update_clock;        /* 0x278 */
-	__u64        steal_clock;              /* 0x280 */
+	__u64	     user_timer;	       /* 0x260 */
+	__u64	     system_timer;	       /* 0x268 */
+	__u64	     steal_timer;	       /* 0x270 */
+	__u64	     last_update_timer;        /* 0x278 */
+	__u64	     last_update_clock;        /* 0x280 */
         psw_t        return_mcck_psw;          /* 0x288 */
 	__u8         pad8[0xc00-0x290];        /* 0x290 */
 
@@ -343,11 +343,11 @@ struct _lowcore
 	__u64        sync_enter_timer;         /* 0x250 */
 	__u64        async_enter_timer;        /* 0x258 */
 	__u64        exit_timer;               /* 0x260 */
-	__u64        last_update_timer;        /* 0x268 */
-	__u64        user_timer;               /* 0x270 */
-	__u64        system_timer;             /* 0x278 */
-	__u64        last_update_clock;        /* 0x280 */
-	__u64        steal_clock;              /* 0x288 */
+	__u64	     user_timer;	       /* 0x268 */
+	__u64	     system_timer;	       /* 0x270 */
+	__u64	     steal_timer;	       /* 0x278 */
+	__u64	     last_update_timer;        /* 0x280 */
+	__u64	     last_update_clock;        /* 0x288 */
         psw_t        return_mcck_psw;          /* 0x290 */
         __u8         pad8[0xc00-0x2a0];        /* 0x2a0 */
         /* System info area */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42..3a8b26e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
 	prev = __switch_to(prev,next);					     \
 } while (0)
 
-extern void account_vtime(struct task_struct *);
+extern void account_vtime(struct task_struct *, struct task_struct *);
 extern void account_tick_vtime(struct task_struct *);
 extern void account_system_vtime(struct task_struct *);
 
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
 
 #define finish_arch_switch(prev) do {					     \
 	set_fs(current->thread.mm_segment);				     \
-	account_vtime(prev);						     \
+	account_vtime(prev, current);					     \
 } while (0)
 
 #define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf96..c544aa5 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
 	unsigned int		cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	struct restart_block	restart_block;
+	__u64			user_timer;
+	__u64			system_timer;
 };
 
 /*
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 4a4a34c..1254a4d 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -31,11 +31,10 @@ static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
-void account_process_tick(struct task_struct *tsk, int user_tick)
+static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 {
-	cputime_t cputime;
-	__u64 timer, clock;
-	int rcu_user_flag;
+	struct thread_info *ti = task_thread_info(tsk);
+	__u64 timer, clock, user, system, steal;
 
 	timer = S390_lowcore.last_update_timer;
 	clock = S390_lowcore.last_update_clock;
@@ -44,59 +43,47 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 		      : "=m" (S390_lowcore.last_update_timer),
 		        "=m" (S390_lowcore.last_update_clock) );
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
-	S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock;
+	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
-	cputime = S390_lowcore.user_timer >> 12;
-	rcu_user_flag = cputime != 0;
-	S390_lowcore.user_timer -= cputime << 12;
-	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime, cputime);
+	user = S390_lowcore.user_timer - ti->user_timer;
+	S390_lowcore.steal_timer -= user;
+	ti->user_timer = S390_lowcore.user_timer;
+	account_user_time(tsk, user, user);
 
-	cputime =  S390_lowcore.system_timer >> 12;
-	S390_lowcore.system_timer -= cputime << 12;
-	S390_lowcore.steal_clock -= cputime << 12;
+	system = S390_lowcore.system_timer - ti->system_timer;
+	S390_lowcore.steal_timer -= system;
+	ti->system_timer = S390_lowcore.system_timer;
 	if (idle_task(smp_processor_id()) != current)
-		account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
+		account_system_time(tsk, hardirq_offset, system, system);
 	else
-		account_idle_time(cputime);
+		account_idle_time(system);
 
-	cputime = S390_lowcore.steal_clock;
-	if ((__s64) cputime > 0) {
-		cputime >>= 12;
-		S390_lowcore.steal_clock -= cputime << 12;
+	steal = S390_lowcore.steal_timer;
+	if ((s64) steal > 0) {
+		S390_lowcore.steal_timer = 0;
 		if (idle_task(smp_processor_id()) != current)
-			account_steal_time(cputime);
+			account_steal_time(steal);
 		else
-			account_idle_time(cputime);
+			account_idle_time(steal);
 	}
 }
 
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
-void account_vtime(struct task_struct *tsk)
+void account_vtime(struct task_struct *prev, struct task_struct *next)
 {
-	cputime_t cputime;
-	__u64 timer;
-
-	timer = S390_lowcore.last_update_timer;
-	asm volatile ("  STPT %0"    /* Store current cpu timer value */
-		      : "=m" (S390_lowcore.last_update_timer) );
-	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
-
-	cputime = S390_lowcore.user_timer >> 12;
-	S390_lowcore.user_timer -= cputime << 12;
-	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime, cputime);
+	struct thread_info *ti;
+
+	do_account_vtime(prev, 0);
+	ti = task_thread_info(prev);
+	ti->user_timer = S390_lowcore.user_timer;
+	ti->system_timer = S390_lowcore.system_timer;
+	ti = task_thread_info(next);
+	S390_lowcore.user_timer = ti->user_timer;
+	S390_lowcore.system_timer = ti->system_timer;
+}
 
-	cputime =  S390_lowcore.system_timer >> 12;
-	S390_lowcore.system_timer -= cputime << 12;
-	S390_lowcore.steal_clock -= cputime << 12;
-	if (idle_task(smp_processor_id()) != current)
-		account_system_time(tsk, 0, cputime, cputime);
-	else
-		account_idle_time(cputime);
+void account_process_tick(struct task_struct *tsk, int user_tick)
+{
+	do_account_vtime(tsk, HARDIRQ_OFFSET);
 }
 
 /*
@@ -105,21 +92,21 @@ void account_vtime(struct task_struct *tsk)
  */
 void account_system_vtime(struct task_struct *tsk)
 {
-	cputime_t cputime;
-	__u64 timer;
+	struct thread_info *ti = task_thread_info(tsk);
+	__u64 timer, system;
 
 	timer = S390_lowcore.last_update_timer;
 	asm volatile ("  STPT %0"    /* Store current cpu timer value */
 		      : "=m" (S390_lowcore.last_update_timer) );
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 
-	cputime =  S390_lowcore.system_timer >> 12;
-	S390_lowcore.system_timer -= cputime << 12;
-	S390_lowcore.steal_clock -= cputime << 12;
+	system = S390_lowcore.system_timer - ti->system_timer;
+	S390_lowcore.steal_timer -= system;
+	ti->system_timer = S390_lowcore.system_timer;
 	if (in_irq() || idle_task(smp_processor_id()) != current)
-		account_system_time(tsk, 0, cputime, cputime);
+		account_system_time(tsk, 0, system, system);
 	else
-		account_idle_time(cputime);
+		account_idle_time(system);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
@@ -490,8 +477,8 @@ void init_cpu_vtimer(void)
 	/* kick the virtual timer */
 	S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
 	S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
-	asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
 	asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
+	asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
 
 	/* enable cpu timer interrupts */
 	__ctl_set_bit(0,10);
-- 
cgit v0.10.2


From 6f43092441bda528dd38f2dc6c1e2522c5079fb7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:40 +0100
Subject: [PATCH] improve precision of idle time detection.

Increase the precision of the idle time calculation that is exported
to user space via /sys/devices/system/cpu/cpu<x>/idle_time_us

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9b..89456df 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
 
 struct s390_idle_data {
 	spinlock_t lock;
-	unsigned int in_idle;
 	unsigned long long idle_count;
 	unsigned long long idle_enter;
 	unsigned long long idle_time;
@@ -26,7 +25,7 @@ void s390_idle_leave(void);
 
 static inline void s390_idle_check(void)
 {
-	if ((&__get_cpu_var(s390_idle))->in_idle)
+	if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
 		s390_idle_leave();
 }
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67..1e06436 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
 #include <linux/utsname.h>
 #include <linux/tick.h>
 #include <linux/elfcore.h>
+#include <linux/kernel_stat.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -79,30 +80,19 @@ DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
 	.lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
 };
 
-static int s390_idle_enter(void)
+void s390_idle_leave(void)
 {
 	struct s390_idle_data *idle;
+	unsigned long long idle_time;
 
 	idle = &__get_cpu_var(s390_idle);
+	idle_time = S390_lowcore.int_clock - idle->idle_enter;
 	spin_lock(&idle->lock);
+	idle->idle_time += idle_time;
+	idle->idle_enter = 0ULL;
 	idle->idle_count++;
-	idle->in_idle = 1;
-	idle->idle_enter = get_clock();
 	spin_unlock(&idle->lock);
-	vtime_stop_cpu_timer();
-	return NOTIFY_OK;
-}
-
-void s390_idle_leave(void)
-{
-	struct s390_idle_data *idle;
-
 	vtime_start_cpu_timer();
-	idle = &__get_cpu_var(s390_idle);
-	spin_lock(&idle->lock);
-	idle->idle_time += get_clock() - idle->idle_enter;
-	idle->in_idle = 0;
-	spin_unlock(&idle->lock);
 }
 
 extern void s390_handle_mcck(void);
@@ -111,16 +101,16 @@ extern void s390_handle_mcck(void);
  */
 static void default_idle(void)
 {
+	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+	unsigned long addr;
+	psw_t psw;
+
 	/* CPU is going idle. */
 	local_irq_disable();
 	if (need_resched()) {
 		local_irq_enable();
 		return;
 	}
-	if (s390_idle_enter() == NOTIFY_BAD) {
-		local_irq_enable();
-		return;
-	}
 #ifdef CONFIG_HOTPLUG_CPU
 	if (cpu_is_offline(smp_processor_id())) {
 		preempt_enable_no_resched();
@@ -138,9 +128,42 @@ static void default_idle(void)
 	trace_hardirqs_on();
 	/* Don't trace preempt off for idle. */
 	stop_critical_timings();
+	vtime_stop_cpu_timer();
+
+	/*
+	 * The inline assembly is equivalent to
+	 *	idle->idle_enter = get_clock();
+	 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+	 *			   PSW_MASK_IO | PSW_MASK_EXT);
+	 * The difference is that the inline assembly makes sure that
+	 * the stck instruction is right before the lpsw instruction.
+	 * This is done to increase the precision.
+	 */
+
 	/* Wait for external, I/O or machine check interrupt. */
-	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-			PSW_MASK_IO | PSW_MASK_EXT);
+	psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT;
+#ifndef __s390x__
+	asm volatile(
+		"	basr	%0,0\n"
+		"0:	ahi	%0,1f-0b\n"
+		"	st	%0,4(%2)\n"
+		"	stck	0(%3)\n"
+		"	lpsw	0(%2)\n"
+		"1:"
+		: "=&d" (addr), "=m" (idle->idle_enter)
+		: "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
+		: "memory", "cc");
+#else /* __s390x__ */
+	asm volatile(
+		"	larl	%0,1f\n"
+		"	stg	%0,8(%2)\n"
+		"	stck	0(%3)\n"
+		"	lpswe	0(%2)\n"
+		"1:"
+		: "=&d" (addr), "=m" (idle->idle_enter)
+		: "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
+		: "memory", "cc");
+#endif /* __s390x__ */
 	start_critical_timings();
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fc7854..3979a6f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -851,9 +851,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
 	unsigned long long idle_count;
 
 	idle = &per_cpu(s390_idle, dev->id);
-	spin_lock_irq(&idle->lock);
+	spin_lock(&idle->lock);
 	idle_count = idle->idle_count;
-	spin_unlock_irq(&idle->lock);
+	if (idle->idle_enter)
+		idle_count++;
+	spin_unlock(&idle->lock);
 	return sprintf(buf, "%llu\n", idle_count);
 }
 static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -862,18 +864,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
 				struct sysdev_attribute *attr, char *buf)
 {
 	struct s390_idle_data *idle;
-	unsigned long long new_time;
+	unsigned long long now, idle_time, idle_enter;
 
 	idle = &per_cpu(s390_idle, dev->id);
-	spin_lock_irq(&idle->lock);
-	if (idle->in_idle) {
-		new_time = get_clock();
-		idle->idle_time += new_time - idle->idle_enter;
-		idle->idle_enter = new_time;
-	}
-	new_time = idle->idle_time;
-	spin_unlock_irq(&idle->lock);
-	return sprintf(buf, "%llu\n", new_time >> 12);
+	spin_lock(&idle->lock);
+	now = get_clock();
+	idle_time = idle->idle_time;
+	idle_enter = idle->idle_enter;
+	if (idle_enter != 0ULL && idle_enter < now)
+		idle_time += now - idle_enter;
+	spin_unlock(&idle->lock);
+	return sprintf(buf, "%llu\n", idle_time >> 12);
 }
 static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 1254a4d..25d21fe 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -112,6 +112,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
 
 static inline void set_vtimer(__u64 expires)
 {
+	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
 	__u64 timer;
 
 	asm volatile ("  STPT %0\n"  /* Store current cpu timer value */
@@ -121,7 +122,7 @@ static inline void set_vtimer(__u64 expires)
 	S390_lowcore.last_update_timer = expires;
 
 	/* store expire time for this CPU timer */
-	__get_cpu_var(virt_cpu_timer).to_expire = expires;
+	vq->to_expire = expires;
 }
 
 void vtime_start_cpu_timer(void)
-- 
cgit v0.10.2


From 9cfb9b3c3a7361c793c031e9c3583b177ac5debd Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:41 +0100
Subject: [PATCH] improve idle cputime accounting

Distinguish the cputime of the idle process where idle is actually using
cpu cycles from the cputime where idle is sleeping on an enabled wait psw.
The former is accounted as system time, the later as idle time.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 89456df..d60a2ee 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -21,12 +21,12 @@ struct s390_idle_data {
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void s390_idle_leave(void);
+void vtime_start_cpu(void);
 
 static inline void s390_idle_check(void)
 {
 	if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
-		s390_idle_leave();
+		vtime_start_cpu();
 }
 
 #endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60..e4bcab7 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
 	__u64 expires;
 	__u64 interval;
 
-	spinlock_t lock;
-	unsigned long magic;
-
 	void (*function)(unsigned long);
 	unsigned long data;
 };
 
-/* the offset value will wrap after ca. 71 years */
+/* the vtimer value will wrap after ca. 71 years */
 struct vtimer_queue {
 	struct list_head list;
 	spinlock_t lock;
-	__u64 to_expire;	  /* current event expire time */
-	__u64 offset;		  /* list offset to zero */
-	__u64 idle;		  /* temp var for idle */
+	__u64 timer;		/* last programmed timer */
+	__u64 elapsed;		/* elapsed time of timer expire values */
+	__u64 idle;		/* temp var for idle */
+	int do_spt;		/* =1: reprogram cpu timer in idle */
 };
 
 extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
 extern void init_cpu_vtimer(void);
 extern void vtime_init(void);
 
-extern void vtime_start_cpu_timer(void);
-extern void vtime_stop_cpu_timer(void);
+extern void vtime_stop_cpu(void);
+extern void vtime_start_leave(void);
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521..1268aa2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
 
 	.globl io_int_handler
 io_int_handler:
-	stpt	__LC_ASYNC_ENTER_TIMER
 	stck	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
 	SAVE_ALL_BASE __LC_SAVE_AREA+16
 	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
 	CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
 
 	.globl	ext_int_handler
 ext_int_handler:
-	stpt	__LC_ASYNC_ENTER_TIMER
 	stck	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
 	SAVE_ALL_BASE __LC_SAVE_AREA+16
 	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
 	CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
 
 	.globl mcck_int_handler
 mcck_int_handler:
+	stck	__LC_INT_CLOCK
 	spt	__LC_CPU_TIMER_SAVE_AREA	# revalidate cpu timer
 	lm	%r0,%r15,__LC_GPREGS_SAVE_AREA	# revalidate gprs
 	SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd..ae83c19 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -559,8 +559,8 @@ kernel_per:
  */
 	.globl io_int_handler
 io_int_handler:
-	stpt	__LC_ASYNC_ENTER_TIMER
 	stck	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
 	SAVE_ALL_BASE __LC_SAVE_AREA+32
 	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
 	CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +721,8 @@ io_notify_resume:
  */
 	.globl	ext_int_handler
 ext_int_handler:
-	stpt	__LC_ASYNC_ENTER_TIMER
 	stck	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
 	SAVE_ALL_BASE __LC_SAVE_AREA+32
 	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
 	CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +746,7 @@ __critical_end:
  */
 	.globl mcck_int_handler
 mcck_int_handler:
+	stck	__LC_INT_CLOCK
 	la	%r1,4095		# revalidate r1
 	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 1e06436..b6110bd 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -46,7 +46,6 @@
 #include <asm/processor.h>
 #include <asm/irq.h>
 #include <asm/timer.h>
-#include <asm/cpu.h>
 #include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -76,35 +75,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return sf->gprs[8];
 }
 
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
-	.lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
-};
-
-void s390_idle_leave(void)
-{
-	struct s390_idle_data *idle;
-	unsigned long long idle_time;
-
-	idle = &__get_cpu_var(s390_idle);
-	idle_time = S390_lowcore.int_clock - idle->idle_enter;
-	spin_lock(&idle->lock);
-	idle->idle_time += idle_time;
-	idle->idle_enter = 0ULL;
-	idle->idle_count++;
-	spin_unlock(&idle->lock);
-	vtime_start_cpu_timer();
-}
-
 extern void s390_handle_mcck(void);
 /*
  * The idle loop on a S390...
  */
 static void default_idle(void)
 {
-	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
-	unsigned long addr;
-	psw_t psw;
-
 	/* CPU is going idle. */
 	local_irq_disable();
 	if (need_resched()) {
@@ -120,7 +96,6 @@ static void default_idle(void)
 	local_mcck_disable();
 	if (test_thread_flag(TIF_MCCK_PENDING)) {
 		local_mcck_enable();
-		s390_idle_leave();
 		local_irq_enable();
 		s390_handle_mcck();
 		return;
@@ -128,42 +103,9 @@ static void default_idle(void)
 	trace_hardirqs_on();
 	/* Don't trace preempt off for idle. */
 	stop_critical_timings();
-	vtime_stop_cpu_timer();
-
-	/*
-	 * The inline assembly is equivalent to
-	 *	idle->idle_enter = get_clock();
-	 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-	 *			   PSW_MASK_IO | PSW_MASK_EXT);
-	 * The difference is that the inline assembly makes sure that
-	 * the stck instruction is right before the lpsw instruction.
-	 * This is done to increase the precision.
-	 */
-
-	/* Wait for external, I/O or machine check interrupt. */
-	psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT;
-#ifndef __s390x__
-	asm volatile(
-		"	basr	%0,0\n"
-		"0:	ahi	%0,1f-0b\n"
-		"	st	%0,4(%2)\n"
-		"	stck	0(%3)\n"
-		"	lpsw	0(%2)\n"
-		"1:"
-		: "=&d" (addr), "=m" (idle->idle_enter)
-		: "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
-		: "memory", "cc");
-#else /* __s390x__ */
-	asm volatile(
-		"	larl	%0,1f\n"
-		"	stg	%0,8(%2)\n"
-		"	stck	0(%3)\n"
-		"	lpswe	0(%2)\n"
-		"1:"
-		: "=&d" (addr), "=m" (idle->idle_enter)
-		: "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
-		: "memory", "cc");
-#endif /* __s390x__ */
+	/* Stop virtual timer and halt the cpu. */
+	vtime_stop_cpu();
+	/* Reenable preemption tracer. */
 	start_critical_timings();
 }
 
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b41..a0d2d55 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
 	struct pt_regs *old_regs;
 
 	old_regs = set_irq_regs(regs);
-	irq_enter();
 	s390_idle_check();
+	irq_enter();
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 25d21fe..2fb36e4 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,10 +23,35 @@
 #include <asm/s390_ext.h>
 #include <asm/timer.h>
 #include <asm/irq_regs.h>
+#include <asm/cpu.h>
 
 static ext_int_info_t ext_int_info_timer;
+
 static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
 
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
+	.lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
+};
+
+static inline __u64 get_vtimer(void)
+{
+	__u64 timer;
+
+	asm volatile("STPT %0" : "=m" (timer));
+	return timer;
+}
+
+static inline void set_vtimer(__u64 expires)
+{
+	__u64 timer;
+
+	asm volatile ("  STPT %0\n"  /* Store current cpu timer value */
+		      "  SPT %1"     /* Set new value immediatly afterwards */
+		      : "=m" (timer) : "m" (expires) );
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+	S390_lowcore.last_update_timer = expires;
+}
+
 /*
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
@@ -53,18 +78,12 @@ static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	system = S390_lowcore.system_timer - ti->system_timer;
 	S390_lowcore.steal_timer -= system;
 	ti->system_timer = S390_lowcore.system_timer;
-	if (idle_task(smp_processor_id()) != current)
-		account_system_time(tsk, hardirq_offset, system, system);
-	else
-		account_idle_time(system);
+	account_system_time(tsk, hardirq_offset, system, system);
 
 	steal = S390_lowcore.steal_timer;
 	if ((s64) steal > 0) {
 		S390_lowcore.steal_timer = 0;
-		if (idle_task(smp_processor_id()) != current)
-			account_steal_time(steal);
-		else
-			account_idle_time(steal);
+		account_steal_time(steal);
 	}
 }
 
@@ -96,80 +115,127 @@ void account_system_vtime(struct task_struct *tsk)
 	__u64 timer, system;
 
 	timer = S390_lowcore.last_update_timer;
-	asm volatile ("  STPT %0"    /* Store current cpu timer value */
-		      : "=m" (S390_lowcore.last_update_timer) );
+	S390_lowcore.last_update_timer = get_vtimer();
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 
 	system = S390_lowcore.system_timer - ti->system_timer;
 	S390_lowcore.steal_timer -= system;
 	ti->system_timer = S390_lowcore.system_timer;
-	if (in_irq() || idle_task(smp_processor_id()) != current)
-		account_system_time(tsk, 0, system, system);
-	else
-		account_idle_time(system);
+	account_system_time(tsk, 0, system, system);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-static inline void set_vtimer(__u64 expires)
+void vtime_start_cpu(void)
 {
+	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
 	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
-	__u64 timer;
-
-	asm volatile ("  STPT %0\n"  /* Store current cpu timer value */
-		      "  SPT %1"     /* Set new value immediatly afterwards */
-		      : "=m" (timer) : "m" (expires) );
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
-	S390_lowcore.last_update_timer = expires;
-
-	/* store expire time for this CPU timer */
-	vq->to_expire = expires;
-}
-
-void vtime_start_cpu_timer(void)
-{
-	struct vtimer_queue *vt_list;
-
-	vt_list = &__get_cpu_var(virt_cpu_timer);
-
-	/* CPU timer interrupt is pending, don't reprogramm it */
-	if (vt_list->idle & 1LL<<63)
-		return;
+	__u64 idle_time, expires;
+
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	idle_time = S390_lowcore.int_clock - idle->idle_enter;
+	account_idle_time(idle_time);
+	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+
+	/* Account system time spent going idle. */
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
+	S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
+
+	/* Restart vtime CPU timer */
+	if (vq->do_spt) {
+		/* Program old expire value but first save progress. */
+		expires = vq->idle - S390_lowcore.async_enter_timer;
+		expires += get_vtimer();
+		set_vtimer(expires);
+	} else {
+		/* Don't account the CPU timer delta while the cpu was idle. */
+		vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
+	}
 
-	if (!list_empty(&vt_list->list))
-		set_vtimer(vt_list->idle);
+	spin_lock(&idle->lock);
+	idle->idle_time += idle_time;
+	idle->idle_enter = 0ULL;
+	idle->idle_count++;
+	spin_unlock(&idle->lock);
 }
 
-void vtime_stop_cpu_timer(void)
+void vtime_stop_cpu(void)
 {
-	struct vtimer_queue *vt_list;
-
-	vt_list = &__get_cpu_var(virt_cpu_timer);
-
-	/* nothing to do */
-	if (list_empty(&vt_list->list)) {
-		vt_list->idle = VTIMER_MAX_SLICE;
-		goto fire;
+	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
+	psw_t psw;
+
+	/* Wait for external, I/O or machine check interrupt. */
+	psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
+
+	/* Check if the CPU timer needs to be reprogrammed. */
+	if (vq->do_spt) {
+		__u64 vmax = VTIMER_MAX_SLICE;
+		/*
+		 * The inline assembly is equivalent to
+		 *	vq->idle = get_cpu_timer();
+		 *	set_cpu_timer(VTIMER_MAX_SLICE);
+		 *	idle->idle_enter = get_clock();
+		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+		 *			   PSW_MASK_IO | PSW_MASK_EXT);
+		 * The difference is that the inline assembly makes sure that
+		 * the last three instruction are stpt, stck and lpsw in that
+		 * order. This is done to increase the precision.
+		 */
+		asm volatile(
+#ifndef CONFIG_64BIT
+			"	basr	1,0\n"
+			"0:	ahi	1,1f-0b\n"
+			"	st	1,4(%2)\n"
+#else /* CONFIG_64BIT */
+			"	larl	1,1f\n"
+			"	stg	1,8(%2)\n"
+#endif /* CONFIG_64BIT */
+			"	stpt	0(%4)\n"
+			"	spt	0(%5)\n"
+			"	stck	0(%3)\n"
+#ifndef CONFIG_64BIT
+			"	lpsw	0(%2)\n"
+#else /* CONFIG_64BIT */
+			"	lpswe	0(%2)\n"
+#endif /* CONFIG_64BIT */
+			"1:"
+			: "=m" (idle->idle_enter), "=m" (vq->idle)
+			: "a" (&psw), "a" (&idle->idle_enter),
+			  "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
+			: "memory", "cc", "1");
+	} else {
+		/*
+		 * The inline assembly is equivalent to
+		 *	vq->idle = get_cpu_timer();
+		 *	idle->idle_enter = get_clock();
+		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+		 *			   PSW_MASK_IO | PSW_MASK_EXT);
+		 * The difference is that the inline assembly makes sure that
+		 * the last three instruction are stpt, stck and lpsw in that
+		 * order. This is done to increase the precision.
+		 */
+		asm volatile(
+#ifndef CONFIG_64BIT
+			"	basr	1,0\n"
+			"0:	ahi	1,1f-0b\n"
+			"	st	1,4(%2)\n"
+#else /* CONFIG_64BIT */
+			"	larl	1,1f\n"
+			"	stg	1,8(%2)\n"
+#endif /* CONFIG_64BIT */
+			"	stpt	0(%4)\n"
+			"	stck	0(%3)\n"
+#ifndef CONFIG_64BIT
+			"	lpsw	0(%2)\n"
+#else /* CONFIG_64BIT */
+			"	lpswe	0(%2)\n"
+#endif /* CONFIG_64BIT */
+			"1:"
+			: "=m" (idle->idle_enter), "=m" (vq->idle)
+			: "a" (&psw), "a" (&idle->idle_enter),
+			  "a" (&vq->idle), "m" (psw)
+			: "memory", "cc", "1");
 	}
-
-	/* store the actual expire value */
-	asm volatile ("STPT %0" : "=m" (vt_list->idle));
-
-	/*
-	 * If the CPU timer is negative we don't reprogramm
-	 * it because we will get instantly an interrupt.
-	 */
-	if (vt_list->idle & 1LL<<63)
-		return;
-
-	vt_list->offset += vt_list->to_expire - vt_list->idle;
-
-	/*
-	 * We cannot halt the CPU timer, we just write a value that
-	 * nearly never expires (only after 71 years) and re-write
-	 * the stored expire value if we continue the timer
-	 */
- fire:
-	set_vtimer(VTIMER_MAX_SLICE);
 }
 
 /*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
  */
 static void do_callbacks(struct list_head *cb_list)
 {
-	struct vtimer_queue *vt_list;
+	struct vtimer_queue *vq;
 	struct vtimer_list *event, *tmp;
-	void (*fn)(unsigned long);
-	unsigned long data;
 
 	if (list_empty(cb_list))
 		return;
 
-	vt_list = &__get_cpu_var(virt_cpu_timer);
+	vq = &__get_cpu_var(virt_cpu_timer);
 
 	list_for_each_entry_safe(event, tmp, cb_list, entry) {
-		fn = event->function;
-		data = event->data;
-		fn(data);
-
-		if (!event->interval)
-			/* delete one shot timer */
-			list_del_init(&event->entry);
-		else {
-			/* move interval timer back to list */
-			spin_lock(&vt_list->lock);
-			list_del_init(&event->entry);
-			list_add_sorted(event, &vt_list->list);
-			spin_unlock(&vt_list->lock);
+		list_del_init(&event->entry);
+		(event->function)(event->data);
+		if (event->interval) {
+			/* Recharge interval timer */
+			event->expires = event->interval + vq->elapsed;
+			spin_lock(&vq->lock);
+			list_add_sorted(event, &vq->list);
+			spin_unlock(&vq->lock);
 		}
 	}
 }
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
  */
 static void do_cpu_timer_interrupt(__u16 error_code)
 {
-	__u64 next, delta;
-	struct vtimer_queue *vt_list;
+	struct vtimer_queue *vq;
 	struct vtimer_list *event, *tmp;
-	struct list_head *ptr;
-	/* the callback queue */
-	struct list_head cb_list;
+	struct list_head cb_list;	/* the callback queue */
+	__u64 elapsed, next;
 
 	INIT_LIST_HEAD(&cb_list);
-	vt_list = &__get_cpu_var(virt_cpu_timer);
+	vq = &__get_cpu_var(virt_cpu_timer);
 
 	/* walk timer list, fire all expired events */
-	spin_lock(&vt_list->lock);
-
-	if (vt_list->to_expire < VTIMER_MAX_SLICE)
-		vt_list->offset += vt_list->to_expire;
-
-	list_for_each_entry_safe(event, tmp, &vt_list->list, entry) {
-		if (event->expires > vt_list->offset)
-			/* found first unexpired event, leave */
-			break;
-
-		/* re-charge interval timer, we have to add the offset */
-		if (event->interval)
-			event->expires = event->interval + vt_list->offset;
-
-		/* move expired timer to the callback queue */
-		list_move_tail(&event->entry, &cb_list);
+	spin_lock(&vq->lock);
+
+	elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
+	BUG_ON((s64) elapsed < 0);
+	vq->elapsed = 0;
+	list_for_each_entry_safe(event, tmp, &vq->list, entry) {
+		if (event->expires < elapsed)
+			/* move expired timer to the callback queue */
+			list_move_tail(&event->entry, &cb_list);
+		else
+			event->expires -= elapsed;
 	}
-	spin_unlock(&vt_list->lock);
+	spin_unlock(&vq->lock);
+
+	vq->do_spt = list_empty(&cb_list);
 	do_callbacks(&cb_list);
 
 	/* next event is first in list */
-	spin_lock(&vt_list->lock);
-	if (!list_empty(&vt_list->list)) {
-		ptr = vt_list->list.next;
-		event = list_entry(ptr, struct vtimer_list, entry);
-		next = event->expires - vt_list->offset;
-
-		/* add the expired time from this interrupt handler
-		 * and the callback functions
-		 */
-		asm volatile ("STPT %0" : "=m" (delta));
-		delta = 0xffffffffffffffffLL - delta + 1;
-		vt_list->offset += delta;
-		next -= delta;
-	} else {
-		vt_list->offset = 0;
-		next = VTIMER_MAX_SLICE;
-	}
-	spin_unlock(&vt_list->lock);
-	set_vtimer(next);
+	next = VTIMER_MAX_SLICE;
+	spin_lock(&vq->lock);
+	if (!list_empty(&vq->list)) {
+		event = list_first_entry(&vq->list, struct vtimer_list, entry);
+		next = event->expires;
+	} else
+		vq->do_spt = 0;
+	spin_unlock(&vq->lock);
+	/*
+	 * To improve precision add the time spent by the
+	 * interrupt handler to the elapsed time.
+	 * Note: CPU timer counts down and we got an interrupt,
+	 *	 the current content is negative
+	 */
+	elapsed = S390_lowcore.async_enter_timer - get_vtimer();
+	set_vtimer(next - elapsed);
+	vq->timer = next - elapsed;
+	vq->elapsed = elapsed;
 }
 
 void init_virt_timer(struct vtimer_list *timer)
 {
 	timer->function = NULL;
 	INIT_LIST_HEAD(&timer->entry);
-	spin_lock_init(&timer->lock);
 }
 EXPORT_SYMBOL(init_virt_timer);
 
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
  */
 static void internal_add_vtimer(struct vtimer_list *timer)
 {
+	struct vtimer_queue *vq;
 	unsigned long flags;
-	__u64 done;
-	struct vtimer_list *event;
-	struct vtimer_queue *vt_list;
+	__u64 left, expires;
 
-	vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
-	spin_lock_irqsave(&vt_list->lock, flags);
+	vq = &per_cpu(virt_cpu_timer, timer->cpu);
+	spin_lock_irqsave(&vq->lock, flags);
 
 	BUG_ON(timer->cpu != smp_processor_id());
 
-	/* if list is empty we only have to set the timer */
-	if (list_empty(&vt_list->list)) {
-		/* reset the offset, this may happen if the last timer was
-		 * just deleted by mod_virt_timer and the interrupt
-		 * didn't happen until here
-		 */
-		vt_list->offset = 0;
-		goto fire;
+	if (list_empty(&vq->list)) {
+		/* First timer on this cpu, just program it. */
+		list_add(&timer->entry, &vq->list);
+		set_vtimer(timer->expires);
+		vq->timer = timer->expires;
+		vq->elapsed = 0;
+	} else {
+		/* Check progress of old timers. */
+		expires = timer->expires;
+		left = get_vtimer();
+		if (likely((s64) expires < (s64) left)) {
+			/* The new timer expires before the current timer. */
+			set_vtimer(expires);
+			vq->elapsed += vq->timer - left;
+			vq->timer = expires;
+		} else {
+			vq->elapsed += vq->timer - left;
+			vq->timer = left;
+		}
+		/* Insert new timer into per cpu list. */
+		timer->expires += vq->elapsed;
+		list_add_sorted(timer, &vq->list);
 	}
 
-	/* save progress */
-	asm volatile ("STPT %0" : "=m" (done));
-
-	/* calculate completed work */
-	done = vt_list->to_expire - done + vt_list->offset;
-	vt_list->offset = 0;
-
-	list_for_each_entry(event, &vt_list->list, entry)
-		event->expires -= done;
-
- fire:
-	list_add_sorted(timer, &vt_list->list);
-
-	/* get first element, which is the next vtimer slice */
-	event = list_entry(vt_list->list.next, struct vtimer_list, entry);
-
-	set_vtimer(event->expires);
-	spin_unlock_irqrestore(&vt_list->lock, flags);
+	spin_unlock_irqrestore(&vq->lock, flags);
 	/* release CPU acquired in prepare_vtimer or mod_virt_timer() */
 	put_cpu();
 }
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
  * If we change a pending timer the function must be called on the CPU
  * where the timer is running on, e.g. by smp_call_function_single()
  *
- * The original mod_timer adds the timer if it is not pending. For compatibility
- * we do the same. The timer will be added on the current CPU as a oneshot timer.
+ * The original mod_timer adds the timer if it is not pending. For
+ * compatibility we do the same. The timer will be added on the current
+ * CPU as a oneshot timer.
  *
  * returns whether it has modified a pending timer (1) or not (0)
  */
 int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 {
-	struct vtimer_queue *vt_list;
+	struct vtimer_queue *vq;
 	unsigned long flags;
 	int cpu;
 
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 		return 1;
 
 	cpu = get_cpu();
-	vt_list = &per_cpu(virt_cpu_timer, cpu);
+	vq = &per_cpu(virt_cpu_timer, cpu);
 
 	/* check if we run on the right CPU */
 	BUG_ON(timer->cpu != cpu);
 
 	/* disable interrupts before test if timer is pending */
-	spin_lock_irqsave(&vt_list->lock, flags);
+	spin_lock_irqsave(&vq->lock, flags);
 
 	/* if timer isn't pending add it on the current CPU */
 	if (!vtimer_pending(timer)) {
-		spin_unlock_irqrestore(&vt_list->lock, flags);
+		spin_unlock_irqrestore(&vq->lock, flags);
 		/* we do not activate an interval timer with mod_virt_timer */
 		timer->interval = 0;
 		timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 		timer->interval = expires;
 
 	/* the timer can't expire anymore so we can release the lock */
-	spin_unlock_irqrestore(&vt_list->lock, flags);
+	spin_unlock_irqrestore(&vq->lock, flags);
 	internal_add_vtimer(timer);
 	return 1;
 }
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
 int del_virt_timer(struct vtimer_list *timer)
 {
 	unsigned long flags;
-	struct vtimer_queue *vt_list;
+	struct vtimer_queue *vq;
 
 	/* check if timer is pending */
 	if (!vtimer_pending(timer))
 		return 0;
 
-	vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
-	spin_lock_irqsave(&vt_list->lock, flags);
+	vq = &per_cpu(virt_cpu_timer, timer->cpu);
+	spin_lock_irqsave(&vq->lock, flags);
 
 	/* we don't interrupt a running timer, just let it expire! */
 	list_del_init(&timer->entry);
 
-	/* last timer removed */
-	if (list_empty(&vt_list->list)) {
-		vt_list->to_expire = 0;
-		vt_list->offset = 0;
-	}
-
-	spin_unlock_irqrestore(&vt_list->lock, flags);
+	spin_unlock_irqrestore(&vq->lock, flags);
 	return 1;
 }
 EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
  */
 void init_cpu_vtimer(void)
 {
-	struct vtimer_queue *vt_list;
+	struct vtimer_queue *vq;
 
 	/* kick the virtual timer */
-	S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
-	S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
 	asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
-	asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
+	asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
+
+	/* initialize per cpu vtimer structure */
+	vq = &__get_cpu_var(virt_cpu_timer);
+	INIT_LIST_HEAD(&vq->list);
+	spin_lock_init(&vq->lock);
 
 	/* enable cpu timer interrupts */
 	__ctl_set_bit(0,10);
-
-	vt_list = &__get_cpu_var(virt_cpu_timer);
-	INIT_LIST_HEAD(&vt_list->list);
-	spin_lock_init(&vt_list->lock);
-	vt_list->to_expire = 0;
-	vt_list->offset = 0;
-	vt_list->idle = 0;
-
 }
 
 void __init vtime_init(void)
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 8a8df75..06b7182 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -632,8 +632,8 @@ do_IRQ (struct pt_regs *regs)
 	struct pt_regs *old_regs;
 
 	old_regs = set_irq_regs(regs);
-	irq_enter();
 	s390_idle_check();
+	irq_enter();
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 834e9ee..92b0417 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -18,6 +18,7 @@
 #include <asm/etr.h>
 #include <asm/lowcore.h>
 #include <asm/cio.h>
+#include <asm/cpu.h>
 #include "s390mach.h"
 
 static struct semaphore m_sem;
@@ -369,6 +370,8 @@ s390_do_machine_check(struct pt_regs *regs)
 
 	lockdep_off();
 
+	s390_idle_check();
+
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
 	umode = user_mode(regs);
-- 
cgit v0.10.2


From c742b31c03f37c5c499178f09f57381aa6c70131 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:42 +0100
Subject: [PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID

The extract cpu time instruction (ectg) instruction allows the user
process to get the current thread cputime without calling into the
kernel. The code that uses the instruction needs to switch to the
access registers mode to get access to the per-cpu info page that
contains the two base values that are needed to calculate the current
cputime from the CPU timer with the ectg instruction.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index a547817..ffdef5f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -106,8 +106,10 @@
 #define __LC_IPLDEV                     0xDB8
 #define __LC_CURRENT			0xDD8
 #define __LC_INT_CLOCK			0xDE8
+#define __LC_VDSO_PER_CPU		0xE38
 #endif /* __s390x__ */
 
+#define __LC_PASTE			0xE40
 
 #define __LC_PANIC_MAGIC		0xE00
 #ifndef __s390x__
@@ -381,7 +383,12 @@ struct _lowcore
         /* whether the kernel died with panic() or not */
         __u32        panic_magic;              /* 0xe00 */
 
-	__u8         pad13[0x11b8-0xe04];      /* 0xe04 */
+	/* Per cpu primary space access list */
+	__u8	     pad_0xe04[0xe3c-0xe04];   /* 0xe04 */
+	__u32	     vdso_per_cpu_data;	       /* 0xe3c */
+	__u32	     paste[16];		       /* 0xe40 */
+
+	__u8	     pad13[0x11b8-0xe80];      /* 0xe80 */
 
 	/* 64 bit extparam used for pfault, diag 250 etc  */
 	__u64        ext_params2;               /* 0x11B8 */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe..7bdd7c8 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
 #ifndef __ASSEMBLY__
 
 /*
- * Note about this structure:
+ * Note about the vdso_data and vdso_per_cpu_data structures:
  *
- * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
  * structure is supposed to be known only to the function in the vdso
  * itself and may change without notice.
  */
@@ -28,10 +28,21 @@ struct vdso_data {
 	__u64 wtom_clock_nsec;		/*				0x28 */
 	__u32 tz_minuteswest;		/* Minutes west of Greenwich	0x30 */
 	__u32 tz_dsttime;		/* Type of dst correction	0x34 */
+	__u32 ectg_available;
+};
+
+struct vdso_per_cpu_data {
+	__u64 ectg_timer_base;
+	__u64 ectg_user_time;
 };
 
 extern struct vdso_data *vdso_data;
 
+#ifdef CONFIG_64BIT
+int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
+void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60..67a6001 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
 	DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
 	DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
 	DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+	DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+	DEFINE(__VDSO_ECTG_BASE,
+	       offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+	DEFINE(__VDSO_ECTG_USER,
+	       offsetof(struct vdso_per_cpu_data, ectg_user_time));
 	/* constants used by the vdso */
 	DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
 	DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index ae83c19..c6fbde1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 	.if !\sync
 	ni	\psworg+1,0xfd		# clear wait state bit
 	.endif
-	lmg	%r0,%r15,SP_R0(%r15)	# load gprs 0-15 of user
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r13,SP_R0(%r15)	# load gprs 0-13 of user
 	stpt	__LC_EXIT_TIMER
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+	lmg	%r14,%r15,SP_R14(%r15)	# load grps 14-15 of user
 	lpswe	\psworg			# back to caller
 	.endm
 
@@ -980,23 +983,23 @@ cleanup_sysc_return:
 
 cleanup_sysc_leave:
 	clc	8(8,%r12),BASED(cleanup_sysc_leave_insn)
-	je	2f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	je	3f
 	clc	8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
-	je	2f
-	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
+	jhe	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
 	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	0f
+	jne	1f
 	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	1f
-0:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-1:	lmg	%r0,%r11,SP_R0(%r15)
+	j	2f
+1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
+2:	lmg	%r0,%r11,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-2:	la	%r12,__LC_RETURN_PSW
+3:	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_sysc_leave_insn:
 	.quad	sysc_done - 4
-	.quad	sysc_done - 8
+	.quad	sysc_done - 16
 
 cleanup_io_return:
 	mvc	__LC_RETURN_PSW(8),0(%r12)
@@ -1006,23 +1009,23 @@ cleanup_io_return:
 
 cleanup_io_leave:
 	clc	8(8,%r12),BASED(cleanup_io_leave_insn)
-	je	2f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	je	3f
 	clc	8(8,%r12),BASED(cleanup_io_leave_insn+8)
-	je	2f
-	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
+	jhe	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
 	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	0f
+	jne	1f
 	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	1f
-0:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-1:	lmg	%r0,%r11,SP_R0(%r15)
+	j	2f
+1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
+2:	lmg	%r0,%r11,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-2:	la	%r12,__LC_RETURN_PSW
+3:	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_io_leave_insn:
 	.quad	io_done - 4
-	.quad	io_done - 8
+	.quad	io_done - 16
 
 /*
  * Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b..f9f70aa 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
 	lg	%r12,.Lparmaddr-.LPG1(%r13)	# pointer to parameter area
 					# move IPL device to lowcore
 	mvc	__LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+	lghi	%r0,__LC_PASTE
+	stg	%r0,__LC_VDSO_PER_CPU
 #
 # Setup stack
 #
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd..d825f49 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
 		/* enable extended save area */
 		__ctl_set_bit(14, 29);
 	}
+#else
+	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
 #endif
 	set_prefix((u32)(unsigned long) lc);
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3979a6f..b3461e8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
 #include <asm/lowcore.h>
 #include <asm/sclp.h>
 #include <asm/cpu.h>
+#include <asm/vdso.h>
 #include "entry.h"
 
 /*
@@ -506,6 +507,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
 			goto out;
 		lowcore->extended_save_area_addr = (u32) save_area;
 	}
+#else
+	if (vdso_alloc_per_cpu(cpu, lowcore))
+		goto out;
 #endif
 	lowcore_ptr[cpu] = lowcore;
 	return 0;
@@ -528,6 +532,8 @@ static void smp_free_lowcore(int cpu)
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE)
 		free_page((unsigned long) lowcore->extended_save_area_addr);
+#else
+	vdso_free_per_cpu(cpu, lowcore);
 #endif
 	free_page(lowcore->panic_stack - PAGE_SIZE);
 	free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -670,6 +676,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
 	panic_stack = __get_free_page(GFP_KERNEL);
 	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+	BUG_ON(!lowcore || !panic_stack || !async_stack);
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE)
 		save_area = get_zeroed_page(GFP_KERNEL);
@@ -683,6 +690,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE)
 		lowcore->extended_save_area_addr = (u32) save_area;
+#else
+	BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
 #endif
 	set_prefix((u32)(unsigned long) lowcore);
 	local_mcck_enable();
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6cce..25a6a82 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
 #include <asm/sections.h>
 #include <asm/vdso.h>
 
-/* Max supported size for symbol names */
-#define MAX_SYMNAME	64
-
 #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
 extern char vdso32_start, vdso32_end;
 static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
 struct vdso_data *vdso_data = &vdso_data_store.data;
 
 /*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+	unsigned int facility_list;
+
+	facility_list = stfl();
+	vd->ectg_available = switch_amode && (facility_list & 1);
+}
+
+#ifdef CONFIG_64BIT
+/*
+ * Setup per cpu vdso data page.
+ */
+static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
+{
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#ifdef CONFIG_64BIT
+#define SEGMENT_ORDER	2
+#else
+#define SEGMENT_ORDER	1
+#endif
+
+int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+	unsigned long segment_table, page_table, page_frame;
+	u32 *psal, *aste;
+	int i;
+
+	lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+	if (!switch_amode || !vdso_enabled)
+		return 0;
+
+	segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+	page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	page_frame = get_zeroed_page(GFP_KERNEL);
+	if (!segment_table || !page_table || !page_frame)
+		goto out;
+
+	clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+		    PAGE_SIZE << SEGMENT_ORDER);
+	clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+		    256*sizeof(unsigned long));
+
+	*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+	*(unsigned long *) page_table = _PAGE_RO + page_frame;
+
+	psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+	aste = psal + 32;
+
+	for (i = 4; i < 32; i += 4)
+		psal[i] = 0x80000000;
+
+	lowcore->paste[4] = (u32)(addr_t) psal;
+	psal[0] = 0x20000000;
+	psal[2] = (u32)(addr_t) aste;
+	*(unsigned long *) (aste + 2) = segment_table +
+		_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+	aste[4] = (u32)(addr_t) psal;
+	lowcore->vdso_per_cpu_data = page_frame;
+
+	vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
+	return 0;
+
+out:
+	free_page(page_frame);
+	free_page(page_table);
+	free_pages(segment_table, SEGMENT_ORDER);
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+	unsigned long segment_table, page_table, page_frame;
+	u32 *psal, *aste;
+
+	if (!switch_amode || !vdso_enabled)
+		return;
+
+	psal = (u32 *)(addr_t) lowcore->paste[4];
+	aste = (u32 *)(addr_t) psal[2];
+	segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+	page_table = *(unsigned long *) segment_table;
+	page_frame = *(unsigned long *) page_table;
+
+	free_page(page_frame);
+	free_page(page_table);
+	free_pages(segment_table, SEGMENT_ORDER);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __vdso_init_cr5(void *dummy)
+{
+	unsigned long cr5;
+
+	cr5 = offsetof(struct _lowcore, paste);
+	__ctl_load(cr5, 5, 5);
+}
+
+static void vdso_init_cr5(void)
+{
+	if (switch_amode && vdso_enabled)
+		on_each_cpu(__vdso_init_cr5, NULL, 1);
+}
+#endif /* CONFIG_64BIT */
+
+/*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
  */
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
 {
 	int i;
 
+	if (!vdso_enabled)
+		return 0;
+	vdso_init_data(vdso_data);
 #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
 	/* Calculate the size of the 32 bit vDSO */
 	vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
 	}
 	vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
 	vdso64_pagelist[vdso64_pages] = NULL;
+#ifndef CONFIG_SMP
+	BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
+#endif
+	vdso_init_cr5();
 #endif /* CONFIG_64BIT */
 
 	get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a..9ce8caa 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
 	cghi	%r2,CLOCK_REALTIME
 	je	0f
 	cghi	%r2,CLOCK_MONOTONIC
+	je	0f
+	cghi	%r2,-2		/* CLOCK_THREAD_CPUTIME_ID for this thread */
 	jne	2f
+	larl	%r5,_vdso_data
+	icm	%r0,15,__LC_ECTG_OK(%r5)
+	jz	2f
 0:	ltgr	%r3,%r3
 	jz	1f				/* res == NULL */
 	larl	%r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410..79dbfee 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
 	larl	%r5,_vdso_data
 	cghi	%r2,CLOCK_REALTIME
 	je	4f
+	cghi	%r2,-2		/* CLOCK_THREAD_CPUTIME_ID for this thread */
+	je	9f
 	cghi	%r2,CLOCK_MONOTONIC
-	jne	9f
+	jne	12f
 
 	/* CLOCK_MONOTONIC */
 	ltgr	%r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
 	alg	%r0,__VDSO_WTOM_SEC(%r5)
 	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
 	jne	0b
-	larl	%r5,10f
+	larl	%r5,13f
 1:	clg	%r1,0(%r5)
 	jl	2f
 	slg	%r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
 	lg	%r0,__VDSO_XTIME_SEC(%r5)
 	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
 	jne	5b
-	larl	%r5,10f
+	larl	%r5,13f
 6:	clg	%r1,0(%r5)
 	jl	7f
 	slg	%r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
 8:	lghi	%r2,0
 	br	%r14
 
+	/* CLOCK_THREAD_CPUTIME_ID for this thread */
+9:	icm	%r0,15,__VDSO_ECTG_OK(%r5)
+	jz	12f
+	ear	%r2,%a4
+	llilh	%r4,0x0100
+	sar	%a4,%r4
+	lghi	%r4,0
+	sacf	512				/* Magic ectg instruction */
+	.insn	ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+	sacf	0
+	sar	%a4,%r2
+	algr	%r1,%r0				/* r1 = cputime as TOD value */
+	mghi	%r1,1000			/* convert to nanoseconds */
+	srlg	%r1,%r1,12			/* r1 = cputime in nanosec */
+	lgr	%r4,%r1
+	larl	%r5,13f
+	srlg	%r1,%r1,9			/* divide by 1000000000 */
+	mlg	%r0,8(%r5)
+	srlg	%r0,%r0,11			/* r0 = tv_sec */
+	stg	%r0,0(%r3)
+	msg	%r0,0(%r5)			/* calculate tv_nsec */
+	slgr	%r4,%r0				/* r4 = tv_nsec */
+	stg	%r4,8(%r3)
+	lghi	%r2,0
+	br	%r14
+
 	/* Fallback to system call */
-9:	lghi	%r1,__NR_clock_gettime
+12:	lghi	%r1,__NR_clock_gettime
 	svc	0
 	br	%r14
 
-10:	.quad	1000000000
+13:	.quad	1000000000
+14:	.quad	19342813113834067
 	.cfi_endproc
 	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
-- 
cgit v0.10.2


From 2786b014ec893c301ea52ef9962e7cc60f89f9b3 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Mon, 22 Sep 2008 16:08:06 +0200
Subject: KVM: x86 emulator: consolidate push reg

This patch consolidate the emulation of push reg instruction.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea05117..a391e21 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1415,13 +1415,7 @@ special_insn:
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
 	case 0x50 ... 0x57:  /* push reg */
-		c->dst.type  = OP_MEM;
-		c->dst.bytes = c->op_bytes;
-		c->dst.val = c->src.val;
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   -c->op_bytes);
-		c->dst.ptr = (void *) register_address(
-			c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
+		emulate_push(ctxt);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-- 
cgit v0.10.2


From a26bf12afb608eb5a96192eaee35fc08ffbf85aa Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:45 +0200
Subject: KVM: VMX: include all IRQ window exits in statistics

irq_window_exits only tracks IRQ window exits due to user space
requests, nmi_window_exits include all exits. The latter makes more
sense, so let's adjust irq_window_exits accounting.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b0..ac34537 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2767,6 +2767,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
+	++vcpu->stat.irq_window_exits;
 
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2776,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	if (kvm_run->request_interrupt_window &&
 	    !vcpu->arch.irq_summary) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		++vcpu->stat.irq_window_exits;
 		return 0;
 	}
 	return 1;
-- 
cgit v0.10.2


From e4a41889ece6c95f390a7fa3a94255ab62470968 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:46 +0200
Subject: KVM: VMX: Use INTR_TYPE_NMI_INTR instead of magic value

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ac34537..81cf12b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2492,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
 	}
 
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 	if (is_no_device(intr_info)) {
@@ -3337,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
 	    (intr_info & INTR_INFO_VALID_MASK)) {
 		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
-- 
cgit v0.10.2


From 60637aacfd95c368e1fbc2157275d1b621b5dcdd Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:47 +0200
Subject: KVM: VMX: Support for NMI task gates

Properly set GUEST_INTR_STATE_NMI and reset nmi_injected when a
task-switch vmexit happened due to a task gate being used for handling
NMIs. Also avoid the false warning about valid vectoring info in
kvm_handle_exit.

Based on original patch by Gleb Natapov.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 81cf12b..8d0fc68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2832,6 +2832,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
 	int reason;
@@ -2839,6 +2840,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
+	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
+	    == INTR_TYPE_NMI_INTR) {
+		vcpu->arch.nmi_injected = false;
+		if (cpu_has_virtual_nmis())
+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+				      GUEST_INTR_STATE_NMI);
+	}
 	tss_selector = exit_qualification;
 
 	return kvm_task_switch(vcpu, tss_selector, reason);
@@ -3012,9 +3022,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
-			exit_reason != EXIT_REASON_EPT_VIOLATION))
-		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
-		       "exit reason is 0x%x\n", __func__, exit_reason);
+			exit_reason != EXIT_REASON_EPT_VIOLATION &&
+			exit_reason != EXIT_REASON_TASK_SWITCH))
+		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
+		       "(0x%x) and exit reason is 0x%x\n",
+		       __func__, vectoring_info, exit_reason);
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
-- 
cgit v0.10.2


From 448fa4a9c5dbc6941dd19ed09692c588d815bb06 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:48 +0200
Subject: KVM: x86: Reset pending/inject NMI state on CPU reset

CPU reset invalidates pending or already injected NMIs, therefore reset
the related state variables.

Based on original patch by Gleb Natapov.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2..1a71f67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3925,6 +3925,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.nmi_pending = false;
+	vcpu->arch.nmi_injected = false;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
cgit v0.10.2


From 33f089ca5a61f7aead26e8e1866dfc961dd88a9e Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:49 +0200
Subject: KVM: VMX: refactor/fix IRQ and NMI injectability determination

There are currently two ways in VMX to check if an IRQ or NMI can be
injected:
 - vmx_{nmi|irq}_enabled and
 - vcpu.arch.{nmi|interrupt}_window_open.
Even worse, one test (at the end of vmx_vcpu_run) uses an inconsistent,
likely incorrect logic.

This patch consolidates and unifies the tests over
{nmi|interrupt}_window_open as cache + vmx_update_window_states
for updating the cache content.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be8..bfbbdea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -327,6 +327,7 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
+	bool nmi_window_open;
 
 	u64 mtrr[0x100];
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8d0fc68..f0866e1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2362,6 +2362,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
+static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+
+	vcpu->arch.nmi_window_open =
+		!(guest_intr & (GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
+
+	vcpu->arch.interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 !(guest_intr & (GUEST_INTR_STATE_STI |
+				 GUEST_INTR_STATE_MOV_SS)));
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,15 +2389,12 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+	vmx_update_window_states(vcpu);
 
 	if (vcpu->arch.interrupt_window_open &&
 	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3075,22 +3087,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return !(guest_intr & (GUEST_INTR_STATE_NMI |
-			       GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI));
-}
-
-static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI)) &&
-		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
-}
-
 static void enable_intr_window(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.nmi_pending)
@@ -3159,11 +3155,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
 	update_tpr_threshold(vcpu);
 
+	vmx_update_window_states(vcpu);
+
 	if (cpu_has_virtual_nmis()) {
 		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
 			if (vcpu->arch.interrupt.pending) {
 				enable_nmi_window(vcpu);
-			} else if (vmx_nmi_enabled(vcpu)) {
+			} else if (vcpu->arch.nmi_window_open) {
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
@@ -3178,7 +3176,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 		}
 	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_irq_enabled(vcpu))
+		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 		else
 			enable_irq_window(vcpu);
@@ -3339,9 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vcpu->arch.interrupt_window_open =
-		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+	vmx_update_window_states(vcpu);
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
-- 
cgit v0.10.2


From f460ee43e250b675376246b1c4c9fe9b9af4ab16 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:50 +0200
Subject: KVM: VMX: refactor IRQ and NMI window enabling

do_interrupt_requests and vmx_intr_assist go different way for
achieving the same: enabling the nmi/irq window start notification.
Unify their code over enable_{irq|nmi}_window, get rid of a redundant
call to enable_intr_window instead of direct enable_nmi_window
invocation and unroll enable_intr_window for both in-kernel and user
space irq injection accordingly.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f0866e1..440f56c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2389,30 +2389,42 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vmx_update_window_states(vcpu);
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
 
-	if (vcpu->arch.interrupt_window_open &&
-	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-		kvm_do_inject_irq(vcpu);
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
 
-	if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	if (!cpu_has_virtual_nmis())
+		return;
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+				       struct kvm_run *kvm_run)
+{
+	vmx_update_window_states(vcpu);
+
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+			kvm_do_inject_irq(vcpu);
+
+		if (vcpu->arch.interrupt.pending)
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	}
 	if (!vcpu->arch.interrupt_window_open &&
 	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		/*
-		 * Interrupts blocked.  Wait for unblock.
-		 */
-		cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	else
-		cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+		enable_irq_window(vcpu);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -3066,35 +3078,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
 	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_intr_window(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu))
-		enable_irq_window(vcpu);
-}
-
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -3165,13 +3148,16 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
-				enable_intr_window(vcpu);
+				enable_nmi_window(vcpu);
 				return;
 			}
 		}
 		if (vcpu->arch.nmi_injected) {
 			vmx_inject_nmi(vcpu);
-			enable_intr_window(vcpu);
+			if (vcpu->arch.nmi_pending)
+				enable_nmi_window(vcpu);
+			else if (kvm_cpu_has_interrupt(vcpu))
+				enable_irq_window(vcpu);
 			return;
 		}
 	}
-- 
cgit v0.10.2


From 66a5a347c2690db4c0756524a8eb5a05e0437aa8 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:51 +0200
Subject: KVM: VMX: fix real-mode NMI support

Fix NMI injection in real-mode with the same pattern we perform IRQ
injection.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 440f56c..38d1385 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2358,6 +2358,19 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.rmode.active) {
+		vmx->rmode.irq.pending = true;
+		vmx->rmode.irq.vector = NMI_VECTOR;
+		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+			     INTR_INFO_VALID_MASK);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+		return;
+	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
-- 
cgit v0.10.2


From 23930f9521c9c4d4aa96cdb9d1e1703f3782bb94 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:52 +0200
Subject: KVM: x86: Enable NMI Watchdog via in-kernel PIT source

LINT0 of the LAPIC can be used to route PIT events as NMI watchdog ticks
into the guest. This patch aligns the in-kernel irqchip emulation with
the user space irqchip with already supports this feature. The trick is
to route PIT interrupts to all LAPIC's LVT0 lines.

Rebased and slightly polished patch originally posted by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37..580cc1d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
+	int i;
+
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 	mutex_unlock(&kvm->lock);
+
+	/*
+	 * Provides NMI watchdog support in IOAPIC mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
+	 * timer IRQs will continue to flow through the IOAPIC.
+	 */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+	}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5..71e37a5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -87,6 +87,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab..206cc11 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -380,6 +380,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		}
 		break;
 
+	case APIC_DM_EXTINT:
+		/*
+		 * Should only be called by kvm_apic_local_deliver() with LVT0,
+		 * before NMI watchdog was enabled. Already handled by
+		 * kvm_apic_accept_pic_intr().
+		 */
+		break;
+
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -743,10 +751,13 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVT0:
+		if (val == APIC_DM_NMI)
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				"for cpu %d\n", apic->vcpu->vcpu_id);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -961,12 +972,25 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 {
-	int vector;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	int vector, mode, trig_mode;
+	u32 reg;
+
+	if (apic && apic_enabled(apic)) {
+		reg = apic_get_reg(apic, lvt_type);
+		vector = reg & APIC_VECTOR_MASK;
+		mode = reg & APIC_MODE_MASK;
+		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+	}
+	return 0;
+}
 
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
-- 
cgit v0.10.2


From 0496fbb973ccc9477082e859ed0faab5acb805ba Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:53 +0200
Subject: KVM: x86: VCPU with pending NMI is runnabled

Ensure that a VCPU with pending NMIs is considered runnable.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1a71f67..1fa9a6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4130,7 +4130,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+	       || vcpu->arch.nmi_pending;
 }
 
 static void vcpu_kick_intr(void *info)
-- 
cgit v0.10.2


From 26df99c6c5807115f06d4e1abae397b7f5f3e00c Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:54 +0200
Subject: KVM: Kick NMI receiving VCPU

Kick the NMI receiving VCPU in case the triggering caller runs in a
different context.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 206cc11..304f9dd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -354,6 +354,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
 	case APIC_DM_NMI:
 		kvm_inject_nmi(vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 53772bb..c8f939c 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -150,6 +150,7 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
 static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_nmi(vcpu);
+	kvm_vcpu_kick(vcpu);
 }
 
 static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-- 
cgit v0.10.2


From c4abb7c9cde24b7351a47328ef866e6a2bbb1ad0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:55 +0200
Subject: KVM: x86: Support for user space injected NMIs

Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for
injecting NMIs from user space and also extends the statistic report
accordingly.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bfbbdea..a40fa84 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -398,6 +398,7 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
+	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -406,6 +407,7 @@ struct kvm_vcpu_stat {
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
+	u32 nmi_injections;
 };
 
 struct descriptor_table {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1fa9a6d..0797145 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
+	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
+	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -1318,6 +1320,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_inject_nmi(vcpu);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1377,6 +1388,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_NMI: {
+		r = kvm_vcpu_ioctl_nmi(vcpu);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -2812,18 +2830,37 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
+/*
+ * Check if userspace requested a NMI window, and that the NMI window
+ * is open.
+ *
+ * No need to exit to userspace if we already have a NMI queued.
+ */
+static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
+					struct kvm_run *kvm_run)
+{
+	return (!vcpu->arch.nmi_pending &&
+		kvm_run->request_nmi_window &&
+		vcpu->arch.nmi_window_open);
+}
+
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (irqchip_in_kernel(vcpu->kvm)) {
 		kvm_run->ready_for_interrupt_injection = 1;
-	else
+		kvm_run->ready_for_nmi_injection = 1;
+	} else {
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
+		kvm_run->ready_for_nmi_injection =
+					(vcpu->arch.nmi_window_open &&
+					 vcpu->arch.nmi_pending == 0);
+	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -2999,6 +3036,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
+			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_NMI;
+				++vcpu->stat.request_nmi_exits;
+			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f18b86f..44fd7fa 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,18 +83,22 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
+#define KVM_EXIT_NMI              16
+#define KVM_EXIT_NMI_WINDOW_OPEN  17
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 padding1[7];
+	__u8 request_nmi_window;
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u8 ready_for_nmi_injection;
+	__u8 padding2;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -387,6 +391,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
+#define KVM_CAP_NMI 19
 
 /*
  * ioctls for VM fds
@@ -458,6 +463,8 @@ struct kvm_trace_rec {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
+/* Available with KVM_CAP_NMI */
+#define KVM_NMI                   _IO(KVMIO,  0x9a)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
-- 
cgit v0.10.2


From 487b391d6ea9b1d0e2e0440466fb3130e78c98d9 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:56 +0200
Subject: KVM: VMX: Provide support for user space injected NMIs

This patch adds the required bits to the VMX side for user space
injected NMIs. As with the preexisting in-kernel irqchip support, the
CPU must provide the "virtual NMI" feature for proper tracking of the
NMI blocking state.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 38d1385..f16a62c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2360,6 +2360,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = NMI_VECTOR;
@@ -2428,6 +2429,30 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 {
 	vmx_update_window_states(vcpu);
 
+	if (cpu_has_virtual_nmis()) {
+		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+			if (vcpu->arch.nmi_window_open) {
+				vcpu->arch.nmi_pending = false;
+				vcpu->arch.nmi_injected = true;
+			} else {
+				enable_nmi_window(vcpu);
+				return;
+			}
+		}
+		if (vcpu->arch.nmi_injected) {
+			vmx_inject_nmi(vcpu);
+			if (vcpu->arch.nmi_pending
+			    || kvm_run->request_nmi_window)
+				enable_nmi_window(vcpu);
+			else if (vcpu->arch.irq_summary
+				 || kvm_run->request_interrupt_window)
+				enable_irq_window(vcpu);
+			return;
+		}
+		if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+			enable_nmi_window(vcpu);
+	}
+
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
 			kvm_do_inject_irq(vcpu);
@@ -2959,6 +2984,14 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
+	/*
+	 * If the user space waits to inject a NMI, exit as soon as possible
+	 */
+	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
+		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+		return 0;
+	}
+
 	return 1;
 }
 
-- 
cgit v0.10.2


From 3b86cd9967242f3f3d775ee015fb814a349ed5e6 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:57 +0200
Subject: KVM: VMX: work around lacking VNMI support

Older VMX supporting CPUs do not provide the "Virtual NMI" feature for
tracking the NMI-blocked state after injecting such events. For now
KVM is unable to inject NMIs on those CPUs.

Derived from Sheng Yang's suggestion to use the IRQ window notification
for detecting the end of NMI handlers, this patch implements virtual
NMI support without impact on the host's ability to receive real NMIs.
The downside is that the given approach requires some heuristics that
can cause NMI nesting in vary rare corner cases.

The approach works as follows:
 - inject NMI and set a software-based NMI-blocked flag
 - arm the IRQ window start notification whenever an NMI window is
   requested
 - if the guest exits due to an opening IRQ window, clear the emulated
   NMI-blocked flag
 - if the guest net execution time with NMI-blocked but without an IRQ
   window exceeds 1 second, force NMI-blocked reset and inject anyway

This approach covers most practical scenarios:
 - succeeding NMIs are seperated by at least one open IRQ window
 - the guest may spin with IRQs disabled (e.g. due to a bug), but
   leaving the NMI handler takes much less time than one second
 - the guest does not rely on strict ordering or timing of NMIs
   (would be problematic in virtualized environments anyway)

Successfully tested with the 'nmi n' monitor command, the kgdbts
testsuite on smp guests (additional patches required to add debug
register support to kvm) + the kernel's nmi_watchdog=1, and a Siemens-
specific board emulation (+ guest) that comes with its own NMI
watchdog mechanism.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f16a62c..2180109 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
+
+	/* Support for vnmi-less CPUs */
+	int soft_vnmi_blocked;
+	ktime_t entry_time;
+	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -2230,6 +2235,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vmx->vcpu.arch.rmode.active = 0;
 
+	vmx->soft_vnmi_blocked = 0;
+
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2342,29 @@ out:
 	return ret;
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis()) {
+		enable_irq_window(vcpu);
+		return;
+	}
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2360,6 +2390,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (!cpu_has_virtual_nmis()) {
+		/*
+		 * Tracking the NMI-blocked state in software is built upon
+		 * finding the next open IRQ window. This, in turn, depends on
+		 * well-behaving guests: They have to keep IRQs disabled at
+		 * least as long as the NMI handler runs. Otherwise we may
+		 * cause NMI nesting, maybe breaking the guest. But as this is
+		 * highly unlikely, we can live with the residual risk.
+		 */
+		vmx->soft_vnmi_blocked = 1;
+		vmx->vnmi_blocked_time = 0;
+	}
+
 	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
@@ -2384,6 +2427,8 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 		!(guest_intr & (GUEST_INTR_STATE_STI |
 				GUEST_INTR_STATE_MOV_SS |
 				GUEST_INTR_STATE_NMI));
+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+		vcpu->arch.nmi_window_open = 0;
 
 	vcpu->arch.interrupt_window_open =
 		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2403,55 +2448,31 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending
-			    || kvm_run->request_nmi_window)
-				enable_nmi_window(vcpu);
-			else if (vcpu->arch.irq_summary
-				 || kvm_run->request_interrupt_window)
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
-		if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
 			enable_nmi_window(vcpu);
+		else if (vcpu->arch.irq_summary
+			 || kvm_run->request_interrupt_window)
+			enable_irq_window(vcpu);
+		return;
 	}
+	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3097,6 +3118,37 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
 		       "(0x%x) and exit reason is 0x%x\n",
 		       __func__, vectoring_info, exit_reason);
+
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
+		if (vcpu->arch.interrupt_window_open) {
+			vmx->soft_vnmi_blocked = 0;
+			vcpu->arch.nmi_window_open = 1;
+		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
+		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			/*
+			 * This CPU don't support us in finding the end of an
+			 * NMI-blocked window if the guest runs with IRQs
+			 * disabled. So we pull the trigger after 1 s of
+			 * futile waiting, but inform the user about this.
+			 */
+			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+			       "state on VCPU %d after 1 s timeout\n",
+			       __func__, vcpu->vcpu_id);
+			vmx->soft_vnmi_blocked = 0;
+			vmx->vcpu.arch.nmi_window_open = 1;
+		}
+
+		/*
+		 * If the user space waits to inject an NNI, exit ASAP
+		 */
+		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
+		    && !vcpu->arch.nmi_pending) {
+			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+			++vcpu->stat.nmi_window_exits;
+			return 0;
+		}
+	}
+
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3146,7 +3198,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
-	}
+	} else if (unlikely(vmx->soft_vnmi_blocked))
+		vmx->vnmi_blocked_time +=
+			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3186,27 +3240,25 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.interrupt.pending) {
-				enable_nmi_window(vcpu);
-			} else if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending)
-				enable_nmi_window(vcpu);
-			else if (kvm_cpu_has_interrupt(vcpu))
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
+		return;
+	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
@@ -3255,6 +3307,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
+	/* Record the guest's net vcpu time for enforced NMI injections. */
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+		vmx->entry_time = ktime_get();
+
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);
-- 
cgit v0.10.2


From 5f179287fa02723215eecf681d812b303c243973 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 7 Oct 2008 15:42:33 +0200
Subject: KVM: call kvm_arch_vcpu_reset() instead of the kvm_x86_ops callback

Call kvm_arch_vcpu_reset() instead of directly using arch callback.
The function does additional things.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0797145..a2c4b55 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3010,7 +3010,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
-		r = kvm_x86_ops->vcpu_reset(vcpu);
+		r = kvm_arch_vcpu_reset(vcpu);
 		if (r)
 			return r;
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-- 
cgit v0.10.2


From b558bc0a25c82ef2a9d2683b0beb3e4b87cea20b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:52 +0800
Subject: x86: Rename mtrr_state struct and macro names

Prepare for exporting them.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f..90db91e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,9 +14,9 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
-struct mtrr_state {
-	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
 	mtrr_type def_type;
@@ -35,7 +35,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
+static struct mtrr_state_type mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e26..d6ec7ec 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec6..9885382 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -11,8 +11,9 @@
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
-#define NUM_FIXED_RANGES 88
-#define MAX_VAR_RANGES 256
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -33,7 +34,7 @@
    an 8 bit field: */
 typedef u8 mtrr_type;
 
-extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 
 struct mtrr_ops {
 	u32	vendor;
-- 
cgit v0.10.2


From 932d27a7913fc6b3c64c6e6082628b0a1561dec9 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:53 +0800
Subject: x86: Export some definition of MTRR

For KVM can reuse the type define, and need them to support shadow MTRR.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e425..cb988aa 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
 };
 #endif /* !__i386__ */
 
+struct mtrr_var_range {
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+	unsigned char enabled;
+	unsigned char have_fixed;
+	mtrr_type def_type;
+};
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
 /*  These are the various ioctls  */
 #define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
 #define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 90db91e..b59ddcc 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
-struct mtrr_state_type {
-	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
-	unsigned char enabled;
-	unsigned char have_fixed;
-	mtrr_type def_type;
-};
-
 struct fixed_range_block {
 	int base_msr; /* start address of an MTRR block */
 	int ranges;   /* number of MTRRs in this block  */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state_type mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
+struct mtrr_state_type mtrr_state = {};
+EXPORT_SYMBOL_GPL(mtrr_state);
+
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
 
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 9885382..ffd6040 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,12 +8,6 @@
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define MTRR_NUM_FIXED_RANGES 88
-#define MTRR_MAX_VAR_RANGES 256
-
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -30,10 +24,6 @@
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
 
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
-   an 8 bit field: */
-typedef u8 mtrr_type;
-
 extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 
 struct mtrr_ops {
@@ -71,13 +61,6 @@ struct set_mtrr_context {
 	u32 ccr3;
 };
 
-struct mtrr_var_range {
-	u32 base_lo;
-	u32 base_hi;
-	u32 mask_lo;
-	u32 mask_hi;
-};
-
 void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
-- 
cgit v0.10.2


From 0bed3b568b68e5835ef5da888a372b9beabf7544 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:54 +0800
Subject: KVM: Improve MTRR structure

As well as reset mmu context when set MTRR.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a40fa84..8082e87 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
 extern spinlock_t kvm_lock;
@@ -329,7 +331,8 @@ struct kvm_vcpu_arch {
 	bool nmi_injected;
 	bool nmi_window_open;
 
-	u64 mtrr[0x100];
+	struct mtrr_state_type mtrr_state;
+	u32 pat;
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a2c4b55..f5b2334 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -650,10 +651,38 @@ static bool msr_mtrr_valid(unsigned msr)
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	vcpu->arch.mtrr[msr - 0x200] = data;
+	if (msr == MSR_MTRRdefType) {
+		vcpu->arch.mtrr_state.def_type = data;
+		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+	} else if (msr == MSR_MTRRfix64K_00000)
+		p[0] = data;
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		p[1 + msr - MSR_MTRRfix16K_80000] = data;
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+	else if (msr == MSR_IA32_CR_PAT)
+		vcpu->arch.pat = data;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pt = data;
+	}
+
+	kvm_mmu_reset_context(vcpu);
 	return 0;
 }
 
@@ -749,10 +778,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	*pdata = vcpu->arch.mtrr[msr - 0x200];
+	if (msr == MSR_MTRRdefType)
+		*pdata = vcpu->arch.mtrr_state.def_type +
+			 (vcpu->arch.mtrr_state.enabled << 10);
+	else if (msr == MSR_MTRRfix64K_00000)
+		*pdata = p[0];
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+	else if (msr == MSR_IA32_CR_PAT)
+		*pdata = vcpu->arch.pat;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pdata = *pt;
+	}
+
 	return 0;
 }
 
@@ -3942,6 +3998,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
+	vcpu->arch.mtrr_state.have_fixed = 1;
 	vcpu_load(vcpu);
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
-- 
cgit v0.10.2


From 468d472f3f65100d5fb88c8d45043c85b874c294 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:55 +0800
Subject: KVM: VMX: Add PAT support for EPT

GUEST_PAT support is a new feature introduced by Intel Core i7 architecture.
With this, cpu would save/load guest and host PAT automatically, for EPT memory
type in guest depends on MSR_IA32_CR_PAT.

Also add save/restore for MSR_IA32_CR_PAT.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2180109..b4c95a5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -962,6 +962,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
 
 		break;
+	case MSR_IA32_CR_PAT:
+		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+			vmcs_write64(GUEST_IA32_PAT, data);
+			vcpu->arch.pat = data;
+			break;
+		}
+		/* Otherwise falls through to kvm_set_msr_common */
 	default:
 		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
@@ -1181,12 +1188,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = 0;
+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
-	min = opt = 0;
+	min = 0;
+	opt = VM_ENTRY_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
 				&_vmentry_control) < 0)
 		return -EIO;
@@ -2092,8 +2100,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
  */
 static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-	u32 host_sysenter_cs;
+	u32 host_sysenter_cs, msr_low, msr_high;
 	u32 junk;
+	u64 host_pat;
 	unsigned long a;
 	struct descriptor_table dt;
 	int i;
@@ -2181,6 +2190,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	rdmsrl(MSR_IA32_SYSENTER_EIP, a);
 	vmcs_writel(HOST_IA32_SYSENTER_EIP, a);   /* 22.2.3 */
 
+	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		vmcs_write64(HOST_IA32_PAT, host_pat);
+	}
+	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		/* Write the default value follow host pat */
+		vmcs_write64(GUEST_IA32_PAT, host_pat);
+		/* Keep arch.pat sync with GUEST_IA32_PAT */
+		vmx->vcpu.arch.pat = host_pat;
+	}
+
 	for (i = 0; i < NR_VMX_MSR; ++i) {
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index ec5edc3..18598af 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -63,10 +63,13 @@
 
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
 #define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
 
 #define VM_ENTRY_IA32E_MODE                     0x00000200
 #define VM_ENTRY_SMM                            0x00000400
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
 
 /* VMCS Encodings */
 enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
 	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
 	GUEST_PDPTR0                    = 0x0000280a,
 	GUEST_PDPTR0_HIGH               = 0x0000280b,
 	GUEST_PDPTR1                    = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
 	GUEST_PDPTR2_HIGH               = 0x0000280f,
 	GUEST_PDPTR3                    = 0x00002810,
 	GUEST_PDPTR3_HIGH               = 0x00002811,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f5b2334..0edf753 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -452,7 +452,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-	MSR_IA32_PERF_STATUS,
+	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
 };
 
 static unsigned num_msrs_to_save;
-- 
cgit v0.10.2


From 74be52e3e6285fc6e872a2a7baea544106f399ea Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:56 +0800
Subject: KVM: Add local get_mtrr_type() to support MTRR

For EPT memory type support.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc..ac2304f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1393,6 +1393,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 	return page;
 }
 
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+			 u64 start, u64 end)
+{
+	int i;
+	u64 base, mask;
+	u8 prev_match, curr_match;
+	int num_var_ranges = KVM_NR_VAR_MTRR;
+
+	if (!mtrr_state->enabled)
+		return 0xFF;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state->have_fixed && (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state->fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	if (!(mtrr_state->enabled & 2))
+		return mtrr_state->def_type;
+
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
+		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask))
+			continue;
+
+		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE)
+			return MTRR_TYPE_UNCACHABLE;
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match)
+			return MTRR_TYPE_UNCACHABLE;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state->def_type;
+}
+
+static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u8 mtrr;
+
+	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
+			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
+	if (mtrr == 0xfe || mtrr == 0xff)
+		mtrr = MTRR_TYPE_WRBACK;
+	return mtrr;
+}
+
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	unsigned index;
-- 
cgit v0.10.2


From 64d4d521757117aa5c1cfe79d3baa6cf57703f81 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:57 +0800
Subject: KVM: Enable MTRR for EPT

The effective memory type of EPT is the mixture of MSR_IA32_CR_PAT and memory
type field of EPT entry.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8082e87..93040b5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -483,6 +483,7 @@ struct kvm_x86_ops {
 
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
+	int (*get_mt_mask_shift)(void);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -496,7 +497,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac2304f..09d05f5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mt_mask;
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
+	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -1546,6 +1548,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 {
 	u64 spte;
 	int ret = 0;
+	u64 mt_mask = shadow_mt_mask;
+
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
 	 * whether the guest actually used the pte (in order to detect
@@ -1564,6 +1568,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
+	if (mt_mask) {
+		mt_mask = get_memory_type(vcpu, gfn) <<
+			  kvm_x86_ops->get_mt_mask_shift();
+		spte |= mt_mask;
+	}
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce65..05efc4e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1912,6 +1912,11 @@ static int get_npt_level(void)
 #endif
 }
 
+static int svm_get_mt_mask_shift(void)
+{
+	return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -1967,6 +1972,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
+	.get_mt_mask_shift = svm_get_mt_mask_shift,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b4c95a5..dae134f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3574,6 +3574,11 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
+static int vmx_get_mt_mask_shift(void)
+{
+	return VMX_EPT_MT_EPTE_SHIFT;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -3629,6 +3634,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
+	.get_mt_mask_shift = vmx_get_mt_mask_shift,
 };
 
 static int __init vmx_init(void)
@@ -3685,10 +3691,10 @@ static int __init vmx_init(void)
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
 			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK);
+				VMX_EPT_EXECUTABLE_MASK,
+				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0edf753..f175b79 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2615,7 +2615,7 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
 	return 0;
 
 out:
-- 
cgit v0.10.2


From d73fa29a9b75b2af7f69dae276d2c602a23b329b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 14 Oct 2008 15:59:10 +0800
Subject: KVM: Clean up kvm_x86_emulate.h

Remove one left improper comment of removed CR2.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a2..16a0026 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -146,22 +146,18 @@ struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
 
-	/* Linear faulting address (if emulating a page-faulting instruction) */
 	unsigned long eflags;
-
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
-
 	u32 cs_base;
 
 	/* decode cache */
-
 	struct decode_cache decode;
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX  1
-#define REPNE_PREFIX    2
+#define REPE_PREFIX	1
+#define REPNE_PREFIX	2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
@@ -170,7 +166,7 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
 
 /* Host execution mode. */
-#if defined(__i386__)
+#if defined(CONFIG_X86_32)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
 #elif defined(CONFIG_X86_64)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
-- 
cgit v0.10.2


From 291f26bc0f89518ad7ee3207c09eb8a743ac8fcc Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 16 Oct 2008 17:30:57 +0800
Subject: KVM: MMU: Extend kvm_mmu_page->slot_bitmap size

Otherwise set_bit() for private memory slot(above KVM_MEMORY_SLOTS) would
corrupted memory in 32bit host.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93040b5..59c3ae1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -192,9 +192,11 @@ struct kvm_mmu_page {
 	u64 *spt;
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
-	unsigned long slot_bitmap; /* One bit set per slot which has memory
-				    * in this shadow page.
-				    */
+	/*
+	 * One bit set per slot which has memory
+	 * in this shadow page.
+	 */
+	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 09d05f5..8687758 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -789,7 +789,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	ASSERT(is_empty_shadow_page(sp->spt));
-	sp->slot_bitmap = 0;
+	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
@@ -1364,7 +1364,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
-	__set_bit(slot, &sp->slot_bitmap);
+	__set_bit(slot, sp->slot_bitmap);
 }
 
 static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -2564,7 +2564,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 		int i;
 		u64 *pt;
 
-		if (!test_bit(slot, &sp->slot_bitmap))
+		if (!test_bit(slot, sp->slot_bitmap))
 			continue;
 
 		pt = sp->spt;
-- 
cgit v0.10.2


From 6fe639792c7b8e462baeaac39ecc33541fd5da6e Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 16 Oct 2008 17:30:58 +0800
Subject: KVM: VMX: Move private memory slot position

PCI device assignment would map guest MMIO spaces as separate slot, so it is
possible that the device has more than 2 MMIO spaces and overwrite current
private memslot.

The patch move private memory slot to the top of userspace visible memory slots.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dae134f..7623eb7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2513,7 +2513,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
 	struct kvm_userspace_memory_region tss_mem = {
-		.slot = 8,
+		.slot = TSS_PRIVATE_MEMSLOT,
 		.guest_phys_addr = addr,
 		.memory_size = PAGE_SIZE * 3,
 		.flags = 0,
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 18598af..3db236c 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -338,8 +338,9 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	10
+#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
 
 #define VMX_NR_VPIDS				(1 << 16)
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
-- 
cgit v0.10.2


From 291fd39bfc2089c2dae79cf2d7cfca81b14ca769 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Mon, 20 Oct 2008 13:11:58 +0200
Subject: KVM: x86 emulator: Add decode entries for 0x04 and 0x05 opcodes (add
 acc, imm)

Add decode entries for 0x04 and 0x05 (ADD) opcodes, execution is already
implemented.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index a391e21..57d7cc4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -80,7 +80,7 @@ static u16 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-	0, 0, 0, 0,
+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
 	/* 0x08 - 0x0F */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-- 
cgit v0.10.2


From 8fdb2351d51b040146f10a624387bbd102d851c0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 20 Oct 2008 10:20:02 +0200
Subject: KVM: x86: Fix and refactor NMI watchdog emulation

This patch refactors the NMI watchdog delivery patch, consolidating
tests and providing a proper API for delivering watchdog events.

An included micro-optimization is to check only for apic_hw_enabled in
kvm_apic_local_deliver (the test for LVT mask is covering the
soft-disabled case already).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Acked-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 580cc1d..b6fcf5a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -612,15 +612,18 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 
 	/*
-	 * Provides NMI watchdog support in IOAPIC mode.
-	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
-	 * timer IRQs will continue to flow through the IOAPIC.
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation is simplified, only
+	 * propagating PIT interrupts to all VCPUs when they have set
+	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+	 * VCPU0, and only if its LVT0 is in EXTINT mode.
 	 */
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+		if (vcpu)
+			kvm_apic_nmi_wd_deliver(vcpu);
 	}
 }
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 71e37a5..b9e9051 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -87,7 +87,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
-int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 304f9dd..0b0d413 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -973,14 +973,12 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
+static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 reg = apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
-	u32 reg;
 
-	if (apic && apic_enabled(apic)) {
-		reg = apic_get_reg(apic, lvt_type);
+	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
 		vector = reg & APIC_VECTOR_MASK;
 		mode = reg & APIC_MODE_MASK;
 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
@@ -989,9 +987,12 @@ int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 	return 0;
 }
 
-static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 {
-	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1086,9 +1087,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
-		atomic_read(&apic->timer.pending) > 0) {
-		if (__inject_apic_timer_irq(apic))
+	if (apic && atomic_read(&apic->timer.pending) > 0) {
+		if (kvm_apic_local_deliver(apic, APIC_LVTT))
 			atomic_dec(&apic->timer.pending);
 	}
 }
-- 
cgit v0.10.2


From cc6e462cd54e64858ea25816df87d033229efe56 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 20 Oct 2008 10:20:03 +0200
Subject: KVM: x86: Optimize NMI watchdog delivery

As suggested by Avi, this patch introduces a counter of VCPUs that have
LVT0 set to NMI mode. Only if the counter > 0, we push the PIT ticks via
all LAPIC LVT0 lines to enable NMI watchdog support.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Acked-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 59c3ae1..09e6c56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -361,6 +361,7 @@ struct kvm_arch{
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	struct hlist_head irq_ack_notifier_list;
+	int vapics_in_nmi_mode;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b6fcf5a..e665d1c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -620,11 +620,12 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
 	 * VCPU0, and only if its LVT0 is in EXTINT mode.
 	 */
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		vcpu = kvm->vcpus[i];
-		if (vcpu)
-			kvm_apic_nmi_wd_deliver(vcpu);
-	}
+	if (kvm->arch.vapics_in_nmi_mode > 0)
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (vcpu)
+				kvm_apic_nmi_wd_deliver(vcpu);
+		}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0b0d413..afac68c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
 	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
 }
 
+static inline int apic_lvt_nmi_mode(u32 lvt_val)
+{
+	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -672,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
 					apic->timer.period)));
 }
 
+static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
+{
+	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+
+	if (apic_lvt_nmi_mode(lvt0_val)) {
+		if (!nmi_wd_enabled) {
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				   "for cpu %d\n", apic->vcpu->vcpu_id);
+			apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
+		}
+	} else if (nmi_wd_enabled)
+		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
+}
+
 static void apic_mmio_write(struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
@@ -753,9 +772,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		break;
 
 	case APIC_LVT0:
-		if (val == APIC_DM_NMI)
-			apic_debug("Receive NMI setting on APIC_LVT0 "
-				"for cpu %d\n", apic->vcpu->vcpu_id);
+		apic_manage_nmi_watchdog(apic, val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-- 
cgit v0.10.2


From e19e30effac03f5a005a8e42ed941a2a5dc62654 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 20 Oct 2008 16:07:10 +0800
Subject: KVM: IRQ ACK notifier should be used with in-kernel irqchip

Also remove unnecessary parameter of unregister irq ack notifier.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb92be2..3a0fb77 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -316,8 +316,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 55ad76e..9fbbdea 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -58,12 +58,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	/* Must be called with in-kernel IRQ chip, otherwise it's nonsense */
+	ASSERT(irqchip_in_kernel(kvm));
+	ASSERT(kian);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
 {
+	if (!kian)
+		return;
 	hlist_del(&kian->link);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a87f45e..4f43abe 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -143,7 +143,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
-- 
cgit v0.10.2


From b8222ad2e52fd2c0c4e5e1c53e65d131f911b767 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Wed, 22 Oct 2008 16:39:47 +0530
Subject: KVM: x86: Fix typo in function name

get_segment_descritptor_dtable() contains an obvious type.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f175b79..ceeac88 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3373,9 +3373,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
 	kvm_desct->padding = 0;
 }
 
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
-					   u16 selector,
-					   struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+					  u16 selector,
+					  struct descriptor_table *dtable)
 {
 	if (selector & 1 << 2) {
 		struct kvm_segment kvm_seg;
@@ -3400,7 +3400,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3419,7 +3419,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7)
 		return 1;
-- 
cgit v0.10.2


From 25022acc3dd5f0b54071c7ba7c371860f2971b52 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 27 Oct 2008 09:04:17 +0000
Subject: KVM: SVM: Set the 'g' bit of the cs selector for cross-vendor
 migration

The hardware does not set the 'g' bit of the cs selector and this breaks
migration from amd hosts to intel hosts. Set this bit if the segment
limit is beyond 1 MB.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 05efc4e..665008d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -772,6 +772,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+	/*
+	 * SVM always stores 0 for the 'G' bit in the CS selector in
+	 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+	 * Intel's VMENTRY has a check on the 'G' bit.
+	 */
+	if (seg == VCPU_SREG_CS)
+		var->g = s->limit > 0xfffff;
+
 	var->unusable = !var->present;
 }
 
-- 
cgit v0.10.2


From c0d09828c870f90c6bc72070ada281568f89c63b Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 27 Oct 2008 09:04:18 +0000
Subject: KVM: SVM: Set the 'busy' flag of the TR selector

The busy flag of the TR selector is not set by the hardware. This breaks
migration from amd hosts to intel hosts.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 665008d..743aebd7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -781,6 +781,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	if (seg == VCPU_SREG_CS)
 		var->g = s->limit > 0xfffff;
 
+	/*
+	 * Work around a bug where the busy flag in the tr selector
+	 * isn't exposed
+	 */
+	if (seg == VCPU_SREG_TR)
+		var->type |= 0x2;
+
 	var->unusable = !var->present;
 }
 
-- 
cgit v0.10.2


From e93f36bcfaa9e899c595e1c446c784a69021854a Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 28 Oct 2008 10:51:30 +0100
Subject: KVM: allow emulator to adjust rip for emulated pio instructions

If we call the emulator we shouldn't call skip_emulated_instruction()
in the first place, since the emulator already computes the next rip
for us. Thus we move ->skip_emulated_instruction() out of
kvm_emulate_pio() and into handle_io() (and the svm equivalent). We
also replaced "return 0" by "break" in the "do_io:" case because now
the shadow register state needs to be committed. Otherwise eip will never
be updated.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 743aebd7..f0ad4d4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1115,6 +1115,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
+	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7623eb7..816d231 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2687,6 +2687,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	rep = (exit_qualification & 32) != 0;
 	port = exit_qualification >> 16;
 
+	skip_emulated_instruction(vcpu);
 	return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ceeac88..38f79b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2478,8 +2478,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
-
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 57d7cc4..8f60ace 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1772,7 +1772,7 @@ special_insn:
 			c->eip = saved_eip;
 			goto cannot_emulate;
 		}
-		return 0;
+		break;
 	case 0xf4:              /* hlt */
 		ctxt->vcpu->arch.halt_request = 1;
 		break;
-- 
cgit v0.10.2


From 1d5a4d9b92028d9fe77da34037bd5a1ebfecc733 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Wed, 29 Oct 2008 09:39:42 +0100
Subject: KVM: VMX: Handle mmio emulation when guest state is invalid

If emulate_invalid_guest_state is enabled, the emulator is called
when guest state is invalid.  Until now, we reported an mmio failure
when emulate_instruction() returned EMULATE_DO_MMIO.  This patch adds
the case where emulate_instruction() failed and an MMIO emulation
is needed.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 816d231..427dbc1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3052,16 +3052,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	while (!guest_state_valid(vcpu)) {
 		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
-		switch (err) {
-			case EMULATE_DONE:
-				break;
-			case EMULATE_DO_MMIO:
-				kvm_report_emulation_failure(vcpu, "mmio");
-				/* TODO: Handle MMIO */
-				return;
-			default:
-				kvm_report_emulation_failure(vcpu, "emulation failure");
-				return;
+		if (err == EMULATE_DO_MMIO)
+			break;
+
+		if (err != EMULATE_DONE) {
+			kvm_report_emulation_failure(vcpu, "emulation failure");
+			return;
 		}
 
 		if (signal_pending(current))
@@ -3073,8 +3069,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	local_irq_disable();
 	preempt_disable();
 
-	/* Guest state should be valid now, no more emulation should be needed */
-	vmx->emulation_required = 0;
+	/* Guest state should be valid now except if we need to
+	 * emulate an MMIO */
+	if (guest_state_valid(vcpu))
+		vmx->emulation_required = 0;
 }
 
 /*
@@ -3121,6 +3119,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
 		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
 
+	/* If we need to emulate an MMIO from handle_invalid_guest_state
+	 * we just return 0 */
+	if (vmx->emulation_required && emulate_invalid_guest_state)
+		return 0;
+
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
 	if (vm_need_ept() && is_paging(vcpu)) {
-- 
cgit v0.10.2


From a917f7af3905953329361d29b6db78eb17b4d44c Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 23 Oct 2008 14:56:44 +0800
Subject: KVM: ia64: Re-organize data sturure of guests' data area

1. Increase the size of data area to 64M
2. Support more vcpus and memory, 128 vcpus and 256G memory are supported
   for guests.
3. Add the boundary check for memory and vcpu allocation.

With this patch, kvm guest's data area looks as follow:
  *
  *            +----------------------+  ------- KVM_VM_DATA_SIZE
  *            |     vcpu[n]'s data   |   |     ___________________KVM_STK_OFFSET
  *            |                      |   |    /                   |
  *            |        ..........    |   |   /vcpu's struct&stack |
  *            |        ..........    |   |  /---------------------|---- 0
  *            |     vcpu[5]'s data   |   | /       vpd            |
  *            |     vcpu[4]'s data   |   |/-----------------------|
  *            |     vcpu[3]'s data   |   /         vtlb           |
  *            |     vcpu[2]'s data   |  /|------------------------|
  *            |     vcpu[1]'s data   |/  |         vhpt           |
  *            |     vcpu[0]'s data   |____________________________|
  *            +----------------------+   |
  *            |    memory dirty log  |   |
  *            +----------------------+   |
  *            |    vm's data struct  |   |
  *            +----------------------+   |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |
  *            |   vm's p2m table  |      |
  *            |                      |   |
  *            |                      |   |
  *            |                      |   |  |
  * vm's data->|                      |   |  |
  *            +----------------------+ ------- 0
  * To support large memory, needs to increase the size of p2m.
  * To support more vcpus, needs to ensure it has enough space to
  * hold vcpus' data.
  */

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324..678e264 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
 #ifndef __ASM_KVM_HOST_H
 #define __ASM_KVM_HOST_H
 
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-
-#define KVM_MAX_VCPUS 4
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -52,68 +41,127 @@
 #define EXIT_REASON_PTC_G		8
 
 /*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (16UL<<20)
+#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
 #define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000UL
-#define VMM_SIZE (8UL<<20)
+#define KVM_VMM_BASE 0xD000000000000000
+#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
 
 /*
  * Define vm_buffer, used by PAL Services, base address.
- * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
+ * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
  */
 #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (8UL<<20)
-
-/*Define Virtual machine data layout.*/
-#define KVM_VM_DATA_SHIFT  24
-#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
-
-
-#define KVM_P2M_BASE    KVM_VM_DATA_BASE
-#define KVM_P2M_OFS     0
-#define KVM_P2M_SIZE    (8UL << 20)
-
-#define KVM_VHPT_BASE   (KVM_P2M_BASE + KVM_P2M_SIZE)
-#define KVM_VHPT_OFS    KVM_P2M_SIZE
-#define KVM_VHPT_BLOCK_SIZE   (2UL << 20)
-#define VHPT_SHIFT      18
-#define VHPT_SIZE       (1UL << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
-
-#define KVM_VTLB_BASE   (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_OFS    (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_BLOCK_SIZE   (1UL<<20)
-#define VTLB_SHIFT      17
-#define VTLB_SIZE       (1UL<<VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
-
-#define KVM_VPD_BASE   (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_OFS    (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_BLOCK_SIZE   (2UL<<20)
-#define VPD_SHIFT       16
-#define VPD_SIZE        (1UL<<VPD_SHIFT)
-
-#define KVM_VCPU_BASE   (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_OFS    (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_BLOCK_SIZE   (2UL<<20)
-#define VCPU_SHIFT 18
-#define VCPU_SIZE (1UL<<VCPU_SHIFT)
-#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
-
-#define KVM_VM_BASE     (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_OFS      (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_BLOCK_SIZE     (1UL<<19)
-
-#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_OFS  (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
-
-/* Get vpd, vhpt, tlb, vcpu, base*/
-#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
-#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
-#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
-#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
+#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * kvm guest's data area looks as follow:
+ *
+ *            +----------------------+	-------	KVM_VM_DATA_SIZE
+ *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
+ *     	      |			     |	 |    /			  |
+ *     	      |	       ..........    |	 |   /vcpu's struct&stack |
+ *     	      |	       ..........    |	 |  /---------------------|---- 0
+ *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
+ *	      |	    vcpu[4]'s data   |	 |/-----------------------|
+ *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
+ *	      |	    vcpu[2]'s data   |	/|------------------------|
+ *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
+ *	      |	    vcpu[0]'s data   |____________________________|
+ *            +----------------------+	 |
+ *	      |	   memory dirty log  |	 |
+ *            +----------------------+	 |
+ *	      |	   vm's data struct  |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |	  vm's p2m table  |	 |
+ *	      |			     |	 |
+ *            |			     |	 |
+ *	      |			     |	 |  |
+ * vm's data->|			     |   |  |
+ *	      +----------------------+ ------- 0
+ * To support large memory, needs to increase the size of p2m.
+ * To support more vcpus, needs to ensure it has enough space to
+ * hold vcpus' data.
+ */
+
+#define KVM_VM_DATA_SHIFT	26
+#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
+
+#define KVM_P2M_BASE		KVM_VM_DATA_BASE
+#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
+
+#define VHPT_SHIFT		16
+#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
+
+#define VTLB_SHIFT		16
+#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
+
+#define VPD_SHIFT		16
+#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
+
+#define VCPU_STRUCT_SHIFT	16
+#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
+
+#define KVM_STK_OFFSET		VCPU_STRUCT_SIZE
+
+#define KVM_VM_STRUCT_SHIFT	19
+#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
+
+#define KVM_MEM_DIRY_LOG_SHIFT	19
+#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+/*Define the max vcpus and memory for Guests.*/
+#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
+			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
+#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+
+struct kvm_vcpu_data {
+	char vcpu_vhpt[VHPT_SIZE];
+	char vcpu_vtlb[VTLB_SIZE];
+	char vcpu_vpd[VPD_SIZE];
+	char vcpu_struct[VCPU_STRUCT_SIZE];
+};
+
+struct kvm_vm_data {
+	char kvm_p2m[KVM_P2M_SIZE];
+	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
+	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
+	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
+};
+
+#define VCPU_BASE(n)	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, vcpu_data[n])
+#define VM_BASE		KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
+
+#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
+#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
+#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
+#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
+				offsetof(struct kvm_vcpu_data, vcpu_struct))
 
 /*IO section definitions*/
 #define IOREQ_READ      1
@@ -403,14 +451,13 @@ struct kvm_sal_data {
 };
 
 struct kvm_arch {
+	spinlock_t dirty_log_lock;
+
 	unsigned long	vm_base;
 	unsigned long	metaphysical_rr0;
 	unsigned long	metaphysical_rr4;
 	unsigned long	vmm_init_rr;
-	unsigned long	vhpt_base;
-	unsigned long	vtlb_base;
-	unsigned long 	vpd_base;
-	spinlock_t dirty_log_lock;
+
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
 	struct kvm_sal_data rdv_sal_data;
@@ -512,7 +559,7 @@ struct kvm_pt_regs {
 
 static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
 {
-	return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
 }
 
 typedef int kvm_vmm_entry(void);
@@ -531,5 +578,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 void kvm_sal_emul(struct kvm_vcpu *vcpu);
 
 static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
+#endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f..43e45f6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -698,27 +698,24 @@ out:
 	return r;
 }
 
-/*
- * Allocate 16M memory for every vm to hold its specific data.
- * Its memory map is defined in kvm_host.h.
- */
 static struct kvm *kvm_alloc_kvm(void)
 {
 
 	struct kvm *kvm;
 	uint64_t  vm_base;
 
+	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
 	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
 
 	if (!vm_base)
 		return ERR_PTR(-ENOMEM);
-	printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
 
-	/* Zero all pages before use! */
 	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-
-	kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+	kvm = (struct kvm *)(vm_base +
+			offsetof(struct kvm_vm_data, kvm_vm_struct));
 	kvm->arch.vm_base = vm_base;
+	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
 
 	return kvm;
 }
@@ -760,21 +757,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
 
 static void kvm_init_vm(struct kvm *kvm)
 {
-	long vm_base;
-
 	BUG_ON(!kvm);
 
 	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
 	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
 	kvm->arch.vmm_init_rr = VMM_INIT_RR;
 
-	vm_base = kvm->arch.vm_base;
-	if (vm_base) {
-		kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
-		kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
-		kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
-	}
-
 	/*
 	 *Fill P2M entries for MMIO/IO ranges
 	 */
@@ -864,7 +852,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		goto out;
 	r = copy_from_user(vcpu + 1, regs->saved_stack +
 			sizeof(struct kvm_vcpu),
-			IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+			KVM_STK_OFFSET - sizeof(struct kvm_vcpu));
 	if (r)
 		goto out;
 	vcpu->arch.exit_data =
@@ -1166,10 +1154,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		/*Set entry address for first run.*/
 		regs->cr_iip = PALE_RESET_ENTRY;
 
-		/*Initilize itc offset for vcpus*/
+		/*Initialize itc offset for vcpus*/
 		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-		for (i = 0; i < MAX_VCPU_NUM; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
 			v->arch.itc_offset = itc_offset;
 			v->arch.last_itc = 0;
 		}
@@ -1183,7 +1172,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.apic->vcpu = vcpu;
 
 	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
 	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
 	p_ctx->psr = 0x1008522000UL;
 	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1207,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.hlt_timer.function = hlt_timer_fn;
 
 	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
 	vcpu->arch.vsa_base = kvm_vsa_base;
 	vcpu->arch.__gp = kvm_vmm_gp;
 	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
 	init_ptce_info(vcpu);
 
 	r = 0;
@@ -1273,12 +1262,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	int r;
 	int cpu;
 
+	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+	r = -EINVAL;
+	if (id >= KVM_MAX_VCPUS) {
+		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+				KVM_MAX_VCPUS);
+		goto fail;
+	}
+
 	r = -ENOMEM;
 	if (!vm_base) {
 		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
 		goto fail;
 	}
-	vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+					vcpu_data[id].vcpu_struct));
 	vcpu->kvm = kvm;
 
 	cpu = get_cpu();
@@ -1396,7 +1395,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 					sizeof(union context));
 	if (r)
 		goto out;
-	r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+	r = copy_to_user(regs->saved_stack, (void *)vcpu, KVM_STK_OFFSET);
 	if (r)
 		goto out;
 	SAVE_REGS(mp_state);
@@ -1457,6 +1456,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
 	unsigned long base_gfn = memslot->base_gfn;
 
+	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	for (i = 0; i < npages; i++) {
 		pfn = gfn_to_pfn(kvm, base_gfn + i);
 		if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1633,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int r, i;
 	long n, base;
-	unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
-					+ KVM_MEM_DIRTY_LOG_OFS);
+	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
 
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d1..b2bcaa2 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
 #include <asm/asmmacro.h>
 #include <asm/types.h>
 #include <asm/kregs.h>
+#include <asm/kvm_host.h>
+
 #include "asm-offsets.h"
 
 #define KVM_MINSTATE_START_SAVE_MIN	     					\
@@ -33,7 +35,7 @@
 	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
 	;;									\
 	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1;  /* compute base of memory stack */  \
+	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
 	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
 	;;									\
 	mov ar.bspstore = r22;				/* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c46..dd979e0 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
  */
 static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
 {
-	return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+	return (uint64_t *)(kvm->arch.vm_base +
+				offsetof(struct kvm_vm_data, kvm_p2m));
 }
 
 static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027c..a528d70 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 	unsigned long vitv = VCPU(vcpu, itv);
 
 	if (vcpu->vcpu_id == 0) {
-		for (i = 0; i < MAX_VCPU_NUM; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
 			VMX(v, itc_offset) = itc_offset;
 			VMX(v, last_itc) = 0;
 		}
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b933..6b6307a 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
 	u64 i, dirty_pages = 1;
 	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
 	spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
-						+ KVM_MEM_DIRTY_LOG_OFS;
+	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
+
 	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
 
 	vmm_spin_lock(lock);
-- 
cgit v0.10.2


From 853dafb62b386a3a75808483a120998e734eb6e1 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 23 Oct 2008 15:03:38 +0800
Subject: KVM: ia64: Remove lock held by halted vcpu

Remove the lock protection for kvm halt logic, otherwise,
once other vcpus want to acquire the lock, and they have to
wait all vcpus are waken up from halt.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 43e45f6..70eb829 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -439,7 +439,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 		expires = div64_u64(itc_diff, cyc_per_usec);
 		kt = ktime_set(0, 1000 * expires);
 
-		down_read(&vcpu->kvm->slots_lock);
 		vcpu->arch.ht_active = 1;
 		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
 
@@ -452,7 +451,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
 				vcpu->arch.mp_state =
 					KVM_MP_STATE_RUNNABLE;
-		up_read(&vcpu->kvm->slots_lock);
 
 		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
-- 
cgit v0.10.2


From 6eb55818c043b097c83828da8430fcb9a02fdb89 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Fri, 31 Oct 2008 12:37:41 +0800
Subject: KVM: Enable Function Level Reset for assigned device

Ideally, every assigned device should in a clear condition before and after
assignment, so that the former state of device won't affect later work.
Some devices provide a mechanism named Function Level Reset, which is
defined in PCI/PCI-e document. We should execute it before and after device
assignment.

(But sadly, the feature is new, and most device on the market now don't
support it. We are considering using D0/D3hot transmit to emulate it later,
but not that elegant and reliable as FLR itself.)

[Update: Reminded by Xiantao, execute FLR after we ensure that the device can
be assigned to the guest.]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 38f79b6..9a4a39c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4148,8 +4148,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	kvm_iommu_unmap_guest(kvm);
 	kvm_free_all_assigned_devices(kvm);
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f43abe..1838052 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -152,6 +152,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 		 */
 		kvm_put_kvm(kvm);
 
+	pci_reset_function(assigned_dev->dev);
+
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
@@ -283,6 +285,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		       __func__);
 		goto out_disable;
 	}
+
+	pci_reset_function(dev);
+
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
-- 
cgit v0.10.2


From 2843099fee32a6020e1caa95c6026f28b5d43bff Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Fri, 3 Oct 2008 17:40:32 +0300
Subject: KVM: MMU: Fix aliased gfns treated as unaliased

Some areas of kvm x86 mmu are using gfn offset inside a slot without
unaliasing the gfn first.  This patch makes sure that the gfn will be
unaliased and add gfn_to_memslot_unaliased() to save the calculating
of the gfn unaliasing in case we have it unaliased already.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09e6c56..99e3cc1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -617,6 +617,8 @@ void kvm_disable_tdp(void);
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
 
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
+
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8687758..8904e8a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -386,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count += 1;
 }
 
@@ -394,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count -= 1;
 	WARN_ON(*write_count < 0);
 }
 
 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	struct kvm_memory_slot *slot;
 	int *largepage_idx;
 
+	gfn = unalias_gfn(kvm, gfn);
+	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot);
 		return *largepage_idx;
@@ -2973,8 +2979,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 		if (sp->role.metaphysical)
 			continue;
 
-		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
+		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 		if (*rmapp)
 			printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1838052..a65baa9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -923,7 +923,7 @@ int kvm_is_error_hva(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 
@@ -936,11 +936,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
 
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
 	gfn = unalias_gfn(kvm, gfn);
-	return __gfn_to_memslot(kvm, gfn);
+	return gfn_to_memslot_unaliased(kvm, gfn);
 }
 
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -964,7 +965,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *slot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	slot = __gfn_to_memslot(kvm, gfn);
+	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (!slot)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@@ -1215,7 +1216,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	memslot = __gfn_to_memslot(kvm, gfn);
+	memslot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
-- 
cgit v0.10.2


From a0d7b9f246074fab1f42678d203ef4ba281505f2 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:11 -0600
Subject: KVM: ppc: Move 440-specific TLB code into 44x_tlb.c

This will make it easier to provide implementations for other cores.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad8..4adb4a3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -58,11 +58,11 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
+extern int kvmppc_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
                            u64 asid, u32 flags);
-extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                  gva_t eend, u32 asid);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f..dd75ab8 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -32,6 +32,34 @@
 
 static unsigned int kvmppc_tlb_44x_pos;
 
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_44x_tlbe *tlbe;
+	int i;
+
+	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+	printk("| %2s | %3s | %8s | %8s | %8s |\n",
+			"nr", "tid", "word0", "word1", "word2");
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.guest_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.shadow_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+}
+#endif
+
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
 	/* Mask off reserved bits. */
@@ -191,8 +219,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 			handler);
 }
 
-void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                           gva_t eend, u32 asid)
+static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                  gva_t eend, u32 asid)
 {
 	unsigned int pid = !(asid & 0xff);
 	int i;
@@ -249,3 +277,109 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 
 	vcpu->arch.shadow_pid = !usermode;
 }
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                             const struct tlbe *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+	u64 eaddr;
+	u64 raddr;
+	u64 asid;
+	u32 flags;
+	struct tlbe *tlbe;
+	unsigned int index;
+
+	index = vcpu->arch.gpr[ra];
+	if (index > PPC44x_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, index);
+		kvmppc_dump_vcpu(vcpu);
+		return EMULATE_FAIL;
+	}
+
+	tlbe = &vcpu->arch.guest_tlb[index];
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+	if (tlbe->word0 & PPC44x_TLB_VALID) {
+		eaddr = get_tlb_eaddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
+	}
+
+	switch (ws) {
+	case PPC44x_TLB_PAGEID:
+		tlbe->tid = vcpu->arch.mmucr & 0xff;
+		tlbe->word0 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_XLAT:
+		tlbe->word1 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_ATTRIB:
+		tlbe->word2 = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		eaddr = get_tlb_eaddr(tlbe);
+		raddr = get_tlb_raddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		flags = tlbe->word2 & 0xffff;
+
+		/* Create a 4KB mapping on the host. If the guest wanted a
+		 * large page, only the first 4KB is mapped here and the rest
+		 * are mapped on the fly. */
+		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+	}
+
+	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
+	            tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
+	            handler);
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+	u32 ea;
+	int index;
+	unsigned int as = get_mmucr_sts(vcpu);
+	unsigned int pid = get_mmucr_stid(vcpu);
+
+	ea = vcpu->arch.gpr[rb];
+	if (ra)
+		ea += vcpu->arch.gpr[ra];
+
+	index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+	if (rc) {
+		if (index < 0)
+			vcpu->arch.cr &= ~0x20000000;
+		else
+			vcpu->arch.cr |= 0x20000000;
+	}
+	vcpu->arch.gpr[rt] = index;
+
+	return EMULATE_DONE;
+}
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
index 7b2591e..c0f8532 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -111,32 +111,6 @@ const unsigned char priority_exception[] = {
 };
 
 
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.shadow_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-
 /* TODO: use vcpu_printf() */
 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fb..0ce8ed5 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -29,8 +29,6 @@
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
 
-#include "44x_tlb.h"
-
 /* Instruction decoding */
 static inline unsigned int get_op(u32 inst)
 {
@@ -87,96 +85,6 @@ static inline unsigned int get_d(u32 inst)
 	return inst & 0xffff;
 }
 
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
-{
-	u64 eaddr;
-	u64 raddr;
-	u64 asid;
-	u32 flags;
-	struct tlbe *tlbe;
-	unsigned int ra;
-	unsigned int rs;
-	unsigned int ws;
-	unsigned int index;
-
-	ra = get_ra(inst);
-	rs = get_rs(inst);
-	ws = get_ws(inst);
-
-	index = vcpu->arch.gpr[ra];
-	if (index > PPC44x_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu->arch.guest_tlb[index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
-	if (tlbe->word0 & PPC44x_TLB_VALID) {
-		eaddr = get_tlb_eaddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
-	}
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = vcpu->arch.mmucr & 0xff;
-		tlbe->word0 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = vcpu->arch.gpr[rs];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		eaddr = get_tlb_eaddr(tlbe);
-		raddr = get_tlb_raddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		flags = tlbe->word2 & 0xffff;
-
-		/* Create a 4KB mapping on the host. If the guest wanted a
-		 * large page, only the first 4KB is mapped here and the rest
-		 * are mapped on the fly. */
-		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
-	}
-
-	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
-			tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
-			handler);
-
-	return EMULATE_DONE;
-}
-
 static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
@@ -222,6 +130,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	int rc;
 	int rs;
 	int rt;
+	int ws;
 	int sprn;
 	int dcrn;
 	enum emulation_result emulated = EMULATE_DONE;
@@ -630,33 +539,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 
 		case 978:                                       /* tlbwe */
-			emulated = kvmppc_emul_tlbwe(vcpu, inst);
+			ra = get_ra(inst);
+			rs = get_rs(inst);
+			ws = get_ws(inst);
+			emulated = kvmppc_emul_tlbwe(vcpu, ra, rs, ws);
 			break;
 
-		case 914:       {                               /* tlbsx */
-			int index;
-			unsigned int as = get_mmucr_sts(vcpu);
-			unsigned int pid = get_mmucr_stid(vcpu);
-
+		case 914:                                       /* tlbsx */
 			rt = get_rt(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
 			rc = get_rc(inst);
-
-			ea = vcpu->arch.gpr[rb];
-			if (ra)
-				ea += vcpu->arch.gpr[ra];
-
-			index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-			if (rc) {
-				if (index < 0)
-					vcpu->arch.cr &= ~0x20000000;
-				else
-					vcpu->arch.cr |= 0x20000000;
-			}
-			vcpu->arch.gpr[rt] = index;
-
-			}
+			emulated = kvmppc_emul_tlbsx(vcpu, rt, ra, rb, rc);
 			break;
 
 		case 790:                                       /* lhbrx */
-- 
cgit v0.10.2


From 0f55dc481ea5c4f87fc0161cb1b8c6e2cafae8fc Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:12 -0600
Subject: KVM: ppc: Rename "struct tlbe" to "struct kvmppc_44x_tlbe"

This will ease ports to other cores.

Also remove unused "struct kvm_tlb" while we're at it.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7..df73351 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,7 +64,7 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct tlbe {
+struct kvmppc_44x_tlbe {
 	u32 tid; /* Only the low 8 bits are used. */
 	u32 word0;
 	u32 word1;
@@ -76,9 +76,9 @@ struct kvm_arch {
 
 struct kvm_vcpu_arch {
 	/* Unmodified copy of the guest's TLB. */
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
+	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
 	/* TLB that's actually used when the guest is running. */
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
 	/* Pages which are referenced in the shadow TLB. */
 	struct page *shadow_pages[PPC44x_TLB_SIZE];
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4adb4a3..39daeaa 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 
-struct kvm_tlb {
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-};
-
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d..0264c97 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -357,7 +357,7 @@ int main(void)
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index dd75ab8..5152fe5 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -86,7 +86,7 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 
 	/* XXX Replace loop with fancy data structures. */
 	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+		struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -111,7 +111,8 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	return -1;
 }
 
-struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
+                                               gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
 	unsigned int index;
@@ -122,7 +123,8 @@ struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
 	return &vcpu->arch.guest_tlb[index];
 }
 
-struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
+                                               gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
 	unsigned int index;
@@ -133,7 +135,7 @@ struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
 	return &vcpu->arch.guest_tlb[index];
 }
 
-static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+static int kvmppc_44x_tlbe_is_writable(struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
 }
@@ -141,7 +143,7 @@ static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
 static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
                                       unsigned int index)
 {
-	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
+	struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
 	struct page *page = vcpu->arch.shadow_pages[index];
 
 	if (get_tlb_v(stlbe)) {
@@ -171,7 +173,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
                     u32 flags)
 {
 	struct page *new_page;
-	struct tlbe *stlbe;
+	struct kvmppc_44x_tlbe *stlbe;
 	hpa_t hpaddr;
 	unsigned int victim;
 
@@ -227,7 +229,7 @@ static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 	/* XXX Replace loop with fancy data structures. */
 	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+		struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
 		unsigned int tid;
 
 		if (!get_tlb_v(stlbe))
@@ -262,7 +264,7 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 	if (vcpu->arch.swap_pid) {
 		/* XXX Replace loop with fancy data structures. */
 		for (i = 0; i <= tlb_44x_hwater; i++) {
-			struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+			struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
 
 			/* Future optimization: clear only userspace mappings. */
 			kvmppc_44x_shadow_release(vcpu, i);
@@ -279,7 +281,7 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 }
 
 static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct tlbe *tlbe)
+                             const struct kvmppc_44x_tlbe *tlbe)
 {
 	gpa_t gpa;
 
@@ -305,7 +307,7 @@ int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	u64 raddr;
 	u64 asid;
 	u32 flags;
-	struct tlbe *tlbe;
+	struct kvmppc_44x_tlbe *tlbe;
 	unsigned int index;
 
 	index = vcpu->arch.gpr[ra];
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b..e5b0a76 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,50 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
+                                                      gva_t eaddr);
+extern struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
+                                                      gva_t eaddr);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 4) & 0xf;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->word0 & 0xfffffc00;
 }
 
-static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1 << 10 << (pgsize << 1);
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	u64 word1 = tlbe->word1;
 	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->tid & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 8) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 9) & 0x1;
 }
@@ -81,7 +83,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
 	return (vcpu->arch.mmucr >> 16) & 0x1;
 }
 
-static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
 {
 	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
 
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
index c0f8532..41bbf4c 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -307,7 +307,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct tlbe *gtlbe;
+		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
 		gfn_t gfn;
 
@@ -347,7 +347,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct tlbe *gtlbe;
+		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
 		gfn_t gfn;
 
@@ -442,7 +442,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
 
 	tlbe->tid = 0;
 	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
@@ -553,7 +553,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
-	struct tlbe *gtlbe;
+	struct kvmppc_44x_tlbe *gtlbe;
 	int index;
 	gva_t eaddr;
 	u8 pid;
-- 
cgit v0.10.2


From d9fbd03d240380826c0ec16f927242be24ff6265 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:13 -0600
Subject: KVM: ppc: combine booke_guest.c and booke_host.c

The division was somewhat artificial and cumbersome, and had no functional
benefit anyways: we can only guests built for the real host processor.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66..ffed96f 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	# We can only run on Book E hosts so far
-	select KVM_BOOKE_HOST
+	select KVM_BOOKE
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.
@@ -30,8 +30,8 @@ config KVM
 
 	  If unsure, say N.
 
-config KVM_BOOKE_HOST
-	bool "KVM host support for Book E PowerPC processors"
+config KVM_BOOKE
+	bool "KVM support for Book E PowerPC processors"
 	depends on KVM && 44x
 	---help---
 	  Provides host support for KVM on Book E PowerPC processors. Currently
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d439..a7f8574 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,10 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 
-kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o
 obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
+kvm-booke-objs := booke.o booke_interrupts.o 44x_tlb.o
+obj-$(CONFIG_KVM_BOOKE) += kvm-booke.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
new file mode 100644
index 0000000..b1e90a1
--- /dev/null
+++ b/arch/powerpc/kvm/booke.c
@@ -0,0 +1,639 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/cacheflush.h>
+
+#include "44x_tlb.h"
+
+unsigned long kvmppc_booke_handlers;
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "exits",      VCPU_STAT(sum_exits) },
+	{ "mmio",       VCPU_STAT(mmio_exits) },
+	{ "dcr",        VCPU_STAT(dcr_exits) },
+	{ "sig",        VCPU_STAT(signal_exits) },
+	{ "light",      VCPU_STAT(light_exits) },
+	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
+	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
+	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
+	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
+	{ "sysc",       VCPU_STAT(syscall_exits) },
+	{ "isi",        VCPU_STAT(isi_exits) },
+	{ "dsi",        VCPU_STAT(dsi_exits) },
+	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
+	{ "dec",        VCPU_STAT(dec_exits) },
+	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
+	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+	{ NULL }
+};
+
+static const u32 interrupt_msr_mask[16] = {
+	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
+	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
+	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
+};
+
+const unsigned char exception_priority[] = {
+	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
+	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
+	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
+	[BOOKE_INTERRUPT_PROGRAM] = 3,
+	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
+	[BOOKE_INTERRUPT_SYSCALL] = 5,
+	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
+	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
+	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
+	[BOOKE_INTERRUPT_DEBUG] = 10,
+	[BOOKE_INTERRUPT_CRITICAL] = 11,
+	[BOOKE_INTERRUPT_WATCHDOG] = 12,
+	[BOOKE_INTERRUPT_EXTERNAL] = 13,
+	[BOOKE_INTERRUPT_FIT] = 14,
+	[BOOKE_INTERRUPT_DECREMENTER] = 15,
+};
+
+const unsigned char priority_exception[] = {
+	BOOKE_INTERRUPT_DATA_STORAGE,
+	BOOKE_INTERRUPT_INST_STORAGE,
+	BOOKE_INTERRUPT_ALIGNMENT,
+	BOOKE_INTERRUPT_PROGRAM,
+	BOOKE_INTERRUPT_FP_UNAVAIL,
+	BOOKE_INTERRUPT_SYSCALL,
+	BOOKE_INTERRUPT_AP_UNAVAIL,
+	BOOKE_INTERRUPT_DTLB_MISS,
+	BOOKE_INTERRUPT_ITLB_MISS,
+	BOOKE_INTERRUPT_MACHINE_CHECK,
+	BOOKE_INTERRUPT_DEBUG,
+	BOOKE_INTERRUPT_CRITICAL,
+	BOOKE_INTERRUPT_WATCHDOG,
+	BOOKE_INTERRUPT_EXTERNAL,
+	BOOKE_INTERRUPT_FIT,
+	BOOKE_INTERRUPT_DECREMENTER,
+};
+
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+
+	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+	for (i = 0; i < 32; i += 4) {
+		printk("gpr%02d: %08x %08x %08x %08x\n", i,
+		       vcpu->arch.gpr[i],
+		       vcpu->arch.gpr[i+1],
+		       vcpu->arch.gpr[i+2],
+		       vcpu->arch.gpr[i+3]);
+	}
+}
+
+/* Check if we are ready to deliver the interrupt */
+static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+	int r;
+
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_CRITICAL:
+		r = vcpu->arch.msr & MSR_CE;
+		break;
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		r = vcpu->arch.msr & MSR_ME;
+		break;
+	case BOOKE_INTERRUPT_EXTERNAL:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_DECREMENTER:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_FIT:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_WATCHDOG:
+		r = vcpu->arch.msr & MSR_CE;
+		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		r = vcpu->arch.msr & MSR_DE;
+		break;
+	default:
+		r = 1;
+	}
+
+	return r;
+}
+
+static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_DECREMENTER:
+		vcpu->arch.tsr |= TSR_DIS;
+		break;
+	}
+
+	vcpu->arch.srr0 = vcpu->arch.pc;
+	vcpu->arch.srr1 = vcpu->arch.msr;
+	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
+	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int exception;
+	unsigned int priority;
+
+	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+	while (priority <= BOOKE_MAX_INTERRUPT) {
+		exception = priority_exception[priority];
+		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
+			kvmppc_clear_exception(vcpu, exception);
+			kvmppc_deliver_interrupt(vcpu, exception);
+			break;
+		}
+
+		priority = find_next_bit(pending,
+		                         BITS_PER_BYTE * sizeof(*pending),
+		                         priority + 1);
+	}
+}
+
+/**
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int exit_nr)
+{
+	enum emulation_result er;
+	int r = RESUME_HOST;
+
+	local_irq_enable();
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+		kvmppc_dump_vcpu(vcpu);
+		r = RESUME_HOST;
+		break;
+
+	case BOOKE_INTERRUPT_EXTERNAL:
+	case BOOKE_INTERRUPT_DECREMENTER:
+		/* Since we switched IVPR back to the host's value, the host
+		 * handled this interrupt the moment we enabled interrupts.
+		 * Now we just offer it a chance to reschedule the guest. */
+
+		/* XXX At this point the TLB still holds our shadow TLB, so if
+		 * we do reschedule the host will fault over it. Perhaps we
+		 * should politely restore the host's entries to minimize
+		 * misses before ceding control. */
+		if (need_resched())
+			cond_resched();
+		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+			vcpu->stat.dec_exits++;
+		else
+			vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_PROGRAM:
+		if (vcpu->arch.msr & MSR_PR) {
+			/* Program traps generated by user-level software must be handled
+			 * by the guest kernel. */
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		er = kvmppc_emulate_instruction(run, vcpu);
+		switch (er) {
+		case EMULATE_DONE:
+			/* Future optimization: only reload non-volatiles if
+			 * they were actually modified by emulation. */
+			vcpu->stat.emulated_inst_exits++;
+			r = RESUME_GUEST_NV;
+			break;
+		case EMULATE_DO_DCR:
+			run->exit_reason = KVM_EXIT_DCR;
+			r = RESUME_HOST;
+			break;
+		case EMULATE_FAIL:
+			/* XXX Deliver Program interrupt to guest. */
+			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+			/* For debugging, encode the failing instruction and
+			 * report it to userspace. */
+			run->hw.hardware_exit_reason = ~0ULL << 32;
+			run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+			r = RESUME_HOST;
+			break;
+		default:
+			BUG();
+		}
+		break;
+
+	case BOOKE_INTERRUPT_FP_UNAVAIL:
+		kvmppc_queue_exception(vcpu, exit_nr);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.dsi_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_INST_STORAGE:
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.isi_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SYSCALL:
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.syscall_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DTLB_MISS: {
+		struct kvmppc_44x_tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.fault_dear;
+		gfn_t gfn;
+
+		/* Check the guest TLB. */
+		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
+		if (!gtlbe) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_queue_exception(vcpu, exit_nr);
+			vcpu->arch.dear = vcpu->arch.fault_dear;
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			vcpu->stat.dtlb_real_miss_exits++;
+			r = RESUME_GUEST;
+			break;
+		}
+
+		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
+		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't, and it is RAM. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
+			vcpu->stat.dtlb_virt_miss_exits++;
+			r = RESUME_GUEST;
+		} else {
+			/* Guest has mapped and accessed a page which is not
+			 * actually RAM. */
+			r = kvmppc_emulate_mmio(run, vcpu);
+		}
+
+		break;
+	}
+
+	case BOOKE_INTERRUPT_ITLB_MISS: {
+		struct kvmppc_44x_tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.pc;
+		gfn_t gfn;
+
+		r = RESUME_GUEST;
+
+		/* Check the guest TLB. */
+		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
+		if (!gtlbe) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_queue_exception(vcpu, exit_nr);
+			vcpu->stat.itlb_real_miss_exits++;
+			break;
+		}
+
+		vcpu->stat.itlb_virt_miss_exits++;
+
+		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
+		} else {
+			/* Guest mapped and leaped at non-RAM! */
+			kvmppc_queue_exception(vcpu,
+			                       BOOKE_INTERRUPT_MACHINE_CHECK);
+		}
+
+		break;
+	}
+
+	case BOOKE_INTERRUPT_DEBUG: {
+		u32 dbsr;
+
+		vcpu->arch.pc = mfspr(SPRN_CSRR0);
+
+		/* clear IAC events in DBSR register */
+		dbsr = mfspr(SPRN_DBSR);
+		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
+		mtspr(SPRN_DBSR, dbsr);
+
+		run->exit_reason = KVM_EXIT_DEBUG;
+		r = RESUME_HOST;
+		break;
+	}
+
+	default:
+		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+		BUG();
+	}
+
+	local_irq_disable();
+
+	kvmppc_check_and_deliver_interrupts(vcpu);
+
+	/* Do some exit accounting. */
+	vcpu->stat.sum_exits++;
+	if (!(r & RESUME_HOST)) {
+		/* To avoid clobbering exit_reason, only check for signals if
+		 * we aren't already exiting to userspace for some other
+		 * reason. */
+		if (signal_pending(current)) {
+			run->exit_reason = KVM_EXIT_INTR;
+			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+
+			vcpu->stat.signal_exits++;
+		} else {
+			vcpu->stat.light_exits++;
+		}
+	} else {
+		switch (run->exit_reason) {
+		case KVM_EXIT_MMIO:
+			vcpu->stat.mmio_exits++;
+			break;
+		case KVM_EXIT_DCR:
+			vcpu->stat.dcr_exits++;
+			break;
+		case KVM_EXIT_INTR:
+			vcpu->stat.signal_exits++;
+			break;
+		}
+	}
+
+	return r;
+}
+
+/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
+	vcpu->arch.pc = 0;
+	vcpu->arch.msr = 0;
+	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+
+	vcpu->arch.shadow_pid = 1;
+
+	/* Eye-catching number so we know if the guest takes an interrupt
+	 * before it's programmed its own IVPR. */
+	vcpu->arch.ivpr = 0x55550000;
+
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	regs->pc = vcpu->arch.pc;
+	regs->cr = vcpu->arch.cr;
+	regs->ctr = vcpu->arch.ctr;
+	regs->lr = vcpu->arch.lr;
+	regs->xer = vcpu->arch.xer;
+	regs->msr = vcpu->arch.msr;
+	regs->srr0 = vcpu->arch.srr0;
+	regs->srr1 = vcpu->arch.srr1;
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = vcpu->arch.sprg0;
+	regs->sprg1 = vcpu->arch.sprg1;
+	regs->sprg2 = vcpu->arch.sprg2;
+	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg5 = vcpu->arch.sprg4;
+	regs->sprg6 = vcpu->arch.sprg5;
+	regs->sprg7 = vcpu->arch.sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = vcpu->arch.gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu->arch.pc = regs->pc;
+	vcpu->arch.cr = regs->cr;
+	vcpu->arch.ctr = regs->ctr;
+	vcpu->arch.lr = regs->lr;
+	vcpu->arch.xer = regs->xer;
+	vcpu->arch.msr = regs->msr;
+	vcpu->arch.srr0 = regs->srr0;
+	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.sprg0 = regs->sprg0;
+	vcpu->arch.sprg1 = regs->sprg1;
+	vcpu->arch.sprg2 = regs->sprg2;
+	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.sprg5 = regs->sprg4;
+	vcpu->arch.sprg6 = regs->sprg5;
+	vcpu->arch.sprg7 = regs->sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+		vcpu->arch.gpr[i] = regs->gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	struct kvmppc_44x_tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	gtlbe = &vcpu->arch.guest_tlb[index];
+
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
+
+static int kvmppc_booke_init(void)
+{
+	unsigned long ivor[16];
+	unsigned long max_ivor = 0;
+	int i;
+
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + i * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
+
+module_init(kvmppc_booke_init)
+module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
deleted file mode 100644
index 41bbf4c..0000000
--- a/arch/powerpc/kvm/booke_guest.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <asm/cputable.h>
-#include <asm/uaccess.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "exits",      VCPU_STAT(sum_exits) },
-	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
-	{ "sig",        VCPU_STAT(signal_exits) },
-	{ "light",      VCPU_STAT(light_exits) },
-	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
-	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
-	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
-	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
-	{ "sysc",       VCPU_STAT(syscall_exits) },
-	{ "isi",        VCPU_STAT(isi_exits) },
-	{ "dsi",        VCPU_STAT(dsi_exits) },
-	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
-	{ "dec",        VCPU_STAT(dec_exits) },
-	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ NULL }
-};
-
-static const u32 interrupt_msr_mask[16] = {
-	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
-	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
-	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
-	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
-	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
-	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
-	[BOOKE_INTERRUPT_PROGRAM] = 3,
-	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
-	[BOOKE_INTERRUPT_SYSCALL] = 5,
-	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
-	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
-	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
-	[BOOKE_INTERRUPT_DEBUG] = 10,
-	[BOOKE_INTERRUPT_CRITICAL] = 11,
-	[BOOKE_INTERRUPT_WATCHDOG] = 12,
-	[BOOKE_INTERRUPT_EXTERNAL] = 13,
-	[BOOKE_INTERRUPT_FIT] = 14,
-	[BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
-	BOOKE_INTERRUPT_DATA_STORAGE,
-	BOOKE_INTERRUPT_INST_STORAGE,
-	BOOKE_INTERRUPT_ALIGNMENT,
-	BOOKE_INTERRUPT_PROGRAM,
-	BOOKE_INTERRUPT_FP_UNAVAIL,
-	BOOKE_INTERRUPT_SYSCALL,
-	BOOKE_INTERRUPT_AP_UNAVAIL,
-	BOOKE_INTERRUPT_DTLB_MISS,
-	BOOKE_INTERRUPT_ITLB_MISS,
-	BOOKE_INTERRUPT_MACHINE_CHECK,
-	BOOKE_INTERRUPT_DEBUG,
-	BOOKE_INTERRUPT_CRITICAL,
-	BOOKE_INTERRUPT_WATCHDOG,
-	BOOKE_INTERRUPT_EXTERNAL,
-	BOOKE_INTERRUPT_FIT,
-	BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
-/* TODO: use vcpu_printf() */
-void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
-	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
-
-	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
-
-	for (i = 0; i < 32; i += 4) {
-		printk("gpr%02d: %08x %08x %08x %08x\n", i,
-		       vcpu->arch.gpr[i],
-		       vcpu->arch.gpr[i+1],
-		       vcpu->arch.gpr[i+2],
-		       vcpu->arch.gpr[i+3]);
-	}
-}
-
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	int r;
-
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_CRITICAL:
-		r = vcpu->arch.msr & MSR_CE;
-		break;
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		r = vcpu->arch.msr & MSR_ME;
-		break;
-	case BOOKE_INTERRUPT_EXTERNAL:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_DECREMENTER:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_FIT:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_WATCHDOG:
-		r = vcpu->arch.msr & MSR_CE;
-		break;
-	case BOOKE_INTERRUPT_DEBUG:
-		r = vcpu->arch.msr & MSR_DE;
-		break;
-	default:
-		r = 1;
-	}
-
-	return r;
-}
-
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_DECREMENTER:
-		vcpu->arch.tsr |= TSR_DIS;
-		break;
-	}
-
-	vcpu->arch.srr0 = vcpu->arch.pc;
-	vcpu->arch.srr1 = vcpu->arch.msr;
-	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
-	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
-}
-
-/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
-{
-	unsigned long *pending = &vcpu->arch.pending_exceptions;
-	unsigned int exception;
-	unsigned int priority;
-
-	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
-	while (priority <= BOOKE_MAX_INTERRUPT) {
-		exception = priority_exception[priority];
-		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-			kvmppc_clear_exception(vcpu, exception);
-			kvmppc_deliver_interrupt(vcpu, exception);
-			break;
-		}
-
-		priority = find_next_bit(pending,
-		                         BITS_PER_BYTE * sizeof(*pending),
-		                         priority + 1);
-	}
-}
-
-/**
- * kvmppc_handle_exit
- *
- * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
- */
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int exit_nr)
-{
-	enum emulation_result er;
-	int r = RESUME_HOST;
-
-	local_irq_enable();
-
-	run->exit_reason = KVM_EXIT_UNKNOWN;
-	run->ready_for_interrupt_injection = 1;
-
-	switch (exit_nr) {
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
-		kvmppc_dump_vcpu(vcpu);
-		r = RESUME_HOST;
-		break;
-
-	case BOOKE_INTERRUPT_EXTERNAL:
-	case BOOKE_INTERRUPT_DECREMENTER:
-		/* Since we switched IVPR back to the host's value, the host
-		 * handled this interrupt the moment we enabled interrupts.
-		 * Now we just offer it a chance to reschedule the guest. */
-
-		/* XXX At this point the TLB still holds our shadow TLB, so if
-		 * we do reschedule the host will fault over it. Perhaps we
-		 * should politely restore the host's entries to minimize
-		 * misses before ceding control. */
-		if (need_resched())
-			cond_resched();
-		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
-			vcpu->stat.dec_exits++;
-		else
-			vcpu->stat.ext_intr_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_PROGRAM:
-		if (vcpu->arch.msr & MSR_PR) {
-			/* Program traps generated by user-level software must be handled
-			 * by the guest kernel. */
-			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
-			r = RESUME_GUEST;
-			break;
-		}
-
-		er = kvmppc_emulate_instruction(run, vcpu);
-		switch (er) {
-		case EMULATE_DONE:
-			/* Future optimization: only reload non-volatiles if
-			 * they were actually modified by emulation. */
-			vcpu->stat.emulated_inst_exits++;
-			r = RESUME_GUEST_NV;
-			break;
-		case EMULATE_DO_DCR:
-			run->exit_reason = KVM_EXIT_DCR;
-			r = RESUME_HOST;
-			break;
-		case EMULATE_FAIL:
-			/* XXX Deliver Program interrupt to guest. */
-			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
-			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
-			/* For debugging, encode the failing instruction and
-			 * report it to userspace. */
-			run->hw.hardware_exit_reason = ~0ULL << 32;
-			run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
-			r = RESUME_HOST;
-			break;
-		default:
-			BUG();
-		}
-		break;
-
-	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_queue_exception(vcpu, exit_nr);
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_DATA_STORAGE:
-		vcpu->arch.dear = vcpu->arch.fault_dear;
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.dsi_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_INST_STORAGE:
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.isi_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.syscall_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct kvmppc_44x_tlbe *gtlbe;
-		unsigned long eaddr = vcpu->arch.fault_dear;
-		gfn_t gfn;
-
-		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
-		if (!gtlbe) {
-			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
-			vcpu->arch.dear = vcpu->arch.fault_dear;
-			vcpu->arch.esr = vcpu->arch.fault_esr;
-			vcpu->stat.dtlb_real_miss_exits++;
-			r = RESUME_GUEST;
-			break;
-		}
-
-		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
-		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
-
-		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-			/* The guest TLB had a mapping, but the shadow TLB
-			 * didn't, and it is RAM. This could be because:
-			 * a) the entry is mapping the host kernel, or
-			 * b) the guest used a large mapping which we're faking
-			 * Either way, we need to satisfy the fault without
-			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
-			vcpu->stat.dtlb_virt_miss_exits++;
-			r = RESUME_GUEST;
-		} else {
-			/* Guest has mapped and accessed a page which is not
-			 * actually RAM. */
-			r = kvmppc_emulate_mmio(run, vcpu);
-		}
-
-		break;
-	}
-
-	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct kvmppc_44x_tlbe *gtlbe;
-		unsigned long eaddr = vcpu->arch.pc;
-		gfn_t gfn;
-
-		r = RESUME_GUEST;
-
-		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
-		if (!gtlbe) {
-			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
-			vcpu->stat.itlb_real_miss_exits++;
-			break;
-		}
-
-		vcpu->stat.itlb_virt_miss_exits++;
-
-		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
-
-		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-			/* The guest TLB had a mapping, but the shadow TLB
-			 * didn't. This could be because:
-			 * a) the entry is mapping the host kernel, or
-			 * b) the guest used a large mapping which we're faking
-			 * Either way, we need to satisfy the fault without
-			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
-		} else {
-			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_queue_exception(vcpu,
-			                       BOOKE_INTERRUPT_MACHINE_CHECK);
-		}
-
-		break;
-	}
-
-	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
-		r = RESUME_HOST;
-		break;
-	}
-
-	default:
-		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
-		BUG();
-	}
-
-	local_irq_disable();
-
-	kvmppc_check_and_deliver_interrupts(vcpu);
-
-	/* Do some exit accounting. */
-	vcpu->stat.sum_exits++;
-	if (!(r & RESUME_HOST)) {
-		/* To avoid clobbering exit_reason, only check for signals if
-		 * we aren't already exiting to userspace for some other
-		 * reason. */
-		if (signal_pending(current)) {
-			run->exit_reason = KVM_EXIT_INTR;
-			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
-			vcpu->stat.signal_exits++;
-		} else {
-			vcpu->stat.light_exits++;
-		}
-	} else {
-		switch (run->exit_reason) {
-		case KVM_EXIT_MMIO:
-			vcpu->stat.mmio_exits++;
-			break;
-		case KVM_EXIT_DCR:
-			vcpu->stat.dcr_exits++;
-			break;
-		case KVM_EXIT_INTR:
-			vcpu->stat.signal_exits++;
-			break;
-		}
-	}
-
-	return r;
-}
-
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
-	vcpu->arch.pc = 0;
-	vcpu->arch.msr = 0;
-	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
-
-	vcpu->arch.shadow_pid = 1;
-
-	/* Eye-catching number so we know if the guest takes an interrupt
-	 * before it's programmed its own IVPR. */
-	vcpu->arch.ivpr = 0x55550000;
-
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	int i;
-
-	regs->pc = vcpu->arch.pc;
-	regs->cr = vcpu->arch.cr;
-	regs->ctr = vcpu->arch.ctr;
-	regs->lr = vcpu->arch.lr;
-	regs->xer = vcpu->arch.xer;
-	regs->msr = vcpu->arch.msr;
-	regs->srr0 = vcpu->arch.srr0;
-	regs->srr1 = vcpu->arch.srr1;
-	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.sprg0;
-	regs->sprg1 = vcpu->arch.sprg1;
-	regs->sprg2 = vcpu->arch.sprg2;
-	regs->sprg3 = vcpu->arch.sprg3;
-	regs->sprg5 = vcpu->arch.sprg4;
-	regs->sprg6 = vcpu->arch.sprg5;
-	regs->sprg7 = vcpu->arch.sprg6;
-
-	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-		regs->gpr[i] = vcpu->arch.gpr[i];
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	int i;
-
-	vcpu->arch.pc = regs->pc;
-	vcpu->arch.cr = regs->cr;
-	vcpu->arch.ctr = regs->ctr;
-	vcpu->arch.lr = regs->lr;
-	vcpu->arch.xer = regs->xer;
-	vcpu->arch.msr = regs->msr;
-	vcpu->arch.srr0 = regs->srr0;
-	vcpu->arch.srr1 = regs->srr1;
-	vcpu->arch.sprg0 = regs->sprg0;
-	vcpu->arch.sprg1 = regs->sprg1;
-	vcpu->arch.sprg2 = regs->sprg2;
-	vcpu->arch.sprg3 = regs->sprg3;
-	vcpu->arch.sprg5 = regs->sprg4;
-	vcpu->arch.sprg6 = regs->sprg5;
-	vcpu->arch.sprg7 = regs->sprg6;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-		vcpu->arch.gpr[i] = regs->gpr[i];
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -ENOTSUPP;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-                                  struct kvm_translation *tr)
-{
-	struct kvmppc_44x_tlbe *gtlbe;
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	gtlbe = &vcpu->arch.guest_tlb[index];
-
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
-}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341..0000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_ppc.h>
-
-unsigned long kvmppc_booke_handlers;
-
-static int kvmppc_booke_init(void)
-{
-	unsigned long ivor[16];
-	unsigned long max_ivor = 0;
-	int i;
-
-	/* We install our own exception handlers by hijacking IVPR. IVPR must
-	 * be 16-bit aligned, so we need a 64KB allocation. */
-	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-	                                         VCPU_SIZE_ORDER);
-	if (!kvmppc_booke_handlers)
-		return -ENOMEM;
-
-	/* XXX make sure our handlers are smaller than Linux's */
-
-	/* Copy our interrupt handlers to match host IVORs. That way we don't
-	 * have to swap the IVORs on every guest/host transition. */
-	ivor[0] = mfspr(SPRN_IVOR0);
-	ivor[1] = mfspr(SPRN_IVOR1);
-	ivor[2] = mfspr(SPRN_IVOR2);
-	ivor[3] = mfspr(SPRN_IVOR3);
-	ivor[4] = mfspr(SPRN_IVOR4);
-	ivor[5] = mfspr(SPRN_IVOR5);
-	ivor[6] = mfspr(SPRN_IVOR6);
-	ivor[7] = mfspr(SPRN_IVOR7);
-	ivor[8] = mfspr(SPRN_IVOR8);
-	ivor[9] = mfspr(SPRN_IVOR9);
-	ivor[10] = mfspr(SPRN_IVOR10);
-	ivor[11] = mfspr(SPRN_IVOR11);
-	ivor[12] = mfspr(SPRN_IVOR12);
-	ivor[13] = mfspr(SPRN_IVOR13);
-	ivor[14] = mfspr(SPRN_IVOR14);
-	ivor[15] = mfspr(SPRN_IVOR15);
-
-	for (i = 0; i < 16; i++) {
-		if (ivor[i] > max_ivor)
-			max_ivor = ivor[i];
-
-		memcpy((void *)kvmppc_booke_handlers + ivor[i],
-		       kvmppc_handlers_start + i * kvmppc_handler_len,
-		       kvmppc_handler_len);
-	}
-	flush_icache_range(kvmppc_booke_handlers,
-	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
-	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
-}
-
-static void __exit kvmppc_booke_exit(void)
-{
-	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
-	kvm_exit();
-}
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
-- 
cgit v0.10.2


From 9dd921cfea734409a931ccc6eafd7f09850311e9 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:14 -0600
Subject: KVM: ppc: Refactor powerpc.c to relocate 440-specific code

This introduces a set of core-provided hooks. For 440, some of these are
implemented by booke.c, with the rest in (the new) 44x.c.

Note that these hooks are link-time, not run-time. Since it is not possible to
build a single kernel for both e500 and 440 (for example), using function
pointers would only add overhead.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index df73351..f5850d7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -74,6 +74,9 @@ struct kvmppc_44x_tlbe {
 struct kvm_arch {
 };
 
+/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
+#define PPC44x_TLB_SIZE 64
+
 struct kvm_vcpu_arch {
 	/* Unmodified copy of the guest's TLB. */
 	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 39daeaa..96d5de9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,23 +61,6 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
-/* XXX Book E specific */
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
-
-extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	set_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
 /* Helper function for "full" MSR writes. No need to call this if only EE is
  * changing. */
 static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
@@ -99,6 +82,23 @@ static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
 	}
 }
 
+/* Core-specific hooks */
+
+extern int kvmppc_core_check_processor_compat(void);
+
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                       struct kvm_interrupt *irq);
+
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 0000000..fcf8c7d
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,123 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+
+#include "44x_tlb.h"
+
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvm44x_disable_debug_interrupts(void)
+{
+	mtmsr(mfmsr() & ~MSR_DE);
+}
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+	kvm44x_disable_debug_interrupts();
+
+	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+	mtmsr(vcpu->arch.host_msr);
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+	u32 dbcr0 = 0;
+
+	vcpu->arch.host_msr = mfmsr();
+	kvm44x_disable_debug_interrupts();
+
+	/* Save host debug register state. */
+	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+	/* set registers up for guest */
+
+	if (dbg->bp[0]) {
+		mtspr(SPRN_IAC1, dbg->bp[0]);
+		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+	}
+	if (dbg->bp[1]) {
+		mtspr(SPRN_IAC2, dbg->bp[1]);
+		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+	}
+	if (dbg->bp[2]) {
+		mtspr(SPRN_IAC3, dbg->bp[2]);
+		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+	}
+	if (dbg->bp[3]) {
+		mtspr(SPRN_IAC4, dbg->bp[3]);
+		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+	}
+
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBCR1, 0);
+	mtspr(SPRN_DBCR2, 0);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	int i;
+
+	/* Mark every guest entry in the shadow TLB entry modified, so that they
+	 * will all be reloaded on the next vcpu run (instead of being
+	 * demand-faulted). */
+	for (i = 0; i <= tlb_44x_hwater; i++)
+		kvmppc_tlbe_set_modified(vcpu, i);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	/* Don't leave guest TLB entries resident when being de-scheduled. */
+	/* XXX It would be nice to differentiate between heavyweight exit and
+	 * sched_out here, since we could avoid the TLB flush for heavyweight
+	 * exits. */
+	_tlbia();
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index e5b0a76..357d79a 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -29,6 +29,7 @@ extern struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
                                                       gva_t eaddr);
 extern struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
                                                       gva_t eaddr);
+extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
 
 /* TLB helper functions */
 static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ffed96f..37e9b3c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -16,11 +16,9 @@ if VIRTUALIZATION
 
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
-	depends on 44x && EXPERIMENTAL
+	depends on EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
-	# We can only run on Book E hosts so far
-	select KVM_BOOKE
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.
@@ -30,12 +28,11 @@ config KVM
 
 	  If unsure, say N.
 
-config KVM_BOOKE
-	bool "KVM support for Book E PowerPC processors"
+config KVM_440
+	bool "KVM support for PowerPC 440 processors"
 	depends on KVM && 44x
 	---help---
-	  Provides host support for KVM on Book E PowerPC processors. Currently
-	  this works on 440 processors only.
+	  KVM can run unmodified 440 guest kernels on 440 host processors.
 
 config KVM_TRACE
 	bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index a7f8574..f5e3375 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -13,5 +13,5 @@ obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-booke-objs := booke.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE) += kvm-booke.o
+kvm-440-objs := booke.o booke_interrupts.o 44x.o 44x_tlb.o
+obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b1e90a1..138014a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -134,6 +134,40 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void kvmppc_booke_queue_exception(struct kvm_vcpu *vcpu, int exception)
+{
+	unsigned int priority = exception_priority[exception];
+	set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static void kvmppc_booke_clear_exception(struct kvm_vcpu *vcpu, int exception)
+{
+	unsigned int priority = exception_priority[exception];
+	clear_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
+	return test_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+}
+
 /* Check if we are ready to deliver the interrupt */
 static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
 {
@@ -168,7 +202,7 @@ static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
 	return r;
 }
 
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+static void kvmppc_booke_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
 {
 	switch (interrupt) {
 	case BOOKE_INTERRUPT_DECREMENTER:
@@ -183,7 +217,7 @@ static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
 }
 
 /* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
 	unsigned int exception;
@@ -193,8 +227,8 @@ void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
 	while (priority <= BOOKE_MAX_INTERRUPT) {
 		exception = priority_exception[priority];
 		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-			kvmppc_clear_exception(vcpu, exception);
-			kvmppc_deliver_interrupt(vcpu, exception);
+			kvmppc_booke_clear_exception(vcpu, exception);
+			kvmppc_booke_deliver_interrupt(vcpu, exception);
 			break;
 		}
 
@@ -251,7 +285,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+			kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -284,27 +318,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_exception(vcpu, exit_nr);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_exception(vcpu, exit_nr);
 		vcpu->stat.dsi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_exception(vcpu, exit_nr);
 		vcpu->stat.isi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_exception(vcpu, exit_nr);
 		vcpu->stat.syscall_exits++;
 		r = RESUME_GUEST;
 		break;
@@ -318,7 +352,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
 		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
+			kvmppc_booke_queue_exception(vcpu, exit_nr);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			vcpu->stat.dtlb_real_miss_exits++;
@@ -360,7 +394,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
 		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
+			kvmppc_booke_queue_exception(vcpu, exit_nr);
 			vcpu->stat.itlb_real_miss_exits++;
 			break;
 		}
@@ -380,8 +414,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			               gtlbe->word2);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_queue_exception(vcpu,
-			                       BOOKE_INTERRUPT_MACHINE_CHECK);
+			kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_MACHINE_CHECK);
 		}
 
 		break;
@@ -409,7 +442,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	local_irq_disable();
 
-	kvmppc_check_and_deliver_interrupts(vcpu);
+	kvmppc_core_deliver_interrupts(vcpu);
 
 	/* Do some exit accounting. */
 	vcpu->stat.sum_exits++;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0ce8ed5..c5d2bfc 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -139,7 +139,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	switch (get_op(inst)) {
 	case 3:                                                 /* trap */
 		printk("trap!\n");
-		kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0ef..8d0aaf9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
 
 void kvm_arch_check_processor_compat(void *rtn)
 {
-	int r;
-
-	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	*(int *)rtn = r;
+	*(int *)rtn = kvmppc_core_check_processor_compat();
 }
 
 struct kvm *kvm_arch_create_vm(void)
@@ -212,16 +205,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-
-	return test_bit(priority, &vcpu->arch.pending_exceptions);
+	return kvmppc_core_pending_dec(vcpu);
 }
 
 static void kvmppc_decrementer_func(unsigned long data)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+	kvmppc_core_queue_dec(vcpu);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -242,96 +233,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_core_destroy_mmu(vcpu);
 }
 
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvmppc_disable_debug_interrupts(void)
-{
-	mtmsr(mfmsr() & ~MSR_DE);
-}
-
-static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
-{
-	kvmppc_disable_debug_interrupts();
-
-	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-	mtmsr(vcpu->arch.host_msr);
-}
-
-static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-	u32 dbcr0 = 0;
-
-	vcpu->arch.host_msr = mfmsr();
-	kvmppc_disable_debug_interrupts();
-
-	/* Save host debug register state. */
-	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-	/* set registers up for guest */
-
-	if (dbg->bp[0]) {
-		mtspr(SPRN_IAC1, dbg->bp[0]);
-		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-	}
-	if (dbg->bp[1]) {
-		mtspr(SPRN_IAC2, dbg->bp[1]);
-		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-	}
-	if (dbg->bp[2]) {
-		mtspr(SPRN_IAC3, dbg->bp[2]);
-		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-	}
-	if (dbg->bp[3]) {
-		mtspr(SPRN_IAC4, dbg->bp[3]);
-		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-	}
-
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBCR1, 0);
-	mtspr(SPRN_DBCR2, 0);
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int i;
-
 	if (vcpu->guest_debug.enabled)
-		kvmppc_load_guest_debug_registers(vcpu);
+		kvmppc_core_load_guest_debugstate(vcpu);
 
-	/* Mark every guest entry in the shadow TLB entry modified, so that they
-	 * will all be reloaded on the next vcpu run (instead of being
-	 * demand-faulted). */
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_tlbe_set_modified(vcpu, i);
+	kvmppc_core_vcpu_load(vcpu, cpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_debug.enabled)
-		kvmppc_restore_host_debug_state(vcpu);
+		kvmppc_core_load_host_debugstate(vcpu);
 
 	/* Don't leave guest TLB entries resident when being de-scheduled. */
 	/* XXX It would be nice to differentiate between heavyweight exit and
 	 * sched_out here, since we could avoid the TLB flush for heavyweight
 	 * exits. */
 	_tlbil_all();
+	kvmppc_core_vcpu_put(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -460,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		vcpu->arch.dcr_needed = 0;
 	}
 
-	kvmppc_check_and_deliver_interrupts(vcpu);
+	kvmppc_core_deliver_interrupts(vcpu);
 
 	local_irq_disable();
 	kvm_guest_enter();
@@ -478,7 +398,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+	kvmppc_core_queue_external(vcpu, irq);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
-- 
cgit v0.10.2


From c381a04313e7c0fb04246b1ff711e0b5726de6c0 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:15 -0600
Subject: ppc: Create disassemble.h to extract instruction fields

This is used in a couple places in KVM, but isn't KVM-specific.

However, this patch doesn't modify other in-kernel emulation code:
- xmon uses a direct copy of ppc_opc.c from binutils
- emulate_instruction() doesn't need it because it can use a series
  of mask tests.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 0000000..9b198d1
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c5d2bfc..5fd9cf77 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -28,62 +28,7 @@
 #include <asm/time.h>
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
-
-/* Instruction decoding */
-static inline unsigned int get_op(u32 inst)
-{
-	return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
-	return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
-	return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
-	return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
-	return inst & 0xffff;
-}
+#include <asm/disassemble.h>
 
 static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
-- 
cgit v0.10.2


From 75f74f0dbe086c239b4b0cc5ed75b903ea3e663f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:16 -0600
Subject: KVM: ppc: refactor instruction emulation into generic and
 core-specific pieces

Cores provide 3 emulation hooks, implemented for example in the new
4xx_emulate.c:
kvmppc_core_emulate_op
kvmppc_core_emulate_mtspr
kvmppc_core_emulate_mfspr

Strictly speaking the last two aren't necessary, but provide for more
informative error reporting ("unknown SPR").

Long term I'd like to have instruction decoding autogenerated from tables of
opcodes, and that way we could aggregate universal, Book E, and core-specific
instructions more easily and without redundant switch statements.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 96d5de9..aecf95d 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -53,35 +53,13 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
-extern int kvmppc_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
                            u64 asid, u32 flags);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
-	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
-		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
-	vcpu->arch.msr = new_msr;
-
-	if (vcpu->arch.msr & MSR_WE)
-		kvm_vcpu_block(vcpu);
-}
-
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	if (vcpu->arch.pid != new_pid) {
-		vcpu->arch.pid = new_pid;
-		vcpu->arch.swap_pid = 1;
-	}
-}
-
 /* Core-specific hooks */
 
 extern int kvmppc_core_check_processor_compat(void);
@@ -99,6 +77,11 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 
+extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                  unsigned int op, int *advance);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 0000000..a634c0c
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,335 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define OP_RFI      19
+
+#define XOP_RFI     50
+#define XOP_MFMSR   83
+#define XOP_WRTEE   131
+#define XOP_MTMSR   146
+#define XOP_WRTEEI  163
+#define XOP_MFDCR   323
+#define XOP_MTDCR   451
+#define XOP_TLBSX   914
+#define XOP_ICCCI   966
+#define XOP_TLBWE   978
+
+static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+	if (vcpu->arch.pid != new_pid) {
+		vcpu->arch.pid = new_pid;
+		vcpu->arch.swap_pid = 1;
+	}
+}
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int dcrn;
+	int ra;
+	int rb;
+	int rc;
+	int rs;
+	int rt;
+	int ws;
+
+	switch (get_op(inst)) {
+
+	case OP_RFI:
+		switch (get_xop(inst)) {
+		case XOP_RFI:
+			kvmppc_emul_rfi(vcpu);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case XOP_MFMSR:
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			break;
+
+		case XOP_MTMSR:
+			rs = get_rs(inst);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
+		case XOP_WRTEE:
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (vcpu->arch.gpr[rs] & MSR_EE);
+			break;
+
+		case XOP_WRTEEI:
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (inst & MSR_EE);
+			break;
+
+		case XOP_MFDCR:
+			dcrn = get_dcrn(inst);
+			rt = get_rt(inst);
+
+			/* The guest may access CPR0 registers to determine the timebase
+			 * frequency, and it must know the real host frequency because it
+			 * can directly access the timebase registers.
+			 *
+			 * It would be possible to emulate those accesses in userspace,
+			 * but userspace can really only figure out the end frequency.
+			 * We could decompose that into the factors that compute it, but
+			 * that's tricky math, and it's easier to just report the real
+			 * CPR0 values.
+			 */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				break;
+			case DCRN_CPR0_CONFIG_DATA:
+				local_irq_disable();
+				mtdcr(DCRN_CPR0_CONFIG_ADDR,
+					  vcpu->arch.cpr0_cfgaddr);
+				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				local_irq_enable();
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data =  0;
+				run->dcr.is_write = 0;
+				vcpu->arch.io_gpr = rt;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_MTDCR:
+			dcrn = get_dcrn(inst);
+			rs = get_rs(inst);
+
+			/* emulate some access in kernel */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.is_write = 1;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_TLBWE:
+			ra = get_ra(inst);
+			rs = get_rs(inst);
+			ws = get_ws(inst);
+			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+			break;
+
+		case XOP_TLBSX:
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			rc = get_rc(inst);
+			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+			break;
+
+		case XOP_ICCCI:
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	switch (sprn) {
+	case SPRN_MMUCR:
+		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+	case SPRN_PID:
+		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+	case SPRN_CCR0:
+		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_CCR1:
+		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DEAR:
+		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+	case SPRN_ESR:
+		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR0:
+		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR1:
+		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_TSR:
+		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+	case SPRN_TCR:
+		vcpu->arch.tcr = vcpu->arch.gpr[rs];
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	/* Note: SPRG4-7 are user-readable. These values are
+	 * loaded into the real SPRGs when resuming the
+	 * guest. */
+	case SPRN_SPRG4:
+		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG5:
+		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG6:
+		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG7:
+		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+	case SPRN_IVPR:
+		vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR0:
+		vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR1:
+		vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR2:
+		vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR3:
+		vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR4:
+		vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR5:
+		vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR6:
+		vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR7:
+		vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR8:
+		vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR9:
+		vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR10:
+		vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR11:
+		vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR12:
+		vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR13:
+		vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR14:
+		vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+	case SPRN_IVOR15:
+		vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	switch (sprn) {
+	/* 440 */
+	case SPRN_MMUCR:
+		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+	case SPRN_CCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+	case SPRN_CCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+
+	/* Book E */
+	case SPRN_PID:
+		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+	case SPRN_IVPR:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+	case SPRN_DEAR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+	case SPRN_ESR:
+		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+	case SPRN_DBCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+	case SPRN_DBCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+	case SPRN_IVOR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+	case SPRN_IVOR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+	case SPRN_IVOR2:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+	case SPRN_IVOR3:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+	case SPRN_IVOR4:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+	case SPRN_IVOR5:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+	case SPRN_IVOR6:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+	case SPRN_IVOR7:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+	case SPRN_IVOR8:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+	case SPRN_IVOR9:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+	case SPRN_IVOR10:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+	case SPRN_IVOR11:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+	case SPRN_IVOR12:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+	case SPRN_IVOR13:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+	case SPRN_IVOR14:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+	case SPRN_IVOR15:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+	default:
+		return EMULATE_FAIL;
+	}
+
+	return EMULATE_DONE;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5152fe5..bb6da13 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -301,7 +301,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
 	u64 eaddr;
 	u64 raddr;
@@ -363,7 +363,7 @@ int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	return EMULATE_DONE;
 }
 
-int kvmppc_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
 {
 	u32 ea;
 	int index;
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 357d79a..b1029af 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -31,6 +31,10 @@ extern struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
                                                       gva_t eaddr);
 extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
 
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+                                 u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
+
 /* TLB helper functions */
 static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
 {
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index f5e3375..f045fad 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -13,5 +13,10 @@ obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-440-objs := booke.o booke_interrupts.o 44x.o 44x_tlb.o
+kvm-440-objs := \
+	booke.o \
+	booke_interrupts.o \
+	44x.o \
+	44x_tlb.o \
+	44x_emulate.o
 obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 138014a..ea63009 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -29,6 +29,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/cacheflush.h>
 
+#include "booke.h"
 #include "44x_tlb.h"
 
 unsigned long kvmppc_booke_handlers;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 0000000..f694a4b
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,39 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+	vcpu->arch.msr = new_msr;
+
+	if (vcpu->arch.msr & MSR_WE)
+		kvm_vcpu_block(vcpu);
+}
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5fd9cf77..30a49f8 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,14 +23,13 @@
 #include <linux/string.h>
 #include <linux/kvm_host.h>
 
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
+#include <asm/reg.h>
 #include <asm/time.h>
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
 #include <asm/disassemble.h>
 
-static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
 		/* The decrementer ticks at the same rate as the timebase, so
@@ -46,12 +45,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
 /* XXX to do:
  * lhax
  * lhaux
@@ -66,18 +59,17 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
  *
  * XXX is_bigendian should depend on MMU mapping or MSR[LE]
  */
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	u32 inst = vcpu->arch.last_inst;
 	u32 ea;
 	int ra;
 	int rb;
-	int rc;
 	int rs;
 	int rt;
-	int ws;
 	int sprn;
-	int dcrn;
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
 
@@ -88,19 +80,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		advance = 0;
 		break;
 
-	case 19:
-		switch (get_xop(inst)) {
-		case 50:                                        /* rfi */
-			kvmppc_emul_rfi(vcpu);
-			advance = 0;
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
 	case 31:
 		switch (get_xop(inst)) {
 
@@ -109,27 +88,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 			break;
 
-		case 83:                                        /* mfmsr */
-			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
-			break;
-
 		case 87:                                        /* lbzx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 			break;
 
-		case 131:                                       /* wrtee */
-			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (vcpu->arch.gpr[rs] & MSR_EE);
-			break;
-
-		case 146:                                       /* mtmsr */
-			rs = get_rs(inst);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-			break;
-
 		case 151:                                       /* stwx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -137,11 +100,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 1);
 			break;
 
-		case 163:                                       /* wrteei */
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (inst & MSR_EE);
-			break;
-
 		case 215:                                       /* stbx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -182,42 +140,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 323:                                       /* mfdcr */
-			dcrn = get_dcrn(inst);
-			rt = get_rt(inst);
-
-			/* The guest may access CPR0 registers to determine the timebase
-			 * frequency, and it must know the real host frequency because it
-			 * can directly access the timebase registers.
-			 *
-			 * It would be possible to emulate those accesses in userspace,
-			 * but userspace can really only figure out the end frequency.
-			 * We could decompose that into the factors that compute it, but
-			 * that's tricky math, and it's easier to just report the real
-			 * CPR0 values.
-			 */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
-				break;
-			case DCRN_CPR0_CONFIG_DATA:
-				local_irq_disable();
-				mtdcr(DCRN_CPR0_CONFIG_ADDR,
-				      vcpu->arch.cpr0_cfgaddr);
-				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
-				local_irq_enable();
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data =  0;
-				run->dcr.is_write = 0;
-				vcpu->arch.io_gpr = rt;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 339:                                       /* mfspr */
 			sprn = get_sprn(inst);
 			rt = get_rt(inst);
@@ -227,26 +149,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
 			case SPRN_SRR1:
 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
-			case SPRN_MMUCR:
-				vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
-			case SPRN_PID:
-				vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-			case SPRN_IVPR:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-			case SPRN_CCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
-			case SPRN_CCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
 			case SPRN_PVR:
 				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
-			case SPRN_DEAR:
-				vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-			case SPRN_ESR:
-				vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-			case SPRN_DBCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-			case SPRN_DBCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
@@ -267,42 +171,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
-			case SPRN_IVOR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
-
 			default:
-				printk("mfspr: unknown spr %x\n", sprn);
-				vcpu->arch.gpr[rt] = 0;
+				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+				if (emulated == EMULATE_FAIL) {
+					printk("mfspr: unknown spr %x\n", sprn);
+					vcpu->arch.gpr[rt] = 0;
+				}
 				break;
 			}
 			break;
@@ -332,25 +206,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 451:                                       /* mtdcr */
-			dcrn = get_dcrn(inst);
-			rs = get_rs(inst);
-
-			/* emulate some access in kernel */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data = vcpu->arch.gpr[rs];
-				run->dcr.is_write = 1;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 467:                                       /* mtspr */
 			sprn = get_sprn(inst);
 			rs = get_rs(inst);
@@ -359,22 +214,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SRR1:
 				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_MMUCR:
-				vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
-			case SPRN_PID:
-				kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
-			case SPRN_CCR0:
-				vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_CCR1:
-				vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DEAR:
-				vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-			case SPRN_ESR:
-				vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR0:
-				vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR1:
-				vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
@@ -386,14 +225,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				kvmppc_emulate_dec(vcpu);
 				break;
 
-			case SPRN_TSR:
-				vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-
-			case SPRN_TCR:
-				vcpu->arch.tcr = vcpu->arch.gpr[rs];
-				kvmppc_emulate_dec(vcpu);
-				break;
-
 			case SPRN_SPRG0:
 				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SPRG1:
@@ -403,56 +234,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			case SPRN_SPRG3:
 				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
 
-			/* Note: SPRG4-7 are user-readable. These values are
-			 * loaded into the real SPRGs when resuming the
-			 * guest. */
-			case SPRN_SPRG4:
-				vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG5:
-				vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG6:
-				vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG7:
-				vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-			case SPRN_IVPR:
-				vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR0:
-				vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
-
 			default:
-				printk("mtspr: unknown spr %x\n", sprn);
-				emulated = EMULATE_FAIL;
+				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+				if (emulated == EMULATE_FAIL)
+					printk("mtspr: unknown spr %x\n", sprn);
 				break;
 			}
 			break;
@@ -483,21 +268,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 0);
 			break;
 
-		case 978:                                       /* tlbwe */
-			ra = get_ra(inst);
-			rs = get_rs(inst);
-			ws = get_ws(inst);
-			emulated = kvmppc_emul_tlbwe(vcpu, ra, rs, ws);
-			break;
-
-		case 914:                                       /* tlbsx */
-			rt = get_rt(inst);
-			ra = get_ra(inst);
-			rb = get_rb(inst);
-			rc = get_rc(inst);
-			emulated = kvmppc_emul_tlbsx(vcpu, rt, ra, rb, rc);
-			break;
-
 		case 790:                                       /* lhbrx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -513,14 +283,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               2, 0);
 			break;
 
-		case 966:                                       /* iccci */
-			break;
-
 		default:
-			printk("unknown: op %d xop %d\n", get_op(inst),
-				get_xop(inst));
+			/* Attempt core-specific emulation below. */
 			emulated = EMULATE_FAIL;
-			break;
 		}
 		break;
 
@@ -603,9 +368,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		break;
 
 	default:
-		printk("unknown op %d\n", get_op(inst));
 		emulated = EMULATE_FAIL;
-		break;
+	}
+
+	if (emulated == EMULATE_FAIL) {
+		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+		if (emulated == EMULATE_FAIL) {
+			advance = 0;
+			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+		}
 	}
 
 	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
-- 
cgit v0.10.2


From 5cbb5106f50b4515815cd32cf944958c0d4da83f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:17 -0600
Subject: KVM: ppc: Move the last bits of 44x code out of booke.c

Needed to port to other Book E processors.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index aecf95d..d593325 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -62,7 +62,10 @@ extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 /* Core-specific hooks */
 
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_check_processor_compat(void);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                                      struct kvm_translation *tr);
 
 extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index fcf8c7d..f5d7028 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -121,3 +121,56 @@ int kvmppc_core_check_processor_compat(void)
 
 	return r;
 }
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+	struct kvmppc_44x_tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	gtlbe = &vcpu->arch.guest_tlb[index];
+
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ea63009..c619d1b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -479,20 +479,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
 	vcpu->arch.pc = 0;
 	vcpu->arch.msr = 0;
 	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -503,12 +489,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * before it's programmed its own IVPR. */
 	vcpu->arch.ivpr = 0x55550000;
 
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	return 0;
+	return kvmppc_core_vcpu_setup(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -586,33 +567,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOTSUPP;
 }
 
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
-	struct kvmppc_44x_tlbe *gtlbe;
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	gtlbe = &vcpu->arch.guest_tlb[index];
-
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
+	return kvmppc_core_vcpu_translate(vcpu, tr);
 }
 
 static int kvmppc_booke_init(void)
-- 
cgit v0.10.2


From db93f5745d836f81cef0b4101a7c2685eeb55efb Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:18 -0600
Subject: KVM: ppc: create struct kvm_vcpu_44x and introduce container_of()
 accessor

This patch doesn't yet move all 44x-specific data into the new structure, but
is the first step down that path. In the future we may also want to create a
struct kvm_vcpu_booke.

Based on patch from Liu Yu <yu.liu@freescale.com>.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 0000000..dece093
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,47 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_44X_H__
+#define __ASM_44X_H__
+
+#include <linux/kvm_host.h>
+
+/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
+#define PPC44x_TLB_SIZE 64
+
+struct kvmppc_vcpu_44x {
+	/* Unmodified copy of the guest's TLB. */
+	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
+	/* TLB that's actually used when the guest is running. */
+	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	/* Pages which are referenced in the shadow TLB. */
+	struct page *shadow_pages[PPC44x_TLB_SIZE];
+
+	/* Track which TLB entries we've modified in the current exit. */
+	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
+	struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
+}
+
+#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f5850d7..765d8ec 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -74,20 +74,7 @@ struct kvmppc_44x_tlbe {
 struct kvm_arch {
 };
 
-/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
-#define PPC44x_TLB_SIZE 64
-
 struct kvm_vcpu_arch {
-	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
-	/* TLB that's actually used when the guest is running. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	/* Pages which are referenced in the shadow TLB. */
-	struct page *shadow_pages[PPC44x_TLB_SIZE];
-
-	/* Track which TLB entries we've modified in the current exit. */
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
 	u32 host_stack;
 	u32 host_pid;
 	u32 host_dbcr0;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d593325..976ecc4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -62,6 +62,9 @@ extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 /* Core-specific hooks */
 
+extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
+                                                unsigned int id);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_check_processor_compat(void);
 extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
@@ -85,6 +88,9 @@ extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
 extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
 
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
+
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0264c97..393c7f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/hrtimer.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm_host.h>
-#endif
 #ifdef CONFIG_PPC64
 #include <linux/time.h>
 #include <linux/hardirq.h>
@@ -51,6 +48,9 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
 #endif
+#ifdef CONFIG_KVM
+#include <asm/kvm_44x.h>
+#endif
 
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 #include "head_booke.h"
@@ -359,10 +359,14 @@ int main(void)
 #ifdef CONFIG_KVM
 	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
+	DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu));
+	DEFINE(VCPU44x_SHADOW_TLB,
+	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb));
+	DEFINE(VCPU44x_SHADOW_MOD,
+	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod));
+
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
-	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
-	DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index f5d7028..22054b1 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -18,9 +18,13 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/err.h>
+
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
 
 #include "44x_tlb.h"
 
@@ -124,7 +128,8 @@ int kvmppc_core_check_processor_compat(void)
 
 int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
 
 	tlbe->tid = 0;
 	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
@@ -150,6 +155,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
                                struct kvm_translation *tr)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct kvmppc_44x_tlbe *gtlbe;
 	int index;
 	gva_t eaddr;
@@ -166,7 +172,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 		return 0;
 	}
 
-	gtlbe = &vcpu->arch.guest_tlb[index];
+	gtlbe = &vcpu_44x->guest_tlb[index];
 
 	tr->physical_address = tlb_xlate(gtlbe, eaddr);
 	/* XXX what does "writeable" and "usermode" even mean? */
@@ -174,3 +180,55 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 
 	return 0;
 }
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x;
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu_44x) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vcpu = &vcpu_44x->vcpu;
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+	return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int kvmppc_44x_init(void)
+{
+	int r;
+
+	r = kvmppc_booke_init();
+	if (r)
+		return r;
+
+	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
+}
+
+static void kvmppc_44x_exit(void)
+{
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index bb6da13..8b65fbd 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
 
 #include "44x_tlb.h"
 
@@ -43,7 +44,7 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 			"nr", "tid", "word0", "word1", "word2");
 
 	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.guest_tlb[i];
+		tlbe = &vcpu_44x->guest_tlb[i];
 		if (tlbe->word0 & PPC44x_TLB_VALID)
 			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
 			       i, tlbe->tid, tlbe->word0, tlbe->word1,
@@ -51,7 +52,7 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 	}
 
 	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.shadow_tlb[i];
+		tlbe = &vcpu_44x->shadow_tlb[i];
 		if (tlbe->word0 & PPC44x_TLB_VALID)
 			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
 			       i, tlbe->tid, tlbe->word0, tlbe->word1,
@@ -82,11 +83,12 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
                          unsigned int as)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
 	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		struct kvmppc_44x_tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -114,25 +116,27 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
                                                gva_t eaddr)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
 	unsigned int index;
 
 	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 	if (index == -1)
 		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return &vcpu_44x->guest_tlb[index];
 }
 
 struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
                                                gva_t eaddr)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
 	unsigned int index;
 
 	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 	if (index == -1)
 		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return &vcpu_44x->guest_tlb[index];
 }
 
 static int kvmppc_44x_tlbe_is_writable(struct kvmppc_44x_tlbe *tlbe)
@@ -143,8 +147,9 @@ static int kvmppc_44x_tlbe_is_writable(struct kvmppc_44x_tlbe *tlbe)
 static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
                                       unsigned int index)
 {
-	struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
-	struct page *page = vcpu->arch.shadow_pages[index];
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[index];
+	struct page *page = vcpu_44x->shadow_pages[index];
 
 	if (get_tlb_v(stlbe)) {
 		if (kvmppc_44x_tlbe_is_writable(stlbe))
@@ -164,7 +169,9 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
 
 void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
 {
-    vcpu->arch.shadow_tlb_mod[i] = 1;
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+	vcpu_44x->shadow_tlb_mod[i] = 1;
 }
 
 /* Caller must ensure that the specified guest TLB entry is safe to insert into
@@ -172,6 +179,7 @@ void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
 void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
                     u32 flags)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct page *new_page;
 	struct kvmppc_44x_tlbe *stlbe;
 	hpa_t hpaddr;
@@ -182,7 +190,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	victim = kvmppc_tlb_44x_pos++;
 	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
 		kvmppc_tlb_44x_pos = 0;
-	stlbe = &vcpu->arch.shadow_tlb[victim];
+	stlbe = &vcpu_44x->shadow_tlb[victim];
 
 	/* Get reference to new page. */
 	new_page = gfn_to_page(vcpu->kvm, gfn);
@@ -196,7 +204,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	/* Drop reference to old page. */
 	kvmppc_44x_shadow_release(vcpu, victim);
 
-	vcpu->arch.shadow_pages[victim] = new_page;
+	vcpu_44x->shadow_pages[victim] = new_page;
 
 	/* XXX Make sure (va, size) doesn't overlap any other
 	 * entries. 440x6 user manual says the result would be
@@ -224,12 +232,13 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
                                   gva_t eend, u32 asid)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	unsigned int pid = !(asid & 0xff);
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
 	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
 		unsigned int tid;
 
 		if (!get_tlb_v(stlbe))
@@ -259,12 +268,13 @@ static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
  * switching address spaces. */
 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	if (vcpu->arch.swap_pid) {
 		/* XXX Replace loop with fancy data structures. */
 		for (i = 0; i <= tlb_44x_hwater; i++) {
-			struct kvmppc_44x_tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+			struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
 
 			/* Future optimization: clear only userspace mappings. */
 			kvmppc_44x_shadow_release(vcpu, i);
@@ -303,6 +313,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 
 int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	u64 eaddr;
 	u64 raddr;
 	u64 asid;
@@ -317,7 +328,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 		return EMULATE_FAIL;
 	}
 
-	tlbe = &vcpu->arch.guest_tlb[index];
+	tlbe = &vcpu_44x->guest_tlb[index];
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe->word0 & PPC44x_TLB_VALID) {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c619d1b..883e9db 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -573,7 +573,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	return kvmppc_core_vcpu_translate(vcpu, tr);
 }
 
-static int kvmppc_booke_init(void)
+int kvmppc_booke_init(void)
 {
 	unsigned long ivor[16];
 	unsigned long max_ivor = 0;
@@ -618,14 +618,11 @@ static int kvmppc_booke_init(void)
 	flush_icache_range(kvmppc_booke_handlers,
 	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
 
-	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+	return 0;
 }
 
-static void __exit kvmppc_booke_exit(void)
+void __exit kvmppc_booke_exit(void)
 {
 	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
 	kvm_exit();
 }
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165b..8d6929b 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -349,8 +349,8 @@ lightweight_exit:
 	lis	r5, tlb_44x_hwater@ha
 	lwz	r5, tlb_44x_hwater@l(r5)
 	mtctr	r5
-	addi	r9, r4, VCPU_SHADOW_TLB
-	addi	r5, r4, VCPU_SHADOW_MOD
+	addi	r9, r4, -VCPU_TO_44X + VCPU44x_SHADOW_TLB
+	addi	r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD
 	li	r3, 0
 1:
 	lbzx	r7, r3, r5
@@ -377,7 +377,7 @@ lightweight_exit:
 	/* Clear bitmap of modified TLB entries */
 	li	r5, PPC44x_TLB_SIZE>>2
 	mtctr	r5
-	addi	r5, r4, VCPU_SHADOW_MOD - 4
+	addi	r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD - 4
 	li	r6, 0
 1:
 	stwu	r6, 4(r5)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8d0aaf9..237f3ba 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -171,31 +171,12 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
-	return vcpu;
-
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
-	return ERR_PTR(err);
+	return kvmppc_core_vcpu_create(kvm, id);
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
+	kvmppc_core_vcpu_free(vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 5cf8ca22146fa106f3bb865631ec04f5b499508f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:19 -0600
Subject: KVM: ppc: adjust vcpu types to support 64-bit cores

However, some of these fields could be split into separate per-core structures
in the future.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 765d8ec..a4a7d5e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -84,32 +84,32 @@ struct kvm_vcpu_arch {
 	u32 host_msr;
 
 	u64 fpr[32];
-	u32 gpr[32];
+	ulong gpr[32];
 
-	u32 pc;
+	ulong pc;
 	u32 cr;
-	u32 ctr;
-	u32 lr;
-	u32 xer;
+	ulong ctr;
+	ulong lr;
+	ulong xer;
 
-	u32 msr;
+	ulong msr;
 	u32 mmucr;
-	u32 sprg0;
-	u32 sprg1;
-	u32 sprg2;
-	u32 sprg3;
-	u32 sprg4;
-	u32 sprg5;
-	u32 sprg6;
-	u32 sprg7;
-	u32 srr0;
-	u32 srr1;
-	u32 csrr0;
-	u32 csrr1;
-	u32 dsrr0;
-	u32 dsrr1;
-	u32 dear;
-	u32 esr;
+	ulong sprg0;
+	ulong sprg1;
+	ulong sprg2;
+	ulong sprg3;
+	ulong sprg4;
+	ulong sprg5;
+	ulong sprg6;
+	ulong sprg7;
+	ulong srr0;
+	ulong srr1;
+	ulong csrr0;
+	ulong csrr1;
+	ulong dsrr0;
+	ulong dsrr1;
+	ulong dear;
+	ulong esr;
 	u32 dec;
 	u32 decar;
 	u32 tbl;
@@ -117,7 +117,7 @@ struct kvm_vcpu_arch {
 	u32 tcr;
 	u32 tsr;
 	u32 ivor[16];
-	u32 ivpr;
+	ulong ivpr;
 	u32 pir;
 
 	u32 shadow_pid;
@@ -131,8 +131,8 @@ struct kvm_vcpu_arch {
 	u32 dbcr1;
 
 	u32 last_inst;
-	u32 fault_dear;
-	u32 fault_esr;
+	ulong fault_dear;
+	ulong fault_esr;
 	gpa_t paddr_accessed;
 
 	u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 883e9db..b23cd54 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -120,14 +120,14 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
-	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+	printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
 
 	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
 
 	for (i = 0; i < 32; i += 4) {
-		printk("gpr%02d: %08x %08x %08x %08x\n", i,
+		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
 		       vcpu->arch.gpr[i],
 		       vcpu->arch.gpr[i+1],
 		       vcpu->arch.gpr[i+2],
@@ -305,7 +305,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		case EMULATE_FAIL:
 			/* XXX Deliver Program interrupt to guest. */
-			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
 			/* For debugging, encode the failing instruction and
 			 * report it to userspace. */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 30a49f8..814f1e6 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -380,7 +380,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 	}
 
-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
 
 	if (advance)
 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 237f3ba..7ad150e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -256,14 +256,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 	*gpr = run->dcr.data;
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 
 	if (run->mmio.len > sizeof(*gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
-- 
cgit v0.10.2


From b8fd68ac8db1f926fdb2c7f196598a279461de53 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:20 -0600
Subject: KVM: ppc: fix set regs to take care of msr change

When changing some msr bits e.g. problem state we need to take special
care of that. We call the function in our mtmsr emulation (not needed for
wrtee[i]), but we don't call kvmppc_set_msr if we change msr via set_regs
ioctl.
It's a corner case we never hit so far, but I assume it should be
kvmppc_set_msr in our arch set regs function (I found it because it is also
a corner case when using pv support which would miss the update otherwise).

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b23cd54..dec3f50 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -528,7 +528,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.ctr = regs->ctr;
 	vcpu->arch.lr = regs->lr;
 	vcpu->arch.xer = regs->xer;
-	vcpu->arch.msr = regs->msr;
+	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.srr0 = regs->srr0;
 	vcpu->arch.srr1 = regs->srr1;
 	vcpu->arch.sprg0 = regs->sprg0;
-- 
cgit v0.10.2


From 1b6766c7f3533c5d03668e11dd5617ae4a52e5a8 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:21 -0600
Subject: KVM: ppc: optimize kvm stat handling

Currently we use an unnecessary if&switch to detect some cases.
To be honest we don't need the ligh_exits counter anyway, because we can
calculate it out of others. Sum_exits can also be calculated, so we can
remove that too.
MMIO, DCR  and INTR can be counted on other places without these
additional control structures (The INTR case was never hit anyway).

The handling of BOOKE_INTERRUPT_EXTERNAL/BOOKE_INTERRUPT_DECREMENTER is
similar, but we can avoid the additional if when copying 3 lines of code.
I thought about a goto there to prevent duplicate lines, but rewriting three
lines should be better style than a goto cross switch/case statements (its
also not enough code to justify a new inline function).

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dec3f50..b285e3d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -38,11 +38,9 @@ unsigned long kvmppc_booke_handlers;
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "exits",      VCPU_STAT(sum_exits) },
 	{ "mmio",       VCPU_STAT(mmio_exits) },
 	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
-	{ "light",      VCPU_STAT(light_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
 	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
@@ -263,6 +261,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_EXTERNAL:
+		vcpu->stat.ext_intr_exits++;
+		if (need_resched())
+			cond_resched();
+		r = RESUME_GUEST;
+		break;
+
 	case BOOKE_INTERRUPT_DECREMENTER:
 		/* Since we switched IVPR back to the host's value, the host
 		 * handled this interrupt the moment we enabled interrupts.
@@ -272,12 +276,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		 * we do reschedule the host will fault over it. Perhaps we
 		 * should politely restore the host's entries to minimize
 		 * misses before ceding control. */
+		vcpu->stat.dec_exits++;
 		if (need_resched())
 			cond_resched();
-		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
-			vcpu->stat.dec_exits++;
-		else
-			vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
 
@@ -301,6 +302,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		case EMULATE_DO_DCR:
 			run->exit_reason = KVM_EXIT_DCR;
+			vcpu->stat.dcr_exits++;
 			r = RESUME_HOST;
 			break;
 		case EMULATE_FAIL:
@@ -379,6 +381,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
+			vcpu->stat.mmio_exits++;
 		}
 
 		break;
@@ -445,8 +448,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	kvmppc_core_deliver_interrupts(vcpu);
 
-	/* Do some exit accounting. */
-	vcpu->stat.sum_exits++;
 	if (!(r & RESUME_HOST)) {
 		/* To avoid clobbering exit_reason, only check for signals if
 		 * we aren't already exiting to userspace for some other
@@ -454,22 +455,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (signal_pending(current)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
 			vcpu->stat.signal_exits++;
-		} else {
-			vcpu->stat.light_exits++;
-		}
-	} else {
-		switch (run->exit_reason) {
-		case KVM_EXIT_MMIO:
-			vcpu->stat.mmio_exits++;
-			break;
-		case KVM_EXIT_DCR:
-			vcpu->stat.dcr_exits++;
-			break;
-		case KVM_EXIT_INTR:
-			vcpu->stat.signal_exits++;
-			break;
 		}
 	}
 
-- 
cgit v0.10.2


From 9ab80843c01ac25139e635d018467e528729a317 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:22 -0600
Subject: KVM: ppc: optimize find first bit

Since we use a unsigned long here anyway we can use the optimized __ffs.

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b285e3d..0f06471 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -222,7 +222,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 	unsigned int exception;
 	unsigned int priority;
 
-	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+	priority = __ffs(*pending);
 	while (priority <= BOOKE_MAX_INTERRUPT) {
 		exception = priority_exception[priority];
 		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-- 
cgit v0.10.2


From d4cf3892e50b8e35341086a4fe2bb8a3989b55d4 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:23 -0600
Subject: KVM: ppc: optimize irq delivery path

In kvmppc_deliver_interrupt is just one case left in the switch and it is a
rare one (less than 8%) when looking at the exit numbers. Therefore we can
at least drop the switch/case and if an if. I inserted an unlikely too, but
that's open for discussion.

In kvmppc_can_deliver_interrupt all frequent cases are in the default case.
I know compilers are smart but we can make it easier for them. By writing
down all options and removing the default case combined with the fact that
ithe values are constants 0..15 should allow the compiler to write an easy
jump table.
Modifying kvmppc_can_deliver_interrupt pointed me to the fact that gcc seems
to be unable to reduce priority_exception[x] to a build time constant.
Therefore I changed the usage of the translation arrays in the interrupt
delivery path completely. It is now using priority without translation to irq
on the full irq delivery path.
To be able to do that ivpr regs are stored by their priority now.

Additionally the decision made in kvmppc_can_deliver_interrupt is already
sufficient to get the value of interrupt_msr_mask[x]. Therefore we can replace
the 16x4byte array used here with a single 4byte variable (might still be one
miss, but the chance to find this in cache should be better than the right
entry of the whole array).

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 976ecc4..844f683 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -36,9 +36,6 @@ enum emulation_result {
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };
 
-extern const unsigned char exception_priority[];
-extern const unsigned char priority_exception[];
-
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index a634c0c..9bc50ce 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -228,39 +228,56 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
 
 	case SPRN_IVPR:
-		vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR0:
-		vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR1:
-		vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR2:
-		vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR3:
-		vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR4:
-		vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR5:
-		vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR6:
-		vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR7:
-		vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR8:
-		vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR9:
-		vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR10:
-		vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR11:
-		vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR12:
-		vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR13:
-		vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR14:
-		vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+		break;
 	case SPRN_IVOR15:
-		vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+		break;
 
 	default:
 		return EMULATE_FAIL;
@@ -295,37 +312,54 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
 
 	case SPRN_IVOR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		break;
 	case SPRN_IVOR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		break;
 	case SPRN_IVOR2:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		break;
 	case SPRN_IVOR3:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		break;
 	case SPRN_IVOR4:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		break;
 	case SPRN_IVOR5:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		break;
 	case SPRN_IVOR6:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		break;
 	case SPRN_IVOR7:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		break;
 	case SPRN_IVOR8:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		break;
 	case SPRN_IVOR9:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		break;
 	case SPRN_IVOR10:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		break;
 	case SPRN_IVOR11:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		break;
 	case SPRN_IVOR12:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		break;
 	case SPRN_IVOR13:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		break;
 	case SPRN_IVOR14:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		break;
 	case SPRN_IVOR15:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		break;
+
 	default:
 		return EMULATE_FAIL;
 	}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0f06471..ec59a67 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -55,64 +55,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-static const u32 interrupt_msr_mask[16] = {
-	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
-	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
-	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
-	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
-	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
-	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
-	[BOOKE_INTERRUPT_PROGRAM] = 3,
-	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
-	[BOOKE_INTERRUPT_SYSCALL] = 5,
-	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
-	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
-	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
-	[BOOKE_INTERRUPT_DEBUG] = 10,
-	[BOOKE_INTERRUPT_CRITICAL] = 11,
-	[BOOKE_INTERRUPT_WATCHDOG] = 12,
-	[BOOKE_INTERRUPT_EXTERNAL] = 13,
-	[BOOKE_INTERRUPT_FIT] = 14,
-	[BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
-	BOOKE_INTERRUPT_DATA_STORAGE,
-	BOOKE_INTERRUPT_INST_STORAGE,
-	BOOKE_INTERRUPT_ALIGNMENT,
-	BOOKE_INTERRUPT_PROGRAM,
-	BOOKE_INTERRUPT_FP_UNAVAIL,
-	BOOKE_INTERRUPT_SYSCALL,
-	BOOKE_INTERRUPT_AP_UNAVAIL,
-	BOOKE_INTERRUPT_DTLB_MISS,
-	BOOKE_INTERRUPT_ITLB_MISS,
-	BOOKE_INTERRUPT_MACHINE_CHECK,
-	BOOKE_INTERRUPT_DEBUG,
-	BOOKE_INTERRUPT_CRITICAL,
-	BOOKE_INTERRUPT_WATCHDOG,
-	BOOKE_INTERRUPT_EXTERNAL,
-	BOOKE_INTERRUPT_FIT,
-	BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
 /* TODO: use vcpu_printf() */
 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
@@ -133,103 +75,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvmppc_booke_queue_exception(struct kvm_vcpu *vcpu, int exception)
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+                                       unsigned int priority)
 {
-	unsigned int priority = exception_priority[exception];
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-static void kvmppc_booke_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
 {
-	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 }
 
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
-	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
 }
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
-	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-	return test_bit(priority, &vcpu->arch.pending_exceptions);
+	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-	kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
 }
 
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+                                        unsigned int priority)
 {
-	int r;
-
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_CRITICAL:
-		r = vcpu->arch.msr & MSR_CE;
+	int allowed = 0;
+	ulong msr_mask;
+
+	switch (priority) {
+	case BOOKE_IRQPRIO_PROGRAM:
+	case BOOKE_IRQPRIO_DTLB_MISS:
+	case BOOKE_IRQPRIO_ITLB_MISS:
+	case BOOKE_IRQPRIO_SYSCALL:
+	case BOOKE_IRQPRIO_DATA_STORAGE:
+	case BOOKE_IRQPRIO_INST_STORAGE:
+	case BOOKE_IRQPRIO_FP_UNAVAIL:
+	case BOOKE_IRQPRIO_AP_UNAVAIL:
+	case BOOKE_IRQPRIO_ALIGNMENT:
+		allowed = 1;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		r = vcpu->arch.msr & MSR_ME;
+	case BOOKE_IRQPRIO_CRITICAL:
+	case BOOKE_IRQPRIO_WATCHDOG:
+		allowed = vcpu->arch.msr & MSR_CE;
+		msr_mask = MSR_ME;
 		break;
-	case BOOKE_INTERRUPT_EXTERNAL:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_DECREMENTER:
-		r = vcpu->arch.msr & MSR_EE;
+	case BOOKE_IRQPRIO_MACHINE_CHECK:
+		allowed = vcpu->arch.msr & MSR_ME;
+		msr_mask = 0;
 		break;
-	case BOOKE_INTERRUPT_FIT:
-		r = vcpu->arch.msr & MSR_EE;
+	case BOOKE_IRQPRIO_EXTERNAL:
+	case BOOKE_IRQPRIO_DECREMENTER:
+	case BOOKE_IRQPRIO_FIT:
+		allowed = vcpu->arch.msr & MSR_EE;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
-	case BOOKE_INTERRUPT_WATCHDOG:
-		r = vcpu->arch.msr & MSR_CE;
+	case BOOKE_IRQPRIO_DEBUG:
+		allowed = vcpu->arch.msr & MSR_DE;
+		msr_mask = MSR_ME;
 		break;
-	case BOOKE_INTERRUPT_DEBUG:
-		r = vcpu->arch.msr & MSR_DE;
-		break;
-	default:
-		r = 1;
 	}
 
-	return r;
-}
+	if (allowed) {
+		vcpu->arch.srr0 = vcpu->arch.pc;
+		vcpu->arch.srr1 = vcpu->arch.msr;
+		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
 
-static void kvmppc_booke_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_DECREMENTER:
-		vcpu->arch.tsr |= TSR_DIS;
-		break;
+		clear_bit(priority, &vcpu->arch.pending_exceptions);
 	}
 
-	vcpu->arch.srr0 = vcpu->arch.pc;
-	vcpu->arch.srr1 = vcpu->arch.msr;
-	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
-	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+	return allowed;
 }
 
 /* Check pending exceptions and deliver one, if possible. */
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
-	unsigned int exception;
 	unsigned int priority;
 
 	priority = __ffs(*pending);
 	while (priority <= BOOKE_MAX_INTERRUPT) {
-		exception = priority_exception[priority];
-		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-			kvmppc_booke_clear_exception(vcpu, exception);
-			kvmppc_booke_deliver_interrupt(vcpu, exception);
+		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
 			break;
-		}
 
 		priority = find_next_bit(pending,
 		                         BITS_PER_BYTE * sizeof(*pending),
@@ -287,7 +222,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -321,27 +256,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_booke_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
 		vcpu->stat.dsi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
 		vcpu->stat.isi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_booke_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
 		vcpu->stat.syscall_exits++;
 		r = RESUME_GUEST;
 		break;
@@ -355,7 +290,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
 		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_booke_queue_exception(vcpu, exit_nr);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			vcpu->stat.dtlb_real_miss_exits++;
@@ -398,7 +333,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
 		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_booke_queue_exception(vcpu, exit_nr);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
 			vcpu->stat.itlb_real_miss_exits++;
 			break;
 		}
@@ -418,7 +353,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			               gtlbe->word2);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_booke_queue_exception(vcpu, BOOKE_INTERRUPT_MACHINE_CHECK);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
 		}
 
 		break;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f694a4b..48d905f 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,6 +23,24 @@
 #include <linux/types.h>
 #include <linux/kvm_host.h>
 
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SYSCALL 5
+#define BOOKE_IRQPRIO_AP_UNAVAIL 6
+#define BOOKE_IRQPRIO_DTLB_MISS 7
+#define BOOKE_IRQPRIO_ITLB_MISS 8
+#define BOOKE_IRQPRIO_MACHINE_CHECK 9
+#define BOOKE_IRQPRIO_DEBUG 10
+#define BOOKE_IRQPRIO_CRITICAL 11
+#define BOOKE_IRQPRIO_WATCHDOG 12
+#define BOOKE_IRQPRIO_EXTERNAL 13
+#define BOOKE_IRQPRIO_FIT 14
+#define BOOKE_IRQPRIO_DECREMENTER 15
+
 /* Helper function for "full" MSR writes. No need to call this if only EE is
  * changing. */
 static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-- 
cgit v0.10.2


From fcfdbd266a41d3e41d17666de410a24995fde03a Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 5 Nov 2008 09:36:24 -0600
Subject: KVM: ppc: improve trap emulation

set ESR[PTR] when emulating a guest trap. This allows Linux guests to
properly handle WARN_ON() (i.e. detect that it's a non-fatal trap).

Also remove debugging printk in trap emulation.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 814f1e6..4c30fa0 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -74,8 +74,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	int advance = 1;
 
 	switch (get_op(inst)) {
-	case 3:                                                 /* trap */
-		printk("trap!\n");
+	case 3:                                             /* trap */
+		vcpu->arch.esr |= ESR_PTR;
 		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
-- 
cgit v0.10.2


From 0853d2c1d849ef69884d2447d90d04007590b72b Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Wed, 5 Nov 2008 15:37:36 -0800
Subject: KVM: Fix cpuid leaf 0xb loop termination

For cpuid leaf 0xb the bits 8-15 in ECX register define the end of counting
leaf.      The previous code was using bits 0-7 for this purpose, which is
a bug.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a4a39c..2889a0f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1276,7 +1276,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until level_type is zero */
 		for (i = 1; *nent < maxnent; ++i) {
-			level_type = entry[i - 1].ecx & 0xff;
+			level_type = entry[i - 1].ecx & 0xff00;
 			if (!level_type)
 				break;
 			do_cpuid_1_ent(&entry[i], function, i);
-- 
cgit v0.10.2


From 0fdf8e59faa5c60e9d77c8e14abe3a0f8bfcf586 Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Wed, 5 Nov 2008 15:56:21 -0800
Subject: KVM: Fix cpuid iteration on multiple leaves per eac

The code to traverse the cpuid data array list for counting type of leaves is
currently broken.

This patches fixes the 2 things in it.

 1. Set the 1st counting entry's flag KVM_CPUID_FLAG_STATE_READ_NEXT. Without
    it the code will never find a valid entry.

 2. Also the stop condition in the for loop while looking for the next unflaged
    entry is broken. It needs to stop when it find one matching entry;
    and in the case of count of 1, it will be the same entry found in this
    iteration.

Signed-Off-By: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2889a0f..7a2aeba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1246,6 +1246,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		int t, times = entry->eax & 0xff;
 
 		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 		for (t = 1; t < times && *nent < maxnent; ++t) {
 			do_cpuid_1_ent(&entry[t], function, 0);
 			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -2801,7 +2802,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; j == i; j = (j + 1) % nent) {
+	for (j = i + 1; ; j = (j + 1) % nent) {
 		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
 		if (ej->function == e->function) {
 			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-- 
cgit v0.10.2


From 78749809222be5083e21bfe697b44ab797e5c0a8 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Fri, 7 Nov 2008 13:32:12 -0600
Subject: KVM: ensure that memslot userspace addresses are page-aligned

Bad page translation and silent guest failure ensue if the userspace address is
not page-aligned.  I hit this problem using large (host) pages with qemu,
because qemu currently has a hardcoded 4096-byte alignment for guest memory
allocations.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a65baa9..0a0a959 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -715,6 +715,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
+	if (mem->userspace_addr & (PAGE_SIZE - 1))
+		goto out;
 	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
-- 
cgit v0.10.2


From 74ef740da64fd82a14dbab6d7f43d798ecc1b6cc Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Fri, 7 Nov 2008 13:15:13 -0600
Subject: KVM: ppc: fix Kconfig constraints

Make sure that CONFIG_KVM cannot be selected without processor support
(currently, 440 is the only processor implementation available).

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 37e9b3c..e4ab1c7 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,25 +15,23 @@ menuconfig VIRTUALIZATION
 if VIRTUALIZATION
 
 config KVM
-	bool "Kernel-based Virtual Machine (KVM) support"
-	depends on EXPERIMENTAL
+	bool
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+
+config KVM_440
+	bool "KVM support for PowerPC 440 processors"
+	depends on EXPERIMENTAL && 44x
+	select KVM
 	---help---
-	  Support hosting virtualized guest machines. You will also
-	  need to select one or more of the processor modules below.
+	  Support running unmodified 440 guest kernels in virtual machines on
+	  440 host processors.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
 
 	  If unsure, say N.
 
-config KVM_440
-	bool "KVM support for PowerPC 440 processors"
-	depends on KVM && 44x
-	---help---
-	  KVM can run unmodified 440 guest kernels on 440 host processors.
-
 config KVM_TRACE
 	bool "KVM trace support"
 	depends on KVM && MARKERS && SYSFS
-- 
cgit v0.10.2


From 30ed5bb685ab03c9bdf812502900b65087d61490 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Fri, 24 Oct 2008 11:47:57 +0800
Subject: KVM: ia64: Remove some macro definitions in asm-offsets.c.

Use kernel's corresponding macro instead.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13..0c3564a 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
 
 #include <linux/autoconf.h>
 #include <linux/kvm_host.h>
+#include <linux/kbuild.h>
 
 #include "vcpu.h"
 
-#define task_struct kvm_vcpu
-
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : :)
-
-#define OFFSET(_sym, _str, _mem) \
-    DEFINE(_sym, offsetof(_str, _mem));
-
 void foo(void)
 {
 	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-- 
cgit v0.10.2


From e7cacd40d20849f69c908f1290c714145073685a Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 11 Nov 2008 15:30:40 +0800
Subject: KVM: Fix kernel allocated memory slot

Commit 7fd49de9773fdcb7b75e823b21c1c5dc1e218c14 "KVM: ensure that memslot
userspace addresses are page-aligned" broke kernel space allocated memory
slot, for the userspace_addr is invalid.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a0a959..4727c08 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -715,7 +715,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
-	if (mem->userspace_addr & (PAGE_SIZE - 1))
+	if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
 		goto out;
 	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 		goto out;
-- 
cgit v0.10.2


From bf5d4025c9fe8a64c5905c00bf4292319d634903 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Mon, 10 Nov 2008 14:57:34 -0600
Subject: KVM: ppc: use MMUCR accessor to obtain TID

We have an accessor; might as well use it.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 8b65fbd..260fa8b 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -339,7 +339,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 
 	switch (ws) {
 	case PPC44x_TLB_PAGEID:
-		tlbe->tid = vcpu->arch.mmucr & 0xff;
+		tlbe->tid = get_mmucr_stid(vcpu);
 		tlbe->word0 = vcpu->arch.gpr[rs];
 		break;
 
-- 
cgit v0.10.2


From df9b856c454e331bc394c80903fcdea19cae2a33 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Mon, 10 Nov 2008 14:57:35 -0600
Subject: KVM: ppc: use prefetchable mappings for guest memory

Bare metal Linux on 440 can "overmap" RAM in the kernel linear map, so that it
can use large (256MB) mappings even if memory isn't a multiple of 256MB. To
prevent the hardware prefetcher from loading from an invalid physical address
through that mapping, it's marked Guarded.

However, KVM must ensure that all guest mappings are backed by real physical
RAM (since a deliberate access through a guarded mapping could still cause a
machine check). Accordingly, we don't need to make our mappings guarded, so
let's allow prefetching as the designers intended.

Curiously this patch didn't affect performance at all on the quick test I
tried, but it's clearly the right thing to do anyways and may improve other
workloads.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 260fa8b..6fadbd6 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -28,6 +28,8 @@
 
 #include "44x_tlb.h"
 
+#define PPC44x_TLB_UATTR_MASK \
+	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
 
@@ -63,8 +65,8 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
-	/* Mask off reserved bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+	/* We only care about the guest's permission and user bits. */
+	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
 
 	if (!usermode) {
 		/* Guest is in supervisor mode, so we need to translate guest
@@ -76,6 +78,9 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 	/* Make sure host can always access this memory. */
 	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
 
+	/* WIMGE = 0b00100 */
+	attrib |= PPC44x_TLB_M;
+
 	return attrib;
 }
 
-- 
cgit v0.10.2


From fe4e771d5c37f0949047faf95d16a512b21406bf Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Mon, 10 Nov 2008 14:57:36 -0600
Subject: KVM: ppc: fix userspace mapping invalidation on context switch

We used to defer invalidating userspace TLB entries until jumping out of the
kernel. This was causing MMU weirdness most easily triggered by using a pipe in
the guest, e.g. "dmesg | tail". I believe the problem was that after the guest
kernel changed the PID (part of context switch), the old process's mappings
were still present, and so copy_to_user() on the "return to new process" path
ended up using stale mappings.

Testing with large pages (64K) exposed the problem, probably because with 4K
pages, pressure on the TLB faulted all process A's mappings out before the
guest kernel could insert any for process B.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index dece093..72e5939 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -44,4 +44,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
 }
 
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+
 #endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 9bc50ce..9ef79c7 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -21,6 +21,7 @@
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
 
 #include "booke.h"
 #include "44x_tlb.h"
@@ -38,14 +39,6 @@
 #define XOP_ICCCI   966
 #define XOP_TLBWE   978
 
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	if (vcpu->arch.pid != new_pid) {
-		vcpu->arch.pid = new_pid;
-		vcpu->arch.swap_pid = 1;
-	}
-}
-
 static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pc = vcpu->arch.srr0;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 6fadbd6..ee24618 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -268,31 +268,34 @@ static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	}
 }
 
-/* Invalidate all mappings on the privilege switch after PID has been changed.
- * The guest always runs with PID=1, so we must clear the entire TLB when
- * switching address spaces. */
 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
+	vcpu->arch.shadow_pid = !usermode;
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
-	if (vcpu->arch.swap_pid) {
-		/* XXX Replace loop with fancy data structures. */
-		for (i = 0; i <= tlb_44x_hwater; i++) {
-			struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+	if (unlikely(vcpu->arch.pid == new_pid))
+		return;
+
+	vcpu->arch.pid = new_pid;
+
+	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+	 * can't access guest kernel mappings (TID=1). When we switch to a new
+	 * guest PID, which will also use host PID=0, we must discard the old guest
+	 * userspace mappings. */
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_tlb); i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
 
-			/* Future optimization: clear only userspace mappings. */
+		if (get_tlb_tid(stlbe) == 0) {
 			kvmppc_44x_shadow_release(vcpu, i);
 			stlbe->word0 = 0;
 			kvmppc_tlbe_set_modified(vcpu, i);
-			KVMTRACE_5D(STLB_INVAL, vcpu, i,
-			            stlbe->tid, stlbe->word0, stlbe->word1,
-			            stlbe->word2, handler);
 		}
-		vcpu->arch.swap_pid = 0;
 	}
-
-	vcpu->arch.shadow_pid = !usermode;
 }
 
 static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-- 
cgit v0.10.2


From 13673a90f1cf88296f726265cc7cf3ec76ecba30 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:13 -0200
Subject: KVM: VMX: move vmx.h to include/asm

vmx.h will be used by core code that is independent of KVM, so I am
moving it outside the arch/x86/kvm directory.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
new file mode 100644
index 0000000..3db236c
--- /dev/null
+++ b/arch/x86/include/asm/vmx.h
@@ -0,0 +1,367 @@
+#ifndef VMX_H
+#define VMX_H
+
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ *    Avi Kivity <avi@qumranet.com>
+ *    Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
+#define CPU_BASED_HLT_EXITING                   0x00000080
+#define CPU_BASED_INVLPG_EXITING                0x00000200
+#define CPU_BASED_MWAIT_EXITING                 0x00000400
+#define CPU_BASED_RDPMC_EXITING                 0x00000800
+#define CPU_BASED_RDTSC_EXITING                 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
+#define CPU_BASED_CR3_STORE_EXITING		0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
+#define CPU_BASED_CR8_STORE_EXITING             0x00100000
+#define CPU_BASED_TPR_SHADOW                    0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
+#define CPU_BASED_MOV_DR_EXITING                0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
+#define CPU_BASED_USE_IO_BITMAPS                0x02000000
+#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
+#define CPU_BASED_MONITOR_EXITING               0x20000000
+#define CPU_BASED_PAUSE_EXITING                 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+
+
+#define PIN_BASED_EXT_INTR_MASK                 0x00000001
+#define PIN_BASED_NMI_EXITING                   0x00000008
+#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
+
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
+#define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
+
+#define VM_ENTRY_IA32E_MODE                     0x00000200
+#define VM_ENTRY_SMM                            0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
+
+/* VMCS Encodings */
+enum vmcs_field {
+	VIRTUAL_PROCESSOR_ID            = 0x00000000,
+	GUEST_ES_SELECTOR               = 0x00000800,
+	GUEST_CS_SELECTOR               = 0x00000802,
+	GUEST_SS_SELECTOR               = 0x00000804,
+	GUEST_DS_SELECTOR               = 0x00000806,
+	GUEST_FS_SELECTOR               = 0x00000808,
+	GUEST_GS_SELECTOR               = 0x0000080a,
+	GUEST_LDTR_SELECTOR             = 0x0000080c,
+	GUEST_TR_SELECTOR               = 0x0000080e,
+	HOST_ES_SELECTOR                = 0x00000c00,
+	HOST_CS_SELECTOR                = 0x00000c02,
+	HOST_SS_SELECTOR                = 0x00000c04,
+	HOST_DS_SELECTOR                = 0x00000c06,
+	HOST_FS_SELECTOR                = 0x00000c08,
+	HOST_GS_SELECTOR                = 0x00000c0a,
+	HOST_TR_SELECTOR                = 0x00000c0c,
+	IO_BITMAP_A                     = 0x00002000,
+	IO_BITMAP_A_HIGH                = 0x00002001,
+	IO_BITMAP_B                     = 0x00002002,
+	IO_BITMAP_B_HIGH                = 0x00002003,
+	MSR_BITMAP                      = 0x00002004,
+	MSR_BITMAP_HIGH                 = 0x00002005,
+	VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+	VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+	VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+	TSC_OFFSET                      = 0x00002010,
+	TSC_OFFSET_HIGH                 = 0x00002011,
+	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	EPT_POINTER                     = 0x0000201a,
+	EPT_POINTER_HIGH                = 0x0000201b,
+	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+	VMCS_LINK_POINTER               = 0x00002800,
+	VMCS_LINK_POINTER_HIGH          = 0x00002801,
+	GUEST_IA32_DEBUGCTL             = 0x00002802,
+	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
+	GUEST_PDPTR0                    = 0x0000280a,
+	GUEST_PDPTR0_HIGH               = 0x0000280b,
+	GUEST_PDPTR1                    = 0x0000280c,
+	GUEST_PDPTR1_HIGH               = 0x0000280d,
+	GUEST_PDPTR2                    = 0x0000280e,
+	GUEST_PDPTR2_HIGH               = 0x0000280f,
+	GUEST_PDPTR3                    = 0x00002810,
+	GUEST_PDPTR3_HIGH               = 0x00002811,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
+	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+	EXCEPTION_BITMAP                = 0x00004004,
+	PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+	PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+	CR3_TARGET_COUNT                = 0x0000400a,
+	VM_EXIT_CONTROLS                = 0x0000400c,
+	VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+	VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+	VM_ENTRY_CONTROLS               = 0x00004012,
+	VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+	VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+	VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+	TPR_THRESHOLD                   = 0x0000401c,
+	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+	VM_INSTRUCTION_ERROR            = 0x00004400,
+	VM_EXIT_REASON                  = 0x00004402,
+	VM_EXIT_INTR_INFO               = 0x00004404,
+	VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+	IDT_VECTORING_INFO_FIELD        = 0x00004408,
+	IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+	VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+	VMX_INSTRUCTION_INFO            = 0x0000440e,
+	GUEST_ES_LIMIT                  = 0x00004800,
+	GUEST_CS_LIMIT                  = 0x00004802,
+	GUEST_SS_LIMIT                  = 0x00004804,
+	GUEST_DS_LIMIT                  = 0x00004806,
+	GUEST_FS_LIMIT                  = 0x00004808,
+	GUEST_GS_LIMIT                  = 0x0000480a,
+	GUEST_LDTR_LIMIT                = 0x0000480c,
+	GUEST_TR_LIMIT                  = 0x0000480e,
+	GUEST_GDTR_LIMIT                = 0x00004810,
+	GUEST_IDTR_LIMIT                = 0x00004812,
+	GUEST_ES_AR_BYTES               = 0x00004814,
+	GUEST_CS_AR_BYTES               = 0x00004816,
+	GUEST_SS_AR_BYTES               = 0x00004818,
+	GUEST_DS_AR_BYTES               = 0x0000481a,
+	GUEST_FS_AR_BYTES               = 0x0000481c,
+	GUEST_GS_AR_BYTES               = 0x0000481e,
+	GUEST_LDTR_AR_BYTES             = 0x00004820,
+	GUEST_TR_AR_BYTES               = 0x00004822,
+	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+	GUEST_ACTIVITY_STATE            = 0X00004826,
+	GUEST_SYSENTER_CS               = 0x0000482A,
+	HOST_IA32_SYSENTER_CS           = 0x00004c00,
+	CR0_GUEST_HOST_MASK             = 0x00006000,
+	CR4_GUEST_HOST_MASK             = 0x00006002,
+	CR0_READ_SHADOW                 = 0x00006004,
+	CR4_READ_SHADOW                 = 0x00006006,
+	CR3_TARGET_VALUE0               = 0x00006008,
+	CR3_TARGET_VALUE1               = 0x0000600a,
+	CR3_TARGET_VALUE2               = 0x0000600c,
+	CR3_TARGET_VALUE3               = 0x0000600e,
+	EXIT_QUALIFICATION              = 0x00006400,
+	GUEST_LINEAR_ADDRESS            = 0x0000640a,
+	GUEST_CR0                       = 0x00006800,
+	GUEST_CR3                       = 0x00006802,
+	GUEST_CR4                       = 0x00006804,
+	GUEST_ES_BASE                   = 0x00006806,
+	GUEST_CS_BASE                   = 0x00006808,
+	GUEST_SS_BASE                   = 0x0000680a,
+	GUEST_DS_BASE                   = 0x0000680c,
+	GUEST_FS_BASE                   = 0x0000680e,
+	GUEST_GS_BASE                   = 0x00006810,
+	GUEST_LDTR_BASE                 = 0x00006812,
+	GUEST_TR_BASE                   = 0x00006814,
+	GUEST_GDTR_BASE                 = 0x00006816,
+	GUEST_IDTR_BASE                 = 0x00006818,
+	GUEST_DR7                       = 0x0000681a,
+	GUEST_RSP                       = 0x0000681c,
+	GUEST_RIP                       = 0x0000681e,
+	GUEST_RFLAGS                    = 0x00006820,
+	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+	GUEST_SYSENTER_ESP              = 0x00006824,
+	GUEST_SYSENTER_EIP              = 0x00006826,
+	HOST_CR0                        = 0x00006c00,
+	HOST_CR3                        = 0x00006c02,
+	HOST_CR4                        = 0x00006c04,
+	HOST_FS_BASE                    = 0x00006c06,
+	HOST_GS_BASE                    = 0x00006c08,
+	HOST_TR_BASE                    = 0x00006c0a,
+	HOST_GDTR_BASE                  = 0x00006c0c,
+	HOST_IDTR_BASE                  = 0x00006c0e,
+	HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+	HOST_RSP                        = 0x00006c14,
+	HOST_RIP                        = 0x00006c16,
+};
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD		54
+
+/*
+ * Interruption-information format
+ */
+#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
+#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
+#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
+#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
+#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
+#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
+
+#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
+#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
+#define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
+#define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
+
+#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
+#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+
+/* GUEST_INTERRUPTIBILITY_INFO flags. */
+#define GUEST_INTR_STATE_STI		0x00000001
+#define GUEST_INTR_STATE_MOV_SS		0x00000002
+#define GUEST_INTR_STATE_SMI		0x00000004
+#define GUEST_INTR_STATE_NMI		0x00000008
+
+/*
+ * Exit Qualifications for MOV for Control Register Access
+ */
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
+#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
+#define LMSW_SOURCE_DATA_SHIFT 16
+#define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
+#define REG_EAX                         (0 << 8)
+#define REG_ECX                         (1 << 8)
+#define REG_EDX                         (2 << 8)
+#define REG_EBX                         (3 << 8)
+#define REG_ESP                         (4 << 8)
+#define REG_EBP                         (5 << 8)
+#define REG_ESI                         (6 << 8)
+#define REG_EDI                         (7 << 8)
+#define REG_R8                         (8 << 8)
+#define REG_R9                         (9 << 8)
+#define REG_R10                        (10 << 8)
+#define REG_R11                        (11 << 8)
+#define REG_R12                        (12 << 8)
+#define REG_R13                        (13 << 8)
+#define REG_R14                        (14 << 8)
+#define REG_R15                        (15 << 8)
+
+/*
+ * Exit Qualifications for MOV for Debug Register Access
+ */
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
+#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
+#define TYPE_MOV_TO_DR                  (0 << 4)
+#define TYPE_MOV_FROM_DR                (1 << 4)
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
+
+
+/* segment AR */
+#define SEGMENT_AR_L_MASK (1 << 13)
+
+#define AR_TYPE_ACCESSES_MASK 1
+#define AR_TYPE_READABLE_MASK (1 << 1)
+#define AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define AR_TYPE_CODE_MASK (1 << 3)
+#define AR_TYPE_MASK 0x0f
+#define AR_TYPE_BUSY_64_TSS 11
+#define AR_TYPE_BUSY_32_TSS 11
+#define AR_TYPE_BUSY_16_TSS 3
+#define AR_TYPE_LDT 2
+
+#define AR_UNUSABLE_MASK (1 << 16)
+#define AR_S_MASK (1 << 4)
+#define AR_P_MASK (1 << 7)
+#define AR_L_MASK (1 << 13)
+#define AR_DB_MASK (1 << 14)
+#define AR_G_MASK (1 << 15)
+#define AR_DPL_SHIFT 5
+#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
+
+#define AR_RESERVD_MASK 0xfffe0f00
+
+#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
+
+#define VMX_NR_VPIDS				(1 << 16)
+#define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
+#define VMX_VPID_EXTENT_ALL_CONTEXT		2
+
+#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
+#define VMX_EPT_EXTENT_CONTEXT			1
+#define VMX_EPT_EXTENT_GLOBAL			2
+#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
+#define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
+#define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
+#define VMX_EPT_DEFAULT_GAW			3
+#define VMX_EPT_MAX_GAW				0x4
+#define VMX_EPT_MT_EPTE_SHIFT			3
+#define VMX_EPT_GAW_EPTP_SHIFT			3
+#define VMX_EPT_DEFAULT_MT			0x6ull
+#define VMX_EPT_READABLE_MASK			0x1ull
+#define VMX_EPT_WRITABLE_MASK			0x2ull
+#define VMX_EPT_EXECUTABLE_MASK			0x4ull
+#define VMX_EPT_IGMT_BIT    			(1ull << 6)
+
+#define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
+
+#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8904e8a..fa3486d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
  *
  */
 
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
 #include <asm/io.h>
+#include <asm/vmx.h>
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 427dbc1..ec71f64 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
  */
 
 #include "irq.h"
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -31,6 +30,7 @@
 
 #include <asm/io.h>
 #include <asm/desc.h>
+#include <asm/vmx.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
deleted file mode 100644
index 3db236c..0000000
--- a/arch/x86/kvm/vmx.h
+++ /dev/null
@@ -1,367 +0,0 @@
-#ifndef VMX_H
-#define VMX_H
-
-/*
- * vmx.h: VMX Architecture related definitions
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * A few random additions are:
- * Copyright (C) 2006 Qumranet
- *    Avi Kivity <avi@qumranet.com>
- *    Yaniv Kamay <yaniv@qumranet.com>
- *
- */
-
-/*
- * Definitions of Primary Processor-Based VM-Execution Controls.
- */
-#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
-#define CPU_BASED_HLT_EXITING                   0x00000080
-#define CPU_BASED_INVLPG_EXITING                0x00000200
-#define CPU_BASED_MWAIT_EXITING                 0x00000400
-#define CPU_BASED_RDPMC_EXITING                 0x00000800
-#define CPU_BASED_RDTSC_EXITING                 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
-#define CPU_BASED_CR3_STORE_EXITING		0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
-#define CPU_BASED_CR8_STORE_EXITING             0x00100000
-#define CPU_BASED_TPR_SHADOW                    0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
-#define CPU_BASED_MOV_DR_EXITING                0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
-#define CPU_BASED_USE_IO_BITMAPS                0x02000000
-#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
-#define CPU_BASED_MONITOR_EXITING               0x20000000
-#define CPU_BASED_PAUSE_EXITING                 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
-/*
- * Definitions of Secondary Processor-Based VM-Execution Controls.
- */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
-#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
-
-
-#define PIN_BASED_EXT_INTR_MASK                 0x00000001
-#define PIN_BASED_NMI_EXITING                   0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
-
-#define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
-#define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
-#define VM_EXIT_SAVE_IA32_PAT			0x00040000
-#define VM_EXIT_LOAD_IA32_PAT			0x00080000
-
-#define VM_ENTRY_IA32E_MODE                     0x00000200
-#define VM_ENTRY_SMM                            0x00000400
-#define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
-#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
-
-/* VMCS Encodings */
-enum vmcs_field {
-	VIRTUAL_PROCESSOR_ID            = 0x00000000,
-	GUEST_ES_SELECTOR               = 0x00000800,
-	GUEST_CS_SELECTOR               = 0x00000802,
-	GUEST_SS_SELECTOR               = 0x00000804,
-	GUEST_DS_SELECTOR               = 0x00000806,
-	GUEST_FS_SELECTOR               = 0x00000808,
-	GUEST_GS_SELECTOR               = 0x0000080a,
-	GUEST_LDTR_SELECTOR             = 0x0000080c,
-	GUEST_TR_SELECTOR               = 0x0000080e,
-	HOST_ES_SELECTOR                = 0x00000c00,
-	HOST_CS_SELECTOR                = 0x00000c02,
-	HOST_SS_SELECTOR                = 0x00000c04,
-	HOST_DS_SELECTOR                = 0x00000c06,
-	HOST_FS_SELECTOR                = 0x00000c08,
-	HOST_GS_SELECTOR                = 0x00000c0a,
-	HOST_TR_SELECTOR                = 0x00000c0c,
-	IO_BITMAP_A                     = 0x00002000,
-	IO_BITMAP_A_HIGH                = 0x00002001,
-	IO_BITMAP_B                     = 0x00002002,
-	IO_BITMAP_B_HIGH                = 0x00002003,
-	MSR_BITMAP                      = 0x00002004,
-	MSR_BITMAP_HIGH                 = 0x00002005,
-	VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
-	VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
-	VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
-	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
-	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
-	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
-	TSC_OFFSET                      = 0x00002010,
-	TSC_OFFSET_HIGH                 = 0x00002011,
-	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
-	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
-	APIC_ACCESS_ADDR		= 0x00002014,
-	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
-	EPT_POINTER                     = 0x0000201a,
-	EPT_POINTER_HIGH                = 0x0000201b,
-	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
-	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
-	VMCS_LINK_POINTER               = 0x00002800,
-	VMCS_LINK_POINTER_HIGH          = 0x00002801,
-	GUEST_IA32_DEBUGCTL             = 0x00002802,
-	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-	GUEST_IA32_PAT			= 0x00002804,
-	GUEST_IA32_PAT_HIGH		= 0x00002805,
-	GUEST_PDPTR0                    = 0x0000280a,
-	GUEST_PDPTR0_HIGH               = 0x0000280b,
-	GUEST_PDPTR1                    = 0x0000280c,
-	GUEST_PDPTR1_HIGH               = 0x0000280d,
-	GUEST_PDPTR2                    = 0x0000280e,
-	GUEST_PDPTR2_HIGH               = 0x0000280f,
-	GUEST_PDPTR3                    = 0x00002810,
-	GUEST_PDPTR3_HIGH               = 0x00002811,
-	HOST_IA32_PAT			= 0x00002c00,
-	HOST_IA32_PAT_HIGH		= 0x00002c01,
-	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
-	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
-	EXCEPTION_BITMAP                = 0x00004004,
-	PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
-	PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
-	CR3_TARGET_COUNT                = 0x0000400a,
-	VM_EXIT_CONTROLS                = 0x0000400c,
-	VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
-	VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
-	VM_ENTRY_CONTROLS               = 0x00004012,
-	VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
-	VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
-	VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
-	TPR_THRESHOLD                   = 0x0000401c,
-	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
-	VM_INSTRUCTION_ERROR            = 0x00004400,
-	VM_EXIT_REASON                  = 0x00004402,
-	VM_EXIT_INTR_INFO               = 0x00004404,
-	VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
-	IDT_VECTORING_INFO_FIELD        = 0x00004408,
-	IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-	VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
-	VMX_INSTRUCTION_INFO            = 0x0000440e,
-	GUEST_ES_LIMIT                  = 0x00004800,
-	GUEST_CS_LIMIT                  = 0x00004802,
-	GUEST_SS_LIMIT                  = 0x00004804,
-	GUEST_DS_LIMIT                  = 0x00004806,
-	GUEST_FS_LIMIT                  = 0x00004808,
-	GUEST_GS_LIMIT                  = 0x0000480a,
-	GUEST_LDTR_LIMIT                = 0x0000480c,
-	GUEST_TR_LIMIT                  = 0x0000480e,
-	GUEST_GDTR_LIMIT                = 0x00004810,
-	GUEST_IDTR_LIMIT                = 0x00004812,
-	GUEST_ES_AR_BYTES               = 0x00004814,
-	GUEST_CS_AR_BYTES               = 0x00004816,
-	GUEST_SS_AR_BYTES               = 0x00004818,
-	GUEST_DS_AR_BYTES               = 0x0000481a,
-	GUEST_FS_AR_BYTES               = 0x0000481c,
-	GUEST_GS_AR_BYTES               = 0x0000481e,
-	GUEST_LDTR_AR_BYTES             = 0x00004820,
-	GUEST_TR_AR_BYTES               = 0x00004822,
-	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-	GUEST_ACTIVITY_STATE            = 0X00004826,
-	GUEST_SYSENTER_CS               = 0x0000482A,
-	HOST_IA32_SYSENTER_CS           = 0x00004c00,
-	CR0_GUEST_HOST_MASK             = 0x00006000,
-	CR4_GUEST_HOST_MASK             = 0x00006002,
-	CR0_READ_SHADOW                 = 0x00006004,
-	CR4_READ_SHADOW                 = 0x00006006,
-	CR3_TARGET_VALUE0               = 0x00006008,
-	CR3_TARGET_VALUE1               = 0x0000600a,
-	CR3_TARGET_VALUE2               = 0x0000600c,
-	CR3_TARGET_VALUE3               = 0x0000600e,
-	EXIT_QUALIFICATION              = 0x00006400,
-	GUEST_LINEAR_ADDRESS            = 0x0000640a,
-	GUEST_CR0                       = 0x00006800,
-	GUEST_CR3                       = 0x00006802,
-	GUEST_CR4                       = 0x00006804,
-	GUEST_ES_BASE                   = 0x00006806,
-	GUEST_CS_BASE                   = 0x00006808,
-	GUEST_SS_BASE                   = 0x0000680a,
-	GUEST_DS_BASE                   = 0x0000680c,
-	GUEST_FS_BASE                   = 0x0000680e,
-	GUEST_GS_BASE                   = 0x00006810,
-	GUEST_LDTR_BASE                 = 0x00006812,
-	GUEST_TR_BASE                   = 0x00006814,
-	GUEST_GDTR_BASE                 = 0x00006816,
-	GUEST_IDTR_BASE                 = 0x00006818,
-	GUEST_DR7                       = 0x0000681a,
-	GUEST_RSP                       = 0x0000681c,
-	GUEST_RIP                       = 0x0000681e,
-	GUEST_RFLAGS                    = 0x00006820,
-	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
-	GUEST_SYSENTER_ESP              = 0x00006824,
-	GUEST_SYSENTER_EIP              = 0x00006826,
-	HOST_CR0                        = 0x00006c00,
-	HOST_CR3                        = 0x00006c02,
-	HOST_CR4                        = 0x00006c04,
-	HOST_FS_BASE                    = 0x00006c06,
-	HOST_GS_BASE                    = 0x00006c08,
-	HOST_TR_BASE                    = 0x00006c0a,
-	HOST_GDTR_BASE                  = 0x00006c0c,
-	HOST_IDTR_BASE                  = 0x00006c0e,
-	HOST_IA32_SYSENTER_ESP          = 0x00006c10,
-	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
-	HOST_RSP                        = 0x00006c14,
-	HOST_RIP                        = 0x00006c16,
-};
-
-#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI       0
-#define EXIT_REASON_EXTERNAL_INTERRUPT  1
-#define EXIT_REASON_TRIPLE_FAULT        2
-
-#define EXIT_REASON_PENDING_INTERRUPT   7
-#define EXIT_REASON_NMI_WINDOW		8
-#define EXIT_REASON_TASK_SWITCH         9
-#define EXIT_REASON_CPUID               10
-#define EXIT_REASON_HLT                 12
-#define EXIT_REASON_INVLPG              14
-#define EXIT_REASON_RDPMC               15
-#define EXIT_REASON_RDTSC               16
-#define EXIT_REASON_VMCALL              18
-#define EXIT_REASON_VMCLEAR             19
-#define EXIT_REASON_VMLAUNCH            20
-#define EXIT_REASON_VMPTRLD             21
-#define EXIT_REASON_VMPTRST             22
-#define EXIT_REASON_VMREAD              23
-#define EXIT_REASON_VMRESUME            24
-#define EXIT_REASON_VMWRITE             25
-#define EXIT_REASON_VMOFF               26
-#define EXIT_REASON_VMON                27
-#define EXIT_REASON_CR_ACCESS           28
-#define EXIT_REASON_DR_ACCESS           29
-#define EXIT_REASON_IO_INSTRUCTION      30
-#define EXIT_REASON_MSR_READ            31
-#define EXIT_REASON_MSR_WRITE           32
-#define EXIT_REASON_MWAIT_INSTRUCTION   36
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS         44
-#define EXIT_REASON_EPT_VIOLATION       48
-#define EXIT_REASON_EPT_MISCONFIG       49
-#define EXIT_REASON_WBINVD		54
-
-/*
- * Interruption-information format
- */
-#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
-#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
-#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
-#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
-#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
-#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
-
-#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
-#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
-#define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
-#define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
-
-#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
-#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
-
-/* GUEST_INTERRUPTIBILITY_INFO flags. */
-#define GUEST_INTR_STATE_STI		0x00000001
-#define GUEST_INTR_STATE_MOV_SS		0x00000002
-#define GUEST_INTR_STATE_SMI		0x00000004
-#define GUEST_INTR_STATE_NMI		0x00000008
-
-/*
- * Exit Qualifications for MOV for Control Register Access
- */
-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
-#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
-#define LMSW_SOURCE_DATA_SHIFT 16
-#define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
-#define REG_EAX                         (0 << 8)
-#define REG_ECX                         (1 << 8)
-#define REG_EDX                         (2 << 8)
-#define REG_EBX                         (3 << 8)
-#define REG_ESP                         (4 << 8)
-#define REG_EBP                         (5 << 8)
-#define REG_ESI                         (6 << 8)
-#define REG_EDI                         (7 << 8)
-#define REG_R8                         (8 << 8)
-#define REG_R9                         (9 << 8)
-#define REG_R10                        (10 << 8)
-#define REG_R11                        (11 << 8)
-#define REG_R12                        (12 << 8)
-#define REG_R13                        (13 << 8)
-#define REG_R14                        (14 << 8)
-#define REG_R15                        (15 << 8)
-
-/*
- * Exit Qualifications for MOV for Debug Register Access
- */
-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
-#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
-#define TYPE_MOV_TO_DR                  (0 << 4)
-#define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
-
-
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
-
-#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
-
-#define VMX_NR_VPIDS				(1 << 16)
-#define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
-#define VMX_VPID_EXTENT_ALL_CONTEXT		2
-
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
-#define VMX_EPT_EXTENT_CONTEXT			1
-#define VMX_EPT_EXTENT_GLOBAL			2
-#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
-#define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
-#define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
-#define VMX_EPT_DEFAULT_GAW			3
-#define VMX_EPT_MAX_GAW				0x4
-#define VMX_EPT_MT_EPTE_SHIFT			3
-#define VMX_EPT_GAW_EPTP_SHIFT			3
-#define VMX_EPT_DEFAULT_MT			0x6ull
-#define VMX_EPT_READABLE_MASK			0x1ull
-#define VMX_EPT_WRITABLE_MASK			0x2ull
-#define VMX_EPT_EXECUTABLE_MASK			0x4ull
-#define VMX_EPT_IGMT_BIT    			(1ull << 6)
-
-#define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
-
-#endif
-- 
cgit v0.10.2


From c2cedf7be2017e3264c93a4c0d75b1d96d0d7104 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:14 -0200
Subject: KVM: SVM: move svm.h to include/asm

svm.h will be used by core code that is independent of KVM, so I am
moving it outside the arch/x86/kvm directory.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
new file mode 100644
index 0000000..1b8afa7
--- /dev/null
+++ b/arch/x86/include/asm/svm.h
@@ -0,0 +1,328 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+enum {
+	INTERCEPT_INTR,
+	INTERCEPT_NMI,
+	INTERCEPT_SMI,
+	INTERCEPT_INIT,
+	INTERCEPT_VINTR,
+	INTERCEPT_SELECTIVE_CR0,
+	INTERCEPT_STORE_IDTR,
+	INTERCEPT_STORE_GDTR,
+	INTERCEPT_STORE_LDTR,
+	INTERCEPT_STORE_TR,
+	INTERCEPT_LOAD_IDTR,
+	INTERCEPT_LOAD_GDTR,
+	INTERCEPT_LOAD_LDTR,
+	INTERCEPT_LOAD_TR,
+	INTERCEPT_RDTSC,
+	INTERCEPT_RDPMC,
+	INTERCEPT_PUSHF,
+	INTERCEPT_POPF,
+	INTERCEPT_CPUID,
+	INTERCEPT_RSM,
+	INTERCEPT_IRET,
+	INTERCEPT_INTn,
+	INTERCEPT_INVD,
+	INTERCEPT_PAUSE,
+	INTERCEPT_HLT,
+	INTERCEPT_INVLPG,
+	INTERCEPT_INVLPGA,
+	INTERCEPT_IOIO_PROT,
+	INTERCEPT_MSR_PROT,
+	INTERCEPT_TASK_SWITCH,
+	INTERCEPT_FERR_FREEZE,
+	INTERCEPT_SHUTDOWN,
+	INTERCEPT_VMRUN,
+	INTERCEPT_VMMCALL,
+	INTERCEPT_VMLOAD,
+	INTERCEPT_VMSAVE,
+	INTERCEPT_STGI,
+	INTERCEPT_CLGI,
+	INTERCEPT_SKINIT,
+	INTERCEPT_RDTSCP,
+	INTERCEPT_ICEBP,
+	INTERCEPT_WBINVD,
+	INTERCEPT_MONITOR,
+	INTERCEPT_MWAIT,
+	INTERCEPT_MWAIT_COND,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	u16 intercept_cr_read;
+	u16 intercept_cr_write;
+	u16 intercept_dr_read;
+	u16 intercept_dr_write;
+	u32 intercept_exceptions;
+	u64 intercept;
+	u8 reserved_1[44];
+	u64 iopm_base_pa;
+	u64 msrpm_base_pa;
+	u64 tsc_offset;
+	u32 asid;
+	u8 tlb_ctl;
+	u8 reserved_2[3];
+	u32 int_ctl;
+	u32 int_vector;
+	u32 int_state;
+	u8 reserved_3[4];
+	u32 exit_code;
+	u32 exit_code_hi;
+	u64 exit_info_1;
+	u64 exit_info_2;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+	u64 nested_ctl;
+	u8 reserved_4[16];
+	u32 event_inj;
+	u32 event_inj_err;
+	u64 nested_cr3;
+	u64 lbr_ctl;
+	u8 reserved_5[832];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	u16 selector;
+	u16 attrib;
+	u32 limit;
+	u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u8 reserved_1[43];
+	u8 cpl;
+	u8 reserved_2[4];
+	u64 efer;
+	u8 reserved_3[112];
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u8 reserved_4[88];
+	u64 rsp;
+	u8 reserved_5[24];
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_6[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FEATURE_SHIFT 2
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define MSR_EFER_SVME_MASK (1ULL << 12)
+#define MSR_VM_CR       0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117ULL
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_MASK 1
+#define INTERCEPT_CR3_MASK (1 << 3)
+#define INTERCEPT_CR4_MASK (1 << 4)
+#define INTERCEPT_CR8_MASK (1 << 8)
+
+#define INTERCEPT_DR0_MASK 1
+#define INTERCEPT_DR1_MASK (1 << 1)
+#define INTERCEPT_DR2_MASK (1 << 2)
+#define INTERCEPT_DR3_MASK (1 << 3)
+#define INTERCEPT_DR4_MASK (1 << 4)
+#define INTERCEPT_DR5_MASK (1 << 5)
+#define INTERCEPT_DR6_MASK (1 << 6)
+#define INTERCEPT_DR7_MASK (1 << 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+
+#define	SVM_EXIT_READ_CR0 	0x000
+#define	SVM_EXIT_READ_CR3 	0x003
+#define	SVM_EXIT_READ_CR4 	0x004
+#define	SVM_EXIT_READ_CR8 	0x008
+#define	SVM_EXIT_WRITE_CR0 	0x010
+#define	SVM_EXIT_WRITE_CR3 	0x013
+#define	SVM_EXIT_WRITE_CR4 	0x014
+#define	SVM_EXIT_WRITE_CR8 	0x018
+#define	SVM_EXIT_READ_DR0 	0x020
+#define	SVM_EXIT_READ_DR1 	0x021
+#define	SVM_EXIT_READ_DR2 	0x022
+#define	SVM_EXIT_READ_DR3 	0x023
+#define	SVM_EXIT_READ_DR4 	0x024
+#define	SVM_EXIT_READ_DR5 	0x025
+#define	SVM_EXIT_READ_DR6 	0x026
+#define	SVM_EXIT_READ_DR7 	0x027
+#define	SVM_EXIT_WRITE_DR0 	0x030
+#define	SVM_EXIT_WRITE_DR1 	0x031
+#define	SVM_EXIT_WRITE_DR2 	0x032
+#define	SVM_EXIT_WRITE_DR3 	0x033
+#define	SVM_EXIT_WRITE_DR4 	0x034
+#define	SVM_EXIT_WRITE_DR5 	0x035
+#define	SVM_EXIT_WRITE_DR6 	0x036
+#define	SVM_EXIT_WRITE_DR7 	0x037
+#define SVM_EXIT_EXCP_BASE      0x040
+#define SVM_EXIT_INTR		0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT		0x063
+#define SVM_EXIT_VINTR		0x064
+#define SVM_EXIT_CR0_SEL_WRITE	0x065
+#define SVM_EXIT_IDTR_READ	0x066
+#define SVM_EXIT_GDTR_READ	0x067
+#define SVM_EXIT_LDTR_READ	0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE	0x06a
+#define SVM_EXIT_GDTR_WRITE	0x06b
+#define SVM_EXIT_LDTR_WRITE	0x06c
+#define SVM_EXIT_TR_WRITE	0x06d
+#define SVM_EXIT_RDTSC		0x06e
+#define SVM_EXIT_RDPMC		0x06f
+#define SVM_EXIT_PUSHF		0x070
+#define SVM_EXIT_POPF		0x071
+#define SVM_EXIT_CPUID		0x072
+#define SVM_EXIT_RSM		0x073
+#define SVM_EXIT_IRET		0x074
+#define SVM_EXIT_SWINT		0x075
+#define SVM_EXIT_INVD		0x076
+#define SVM_EXIT_PAUSE		0x077
+#define SVM_EXIT_HLT		0x078
+#define SVM_EXIT_INVLPG		0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO		0x07b
+#define SVM_EXIT_MSR		0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN	0x07f
+#define SVM_EXIT_VMRUN		0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD		0x082
+#define SVM_EXIT_VMSAVE		0x083
+#define SVM_EXIT_STGI		0x084
+#define SVM_EXIT_CLGI		0x085
+#define SVM_EXIT_SKINIT		0x086
+#define SVM_EXIT_RDTSCP		0x087
+#define SVM_EXIT_ICEBP		0x088
+#define SVM_EXIT_WBINVD		0x089
+#define SVM_EXIT_MONITOR	0x08a
+#define SVM_EXIT_MWAIT		0x08b
+#define SVM_EXIT_MWAIT_COND	0x08c
+#define SVM_EXIT_NPF  		0x400
+
+#define SVM_EXIT_ERR		-1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+
+#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
+#define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
+#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
+#define SVM_CLGI   ".byte 0x0f, 0x01, 0xdd"
+#define SVM_STGI   ".byte 0x0f, 0x01, 0xdc"
+#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
+
+#endif
+
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc..8e5ee99 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
 #include <linux/kvm_host.h>
 #include <asm/msr.h>
 
-#include "svm.h"
+#include <asm/svm.h>
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
deleted file mode 100644
index 1b8afa7..0000000
--- a/arch/x86/kvm/svm.h
+++ /dev/null
@@ -1,328 +0,0 @@
-#ifndef __SVM_H
-#define __SVM_H
-
-enum {
-	INTERCEPT_INTR,
-	INTERCEPT_NMI,
-	INTERCEPT_SMI,
-	INTERCEPT_INIT,
-	INTERCEPT_VINTR,
-	INTERCEPT_SELECTIVE_CR0,
-	INTERCEPT_STORE_IDTR,
-	INTERCEPT_STORE_GDTR,
-	INTERCEPT_STORE_LDTR,
-	INTERCEPT_STORE_TR,
-	INTERCEPT_LOAD_IDTR,
-	INTERCEPT_LOAD_GDTR,
-	INTERCEPT_LOAD_LDTR,
-	INTERCEPT_LOAD_TR,
-	INTERCEPT_RDTSC,
-	INTERCEPT_RDPMC,
-	INTERCEPT_PUSHF,
-	INTERCEPT_POPF,
-	INTERCEPT_CPUID,
-	INTERCEPT_RSM,
-	INTERCEPT_IRET,
-	INTERCEPT_INTn,
-	INTERCEPT_INVD,
-	INTERCEPT_PAUSE,
-	INTERCEPT_HLT,
-	INTERCEPT_INVLPG,
-	INTERCEPT_INVLPGA,
-	INTERCEPT_IOIO_PROT,
-	INTERCEPT_MSR_PROT,
-	INTERCEPT_TASK_SWITCH,
-	INTERCEPT_FERR_FREEZE,
-	INTERCEPT_SHUTDOWN,
-	INTERCEPT_VMRUN,
-	INTERCEPT_VMMCALL,
-	INTERCEPT_VMLOAD,
-	INTERCEPT_VMSAVE,
-	INTERCEPT_STGI,
-	INTERCEPT_CLGI,
-	INTERCEPT_SKINIT,
-	INTERCEPT_RDTSCP,
-	INTERCEPT_ICEBP,
-	INTERCEPT_WBINVD,
-	INTERCEPT_MONITOR,
-	INTERCEPT_MWAIT,
-	INTERCEPT_MWAIT_COND,
-};
-
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
-	u16 intercept_cr_read;
-	u16 intercept_cr_write;
-	u16 intercept_dr_read;
-	u16 intercept_dr_write;
-	u32 intercept_exceptions;
-	u64 intercept;
-	u8 reserved_1[44];
-	u64 iopm_base_pa;
-	u64 msrpm_base_pa;
-	u64 tsc_offset;
-	u32 asid;
-	u8 tlb_ctl;
-	u8 reserved_2[3];
-	u32 int_ctl;
-	u32 int_vector;
-	u32 int_state;
-	u8 reserved_3[4];
-	u32 exit_code;
-	u32 exit_code_hi;
-	u64 exit_info_1;
-	u64 exit_info_2;
-	u32 exit_int_info;
-	u32 exit_int_info_err;
-	u64 nested_ctl;
-	u8 reserved_4[16];
-	u32 event_inj;
-	u32 event_inj_err;
-	u64 nested_cr3;
-	u64 lbr_ctl;
-	u8 reserved_5[832];
-};
-
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-struct __attribute__ ((__packed__)) vmcb_seg {
-	u16 selector;
-	u16 attrib;
-	u32 limit;
-	u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
-	struct vmcb_seg es;
-	struct vmcb_seg cs;
-	struct vmcb_seg ss;
-	struct vmcb_seg ds;
-	struct vmcb_seg fs;
-	struct vmcb_seg gs;
-	struct vmcb_seg gdtr;
-	struct vmcb_seg ldtr;
-	struct vmcb_seg idtr;
-	struct vmcb_seg tr;
-	u8 reserved_1[43];
-	u8 cpl;
-	u8 reserved_2[4];
-	u64 efer;
-	u8 reserved_3[112];
-	u64 cr4;
-	u64 cr3;
-	u64 cr0;
-	u64 dr7;
-	u64 dr6;
-	u64 rflags;
-	u64 rip;
-	u8 reserved_4[88];
-	u64 rsp;
-	u8 reserved_5[24];
-	u64 rax;
-	u64 star;
-	u64 lstar;
-	u64 cstar;
-	u64 sfmask;
-	u64 kernel_gs_base;
-	u64 sysenter_cs;
-	u64 sysenter_esp;
-	u64 sysenter_eip;
-	u64 cr2;
-	u8 reserved_6[32];
-	u64 g_pat;
-	u64 dbgctl;
-	u64 br_from;
-	u64 br_to;
-	u64 last_excp_from;
-	u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
-	struct vmcb_control_area control;
-	struct vmcb_save_area save;
-};
-
-#define SVM_CPUID_FEATURE_SHIFT 2
-#define SVM_CPUID_FUNC 0x8000000a
-
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR       0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_MASK 1
-#define INTERCEPT_CR3_MASK (1 << 3)
-#define INTERCEPT_CR4_MASK (1 << 4)
-#define INTERCEPT_CR8_MASK (1 << 8)
-
-#define INTERCEPT_DR0_MASK 1
-#define INTERCEPT_DR1_MASK (1 << 1)
-#define INTERCEPT_DR2_MASK (1 << 2)
-#define INTERCEPT_DR3_MASK (1 << 3)
-#define INTERCEPT_DR4_MASK (1 << 4)
-#define INTERCEPT_DR5_MASK (1 << 5)
-#define INTERCEPT_DR6_MASK (1 << 6)
-#define INTERCEPT_DR7_MASK (1 << 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-
-#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-
-#define	SVM_EXIT_READ_CR0 	0x000
-#define	SVM_EXIT_READ_CR3 	0x003
-#define	SVM_EXIT_READ_CR4 	0x004
-#define	SVM_EXIT_READ_CR8 	0x008
-#define	SVM_EXIT_WRITE_CR0 	0x010
-#define	SVM_EXIT_WRITE_CR3 	0x013
-#define	SVM_EXIT_WRITE_CR4 	0x014
-#define	SVM_EXIT_WRITE_CR8 	0x018
-#define	SVM_EXIT_READ_DR0 	0x020
-#define	SVM_EXIT_READ_DR1 	0x021
-#define	SVM_EXIT_READ_DR2 	0x022
-#define	SVM_EXIT_READ_DR3 	0x023
-#define	SVM_EXIT_READ_DR4 	0x024
-#define	SVM_EXIT_READ_DR5 	0x025
-#define	SVM_EXIT_READ_DR6 	0x026
-#define	SVM_EXIT_READ_DR7 	0x027
-#define	SVM_EXIT_WRITE_DR0 	0x030
-#define	SVM_EXIT_WRITE_DR1 	0x031
-#define	SVM_EXIT_WRITE_DR2 	0x032
-#define	SVM_EXIT_WRITE_DR3 	0x033
-#define	SVM_EXIT_WRITE_DR4 	0x034
-#define	SVM_EXIT_WRITE_DR5 	0x035
-#define	SVM_EXIT_WRITE_DR6 	0x036
-#define	SVM_EXIT_WRITE_DR7 	0x037
-#define SVM_EXIT_EXCP_BASE      0x040
-#define SVM_EXIT_INTR		0x060
-#define SVM_EXIT_NMI		0x061
-#define SVM_EXIT_SMI		0x062
-#define SVM_EXIT_INIT		0x063
-#define SVM_EXIT_VINTR		0x064
-#define SVM_EXIT_CR0_SEL_WRITE	0x065
-#define SVM_EXIT_IDTR_READ	0x066
-#define SVM_EXIT_GDTR_READ	0x067
-#define SVM_EXIT_LDTR_READ	0x068
-#define SVM_EXIT_TR_READ	0x069
-#define SVM_EXIT_IDTR_WRITE	0x06a
-#define SVM_EXIT_GDTR_WRITE	0x06b
-#define SVM_EXIT_LDTR_WRITE	0x06c
-#define SVM_EXIT_TR_WRITE	0x06d
-#define SVM_EXIT_RDTSC		0x06e
-#define SVM_EXIT_RDPMC		0x06f
-#define SVM_EXIT_PUSHF		0x070
-#define SVM_EXIT_POPF		0x071
-#define SVM_EXIT_CPUID		0x072
-#define SVM_EXIT_RSM		0x073
-#define SVM_EXIT_IRET		0x074
-#define SVM_EXIT_SWINT		0x075
-#define SVM_EXIT_INVD		0x076
-#define SVM_EXIT_PAUSE		0x077
-#define SVM_EXIT_HLT		0x078
-#define SVM_EXIT_INVLPG		0x079
-#define SVM_EXIT_INVLPGA	0x07a
-#define SVM_EXIT_IOIO		0x07b
-#define SVM_EXIT_MSR		0x07c
-#define SVM_EXIT_TASK_SWITCH	0x07d
-#define SVM_EXIT_FERR_FREEZE	0x07e
-#define SVM_EXIT_SHUTDOWN	0x07f
-#define SVM_EXIT_VMRUN		0x080
-#define SVM_EXIT_VMMCALL	0x081
-#define SVM_EXIT_VMLOAD		0x082
-#define SVM_EXIT_VMSAVE		0x083
-#define SVM_EXIT_STGI		0x084
-#define SVM_EXIT_CLGI		0x085
-#define SVM_EXIT_SKINIT		0x086
-#define SVM_EXIT_RDTSCP		0x087
-#define SVM_EXIT_ICEBP		0x088
-#define SVM_EXIT_WBINVD		0x089
-#define SVM_EXIT_MONITOR	0x08a
-#define SVM_EXIT_MWAIT		0x08b
-#define SVM_EXIT_MWAIT_COND	0x08c
-#define SVM_EXIT_NPF  		0x400
-
-#define SVM_EXIT_ERR		-1
-
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
-
-#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
-#define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
-#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
-#define SVM_CLGI   ".byte 0x0f, 0x01, 0xdd"
-#define SVM_STGI   ".byte 0x0f, 0x01, 0xdc"
-#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
-
-#endif
-
-- 
cgit v0.10.2


From eca70fc5671b226966dfb7ee9953d59199288566 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:15 -0200
Subject: KVM: VMX: move ASM_VMX_* definitions from asm/kvm_host.h to asm/vmx.h

Those definitions will be used by code outside KVM, so move it outside
of a KVM-specific source file.

Those definitions are used only on kvm/vmx.c, that already includes
asm/vmx.h, so they can be moved safely.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 99e3cc1..f58f7eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -714,18 +714,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
 #define MSR_IA32_TIME_STAMP_COUNTER		0x010
 
 #define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 3db236c..d0238e6 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -364,4 +364,19 @@ enum vmcs_field {
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
+
+#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
+
+
 #endif
-- 
cgit v0.10.2


From 6210e37b122583643da335c0389f74098713e5ca Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:16 -0200
Subject: KVM: VMX: move cpu_has_kvm_support() to an inline on asm/virtext.h

It will be used by core code on kdump and reboot, to disable
vmx if needed.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 0000000..298b6a0
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,31 @@
+/* CPU virtualization extensions handling
+ *
+ * This should carry the code for handling CPU virtualization extensions
+ * that needs to live in the kernel core.
+ *
+ * Author: Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Copyright (C) 2008, Red Hat Inc.
+ *
+ * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef _ASM_X86_VIRTEX_H
+#define _ASM_X86_VIRTEX_H
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+/*
+ * VMX functions:
+ */
+
+static inline int cpu_has_vmx(void)
+{
+	unsigned long ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ec71f64..defaeeb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
 #include <asm/io.h>
 #include <asm/desc.h>
 #include <asm/vmx.h>
+#include <asm/virtext.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -1044,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
 
 static __init int cpu_has_kvm_support(void)
 {
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+	return cpu_has_vmx();
 }
 
 static __init int vmx_disabled_by_bios(void)
-- 
cgit v0.10.2


From 1e9931146c748420343aeefadb3bb17bd1c14a37 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:17 -0200
Subject: x86: asm/virtext.h: add cpu_vmxoff() inline function

Unfortunately we can't use exactly the same code from vmx
hardware_disable(), because the KVM function uses the
__kvm_handle_fault_on_reboot() tricks.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 298b6a0..7dee5b5 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -18,6 +18,8 @@
 #include <asm/processor.h>
 #include <asm/system.h>
 
+#include <asm/vmx.h>
+
 /*
  * VMX functions:
  */
@@ -28,4 +30,17 @@ static inline int cpu_has_vmx(void)
 	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
 }
 
+
+/** Disable VMX on the current CPU
+ *
+ * vmxoff causes a undefined-opcode exception if vmxon was not run
+ * on the CPU previously. Only call this function if you know VMX
+ * is enabled.
+ */
+static inline void cpu_vmxoff(void)
+{
+	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v0.10.2


From 710ff4a855d0f3bf74b5b4a20328e2858a8a2968 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:18 -0200
Subject: KVM: VMX: extract kvm_cpu_vmxoff() from hardware_disable()

Along with some comments on why it is different from the core cpu_vmxoff()
function.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index defaeeb..f5958a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1091,13 +1091,22 @@ static void vmclear_local_vcpus(void)
 		__vcpu_clear(vmx);
 }
 
-static void hardware_disable(void *garbage)
+
+/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
+ * tricks.
+ */
+static void kvm_cpu_vmxoff(void)
 {
-	vmclear_local_vcpus();
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
+static void hardware_disable(void *garbage)
+{
+	vmclear_local_vcpus();
+	kvm_cpu_vmxoff();
+}
+
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
 				      u32 msr, u32 *result)
 {
-- 
cgit v0.10.2


From 6aa07a0d77f6aafbe69e4e8609ffaf2b7ee1b591 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:19 -0200
Subject: x86: cpu_emergency_vmxoff() function

Add cpu_emergency_vmxoff() and its friends: cpu_vmx_enabled() and
__cpu_emergency_vmxoff().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 7dee5b5..6bcf0acb 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -43,4 +43,27 @@ static inline void cpu_vmxoff(void)
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
+static inline int cpu_vmx_enabled(void)
+{
+	return read_cr4() & X86_CR4_VMXE;
+}
+
+/** Disable VMX if it is enabled on the current CPU
+ *
+ * You shouldn't call this if cpu_has_vmx() returns 0.
+ */
+static inline void __cpu_emergency_vmxoff(void)
+{
+	if (cpu_vmx_enabled())
+		cpu_vmxoff();
+}
+
+/** Disable VMX if it is supported and enabled on the current CPU
+ */
+static inline void cpu_emergency_vmxoff(void)
+{
+	if (cpu_has_vmx())
+		__cpu_emergency_vmxoff();
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v0.10.2


From 63d1142f8f69e39468bc6079ab2239e902828134 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:20 -0200
Subject: KVM: SVM: move has_svm() code to asm/virtext.h

Use a trick to keep the printk()s on has_svm() working as before. gcc
will take care of not generating code for the 'msg' stuff when the
function is called with a NULL msg argument.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 6bcf0acb..6f0d409 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -19,6 +19,7 @@
 #include <asm/system.h>
 
 #include <asm/vmx.h>
+#include <asm/svm.h>
 
 /*
  * VMX functions:
@@ -66,4 +67,44 @@ static inline void cpu_emergency_vmxoff(void)
 		__cpu_emergency_vmxoff();
 }
 
+
+
+
+/*
+ * SVM functions:
+ */
+
+/** Check if the CPU has SVM support
+ *
+ * You can use the 'msg' arg to get a message describing the problem,
+ * if the function returns zero. Simply pass NULL if you are not interested
+ * on the messages; gcc should take care of not generating code for
+ * the messages on this case.
+ */
+static inline int cpu_has_svm(const char **msg)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+		if (msg)
+			*msg = "not amd";
+		return 0;
+	}
+
+	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+	if (eax < SVM_CPUID_FUNC) {
+		if (msg)
+			*msg = "can't execute cpuid_8000000a";
+		return 0;
+	}
+
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+		if (msg)
+			*msg = "svm not available";
+		return 0;
+	}
+	return 1;
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f0ad4d4..0667c6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
 
 #include <asm/desc.h>
 
+#include <asm/virtext.h>
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -245,24 +247,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 static int has_svm(void)
 {
-	uint32_t eax, ebx, ecx, edx;
-
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-		printk(KERN_INFO "has_svm: not amd\n");
-		return 0;
-	}
+	const char *msg;
 
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	if (eax < SVM_CPUID_FUNC) {
-		printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
+	if (!cpu_has_svm(&msg)) {
+		printk(KERN_INFO "has_svn: %s\n", msg);
 		return 0;
 	}
 
-	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-		printk(KERN_DEBUG "has_svm: svm not available\n");
-		return 0;
-	}
 	return 1;
 }
 
-- 
cgit v0.10.2


From 2c8dceebb238680d5577500f8283397d41ca5590 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:21 -0200
Subject: KVM: SVM: move svm_hardware_disable() code to asm/virtext.h

Create cpu_svm_disable() function.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 6f0d409..2cfe363 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -107,4 +107,18 @@ static inline int cpu_has_svm(const char **msg)
 	return 1;
 }
 
+
+/** Disable SVM on the current CPU
+ *
+ * You should call this only if cpu_has_svm() returned true.
+ */
+static inline void cpu_svm_disable(void)
+{
+	uint64_t efer;
+
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0667c6d..1452851 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -259,11 +259,7 @@ static int has_svm(void)
 
 static void svm_hardware_disable(void *garbage)
 {
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+	cpu_svm_disable();
 }
 
 static void svm_hardware_enable(void *garbage)
-- 
cgit v0.10.2


From 0f3e9eeba0ea212bbea88790729d054b700ab91e Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:22 -0200
Subject: x86: cpu_emergency_svm_disable() function

This function can be used by the reboot or kdump code to forcibly
disable SVM on the CPU.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 2cfe363..5936362 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -121,4 +121,12 @@ static inline void cpu_svm_disable(void)
 	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
 }
 
+/** Makes sure SVM is disabled, if it is supported on the CPU
+ */
+static inline void cpu_emergency_svm_disable(void)
+{
+	if (cpu_has_svm(NULL))
+		cpu_svm_disable();
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v0.10.2


From 2340b62f77c782c305e6ae7748675a638436d1ef Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:23 -0200
Subject: kdump: forcibly disable VMX and SVM on machine_crash_shutdown()

We need to disable virtualization extensions on all CPUs before booting
the kdump kernel, otherwise the kdump kernel booting will fail, and
rebooting after the kdump kernel did its task may also fail.

We do it using cpu_emergency_vmxoff() and cpu_emergency_svm_disable(),
that should always work, because those functions check if the CPUs
support SVM or VMX before doing their tasks.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852..c689d19 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 #include <asm/reboot.h>
+#include <asm/virtext.h>
 
 #include <mach_ipi.h>
 
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
 #endif
 	crash_save_cpu(regs, cpu);
 
+	/* Disable VMX or SVM if needed.
+	 *
+	 * We need to disable virtualization on all CPUs.
+	 * Having VMX or SVM enabled on any CPU may break rebooting
+	 * after the kdump kernel has finished its task.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	disable_local_APIC();
 }
 
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	local_irq_disable();
 
 	kdump_nmi_shootdown_cpus();
+
+	/* Booting kdump kernel with VMX or SVM enabled won't work,
+	 * because (among other limitations) we can't disable paging
+	 * with the virt flags.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
-- 
cgit v0.10.2


From d176720d34c72f7a8474a12204add93e54fe3ef1 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:24 -0200
Subject: x86: disable VMX on all CPUs on reboot

On emergency_restart, we may need to use an NMI to disable virtualization
on all CPUs. We do that using nmi_shootdown_cpus() if VMX is enabled.

Note: With this patch, we will run the NMI stuff only when the CPU where
emergency_restart() was called has VMX enabled. This should work on most
cases because KVM enables VMX on all CPUs, but we may miss the small
window where KVM is doing that. Also, I don't know if all code using
VMX out there always enable VMX on all CPUs like KVM does. We have two
other alternatives for that:

a) Have an API that all code that enables VMX on any CPU should use
   to tell the kernel core that it is going to enable VMX on the CPUs.
b) Always call nmi_shootdown_cpus() if the CPU supports VMX. This is
   a bit intrusive and more risky, as it would run nmi_shootdown_cpus()
   on emergency_reboot() even on systems where virtualization is never
   enabled.

Finding a proper point to hook the nmi_shootdown_cpus() call isn't
trivial, as the non-emergency machine_restart() (that doesn't need the
NMI tricks) uses machine_emergency_restart() directly.

The solution to make this work without adding a new function or argument
to machine_ops was setting a 'reboot_emergency' flag that tells if
native_machine_emergency_restart() needs to do the virt cleanup or not.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718d..72e0e4e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
+#include <asm/virtext.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -39,6 +40,12 @@ int reboot_force;
 static int reboot_cpu = -1;
 #endif
 
+/* This is set if we need to go through the 'emergency' path.
+ * When machine_emergency_restart() is called, we may be on
+ * an inconsistent state and won't be able to do a clean cleanup
+ */
+static int reboot_emergency;
+
 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
 bool port_cf9_safe = false;
 
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
 	}
 }
 
+static void vmxoff_nmi(int cpu, struct die_args *args)
+{
+	cpu_emergency_vmxoff();
+}
+
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ */
+static void emergency_vmx_disable_all(void)
+{
+	/* Just make sure we won't change CPUs while doing this */
+	local_irq_disable();
+
+	/* We need to disable VMX on all CPUs before rebooting, otherwise
+	 * we risk hanging up the machine, because the CPU ignore INIT
+	 * signals when VMX is enabled.
+	 *
+	 * We can't take any locks and we may be on an inconsistent
+	 * state, so we use NMIs as IPIs to tell the other CPUs to disable
+	 * VMX and halt.
+	 *
+	 * For safety, we will avoid running the nmi_shootdown_cpus()
+	 * stuff unnecessarily, but we don't have a way to check
+	 * if other CPUs have VMX enabled. So we will call it only if the
+	 * CPU we are running on has VMX enabled.
+	 *
+	 * We will miss cases where VMX is not enabled on all CPUs. This
+	 * shouldn't do much harm because KVM always enable VMX on all
+	 * CPUs anyway. But we can miss it on the small window where KVM
+	 * is still enabling VMX.
+	 */
+	if (cpu_has_vmx() && cpu_vmx_enabled()) {
+		/* Disable VMX on this CPU.
+		 */
+		cpu_vmxoff();
+
+		/* Halt and disable VMX on the other CPUs */
+		nmi_shootdown_cpus(vmxoff_nmi);
+
+	}
+}
+
+
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
 {
 	int i;
 
+	if (reboot_emergency)
+		emergency_vmx_disable_all();
+
 	/* Tell the BIOS if we want cold or warm reboot */
 	*((unsigned short *)__va(0x472)) = reboot_mode;
 
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
 #endif
 }
 
+static void __machine_emergency_restart(int emergency)
+{
+	reboot_emergency = emergency;
+	machine_ops.emergency_restart();
+}
+
 static void native_machine_restart(char *__unused)
 {
 	printk("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
-	machine_emergency_restart();
+	__machine_emergency_restart(0);
 }
 
 static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
 
 void machine_emergency_restart(void)
 {
-	machine_ops.emergency_restart();
+	__machine_emergency_restart(1);
 }
 
 void machine_restart(char *cmd)
-- 
cgit v0.10.2


From 7d637978151511148912fe2ea2bac9f9c64f5c35 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Fri, 21 Nov 2008 20:58:11 +0800
Subject: KVM: ia64: Define printk function for kvm-intel module

kvm-intel module is relocated to an isolated address space
with kernel, so it can't call host kernel's printk for debug
purpose. In the module, we implement the printk to output debug
info of vmm.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 678e264..0560f3f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -39,6 +39,7 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT	6
 #define EXIT_REASON_IPI			7
 #define EXIT_REASON_PTC_G		8
+#define EXIT_REASON_DEBUG		20
 
 /*Define vmm address space and vm data space.*/
 #define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
@@ -126,6 +127,8 @@
 			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
 #define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
 
+#define VMM_LOG_LEN 256
+
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/kvm.h>
@@ -437,6 +440,7 @@ struct kvm_vcpu_arch {
 
 	unsigned long opcode;
 	unsigned long cause;
+	char log_buf[VMM_LOG_LEN];
 	union context host;
 	union context guest;
 };
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66..76464dc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
 
 CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o
+	vtlb.o process.o kvm_lib.o
 #Add link memcpy and memset to avoid possible structure assignment error
 kvm-intel-objs += memcpy.o memset.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 70eb829..b4d24e2 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -474,6 +474,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+				struct kvm_run *kvm_run)
+{
+	printk("VMM: %s", vcpu->arch.log_buf);
+	return 1;
+}
+
 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
 		struct kvm_run *kvm_run) = {
 	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
@@ -485,6 +492,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
 	[EXIT_REASON_IPI]		    = handle_ipi,
 	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
 
 };
 
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 0000000..a85cb61
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
+/*
+ * kvm_lib.c: Compile some libraries for kvm-intel module.
+ *
+ *	Just include kernel's library, and disable symbols export.
+ * 	Copyright (C) 2008, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#undef CONFIG_MODULES
+#include "../../../lib/vsprintf.c"
+#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4..9577795 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -62,5 +62,31 @@ void vmm_spin_unlock(spinlock_t *lock)
 {
 	_vmm_raw_spin_unlock(lock);
 }
+
+static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_DEBUG;
+	vmm_transition(vcpu);
+	local_irq_restore(psr);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	va_list args;
+	int r;
+
+	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
+	va_start(args, fmt);
+	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
+	va_end(args);
+	vcpu_debug_exit(vcpu);
+	return r;
+}
+
 module_init(kvm_vmm_init)
 module_exit(kvm_vmm_exit)
-- 
cgit v0.10.2


From 5e2be19832ccf93bf731a1758ec9fabf48414584 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Fri, 21 Nov 2008 10:46:12 +0800
Subject: KVM: ia64: Add some debug points to provide crash infomation

Use printk infrastructure to print out some debug info once VM crashes.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858..21f63ff 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
 
 	switch (addr) {
 	case PIB_OFST_INTA:
-		/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
-		panic_vm(v);
+		panic_vm(v, "Undefined write on PIB INTA\n");
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			vlsapic_write_xtp(v, val);
 		} else {
-			/*panic_domain(NULL,
-			"Undefined write on PIB XTP\n");*/
-			panic_vm(v);
+			panic_vm(v, "Undefined write on PIB XTP\n");
 		}
 		break;
 	default:
 		if (PIB_LOW_HALF(addr)) {
-			/*lower half */
+			/*Lower half */
 			if (length != 8)
-				/*panic_domain(NULL,
-				"Can't LHF write with size %ld!\n",
-				length);*/
-				panic_vm(v);
+				panic_vm(v, "Can't LHF write with size %ld!\n",
+						length);
 			else
 				vlsapic_write_ipi(v, addr, val);
-		} else {   /*	upper half
-				printk("IPI-UHF write %lx\n",addr);*/
-			panic_vm(v);
+		} else {   /*Upper half */
+			panic_vm(v, "IPI-UHF write %lx\n", addr);
 		}
 		break;
 	}
@@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
 		if (length == 1) /* 1 byte load */
 			; /* There is no i8259, there is no INTA access*/
 		else
-			/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
-			panic_vm(v);
+			panic_vm(v, "Undefined read on PIB INTA\n");
 
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			result = VLSAPIC_XTP(v);
-			/* printk("read xtp %lx\n", result); */
 		} else {
-			/*panic_domain(NULL,
-			"Undefined read on PIB XTP\n");*/
-			panic_vm(v);
+			panic_vm(v, "Undefined read on PIB XTP\n");
 		}
 		break;
 	default:
-		panic_vm(v);
+		panic_vm(v, "Undefined addr access for lsapic!\n");
 		break;
 	}
 	return result;
@@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
 			/* it's necessary to ensure zero extending */
 			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Unhandled mmio access returned!\n");
 out:
 	local_irq_restore(psr);
 	return ;
@@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
 		return;
 	} else {
 		inst_type = -1;
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Unsupported MMIO access instruction! \
+				Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+				bundle.i64[0], bundle.i64[1]);
 	}
 
 	size = 1 << size;
@@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
 		if (inst_type == SL_INTEGER)
 			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
 		else
-			panic_vm(vcpu);
+			panic_vm(vcpu, "Unsupported instruction type!\n");
 
 	}
 	vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 8008173..cefc349 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
 	vector = vec2off[vec];
 
 	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
+						"with psr.ic = 0\n", vector);
 		return;
 	}
 
@@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
 		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
 		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
 }
 
 static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
 		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
 		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
 }
 
 void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
 	vpsr = VCPU(vcpu, vpsr);
 	isr = vpsr & IA64_PSR_RI;
 	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
 	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
 }
 
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index a528d70..ecd526b 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -1651,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
 	 * Otherwise panic
 	 */
 	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
+				& vpsr.is=0\n");
 
 	/*
 	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2104,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
 
 	if (is_physical_mode(vcpu)) {
 		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu);
+			panic_vm(vcpu, "Machine Status conflicts!\n");
 
 		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
 		ia64_dv_serialize_data();
@@ -2153,10 +2154,70 @@ int vmm_entry(void)
 	return 0;
 }
 
-void panic_vm(struct kvm_vcpu *v)
-{
+static void kvm_show_registers(struct kvm_pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	struct kvm_vcpu *vcpu = current_vcpu;
+	if (vcpu != NULL)
+		printk("vcpu 0x%p vcpu %d\n",
+		       vcpu, vcpu->vcpu_id);
+
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip);
+
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
+							regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
+							regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
+							regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
+							regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
+							regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
+							regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
+							regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
+							regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
+							regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
+							regs->r30, regs->r31);
+
+}
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	struct kvm_pt_regs *regs = vcpu_regs(v);
 	struct exit_ctl_data *p = &v->arch.exit_data;
-
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(buf);
+	kvm_show_registers(regs);
 	p->exit_reason = EXIT_REASON_VM_PANIC;
 	vmm_transition(v);
 	/*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e..0dad842 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,7 +737,7 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
 void kvm_init_vhpt(struct kvm_vcpu *v);
 void thash_init(struct thash_cb *hcb, u64 sz);
 
-void panic_vm(struct kvm_vcpu *v);
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
 
 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
 		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 9577795..d3dc0b0 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
  */
 
 
+#include<linux/kernel.h>
 #include<linux/module.h>
 #include<asm/fpswa.h>
 
-- 
cgit v0.10.2


From 9f7d5bb5e2abf5316bb17eb3e7751dbafa09e5cf Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Fri, 21 Nov 2008 17:16:07 +0800
Subject: KVM: ia64: Add handler for crashed vmm

Since vmm runs in an isolated address space and it is just a copy
of host's kvm-intel module, so once vmm crashes, we just crash all guests
running on it instead of crashing whole kernel.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index cefc349..552d077 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -942,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
 	ia64_set_pta(vcpu->arch.vhpt.pta.val);
 }
 
+static void vmm_sanity_check(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
+		panic_vm(vcpu, "Failed to do vmm sanity check,"
+			"it maybe caused by crashed vmm!!\n\n");
+	}
+}
+
 static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
 {
+	vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
+
 	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
 		vcpu_do_resume(vcpu);
 		return;
@@ -969,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
 						1, 0, 0, 0, 0, 0);
 	kvm_do_resume_op(vcpu);
 }
+
+void vmm_panic_handler(u64 vec)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	vmm_sanity = 0;
+	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
+			vec2off[vec]);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index 0dad842..b2f12a5 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -741,5 +741,8 @@ void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
 
 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
 		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+
+extern long vmm_sanity;
+
 #endif
 #endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index d3dc0b0..9eee5c0 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -32,6 +32,8 @@ MODULE_LICENSE("GPL");
 extern char kvm_ia64_ivt;
 extern fpswa_interface_t *vmm_fpswa_interface;
 
+long vmm_sanity = 1;
+
 struct kvm_vmm_info vmm_info = {
 	.module	     = THIS_MODULE,
 	.vmm_entry   = vmm_entry,
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251..50b4646 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -70,14 +70,12 @@
 # define PSR_DEFAULT_BITS   0
 #endif
 
-
 #define KVM_FAULT(n)    \
     kvm_fault_##n:;          \
     mov r19=n;;          \
-    br.sptk.many kvm_fault_##n;         \
+    br.sptk.many kvm_vmm_panic;         \
     ;;                  \
 
-
 #define KVM_REFLECT(n)    \
     mov r31=pr;           \
     mov r19=n;       /* prepare to save predicates */ \
@@ -85,17 +83,26 @@
     ;;      \
     tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
 (p7)br.sptk.many kvm_dispatch_reflection;        \
-    br.sptk.many kvm_panic;      \
-
+    br.sptk.many kvm_vmm_panic;      \
 
-GLOBAL_ENTRY(kvm_panic)
-    br.sptk.many kvm_panic
+GLOBAL_ENTRY(kvm_vmm_panic)
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r15
+    adds r3=8,r2                // set up second base pointer
     ;;
-END(kvm_panic)
-
-
-
-
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    br.call.sptk.many b6=vmm_panic_handler;
+END(kvm_vmm_panic)
 
     .section .text.ivt,"ax"
 
-- 
cgit v0.10.2


From 8fe0736763a07fbea56213ea105a0c2ee098e6fc Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Fri, 21 Nov 2008 21:04:37 +0800
Subject: KVM: ia64: Clean up vmm_ivt.S using tab to indent every line

Using tab for indentation for vmm_ivt.S.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 50b4646..3ef1a01 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
 /*
- * /ia64/kvm_ivt.S
+ * arch/ia64/kvm/vmm_ivt.S
  *
  * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
  *      Stephane Eranian <eranian@hpl.hp.com>
@@ -71,37 +71,37 @@
 #endif
 
 #define KVM_FAULT(n)    \
-    kvm_fault_##n:;          \
-    mov r19=n;;          \
-    br.sptk.many kvm_vmm_panic;         \
-    ;;                  \
+	kvm_fault_##n:;          \
+	mov r19=n;;          \
+	br.sptk.many kvm_vmm_panic;         \
+	;;                  \
 
 #define KVM_REFLECT(n)    \
-    mov r31=pr;           \
-    mov r19=n;       /* prepare to save predicates */ \
-    mov r29=cr.ipsr;      \
-    ;;      \
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)br.sptk.many kvm_dispatch_reflection;        \
-    br.sptk.many kvm_vmm_panic;      \
+	mov r31=pr;           \
+	mov r19=n;       /* prepare to save predicates */ \
+	mov r29=cr.ipsr;      \
+	;;      \
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)	br.sptk.many kvm_dispatch_reflection;        \
+	br.sptk.many kvm_vmm_panic;      \
 
 GLOBAL_ENTRY(kvm_vmm_panic)
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,1,0
-    mov out0=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    br.call.sptk.many b6=vmm_panic_handler;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	br.call.sptk.many b6=vmm_panic_handler;
 END(kvm_vmm_panic)
 
     .section .text.ivt,"ax"
@@ -112,308 +112,307 @@ kvm_ia64_ivt:
 ///////////////////////////////////////////////////////////////
 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
 ENTRY(kvm_vhpt_miss)
-    KVM_FAULT(0)
+	KVM_FAULT(0)
 END(kvm_vhpt_miss)
 
-
     .org kvm_ia64_ivt+0x400
 ////////////////////////////////////////////////////////////////
 // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
 ENTRY(kvm_itlb_miss)
-    mov r31 = pr
-    mov r29=cr.ipsr;
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-    (p6) br.sptk kvm_alt_itlb_miss
-    mov r19 = 1
-    br.sptk kvm_itlb_miss_dispatch
-    KVM_FAULT(1);
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_itlb_miss
+	mov r19 = 1
+	br.sptk kvm_itlb_miss_dispatch
+	KVM_FAULT(1);
 END(kvm_itlb_miss)
 
     .org kvm_ia64_ivt+0x0800
 //////////////////////////////////////////////////////////////////
 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
 ENTRY(kvm_dtlb_miss)
-    mov r31 = pr
-    mov r29=cr.ipsr;
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)br.sptk kvm_alt_dtlb_miss
-    br.sptk kvm_dtlb_miss_dispatch
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_dtlb_miss
+	br.sptk kvm_dtlb_miss_dispatch
 END(kvm_dtlb_miss)
 
      .org kvm_ia64_ivt+0x0c00
 ////////////////////////////////////////////////////////////////////
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(kvm_alt_itlb_miss)
-    mov r16=cr.ifa    // get address that caused the TLB miss
-    ;;
-    movl r17=PAGE_KERNEL
-    mov r24=cr.ipsr
-    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-    ;;
-    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-    ;;
-    or r19=r17,r19      // insert PTE control bits into r19
-    ;;
-    movl r20=IA64_GRANULE_SHIFT<<2
-    ;;
-    mov cr.itir=r20
-    ;;
-    itc.i r19		// insert the TLB entry
-    mov pr=r31,-1
-    rfi
+	mov r16=cr.ifa    // get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	mov r24=cr.ipsr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r17,r19      // insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
 END(kvm_alt_itlb_miss)
 
     .org kvm_ia64_ivt+0x1000
 /////////////////////////////////////////////////////////////////////
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(kvm_alt_dtlb_miss)
-    mov r16=cr.ifa		// get address that caused the TLB miss
-    ;;
-    movl r17=PAGE_KERNEL
-    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-    mov r24=cr.ipsr
-    ;;
-    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-    ;;
-    or r19=r19,r17	// insert PTE control bits into r19
-    ;;
-    movl r20=IA64_GRANULE_SHIFT<<2
-    ;;
-    mov cr.itir=r20
-    ;;
-    itc.d r19		// insert the TLB entry
-    mov pr=r31,-1
-    rfi
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r24=cr.ipsr
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r19,r17	// insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.d r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
 END(kvm_alt_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1400
 //////////////////////////////////////////////////////////////////////
 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
 ENTRY(kvm_nested_dtlb_miss)
-    KVM_FAULT(5)
+	KVM_FAULT(5)
 END(kvm_nested_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1800
 /////////////////////////////////////////////////////////////////////
 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
 ENTRY(kvm_ikey_miss)
-    KVM_REFLECT(6)
+	KVM_REFLECT(6)
 END(kvm_ikey_miss)
 
     .org kvm_ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
 ENTRY(kvm_dkey_miss)
-    KVM_REFLECT(7)
+	KVM_REFLECT(7)
 END(kvm_dkey_miss)
 
     .org kvm_ia64_ivt+0x2000
 ////////////////////////////////////////////////////////////////////
 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
 ENTRY(kvm_dirty_bit)
-    KVM_REFLECT(8)
+	KVM_REFLECT(8)
 END(kvm_dirty_bit)
 
     .org kvm_ia64_ivt+0x2400
 ////////////////////////////////////////////////////////////////////
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(kvm_iaccess_bit)
-    KVM_REFLECT(9)
+	KVM_REFLECT(9)
 END(kvm_iaccess_bit)
 
     .org kvm_ia64_ivt+0x2800
 ///////////////////////////////////////////////////////////////////
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(kvm_daccess_bit)
-    KVM_REFLECT(10)
+	KVM_REFLECT(10)
 END(kvm_daccess_bit)
 
     .org kvm_ia64_ivt+0x2c00
 /////////////////////////////////////////////////////////////////
 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
 ENTRY(kvm_break_fault)
-    mov r31=pr
-    mov r19=11
-    mov r29=cr.ipsr
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    ;;
-    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
-    mov out0=cr.ifa
-    mov out2=cr.isr     // FIXME: pity to make this slow access twice
-    mov out3=cr.iim     // FIXME: pity to make this slow access twice
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15)ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out1=16,sp
-    br.call.sptk.many b6=kvm_ia64_handle_break
-    ;;
+	mov r31=pr
+	mov r19=11
+	mov r29=cr.ipsr
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
+	mov out0=cr.ifa
+	mov out2=cr.isr     // FIXME: pity to make this slow access twice
+	mov out3=cr.iim     // FIXME: pity to make this slow access twice
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i         // guarantee that interruption collection is on
+	;;
+	//(p15)ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out1=16,sp
+	br.call.sptk.many b6=kvm_ia64_handle_break
+	;;
 END(kvm_break_fault)
 
     .org kvm_ia64_ivt+0x3000
 /////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(kvm_interrupt)
-    mov r31=pr		// prepare to save predicates
-    mov r19=12
-    mov r29=cr.ipsr
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-    tbit.z p0,p15=r29,IA64_PSR_I_BIT
-    ;;
-(p7) br.sptk kvm_dispatch_interrupt
-    ;;
-    mov r27=ar.rsc		/* M */
-    mov r20=r1			/* A */
-    mov r25=ar.unat		/* M */
-    mov r26=ar.pfs		/* I */
-    mov r28=cr.iip		/* M */
-    cover			/* B (or nothing) */
-    ;;
-    mov r1=sp
-    ;;
-    invala			/* M */
-    mov r30=cr.ifs
-    ;;
-    addl r1=-VMM_PT_REGS_SIZE,r1
-    ;;
-    adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-    adds r16=PT(CR_IPSR),r1
-    ;;
-    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-    st8 [r16]=r29			/* save cr.ipsr */
-    ;;
-    lfetch.fault.excl.nt1 [r17]
-    mov r29=b0
-    ;;
-    adds r16=PT(R8),r1  	/* initialize first base pointer */
-    adds r17=PT(R9),r1  	/* initialize second base pointer */
-    mov r18=r0      		/* make sure r18 isn't NaT */
-    ;;
+	mov r31=pr		// prepare to save predicates
+	mov r19=12
+	mov r29=cr.ipsr
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+	tbit.z p0,p15=r29,IA64_PSR_I_BIT
+	;;
+(p7)	br.sptk kvm_dispatch_interrupt
+	;;
+	mov r27=ar.rsc		/* M */
+	mov r20=r1			/* A */
+	mov r25=ar.unat		/* M */
+	mov r26=ar.pfs		/* I */
+	mov r28=cr.iip		/* M */
+	cover			/* B (or nothing) */
+	;;
+	mov r1=sp
+	;;
+	invala			/* M */
+	mov r30=cr.ifs
+	;;
+	addl r1=-VMM_PT_REGS_SIZE,r1
+	;;
+	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+	adds r16=PT(CR_IPSR),r1
+	;;
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+	st8 [r16]=r29			/* save cr.ipsr */
+	;;
+	lfetch.fault.excl.nt1 [r17]
+	mov r29=b0
+	;;
+	adds r16=PT(R8),r1  	/* initialize first base pointer */
+	adds r17=PT(R9),r1  	/* initialize second base pointer */
+	mov r18=r0      		/* make sure r18 isn't NaT */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r8,16
 .mem.offset 8,0; st8.spill [r17]=r9,16
         ;;
 .mem.offset 0,0; st8.spill [r16]=r10,24
 .mem.offset 8,0; st8.spill [r17]=r11,24
         ;;
-    st8 [r16]=r28,16		/* save cr.iip */
-    st8 [r17]=r30,16		/* save cr.ifs */
-    mov r8=ar.fpsr		/* M */
-    mov r9=ar.csd
-    mov r10=ar.ssd
-    movl r11=FPSR_DEFAULT	/* L-unit */
-    ;;
-    st8 [r16]=r25,16		/* save ar.unat */
-    st8 [r17]=r26,16		/* save ar.pfs */
-    shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-    ;;
-    st8 [r16]=r27,16		/* save ar.rsc */
-    adds r17=16,r17		/* skip over ar_rnat field */
-    ;;
-    st8 [r17]=r31,16		/* save predicates */
-    adds r16=16,r16		/* skip over ar_bspstore field */
-    ;;
-    st8 [r16]=r29,16		/* save b0 */
-    st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-    ;;
+	st8 [r16]=r28,16		/* save cr.iip */
+	st8 [r17]=r30,16		/* save cr.ifs */
+	mov r8=ar.fpsr		/* M */
+	mov r9=ar.csd
+	mov r10=ar.ssd
+	movl r11=FPSR_DEFAULT	/* L-unit */
+	;;
+	st8 [r16]=r25,16		/* save ar.unat */
+	st8 [r17]=r26,16		/* save ar.pfs */
+	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+	;;
+	st8 [r16]=r27,16		/* save ar.rsc */
+	adds r17=16,r17		/* skip over ar_rnat field */
+	;;
+	st8 [r17]=r31,16		/* save predicates */
+	adds r16=16,r16		/* skip over ar_bspstore field */
+	;;
+	st8 [r16]=r29,16		/* save b0 */
+	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
 .mem.offset 8,0; st8.spill [r17]=r12,16
-    adds r12=-16,r1
-    /* switch to kernel memory stack (with 16 bytes of scratch) */
-    ;;
+	adds r12=-16,r1
+	/* switch to kernel memory stack (with 16 bytes of scratch) */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r13,16
 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r16]=r15,16
 .mem.offset 8,0; st8.spill [r17]=r14,16
-    dep r14=-1,r0,60,4
-    ;;
+	dep r14=-1,r0,60,4
+	;;
 .mem.offset 0,0; st8.spill [r16]=r2,16
 .mem.offset 8,0; st8.spill [r17]=r3,16
-    adds r2=VMM_PT_REGS_R16_OFFSET,r1
-    adds r14 = VMM_VCPU_GP_OFFSET,r13
-    ;;
-    mov r8=ar.ccv
-    ld8 r14 = [r14]
-    ;;
-    mov r1=r14       /* establish kernel global pointer */
-    ;;                                          \
-    bsw.1
-    ;;
-    alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-    mov out0=r13
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i
-    ;;
-    //(p15) ssm psr.i
-    adds r3=8,r2		// set up second base pointer for SAVE_REST
-    srlz.i			// ensure everybody knows psr.ic is back on
-    ;;
+	adds r2=VMM_PT_REGS_R16_OFFSET,r1
+	adds r14 = VMM_VCPU_GP_OFFSET,r13
+	;;
+	mov r8=ar.ccv
+	ld8 r14 = [r14]
+	;;
+	mov r1=r14       /* establish kernel global pointer */
+	;;                                          \
+	bsw.1
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+	mov out0=r13
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
 .mem.offset 0,0; st8.spill [r2]=r16,16
 .mem.offset 8,0; st8.spill [r3]=r17,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r18,16
 .mem.offset 8,0; st8.spill [r3]=r19,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r20,16
 .mem.offset 8,0; st8.spill [r3]=r21,16
-    mov r18=b6
-    ;;
+	mov r18=b6
+	;;
 .mem.offset 0,0; st8.spill [r2]=r22,16
 .mem.offset 8,0; st8.spill [r3]=r23,16
-    mov r19=b7
-    ;;
+	mov r19=b7
+	;;
 .mem.offset 0,0; st8.spill [r2]=r24,16
 .mem.offset 8,0; st8.spill [r3]=r25,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r26,16
 .mem.offset 8,0; st8.spill [r3]=r27,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r28,16
 .mem.offset 8,0; st8.spill [r3]=r29,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r30,16
 .mem.offset 8,0; st8.spill [r3]=r31,32
-    ;;
-    mov ar.fpsr=r11       /* M-unit */
-    st8 [r2]=r8,8         /* ar.ccv */
-    adds r24=PT(B6)-PT(F7),r3
-    ;;
-    stf.spill [r2]=f6,32
-    stf.spill [r3]=f7,32
-    ;;
-    stf.spill [r2]=f8,32
-    stf.spill [r3]=f9,32
-    ;;
-    stf.spill [r2]=f10
-    stf.spill [r3]=f11
-    adds r25=PT(B7)-PT(F11),r3
-    ;;
-    st8 [r24]=r18,16       /* b6 */
-    st8 [r25]=r19,16       /* b7 */
-    ;;
-    st8 [r24]=r9           /* ar.csd */
-    st8 [r25]=r10          /* ar.ssd */
-    ;;
-    srlz.d		// make sure we see the effect of cr.ivr
-    addl r14=@gprel(ia64_leave_nested),gp
-    ;;
-    mov rp=r14
-    br.call.sptk.many b6=kvm_ia64_handle_irq
-    ;;
+	;;
+	mov ar.fpsr=r11       /* M-unit */
+	st8 [r2]=r8,8         /* ar.ccv */
+	adds r24=PT(B6)-PT(F7),r3
+	;;
+	stf.spill [r2]=f6,32
+	stf.spill [r3]=f7,32
+	;;
+	stf.spill [r2]=f8,32
+	stf.spill [r3]=f9,32
+	;;
+	stf.spill [r2]=f10
+	stf.spill [r3]=f11
+	adds r25=PT(B7)-PT(F11),r3
+	;;
+	st8 [r24]=r18,16       /* b6 */
+	st8 [r25]=r19,16       /* b7 */
+	;;
+	st8 [r24]=r9           /* ar.csd */
+	st8 [r25]=r10          /* ar.ssd */
+	;;
+	srlz.d		// make sure we see the effect of cr.ivr
+	addl r14=@gprel(ia64_leave_nested),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+	;;
 END(kvm_interrupt)
 
     .global kvm_dispatch_vexirq
@@ -421,387 +420,385 @@ END(kvm_interrupt)
 //////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
 ENTRY(kvm_virtual_exirq)
-    mov r31=pr
-    mov r19=13
-    mov r30 =r0
-    ;;
+	mov r31=pr
+	mov r19=13
+	mov r30 =r0
+	;;
 kvm_dispatch_vexirq:
-    cmp.eq p6,p0 = 1,r30
-    ;;
-(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-(p6)ld8 r1 = [r29]
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,1,0
-    mov out0=r13
-
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    KVM_SAVE_REST
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    mov rp=r14
-    br.call.sptk.many b6=kvm_vexirq
+	cmp.eq p6,p0 = 1,r30
+	;;
+(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+(p6)	ld8 r1 = [r29]
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r13
+
+	ssm psr.ic
+	;;
+	srlz.i // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	adds r3=8,r2                // set up second base pointer
+	;;
+	KVM_SAVE_REST
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_vexirq
 END(kvm_virtual_exirq)
 
     .org kvm_ia64_ivt+0x3800
 /////////////////////////////////////////////////////////////////////
 // 0x3800 Entry 14 (size 64 bundles) Reserved
-    KVM_FAULT(14)
-    // this code segment is from 2.6.16.13
-
+	KVM_FAULT(14)
+	// this code segment is from 2.6.16.13
 
     .org kvm_ia64_ivt+0x3c00
 ///////////////////////////////////////////////////////////////////////
 // 0x3c00 Entry 15 (size 64 bundles) Reserved
-    KVM_FAULT(15)
-
+	KVM_FAULT(15)
 
     .org kvm_ia64_ivt+0x4000
 ///////////////////////////////////////////////////////////////////////
 // 0x4000 Entry 16 (size 64 bundles) Reserved
-    KVM_FAULT(16)
+	KVM_FAULT(16)
 
     .org kvm_ia64_ivt+0x4400
 //////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
-    KVM_FAULT(17)
+	KVM_FAULT(17)
 
     .org kvm_ia64_ivt+0x4800
 //////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
-    KVM_FAULT(18)
+	KVM_FAULT(18)
 
     .org kvm_ia64_ivt+0x4c00
 //////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
-    KVM_FAULT(19)
+	KVM_FAULT(19)
 
     .org kvm_ia64_ivt+0x5000
 //////////////////////////////////////////////////////////////////////
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present
 ENTRY(kvm_page_not_present)
-    KVM_REFLECT(20)
+	KVM_REFLECT(20)
 END(kvm_page_not_present)
 
     .org kvm_ia64_ivt+0x5100
 ///////////////////////////////////////////////////////////////////////
 // 0x5100 Entry 21 (size 16 bundles) Key Permission vector
 ENTRY(kvm_key_permission)
-    KVM_REFLECT(21)
+	KVM_REFLECT(21)
 END(kvm_key_permission)
 
     .org kvm_ia64_ivt+0x5200
 //////////////////////////////////////////////////////////////////////
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(kvm_iaccess_rights)
-    KVM_REFLECT(22)
+	KVM_REFLECT(22)
 END(kvm_iaccess_rights)
 
     .org kvm_ia64_ivt+0x5300
 //////////////////////////////////////////////////////////////////////
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(kvm_daccess_rights)
-    KVM_REFLECT(23)
+	KVM_REFLECT(23)
 END(kvm_daccess_rights)
 
     .org kvm_ia64_ivt+0x5400
 /////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(kvm_general_exception)
-   KVM_REFLECT(24)
-   KVM_FAULT(24)
+	KVM_REFLECT(24)
+	KVM_FAULT(24)
 END(kvm_general_exception)
 
     .org kvm_ia64_ivt+0x5500
 //////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
 ENTRY(kvm_disabled_fp_reg)
-    KVM_REFLECT(25)
+	KVM_REFLECT(25)
 END(kvm_disabled_fp_reg)
 
     .org kvm_ia64_ivt+0x5600
 ////////////////////////////////////////////////////////////////////
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(kvm_nat_consumption)
-    KVM_REFLECT(26)
+	KVM_REFLECT(26)
 END(kvm_nat_consumption)
 
     .org kvm_ia64_ivt+0x5700
 /////////////////////////////////////////////////////////////////////
 // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
 ENTRY(kvm_speculation_vector)
-    KVM_REFLECT(27)
+	KVM_REFLECT(27)
 END(kvm_speculation_vector)
 
     .org kvm_ia64_ivt+0x5800
 /////////////////////////////////////////////////////////////////////
 // 0x5800 Entry 28 (size 16 bundles) Reserved
-    KVM_FAULT(28)
+	KVM_FAULT(28)
 
     .org kvm_ia64_ivt+0x5900
 ///////////////////////////////////////////////////////////////////
 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
 ENTRY(kvm_debug_vector)
-    KVM_FAULT(29)
+	KVM_FAULT(29)
 END(kvm_debug_vector)
 
     .org kvm_ia64_ivt+0x5a00
 ///////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(kvm_unaligned_access)
-    KVM_REFLECT(30)
+	KVM_REFLECT(30)
 END(kvm_unaligned_access)
 
     .org kvm_ia64_ivt+0x5b00
 //////////////////////////////////////////////////////////////////////
 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
 ENTRY(kvm_unsupported_data_reference)
-    KVM_REFLECT(31)
+	KVM_REFLECT(31)
 END(kvm_unsupported_data_reference)
 
     .org kvm_ia64_ivt+0x5c00
 ////////////////////////////////////////////////////////////////////
 // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
 ENTRY(kvm_floating_point_fault)
-    KVM_REFLECT(32)
+	KVM_REFLECT(32)
 END(kvm_floating_point_fault)
 
     .org kvm_ia64_ivt+0x5d00
 /////////////////////////////////////////////////////////////////////
 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
 ENTRY(kvm_floating_point_trap)
-    KVM_REFLECT(33)
+	KVM_REFLECT(33)
 END(kvm_floating_point_trap)
 
     .org kvm_ia64_ivt+0x5e00
 //////////////////////////////////////////////////////////////////////
 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
 ENTRY(kvm_lower_privilege_trap)
-    KVM_REFLECT(34)
+	KVM_REFLECT(34)
 END(kvm_lower_privilege_trap)
 
     .org kvm_ia64_ivt+0x5f00
 //////////////////////////////////////////////////////////////////////
 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
 ENTRY(kvm_taken_branch_trap)
-    KVM_REFLECT(35)
+	KVM_REFLECT(35)
 END(kvm_taken_branch_trap)
 
     .org kvm_ia64_ivt+0x6000
 ////////////////////////////////////////////////////////////////////
 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
 ENTRY(kvm_single_step_trap)
-    KVM_REFLECT(36)
+	KVM_REFLECT(36)
 END(kvm_single_step_trap)
     .global kvm_virtualization_fault_back
     .org kvm_ia64_ivt+0x6100
 /////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
 ENTRY(kvm_virtualization_fault)
-    mov r31=pr
-    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-    st8 [r16] = r1
-    adds r17 = VMM_VCPU_GP_OFFSET, r21
-    ;;
-    ld8 r1 = [r17]
-    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-    cmp.eq p9,p0=EVENT_RSM,r24
-    cmp.eq p10,p0=EVENT_SSM,r24
-    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-    cmp.eq p12,p0=EVENT_THASH,r24
-    (p6) br.dptk.many kvm_asm_mov_from_ar
-    (p7) br.dptk.many kvm_asm_mov_from_rr
-    (p8) br.dptk.many kvm_asm_mov_to_rr
-    (p9) br.dptk.many kvm_asm_rsm
-    (p10) br.dptk.many kvm_asm_ssm
-    (p11) br.dptk.many kvm_asm_mov_to_psr
-    (p12) br.dptk.many kvm_asm_thash
-    ;;
+	mov r31=pr
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	st8 [r16] = r1
+	adds r17 = VMM_VCPU_GP_OFFSET, r21
+	;;
+	ld8 r1 = [r17]
+	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+	cmp.eq p9,p0=EVENT_RSM,r24
+	cmp.eq p10,p0=EVENT_SSM,r24
+	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+	cmp.eq p12,p0=EVENT_THASH,r24
+(p6)	br.dptk.many kvm_asm_mov_from_ar
+(p7)	br.dptk.many kvm_asm_mov_from_rr
+(p8)	br.dptk.many kvm_asm_mov_to_rr
+(p9)	br.dptk.many kvm_asm_rsm
+(p10)	br.dptk.many kvm_asm_ssm
+(p11)	br.dptk.many kvm_asm_mov_to_psr
+(p12)	br.dptk.many kvm_asm_thash
+	;;
 kvm_virtualization_fault_back:
-    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-    ld8 r1 = [r16]
-    ;;
-    mov r19=37
-    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-    ;;
-    st8 [r16] = r24
-    st8 [r17] = r25
-    ;;
-    cmp.ne p6,p0=EVENT_RFI, r24
-    (p6) br.sptk kvm_dispatch_virtualization_fault
-    ;;
-    adds r18=VMM_VPD_BASE_OFFSET,r21
-    ;;
-    ld8 r18=[r18]
-    ;;
-    adds r18=VMM_VPD_VIFS_OFFSET,r18
-    ;;
-    ld8 r18=[r18]
-    ;;
-    tbit.z p6,p0=r18,63
-    (p6) br.sptk kvm_dispatch_virtualization_fault
-    ;;
-    //if vifs.v=1 desert current register frame
-    alloc r18=ar.pfs,0,0,0,0
-    br.sptk kvm_dispatch_virtualization_fault
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 = [r16]
+	;;
+	mov r19=37
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	cmp.ne p6,p0=EVENT_RFI, r24
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]
+	;;
+	adds r18=VMM_VPD_VIFS_OFFSET,r18
+	;;
+	ld8 r18=[r18]
+	;;
+	tbit.z p6,p0=r18,63
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+//if vifs.v=1 desert current register frame
+	alloc r18=ar.pfs,0,0,0,0
+	br.sptk kvm_dispatch_virtualization_fault
 END(kvm_virtualization_fault)
 
     .org kvm_ia64_ivt+0x6200
 //////////////////////////////////////////////////////////////
 // 0x6200 Entry 38 (size 16 bundles) Reserved
-    KVM_FAULT(38)
+	KVM_FAULT(38)
 
     .org kvm_ia64_ivt+0x6300
 /////////////////////////////////////////////////////////////////
 // 0x6300 Entry 39 (size 16 bundles) Reserved
-    KVM_FAULT(39)
+	KVM_FAULT(39)
 
     .org kvm_ia64_ivt+0x6400
 /////////////////////////////////////////////////////////////////
 // 0x6400 Entry 40 (size 16 bundles) Reserved
-    KVM_FAULT(40)
+	KVM_FAULT(40)
 
     .org kvm_ia64_ivt+0x6500
 //////////////////////////////////////////////////////////////////
 // 0x6500 Entry 41 (size 16 bundles) Reserved
-    KVM_FAULT(41)
+	KVM_FAULT(41)
 
     .org kvm_ia64_ivt+0x6600
 //////////////////////////////////////////////////////////////////
 // 0x6600 Entry 42 (size 16 bundles) Reserved
-    KVM_FAULT(42)
+	KVM_FAULT(42)
 
     .org kvm_ia64_ivt+0x6700
 //////////////////////////////////////////////////////////////////
 // 0x6700 Entry 43 (size 16 bundles) Reserved
-    KVM_FAULT(43)
+	KVM_FAULT(43)
 
     .org kvm_ia64_ivt+0x6800
 //////////////////////////////////////////////////////////////////
 // 0x6800 Entry 44 (size 16 bundles) Reserved
-    KVM_FAULT(44)
+	KVM_FAULT(44)
 
     .org kvm_ia64_ivt+0x6900
 ///////////////////////////////////////////////////////////////////
 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
 //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
 ENTRY(kvm_ia32_exception)
-    KVM_FAULT(45)
+	KVM_FAULT(45)
 END(kvm_ia32_exception)
 
     .org kvm_ia64_ivt+0x6a00
 ////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(kvm_ia32_intercept)
-    KVM_FAULT(47)
+	KVM_FAULT(47)
 END(kvm_ia32_intercept)
 
     .org kvm_ia64_ivt+0x6c00
 /////////////////////////////////////////////////////////////////////
 // 0x6c00 Entry 48 (size 16 bundles) Reserved
-    KVM_FAULT(48)
+	KVM_FAULT(48)
 
     .org kvm_ia64_ivt+0x6d00
 //////////////////////////////////////////////////////////////////////
 // 0x6d00 Entry 49 (size 16 bundles) Reserved
-    KVM_FAULT(49)
+	KVM_FAULT(49)
 
     .org kvm_ia64_ivt+0x6e00
 //////////////////////////////////////////////////////////////////////
 // 0x6e00 Entry 50 (size 16 bundles) Reserved
-    KVM_FAULT(50)
+	KVM_FAULT(50)
 
     .org kvm_ia64_ivt+0x6f00
 /////////////////////////////////////////////////////////////////////
 // 0x6f00 Entry 51 (size 16 bundles) Reserved
-    KVM_FAULT(52)
+	KVM_FAULT(52)
 
     .org kvm_ia64_ivt+0x7100
 ////////////////////////////////////////////////////////////////////
 // 0x7100 Entry 53 (size 16 bundles) Reserved
-    KVM_FAULT(53)
+	KVM_FAULT(53)
 
     .org kvm_ia64_ivt+0x7200
 /////////////////////////////////////////////////////////////////////
 // 0x7200 Entry 54 (size 16 bundles) Reserved
-    KVM_FAULT(54)
+	KVM_FAULT(54)
 
     .org kvm_ia64_ivt+0x7300
 ////////////////////////////////////////////////////////////////////
 // 0x7300 Entry 55 (size 16 bundles) Reserved
-    KVM_FAULT(55)
+	KVM_FAULT(55)
 
     .org kvm_ia64_ivt+0x7400
 ////////////////////////////////////////////////////////////////////
 // 0x7400 Entry 56 (size 16 bundles) Reserved
-    KVM_FAULT(56)
+	KVM_FAULT(56)
 
     .org kvm_ia64_ivt+0x7500
 /////////////////////////////////////////////////////////////////////
 // 0x7500 Entry 57 (size 16 bundles) Reserved
-    KVM_FAULT(57)
+	KVM_FAULT(57)
 
     .org kvm_ia64_ivt+0x7600
 /////////////////////////////////////////////////////////////////////
 // 0x7600 Entry 58 (size 16 bundles) Reserved
-    KVM_FAULT(58)
+	KVM_FAULT(58)
 
     .org kvm_ia64_ivt+0x7700
 ////////////////////////////////////////////////////////////////////
 // 0x7700 Entry 59 (size 16 bundles) Reserved
-    KVM_FAULT(59)
+	KVM_FAULT(59)
 
     .org kvm_ia64_ivt+0x7800
 ////////////////////////////////////////////////////////////////////
 // 0x7800 Entry 60 (size 16 bundles) Reserved
-    KVM_FAULT(60)
+	KVM_FAULT(60)
 
     .org kvm_ia64_ivt+0x7900
 /////////////////////////////////////////////////////////////////////
 // 0x7900 Entry 61 (size 16 bundles) Reserved
-    KVM_FAULT(61)
+	KVM_FAULT(61)
 
     .org kvm_ia64_ivt+0x7a00
 /////////////////////////////////////////////////////////////////////
 // 0x7a00 Entry 62 (size 16 bundles) Reserved
-    KVM_FAULT(62)
+	KVM_FAULT(62)
 
     .org kvm_ia64_ivt+0x7b00
 /////////////////////////////////////////////////////////////////////
 // 0x7b00 Entry 63 (size 16 bundles) Reserved
-    KVM_FAULT(63)
+	KVM_FAULT(63)
 
     .org kvm_ia64_ivt+0x7c00
 ////////////////////////////////////////////////////////////////////
 // 0x7c00 Entry 64 (size 16 bundles) Reserved
-    KVM_FAULT(64)
+	KVM_FAULT(64)
 
     .org kvm_ia64_ivt+0x7d00
 /////////////////////////////////////////////////////////////////////
 // 0x7d00 Entry 65 (size 16 bundles) Reserved
-    KVM_FAULT(65)
+	KVM_FAULT(65)
 
     .org kvm_ia64_ivt+0x7e00
 /////////////////////////////////////////////////////////////////////
 // 0x7e00 Entry 66 (size 16 bundles) Reserved
-    KVM_FAULT(66)
+	KVM_FAULT(66)
 
     .org kvm_ia64_ivt+0x7f00
 ////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
-    KVM_FAULT(67)
+	KVM_FAULT(67)
 
     .org kvm_ia64_ivt+0x8000
 // There is no particular reason for this code to be here, other than that
@@ -811,132 +808,128 @@ END(kvm_ia32_intercept)
 
 
 ENTRY(kvm_dtlb_miss_dispatch)
-    mov r19 = 2
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,3,0
-    mov out0=cr.ifa
-    mov out1=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-    ;;
-    KVM_SAVE_REST
-    KVM_SAVE_EXTRA
-    mov rp=r14
-    ;;
-    adds out2=16,r12
-    br.call.sptk.many b6=kvm_page_fault
+	mov r19 = 2
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i     // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
 END(kvm_dtlb_miss_dispatch)
 
 ENTRY(kvm_itlb_miss_dispatch)
 
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,3,0
-    mov out0=cr.ifa
-    mov out1=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out2=16,r12
-    br.call.sptk.many b6=kvm_page_fault
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
 END(kvm_itlb_miss_dispatch)
 
 ENTRY(kvm_dispatch_reflection)
-    /*
-     * Input:
-     *  psr.ic: off
-     *  r19:    intr type (offset into ivt, see ia64_int.h)
-     *  r31:    contains saved predicates (pr)
-     */
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,5,0
-    mov out0=cr.ifa
-    mov out1=cr.isr
-    mov out2=cr.iim
-    mov out3=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out4=16,r12
-    br.call.sptk.many b6=reflect_interruption
+/*
+ * Input:
+ *  psr.ic: off
+ *  r19:    intr type (offset into ivt, see ia64_int.h)
+ *  r31:    contains saved predicates (pr)
+ */
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	mov out0=cr.ifa
+	mov out1=cr.isr
+	mov out2=cr.iim
+	mov out3=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out4=16,r12
+	br.call.sptk.many b6=reflect_interruption
 END(kvm_dispatch_reflection)
 
 ENTRY(kvm_dispatch_virtualization_fault)
-    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-    ;;
-    st8 [r16] = r24
-    st8 [r17] = r25
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    ;;
-    alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
-    mov out0=r13        //vcpu
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-    ;;
-    KVM_SAVE_REST
-    KVM_SAVE_EXTRA
-    mov rp=r14
-    ;;
-    adds out1=16,sp         //regs
-    br.call.sptk.many b6=kvm_emulate
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
+	mov out0=r13        //vcpu
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out1=16,sp         //regs
+	br.call.sptk.many b6=kvm_emulate
 END(kvm_dispatch_virtualization_fault)
 
 
 ENTRY(kvm_dispatch_interrupt)
-    KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-    ;;
-    alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-    //mov out0=cr.ivr		// pass cr.ivr as first arg
-    adds r3=8,r2		// set up second base pointer for SAVE_REST
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i
-    ;;
-    //(p15) ssm psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    mov out0=r13		// pass pointer to pt_regs as second arg
-    br.call.sptk.many b6=kvm_ia64_handle_irq
+	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+	;;
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	mov out0=r13		// pass pointer to pt_regs as second arg
+	br.call.sptk.many b6=kvm_ia64_handle_irq
 END(kvm_dispatch_interrupt)
 
-
-
-
 GLOBAL_ENTRY(ia64_leave_nested)
 	rsm psr.i
 	;;
@@ -1015,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
 	;;
 	ldf.fill f11=[r2]
 //	mov r18=r13
-//    mov r21=r13
+//	mov r21=r13
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 	;;
@@ -1065,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested)
 	rfi
 END(ia64_leave_nested)
 
-
-
 GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-    /*
-     * work.need_resched etc. mustn't get changed
-     *by this CPU before it returns to
-    ;;
-     * user- or fsys-mode, hence we disable interrupts early on:
-     */
-    adds r2 = PT(R4)+16,r12
-    adds r3 = PT(R5)+16,r12
-    adds r8 = PT(EML_UNAT)+16,r12
-    ;;
-    ld8 r8 = [r8]
-    ;;
-    mov ar.unat=r8
-    ;;
-    ld8.fill r4=[r2],16    //load r4
-    ld8.fill r5=[r3],16    //load r5
-    ;;
-    ld8.fill r6=[r2]    //load r6
-    ld8.fill r7=[r3]    //load r7
-    ;;
+/*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+	adds r2 = PT(R4)+16,r12
+	adds r3 = PT(R5)+16,r12
+	adds r8 = PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8 = [r8]
+	;;
+	mov ar.unat=r8
+	;;
+	ld8.fill r4=[r2],16    //load r4
+	ld8.fill r5=[r3],16    //load r5
+	;;
+	ld8.fill r6=[r2]    //load r6
+	ld8.fill r7=[r3]    //load r7
+	;;
 END(ia64_leave_hypervisor_prepare)
 //fall through
 GLOBAL_ENTRY(ia64_leave_hypervisor)
-    rsm psr.i
-    ;;
-    br.call.sptk.many b0=leave_hypervisor_tail
-    ;;
-    adds r20=PT(PR)+16,r12
-    adds r8=PT(EML_UNAT)+16,r12
-    ;;
-    ld8 r8=[r8]
-    ;;
-    mov ar.unat=r8
-    ;;
-    lfetch [r20],PT(CR_IPSR)-PT(PR)
-    adds r2 = PT(B6)+16,r12
-    adds r3 = PT(B7)+16,r12
-    ;;
-    lfetch [r20]
-    ;;
-    ld8 r24=[r2],16        /* B6 */
-    ld8 r25=[r3],16        /* B7 */
-    ;;
-    ld8 r26=[r2],16        /* ar_csd */
-    ld8 r27=[r3],16        /* ar_ssd */
-    mov b6 = r24
-    ;;
-    ld8.fill r8=[r2],16
-    ld8.fill r9=[r3],16
-    mov b7 = r25
-    ;;
-    mov ar.csd = r26
-    mov ar.ssd = r27
-    ;;
-    ld8.fill r10=[r2],PT(R15)-PT(R10)
-    ld8.fill r11=[r3],PT(R14)-PT(R11)
-    ;;
-    ld8.fill r15=[r2],PT(R16)-PT(R15)
-    ld8.fill r14=[r3],PT(R17)-PT(R14)
-    ;;
-    ld8.fill r16=[r2],16
-    ld8.fill r17=[r3],16
-    ;;
-    ld8.fill r18=[r2],16
-    ld8.fill r19=[r3],16
-    ;;
-    ld8.fill r20=[r2],16
-    ld8.fill r21=[r3],16
-    ;;
-    ld8.fill r22=[r2],16
-    ld8.fill r23=[r3],16
-    ;;
-    ld8.fill r24=[r2],16
-    ld8.fill r25=[r3],16
-    ;;
-    ld8.fill r26=[r2],16
-    ld8.fill r27=[r3],16
-    ;;
-    ld8.fill r28=[r2],16
-    ld8.fill r29=[r3],16
-    ;;
-    ld8.fill r30=[r2],PT(F6)-PT(R30)
-    ld8.fill r31=[r3],PT(F7)-PT(R31)
-    ;;
-    rsm psr.i | psr.ic
-    // initiate turning off of interrupt and interruption collection
-    invala          // invalidate ALAT
-    ;;
-    srlz.i          // ensure interruption collection is off
-    ;;
-    bsw.0
-    ;;
-    adds r16 = PT(CR_IPSR)+16,r12
-    adds r17 = PT(CR_IIP)+16,r12
-    mov r21=r13		// get current
-    ;;
-    ld8 r31=[r16],16    // load cr.ipsr
-    ld8 r30=[r17],16    // load cr.iip
-    ;;
-    ld8 r29=[r16],16    // load cr.ifs
-    ld8 r28=[r17],16    // load ar.unat
-    ;;
-    ld8 r27=[r16],16    // load ar.pfs
-    ld8 r26=[r17],16    // load ar.rsc
-    ;;
-    ld8 r25=[r16],16    // load ar.rnat
-    ld8 r24=[r17],16    // load ar.bspstore
-    ;;
-    ld8 r23=[r16],16    // load predicates
-    ld8 r22=[r17],16    // load b0
-    ;;
-    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-    ld8.fill r1=[r17],16    //load r1
-    ;;
-    ld8.fill r12=[r16],16    //load r12
-    ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-    ;;
-    ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-    ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-    ;;
-    ld8.fill r3=[r16]	//load r3
-    ld8 r18=[r17]	//load ar_ccv
-    ;;
-    mov ar.fpsr=r19
-    mov ar.ccv=r18
-    shr.u r18=r20,16
-    ;;
+	rsm psr.i
+	;;
+	br.call.sptk.many b0=leave_hypervisor_tail
+	;;
+	adds r20=PT(PR)+16,r12
+	adds r8=PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8=[r8]
+	;;
+	mov ar.unat=r8
+	;;
+	lfetch [r20],PT(CR_IPSR)-PT(PR)
+	adds r2 = PT(B6)+16,r12
+	adds r3 = PT(B7)+16,r12
+	;;
+	lfetch [r20]
+	;;
+	ld8 r24=[r2],16        /* B6 */
+	ld8 r25=[r3],16        /* B7 */
+	;;
+	ld8 r26=[r2],16        /* ar_csd */
+	ld8 r27=[r3],16        /* ar_ssd */
+	mov b6 = r24
+	;;
+	ld8.fill r8=[r2],16
+	ld8.fill r9=[r3],16
+	mov b7 = r25
+	;;
+	mov ar.csd = r26
+	mov ar.ssd = r27
+	;;
+	ld8.fill r10=[r2],PT(R15)-PT(R10)
+	ld8.fill r11=[r3],PT(R14)-PT(R11)
+	;;
+	ld8.fill r15=[r2],PT(R16)-PT(R15)
+	ld8.fill r14=[r3],PT(R17)-PT(R14)
+	;;
+	ld8.fill r16=[r2],16
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	;;
+	ld8.fill r22=[r2],16
+	ld8.fill r23=[r3],16
+	;;
+	ld8.fill r24=[r2],16
+	ld8.fill r25=[r3],16
+	;;
+	ld8.fill r26=[r2],16
+	ld8.fill r27=[r3],16
+	;;
+	ld8.fill r28=[r2],16
+	ld8.fill r29=[r3],16
+	;;
+	ld8.fill r30=[r2],PT(F6)-PT(R30)
+	ld8.fill r31=[r3],PT(F7)-PT(R31)
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala          // invalidate ALAT
+	;;
+	srlz.i          // ensure interruption collection is off
+	;;
+	bsw.0
+	;;
+	adds r16 = PT(CR_IPSR)+16,r12
+	adds r17 = PT(CR_IIP)+16,r12
+	mov r21=r13		// get current
+	;;
+	ld8 r31=[r16],16    // load cr.ipsr
+	ld8 r30=[r17],16    // load cr.iip
+	;;
+	ld8 r29=[r16],16    // load cr.ifs
+	ld8 r28=[r17],16    // load ar.unat
+	;;
+	ld8 r27=[r16],16    // load ar.pfs
+	ld8 r26=[r17],16    // load ar.rsc
+	;;
+	ld8 r25=[r16],16    // load ar.rnat
+	ld8 r24=[r17],16    // load ar.bspstore
+	;;
+	ld8 r23=[r16],16    // load predicates
+	ld8 r22=[r17],16    // load b0
+	;;
+	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16    //load r1
+	;;
+	ld8.fill r12=[r16],16    //load r12
+	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+	;;
+	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+	;;
+	ld8.fill r3=[r16]	//load r3
+	ld8 r18=[r17]	//load ar_ccv
+	;;
+	mov ar.fpsr=r19
+	mov ar.ccv=r18
+	shr.u r18=r20,16
+	;;
 kvm_rbs_switch:
-    mov r19=96
+	mov r19=96
 
 kvm_dont_preserve_current_frame:
 /*
@@ -1208,76 +1198,76 @@ kvm_dont_preserve_current_frame:
 #   define pReturn	p7
 #   define Nregs	14
 
-    alloc loc0=ar.pfs,2,Nregs-2,2,0
-    shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
-    sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-    ;;
-    mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-    shladd in0=loc1,3,r19
-    mov in1=0
-    ;;
-    TEXT_ALIGN(32)
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
+	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r19
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
 kvm_rse_clear_invalid:
-    alloc loc0=ar.pfs,2,Nregs-2,2,0
-    cmp.lt pRecurse,p0=Nregs*8,in0
-    // if more than Nregs regs left to clear, (re)curse
-    add out0=-Nregs*8,in0
-    add out1=1,in1		// increment recursion count
-    mov loc1=0
-    mov loc2=0
-    ;;
-    mov loc3=0
-    mov loc4=0
-    mov loc5=0
-    mov loc6=0
-    mov loc7=0
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0
+	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1		// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-    ;;
-    mov loc8=0
-    mov loc9=0
-    cmp.ne pReturn,p0=r0,in1
-    // if recursion count != 0, we need to do a br.ret
-    mov loc10=0
-    mov loc11=0
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1
+	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
 (pReturn) br.ret.dptk.many b0
 
 #	undef pRecurse
 #	undef pReturn
 
 // loadrs has already been shifted
-    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-    ;;
-    loadrs
-    ;;
-    mov ar.bspstore=r24
-    ;;
-    mov ar.unat=r28
-    mov ar.rnat=r25
-    mov ar.rsc=r26
-    ;;
-    mov cr.ipsr=r31
-    mov cr.iip=r30
-    mov cr.ifs=r29
-    mov ar.pfs=r27
-    adds r18=VMM_VPD_BASE_OFFSET,r21
-    ;;
-    ld8 r18=[r18]   //vpd
-    adds r17=VMM_VCPU_ISR_OFFSET,r21
-    ;;
-    ld8 r17=[r17]
-    adds r19=VMM_VPD_VPSR_OFFSET,r18
-    ;;
-    ld8 r19=[r19]        //vpsr
-    mov r25=r18
-    adds r16= VMM_VCPU_GP_OFFSET,r21
-    ;;
-    ld8 r16= [r16] // Put gp in r24
-    movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-    ;;
-    add  r24=r24,r16
-    ;;
-    br.sptk.many  kvm_vps_sync_write       // call the service
-    ;;
+	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+	;;
+	loadrs
+	;;
+	mov ar.bspstore=r24
+	;;
+	mov ar.unat=r28
+	mov ar.rnat=r25
+	mov ar.rsc=r26
+	;;
+	mov cr.ipsr=r31
+	mov cr.iip=r30
+	mov cr.ifs=r29
+	mov ar.pfs=r27
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]   //vpd
+	adds r17=VMM_VCPU_ISR_OFFSET,r21
+	;;
+	ld8 r17=[r17]
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]        //vpsr
+	mov r25=r18
+	adds r16= VMM_VCPU_GP_OFFSET,r21
+	;;
+	ld8 r16= [r16] // Put gp in r24
+	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+	;;
+	add  r24=r24,r16
+	;;
+	br.sptk.many  kvm_vps_sync_write       // call the service
+	;;
 END(ia64_leave_hypervisor)
 // fall through
 GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1290,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry)
  *  r22:b0
  *  r23:predicate
  */
-    mov r24=r22
-    mov r25=r18
-    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-    (p1) br.cond.sptk.few kvm_vps_resume_normal
-    (p2) br.cond.sptk.many kvm_vps_resume_handler
-    ;;
+	mov r24=r22
+	mov r25=r18
+	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+(p1) 	br.cond.sptk.few kvm_vps_resume_normal
+(p2)	br.cond.sptk.many kvm_vps_resume_handler
+	;;
 END(ia64_vmm_entry)
 
-
-
 /*
  * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
  *                  u64 arg3, u64 arg4, u64 arg5,
@@ -1317,88 +1305,88 @@ psrsave =   loc2
 entry   =   loc3
 hostret =   r24
 
-    alloc   pfssave=ar.pfs,4,4,0,0
-    mov rpsave=rp
-    adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-    ;;
-    ld8 entry=[entry]
-1:  mov hostret=ip
-    mov r25=in1         // copy arguments
-    mov r26=in2
-    mov r27=in3
-    mov psrsave=psr
-    ;;
-    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-    ;;
-    add hostret=2f-1b,hostret   // calculate return address
-    add entry=entry,in0
-    ;;
-    rsm psr.i | psr.ic
-    ;;
-    srlz.i
-    mov b6=entry
-    br.cond.sptk b6         // call the service
+	alloc   pfssave=ar.pfs,4,4,0,0
+	mov rpsave=rp
+	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+	;;
+	ld8 entry=[entry]
+1:	mov hostret=ip
+	mov r25=in1         // copy arguments
+	mov r26=in2
+	mov r27=in3
+	mov psrsave=psr
+	;;
+	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+	;;
+	add hostret=2f-1b,hostret   // calculate return address
+	add entry=entry,in0
+	;;
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	mov b6=entry
+	br.cond.sptk b6         // call the service
 2:
-    // Architectural sequence for enabling interrupts if necessary
+// Architectural sequence for enabling interrupts if necessary
 (p7)    ssm psr.ic
-    ;;
+	;;
 (p7)    srlz.i
-    ;;
+	;;
 //(p6)    ssm psr.i
-    ;;
-    mov rp=rpsave
-    mov ar.pfs=pfssave
-    mov r8=r31
-    ;;
-    srlz.d
-    br.ret.sptk rp
+	;;
+	mov rp=rpsave
+	mov ar.pfs=pfssave
+	mov r8=r31
+	;;
+	srlz.d
+	br.ret.sptk rp
 
 END(ia64_call_vsa)
 
 #define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
 
 GLOBAL_ENTRY(vmm_reset_entry)
-    //set up ipsr, iip, vpd.vpsr, dcr
-    // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-    // For DCR: all bits 0
-    bsw.0
-    ;;
-    mov r21 =r13
-    adds r14=-VMM_PT_REGS_SIZE, r12
-    ;;
-    movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-    movl r10=0x8000000000000000
-    adds r16=PT(CR_IIP), r14
-    adds r20=PT(R1), r14
-    ;;
-    rsm psr.ic | psr.i
-    ;;
-    srlz.i
-    ;;
-    mov ar.rsc = 0
-    ;;
-    flushrs
-    ;;
-    mov ar.bspstore = 0
-    // clear BSPSTORE
-    ;;
-    mov cr.ipsr=r6
-    mov cr.ifs=r10
-    ld8 r4 = [r16] // Set init iip for first run.
-    ld8 r1 = [r20]
-    ;;
-    mov cr.iip=r4
-    adds r16=VMM_VPD_BASE_OFFSET,r13
-    ;;
-    ld8 r18=[r16]
-    ;;
-    adds r19=VMM_VPD_VPSR_OFFSET,r18
-    ;;
-    ld8 r19=[r19]
-    mov r17=r0
-    mov r22=r0
-    mov r23=r0
-    br.cond.sptk ia64_vmm_entry
-    br.ret.sptk  b0
+	//set up ipsr, iip, vpd.vpsr, dcr
+	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+	// For DCR: all bits 0
+	bsw.0
+	;;
+	mov r21 =r13
+	adds r14=-VMM_PT_REGS_SIZE, r12
+	;;
+	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+	movl r10=0x8000000000000000
+	adds r16=PT(CR_IIP), r14
+	adds r20=PT(R1), r14
+	;;
+	rsm psr.ic | psr.i
+	;;
+	srlz.i
+	;;
+	mov ar.rsc = 0
+	;;
+	flushrs
+	;;
+	mov ar.bspstore = 0
+	// clear BSPSTORE
+	;;
+	mov cr.ipsr=r6
+	mov cr.ifs=r10
+	ld8 r4 = [r16] // Set init iip for first run.
+	ld8 r1 = [r20]
+	;;
+	mov cr.iip=r4
+	adds r16=VMM_VPD_BASE_OFFSET,r13
+	;;
+	ld8 r18=[r16]
+	;;
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]
+	mov r17=r0
+	mov r22=r0
+	mov r23=r0
+	br.cond.sptk ia64_vmm_entry
+	br.ret.sptk  b0
 END(vmm_reset_entry)
-- 
cgit v0.10.2


From df203ec9a77a7236cb90456664d714423b98a977 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 23 Nov 2008 18:08:57 +0200
Subject: KVM: VMX: Conditionally request interrupt window after injecting irq

If we're injecting an interrupt, and another one is pending, request
an interrupt window notification so we don't have excess latency on the
second interrupt.

This shouldn't happen in practice since an EOI will be issued, giving a second
chance to request an interrupt window, but...

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f5958a7..7ea4855 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3304,6 +3304,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+		if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
 	}
 }
 
-- 
cgit v0.10.2


From 423cd25a5ade17b8a5cc85e6f0a0f37028d2c4a2 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Mon, 24 Nov 2008 15:45:23 -0200
Subject: x86: KVM guest: sign kvmclock as paravirt

Currently, we only set the KVM paravirt signature in case
of CONFIG_KVM_GUEST. However, it is possible to have it turned
off, while CONFIG_KVM_CLOCK is turned on. This is also a paravirt
case, and should be shown accordingly.

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9..b38e801 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
 #endif
 		kvm_get_preset_lpj();
 		clocksource_register(&kvm_clock);
+		pv_info.paravirt_enabled = 1;
+		pv_info.name = "KVM";
 	}
 }
-- 
cgit v0.10.2


From 342ffb93006e537fb8cb215b923ce69943a1e820 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:49 +0800
Subject: KVM: Move ack notifier register and IRQ sourcd ID request

Distinguish common part for device assignment and INTx part, perparing for
refactor later.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4727c08..8966fd1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -192,16 +192,31 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		return -EINVAL;
 	}
 
-	if (match->irq_requested) {
+	if (!match->irq_requested) {
+		INIT_WORK(&match->interrupt_work,
+				kvm_assigned_dev_interrupt_work_handler);
+		if (irqchip_in_kernel(kvm)) {
+			/* Register ack nofitier */
+			match->ack_notifier.gsi = -1;
+			match->ack_notifier.irq_acked =
+					kvm_assigned_dev_ack_irq;
+			kvm_register_irq_ack_notifier(kvm,
+					&match->ack_notifier);
+
+			/* Request IRQ source ID */
+			r = kvm_request_irq_source_id(kvm);
+			if (r < 0)
+				goto out_release;
+			else
+				match->irq_source_id = r;
+		}
+	} else {
 		match->guest_irq = assigned_irq->guest_irq;
 		match->ack_notifier.gsi = assigned_irq->guest_irq;
 		mutex_unlock(&kvm->lock);
 		return 0;
 	}
 
-	INIT_WORK(&match->interrupt_work,
-		  kvm_assigned_dev_interrupt_work_handler);
-
 	if (irqchip_in_kernel(kvm)) {
 		if (!capable(CAP_SYS_RAWIO)) {
 			r = -EPERM;
@@ -214,13 +229,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 			match->host_irq = match->dev->irq;
 		match->guest_irq = assigned_irq->guest_irq;
 		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
-		r = kvm_request_irq_source_id(kvm);
-		if (r < 0)
-			goto out_release;
-		else
-			match->irq_source_id = r;
 
 		/* Even though this is PCI, we don't want to use shared
 		 * interrupts. Sharing host devices with guest-assigned devices
-- 
cgit v0.10.2


From 00e3ed39e2e25ffb3417ce1bec8f4b78ed4b85e7 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:50 +0800
Subject: KVM: Separate update irq to a single function

Separate INTx enabling part to a independence function, so that we can add MSI
enabling part easily.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8966fd1..ef2f03c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -176,6 +176,41 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
 	}
 }
 
+static int assigned_device_update_intx(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	if (adev->irq_requested) {
+		adev->guest_irq = airq->guest_irq;
+		adev->ack_notifier.gsi = airq->guest_irq;
+		return 0;
+	}
+
+	if (irqchip_in_kernel(kvm)) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+
+		if (airq->host_irq)
+			adev->host_irq = airq->host_irq;
+		else
+			adev->host_irq = adev->dev->irq;
+		adev->guest_irq = airq->guest_irq;
+		adev->ack_notifier.gsi = airq->guest_irq;
+
+		/* Even though this is PCI, we don't want to use shared
+		 * interrupts. Sharing host devices with guest-assigned devices
+		 * on the same interrupt line is not a happy situation: there
+		 * are going to be long delays in accepting, acking, etc.
+		 */
+		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
+				0, "kvm_assigned_intx_device", (void *)adev))
+			return -EIO;
+	}
+
+	adev->irq_requested = true;
+	return 0;
+}
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -210,39 +245,12 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 			else
 				match->irq_source_id = r;
 		}
-	} else {
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		mutex_unlock(&kvm->lock);
-		return 0;
 	}
 
-	if (irqchip_in_kernel(kvm)) {
-		if (!capable(CAP_SYS_RAWIO)) {
-			r = -EPERM;
-			goto out_release;
-		}
-
-		if (assigned_irq->host_irq)
-			match->host_irq = assigned_irq->host_irq;
-		else
-			match->host_irq = match->dev->irq;
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_device", (void *)match)) {
-			r = -EIO;
-			goto out_release;
-		}
-	}
+	r = assigned_device_update_intx(kvm, match, assigned_irq);
+	if (r)
+		goto out_release;
 
-	match->irq_requested = true;
 	mutex_unlock(&kvm->lock);
 	return r;
 out_release:
-- 
cgit v0.10.2


From 4f906c19ae29397409bedabf7bbe5cb42ad90332 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:51 +0800
Subject: KVM: Replace irq_requested with more generic irq_requested_type

Separate guest irq type and host irq type, for we can support guest using INTx
with host using MSI (but not opposite combination).

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3a0fb77..c3d4b96 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,7 +307,9 @@ struct kvm_assigned_dev_kernel {
 	int host_devfn;
 	int host_irq;
 	int guest_irq;
-	int irq_requested;
+#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
+#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
+	unsigned long irq_requested_type;
 	int irq_source_id;
 	struct pci_dev *dev;
 	struct kvm *kvm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef2f03c..638de47 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,7 +140,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
 				     *assigned_dev)
 {
-	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
@@ -180,7 +180,7 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *adev,
 			struct kvm_assigned_irq *airq)
 {
-	if (adev->irq_requested) {
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) {
 		adev->guest_irq = airq->guest_irq;
 		adev->ack_notifier.gsi = airq->guest_irq;
 		return 0;
@@ -207,7 +207,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			return -EIO;
 	}
 
-	adev->irq_requested = true;
+	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
+				   KVM_ASSIGNED_DEV_HOST_INTX;
 	return 0;
 }
 
@@ -227,7 +228,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		return -EINVAL;
 	}
 
-	if (!match->irq_requested) {
+	if (!match->irq_requested_type) {
 		INIT_WORK(&match->interrupt_work,
 				kvm_assigned_dev_interrupt_work_handler);
 		if (irqchip_in_kernel(kvm)) {
-- 
cgit v0.10.2


From fbac7818d8fba7e1df9f4b209777f3b67b953dd3 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:52 +0800
Subject: KVM: Clean up assigned_device_update_irq

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 638de47..2089f8b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -180,11 +180,11 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *adev,
 			struct kvm_assigned_irq *airq)
 {
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) {
-		adev->guest_irq = airq->guest_irq;
-		adev->ack_notifier.gsi = airq->guest_irq;
+	adev->guest_irq = airq->guest_irq;
+	adev->ack_notifier.gsi = airq->guest_irq;
+
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
 		return 0;
-	}
 
 	if (irqchip_in_kernel(kvm)) {
 		if (!capable(CAP_SYS_RAWIO))
@@ -194,8 +194,6 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			adev->host_irq = airq->host_irq;
 		else
 			adev->host_irq = adev->dev->irq;
-		adev->guest_irq = airq->guest_irq;
-		adev->ack_notifier.gsi = airq->guest_irq;
 
 		/* Even though this is PCI, we don't want to use shared
 		 * interrupts. Sharing host devices with guest-assigned devices
-- 
cgit v0.10.2


From 0937c48d075ddd59ae2c12a6fa8308b9c7a63753 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:53 +0800
Subject: KVM: Add fields for MSI device assignment

Prepared for kvm_arch_assigned_device_msi_dispatch().

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 44fd7fa..bb283c3 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -507,10 +507,17 @@ struct kvm_assigned_irq {
 	__u32 guest_irq;
 	__u32 flags;
 	union {
+		struct {
+			__u32 addr_lo;
+			__u32 addr_hi;
+			__u32 data;
+		} guest_msi;
 		__u32 reserved[12];
 	};
 };
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c3d4b96..8091a4d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/marker.h>
+#include <linux/msi.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -307,8 +308,11 @@ struct kvm_assigned_dev_kernel {
 	int host_devfn;
 	int host_irq;
 	int guest_irq;
+	struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
+#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
+#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	struct pci_dev *dev;
-- 
cgit v0.10.2


From 68b76f51675809c8ce200a86276c3c7266f17a64 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:54 +0800
Subject: KVM: Export ioapic_get_delivery_bitmask

It would be used for MSI in device assignment, for MSI dispatch.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c8f939c..23b81cf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -153,8 +153,8 @@ static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 	kvm_vcpu_kick(vcpu);
 }
 
-static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				       u8 dest_mode)
+u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				    u8 dest_mode)
 {
 	u32 mask = 0;
 	int i;
@@ -208,7 +208,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		     "vector=%x trig_mode=%x\n",
 		     dest, dest_mode, delivery_mode, vector, trig_mode);
 
-	deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
+	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
+							  dest_mode);
 	if (!deliver_bitmask) {
 		ioapic_debug("no target on destination\n");
 		return 0;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index cd7ae76..49c9581 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -85,5 +85,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
+u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				u8 dest_mode);
 
 #endif
-- 
cgit v0.10.2


From f64769eb05565c74d7fce6fa75d65924f9cdaf79 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:55 +0800
Subject: KVM: Add assigned_device_msi_dispatch()

The function is used to dispatch MSI to lapic according to MSI message
address and message data.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2089f8b..228c1d1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_X86
+#include <asm/msidef.h>
+#endif
+
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
 #endif
@@ -78,6 +82,57 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 bool kvm_rebooting;
 
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
+
+#ifdef CONFIG_X86
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
+{
+	int vcpu_id;
+	struct kvm_vcpu *vcpu;
+	struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
+	int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+			>> MSI_ADDR_DEST_ID_SHIFT;
+	int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
+			>> MSI_DATA_VECTOR_SHIFT;
+	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+				(unsigned long *)&dev->guest_msi.address_lo);
+	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+				(unsigned long *)&dev->guest_msi.data);
+	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+				(unsigned long *)&dev->guest_msi.data);
+	u32 deliver_bitmask;
+
+	BUG_ON(!ioapic);
+
+	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+				dest_id, dest_mode);
+	/* IOAPIC delivery mode value is the same as MSI here */
+	switch (delivery_mode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+				deliver_bitmask);
+		if (vcpu != NULL)
+			kvm_apic_set_irq(vcpu, vector, trig_mode);
+		else
+			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+		break;
+	case IOAPIC_FIXED:
+		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+			if (!(deliver_bitmask & (1 << vcpu_id)))
+				continue;
+			deliver_bitmask &= ~(1 << vcpu_id);
+			vcpu = ioapic->kvm->vcpus[vcpu_id];
+			if (vcpu)
+				kvm_apic_set_irq(vcpu, vector, trig_mode);
+		}
+		break;
+	default:
+		printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
+	}
+}
+#else
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
+#endif
+
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
 {
-- 
cgit v0.10.2


From 6b9cc7fd469869bed38831c5adac3f59dc25eaf5 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:56 +0800
Subject: KVM: Enable MSI for device assignment

We enable guest MSI and host MSI support in this patch. The userspace want to
enable MSI should set KVM_DEV_IRQ_ASSIGN_ENABLE_MSI in the assigned_irq's flag.
Function would return -ENOTTY if can't enable MSI, userspace shouldn't set MSI
Enable bit when KVM_ASSIGN_IRQ return -ENOTTY with
KVM_DEV_IRQ_ASSIGN_ENABLE_MSI.

Userspace can tell the support of MSI device from #ifdef KVM_CAP_DEVICE_MSI.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index bb283c3..0997e6f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -392,6 +392,9 @@ struct kvm_trace_rec {
 #endif
 #define KVM_CAP_IOMMU 18
 #define KVM_CAP_NMI 19
+#if defined(CONFIG_X86)
+#define KVM_CAP_DEVICE_MSI 20
+#endif
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 228c1d1..bf36ae9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -159,9 +159,15 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm,
-		    assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
+		kvm_set_irq(assigned_dev->kvm,
+			    assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+	else if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_GUEST_MSI) {
+		assigned_device_msi_dispatch(assigned_dev);
+		enable_irq(assigned_dev->host_irq);
+	}
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
 }
@@ -197,6 +203,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 {
 	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		pci_disable_msi(assigned_dev->dev);
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
@@ -242,6 +250,11 @@ static int assigned_device_update_intx(struct kvm *kvm,
 		return 0;
 
 	if (irqchip_in_kernel(kvm)) {
+		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
+			free_irq(adev->host_irq, (void *)kvm);
+			pci_disable_msi(adev->dev);
+		}
+
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 
@@ -265,6 +278,41 @@ static int assigned_device_update_intx(struct kvm *kvm,
 	return 0;
 }
 
+#ifdef CONFIG_X86
+static int assigned_device_update_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	int r;
+
+	/* x86 don't care upper address of guest msi message addr */
+	adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
+	adev->guest_msi.data = airq->guest_msi.data;
+	adev->ack_notifier.gsi = -1;
+
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		return 0;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
+			free_irq(adev->host_irq, (void *)adev);
+
+		r = pci_enable_msi(adev->dev);
+		if (r)
+			return r;
+
+		adev->host_irq = adev->dev->irq;
+		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msi_device", (void *)adev))
+			return -EIO;
+	}
+
+	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI |
+				   KVM_ASSIGNED_DEV_HOST_MSI;
+	return 0;
+}
+#endif
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -301,9 +349,30 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	r = assigned_device_update_intx(kvm, match, assigned_irq);
-	if (r)
-		goto out_release;
+	if (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+#ifdef CONFIG_X86
+		r = assigned_device_update_msi(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"MSI device!\n");
+			goto out_release;
+		}
+#else
+		r = -ENOTTY;
+#endif
+	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
+		/* Host device IRQ 0 means don't support INTx */
+		printk(KERN_WARNING "kvm: wait device to enable MSI!\n");
+		r = 0;
+	} else {
+		/* Non-sharing INTx mode */
+		r = assigned_device_update_intx(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"INTx device!\n");
+			goto out_release;
+		}
+	}
 
 	mutex_unlock(&kvm->lock);
 	return r;
-- 
cgit v0.10.2


From 5319c662522db8995ff9276ba9d80549c64b294a Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:57 +0800
Subject: KVM: MSI to INTx translate

Now we use MSI as default one, and translate MSI to INTx when guest need
INTx rather than MSI. For legacy device, we provide support for non-sharing
host IRQ.

Provide a parameter msi2intx for this method. The value is true by default in
x86 architecture.

We can't guarantee this mode can work on every device, but for most of us
tested, it works. If your device encounter some trouble with this mode, you can
try set msi2intx modules parameter to 0. If the device is OK with msi2intx=0,
then please report it to KVM mailing list or me. We may prepare a blacklist for
the device that can't work in this mode.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bf36ae9..54d25e6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -64,6 +64,9 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+static int msi2intx = 1;
+module_param(msi2intx, bool, 0);
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -250,7 +253,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
 		return 0;
 
 	if (irqchip_in_kernel(kvm)) {
-		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
+		if (!msi2intx &&
+		    adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
 			free_irq(adev->host_irq, (void *)kvm);
 			pci_disable_msi(adev->dev);
 		}
@@ -285,21 +289,33 @@ static int assigned_device_update_msi(struct kvm *kvm,
 {
 	int r;
 
-	/* x86 don't care upper address of guest msi message addr */
-	adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
-	adev->guest_msi.data = airq->guest_msi.data;
-	adev->ack_notifier.gsi = -1;
+	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+		/* x86 don't care upper address of guest msi message addr */
+		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
+		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
+		adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
+		adev->guest_msi.data = airq->guest_msi.data;
+		adev->ack_notifier.gsi = -1;
+	} else if (msi2intx) {
+		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
+		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
+		adev->guest_irq = airq->guest_irq;
+		adev->ack_notifier.gsi = airq->guest_irq;
+	}
 
 	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
 		return 0;
 
 	if (irqchip_in_kernel(kvm)) {
-		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
-			free_irq(adev->host_irq, (void *)adev);
-
-		r = pci_enable_msi(adev->dev);
-		if (r)
-			return r;
+		if (!msi2intx) {
+			if (adev->irq_requested_type &
+					KVM_ASSIGNED_DEV_HOST_INTX)
+				free_irq(adev->host_irq, (void *)adev);
+
+			r = pci_enable_msi(adev->dev);
+			if (r)
+				return r;
+		}
 
 		adev->host_irq = adev->dev->irq;
 		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
@@ -307,8 +323,10 @@ static int assigned_device_update_msi(struct kvm *kvm,
 			return -EIO;
 	}
 
-	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI |
-				   KVM_ASSIGNED_DEV_HOST_MSI;
+	if (!msi2intx)
+		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
+
+	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
 	return 0;
 }
 #endif
@@ -346,10 +364,19 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				goto out_release;
 			else
 				match->irq_source_id = r;
+
+#ifdef CONFIG_X86
+			/* Determine host device irq type, we can know the
+			 * result from dev->msi_enabled */
+			if (msi2intx)
+				pci_enable_msi(match->dev);
+#endif
 		}
 	}
 
-	if (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+	if ((!msi2intx &&
+	     (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+	    (msi2intx && match->dev->msi_enabled)) {
 #ifdef CONFIG_X86
 		r = assigned_device_update_msi(kvm, match, assigned_irq);
 		if (r) {
@@ -362,8 +389,16 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 #endif
 	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
 		/* Host device IRQ 0 means don't support INTx */
-		printk(KERN_WARNING "kvm: wait device to enable MSI!\n");
-		r = 0;
+		if (!msi2intx) {
+			printk(KERN_WARNING
+			       "kvm: wait device to enable MSI!\n");
+			r = 0;
+		} else {
+			printk(KERN_WARNING
+			       "kvm: failed to enable MSI device!\n");
+			r = -ENOTTY;
+			goto out_release;
+		}
 	} else {
 		/* Non-sharing INTx mode */
 		r = assigned_device_update_intx(kvm, match, assigned_irq);
@@ -2209,6 +2244,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
+#ifndef CONFIG_X86
+	msi2intx = 0;
+#endif
 
 	return 0;
 
-- 
cgit v0.10.2


From ecc5589f19a52e7e6501fe449047b19087ae11bb Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 25 Nov 2008 15:58:07 +0100
Subject: KVM: MMU: optimize set_spte for page sync

The write protect verification in set_spte is unnecessary for page sync.

Its guaranteed that, if the unsync spte was writable, the target page
does not have a write protected shadow (if it had, the spte would have
been write protected under mmu_lock by rmap_write_protect before).

Same reasoning applies to mark_page_dirty: the gfn has been marked as
dirty via the pagefault path.

The cost of hash table and memslot lookups are quite significant if the
workload is pagetable write intensive resulting in increased mmu_lock
contention.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fa3486d..dd20b19 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1593,6 +1593,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 		spte |= PT_WRITABLE_MASK;
 
+		/*
+		 * Optimization: for pte sync, if spte was writable the hash
+		 * lookup is unnecessary (and expensive). Write protection
+		 * is responsibility of mmu_get_page / kvm_sync_page.
+		 * Same reasoning can be applied to dirty page accounting.
+		 */
+		if (!can_unsync && is_writeble_pte(*shadow_pte))
+			goto set_pte;
+
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
-- 
cgit v0.10.2


From dda96d8f1b3de692cce09969ce28fe22e58e5acf Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 26 Nov 2008 15:14:10 +0200
Subject: KVM: x86 emulator: reduce duplication in one operand emulation thunks

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 8f60ace..5f87d3e 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -359,6 +359,12 @@ static u16 group2_table[] = {
 	"andl %"_msk",%"_LO32 _tmp"; "		\
 	"orl  %"_LO32 _tmp",%"_sav"; "
 
+#ifdef CONFIG_X86_64
+#define ON64(x) x
+#else
+#define ON64(x)
+#endif
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do { 								    \
@@ -425,42 +431,27 @@ static u16 group2_table[] = {
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags)                                    \
+#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
 	do {								\
 		unsigned long _tmp;					\
 									\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "3", "2")			\
+			_op _suffix " %1; "				\
+			_POST_EFLAGS("0", "3", "2")			\
+			: "=m" (_eflags), "+m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: "i" (EFLAGS_MASK));				\
+	} while (0)
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags)                                    \
+	do {								\
 		switch ((_dst).bytes) {				        \
-		case 1:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"b %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 2:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"w %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 4:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"l %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 8:							\
-			__emulate_1op_8byte(_op, _dst, _eflags);	\
-			break;						\
+		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
+		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
+		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
+		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
 		}							\
 	} while (0)
 
@@ -476,19 +467,8 @@ static u16 group2_table[] = {
 			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
 	} while (0)
 
-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "3", "2")			  \
-			_op"q %1; "					  \
-			_POST_EFLAGS("0", "3", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: "i" (EFLAGS_MASK));				  \
-	} while (0)
-
 #elif defined(__i386__)
 #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
 #endif				/* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-- 
cgit v0.10.2


From 6b7ad61ffb9ca110add6f7fb36cc8a4dd89696a4 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 26 Nov 2008 15:30:45 +0200
Subject: KVM: x86 emulator: consolidate emulation of two operand instructions

No need to repeat the same assembly block over and over.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 5f87d3e..a11af6f 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -365,49 +365,42 @@ static u16 group2_table[] = {
 #define ON64(x)
 #endif
 
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
+	do {								\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "4", "2")			\
+			_op _suffix " %"_x"3,%1; "			\
+			_POST_EFLAGS("0", "4", "2")			\
+			: "=m" (_eflags), "=m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: _y ((_src).val), "i" (EFLAGS_MASK));		\
+	} while (0);
+
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
-	do { 								    \
-		unsigned long _tmp;					    \
-									    \
-		switch ((_dst).bytes) {					    \
-		case 2:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"w %"_wx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),        \
-				  "=&r" (_tmp)				    \
-				: _wy ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 4:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"l %"_lx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),	    \
-				  "=&r" (_tmp)				    \
-				: _ly ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 8:							    \
-			__emulate_2op_8byte(_op, _src, _dst,		    \
-					    _eflags, _qx, _qy);		    \
-			break;						    \
-		}							    \
+	do {								\
+		unsigned long _tmp;					\
+									\
+		switch ((_dst).bytes) {					\
+		case 2:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+			break;						\
+		case 4:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+			break;						\
+		case 8:							\
+			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+			break;						\
+		}							\
 	} while (0)
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do {								     \
-		unsigned long __tmp;					     \
+		unsigned long _tmp;					     \
 		switch ((_dst).bytes) {				             \
 		case 1:							     \
-			__asm__ __volatile__ (				     \
-				_PRE_EFLAGS("0", "4", "2")		     \
-				_op"b %"_bx"3,%1; "			     \
-				_POST_EFLAGS("0", "4", "2")		     \
-				: "=m" (_eflags), "=m" ((_dst).val),	     \
-				  "=&r" (__tmp)				     \
-				: _by ((_src).val), "i" (EFLAGS_MASK));      \
+			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
 			break;						     \
 		default:						     \
 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
@@ -455,22 +448,6 @@ static u16 group2_table[] = {
 		}							\
 	} while (0)
 
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(CONFIG_X86_64)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "4", "2")			  \
-			_op"q %"_qx"3,%1; "				  \
-			_POST_EFLAGS("0", "4", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0)
-
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#endif				/* __i386__ */
-
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({	unsigned long _x;						\
-- 
cgit v0.10.2


From d329c035e754156ffabcb64ff75d05bb8e2ddbf5 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 26 Nov 2008 14:50:27 +0100
Subject: KVM: s390: Fix refcounting and allow module unload

Currently it is impossible to unload the kvm module on s390.
This patch fixes kvm_arch_destroy_vm to release all cpus.
This make it possible to unload the module.

In addition we stop messing with the module refcount in arch code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2..3db9e5d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -185,8 +185,6 @@ struct kvm *kvm_arch_create_vm(void)
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
 
-	try_module_get(THIS_MODULE);
-
 	return kvm;
 out_nodbf:
 	free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +194,32 @@ out_nokvm:
 	return ERR_PTR(rc);
 }
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kfree(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	debug_unregister(kvm->arch.dbf);
+	kvm_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
+	debug_unregister(kvm->arch.dbf);
 	kfree(kvm);
-	module_put(THIS_MODULE);
 }
 
 /* Section: vcpu related */
@@ -308,8 +325,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
 		 vcpu->arch.sie_block);
 
-	try_module_get(THIS_MODULE);
-
 	return vcpu;
 out_free_cpu:
 	kfree(vcpu);
@@ -317,14 +332,6 @@ out_nomem:
 	return ERR_PTR(rc);
 }
 
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
-	free_page((unsigned long)(vcpu->arch.sie_block));
-	kfree(vcpu);
-	module_put(THIS_MODULE);
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	/* kvm common code refers to this, but never calls it */
-- 
cgit v0.10.2


From 6692cef30b7caf7525ae99670cddbaf28f1f9d40 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 26 Nov 2008 14:51:08 +0100
Subject: KVM: s390: Fix memory leak of vcpu->run

The s390 backend of kvm never calls kvm_vcpu_uninit. This causes
a memory leak of vcpu->run pages.
Lets call kvm_vcpu_uninit in kvm_arch_vcpu_destroy to free
the vcpu->run.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3db9e5d..76f05dd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -198,6 +198,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	free_page((unsigned long)(vcpu->arch.sie_block));
+	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu);
 }
 
@@ -230,8 +231,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	/* kvm common code refers to this, but does'nt call it */
-	BUG();
+	/* Nothing todo */
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-- 
cgit v0.10.2


From b82091824ee4970adf92d5cd6d57b12273171625 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 26 Nov 2008 19:59:06 +0800
Subject: KVM: Prevent trace call into unloaded module text

Add marker_synchronize_unregister() before module unloading.
This prevents possible trace calls into unloaded module text.

Signed-off-by: Wu Fengguang <wfg@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 41dcc84..f598744 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -252,6 +252,7 @@ void kvm_trace_cleanup(void)
 			struct kvm_trace_probe *p = &kvm_trace_probes[i];
 			marker_probe_unregister(p->name, p->probe_func, p);
 		}
+		marker_synchronize_unregister();
 
 		relay_close(kt->rchan);
 		debugfs_remove(kt->lost_file);
-- 
cgit v0.10.2


From faa5a3ae39483aefc46a78299c811194f953af27 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 17:36:41 +0200
Subject: KVM: x86 emulator: Extract 'pop' sequence into a function

Switch 'pop r/m' instruction to use the new function.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index a11af6f..2555762 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1057,20 +1057,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 					       c->regs[VCPU_REGS_RSP]);
 }
 
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
-				struct x86_emulate_ops *ops)
+static int emulate_pop(struct x86_emulate_ctxt *ctxt,
+		       struct x86_emulate_ops *ops)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
 	rc = ops->read_std(register_address(c, ss_base(ctxt),
 					    c->regs[VCPU_REGS_RSP]),
-			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
+			   &c->src.val, c->src.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+	return rc;
+}
 
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
+				struct x86_emulate_ops *ops)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+
+	c->src.bytes = c->dst.bytes;
+	rc = emulate_pop(ctxt, ops);
+	if (rc != 0)
+		return rc;
+	c->dst.val = c->src.val;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 781d0edc5fc5cfe7491a0c5081734e62f6dc66ee Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 18:00:28 +0200
Subject: KVM: x86 emulator: allow pop from mmio

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2555762..70242f5f 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1063,9 +1063,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	rc = ops->read_std(register_address(c, ss_base(ctxt),
-					    c->regs[VCPU_REGS_RSP]),
-			   &c->src.val, c->src.bytes, ctxt->vcpu);
+	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+						 c->regs[VCPU_REGS_RSP]),
+				&c->src.val, c->src.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-- 
cgit v0.10.2


From 8a09b6877f3100207b3572e7e12ea796493fe914 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 18:06:33 +0200
Subject: KVM: x86 emulator: switch 'pop reg' instruction to emulate_pop()

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 70242f5f..702de98 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1389,14 +1389,11 @@ special_insn:
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
-			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
-			c->op_bytes, ctxt->vcpu)) != 0)
+		c->src.bytes = c->op_bytes;
+		rc = emulate_pop(ctxt, ops);
+		if (rc != 0)
 			goto done;
-
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   c->op_bytes);
-		c->dst.type = OP_NONE;	/* Disable writeback. */
+		c->dst.val = c->src.val;
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
-- 
cgit v0.10.2


From cf5de4f886116871c2ae2eee53524edd741a68ae Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Fri, 28 Nov 2008 00:14:07 +0200
Subject: KVM: x86 emulator: fix ret emulation

'ret' did not set the operand type or size for the destination, so
writeback ignored it.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 702de98..72ae86b 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1650,7 +1650,9 @@ special_insn:
 		emulate_grp2(ctxt);
 		break;
 	case 0xc3: /* ret */
+		c->dst.type = OP_REG;
 		c->dst.ptr = &c->eip;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
 	mov:
-- 
cgit v0.10.2


From 2b48cc75b21431037d6f902b9d583b1aff198490 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 29 Nov 2008 20:36:13 +0200
Subject: KVM: x86 emulator: fix popf emulation

Set operand type and size to get correct writeback behavior.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 72ae86b..e8c87cc 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1552,7 +1552,9 @@ special_insn:
 		emulate_push(ctxt);
 		break;
 	case 0x9d: /* popf */
+		c->dst.type = OP_REG;
 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xa0 ... 0xa1:	/* mov */
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-- 
cgit v0.10.2


From f3fd92fbdb7663bd889c136842afc3851351ea8f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 29 Nov 2008 20:38:12 +0200
Subject: KVM: Remove extraneous semicolon after do/while

Notices by Guillaume Thouvenin.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index e8c87cc..69b330b 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -374,7 +374,7 @@ static u16 group2_table[] = {
 			: "=m" (_eflags), "=m" ((_dst).val),		\
 			  "=&r" (_tmp)					\
 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0);
+	} while (0)
 
 
 /* Raw emulation: instruction has two explicit operands. */
-- 
cgit v0.10.2


From e8ba5d311d0c4420e84f40ff50f83981f5864a9a Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 28 Nov 2008 17:02:42 +0100
Subject: KVM: fix sparse warning

Impact: make global function static

  virt/kvm/kvm_main.c:85:6: warning: symbol 'kvm_rebooting' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 54d25e6..8dab7ce 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -82,7 +82,7 @@ struct dentry *kvm_debugfs_dir;
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
 
-bool kvm_rebooting;
+static bool kvm_rebooting;
 
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
 
-- 
cgit v0.10.2


From efff9e538f6bfa8ee2ca03f7e9a55d98df115186 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 28 Nov 2008 17:02:06 +0100
Subject: KVM: VMX: fix sparse warning

Impact: make global function static

  arch/x86/kvm/vmx.c:134:3: warning: symbol 'vmx_capability' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7ea4855..e446f23 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,7 +128,7 @@ static struct vmcs_config {
 	u32 vmentry_ctrl;
 } vmcs_config;
 
-struct vmx_capability {
+static struct vmx_capability {
 	u32 ept;
 	u32 vpid;
 } vmx_capability;
-- 
cgit v0.10.2


From 844c7a9ff404d8fc88bb77b06461644621d2c985 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 1 Dec 2008 13:57:45 +0000
Subject: KVM: remove the IRQ ACK notifier assertions

We will obviously never pass a NULL struct kvm_irq_ack_notifier* to
this functions. They are always embedded in the assigned device
structure, so the assertion add nothing.

The irqchip_in_kernel() assertion is very out of place - clearly
this little abstraction needs to know nothing about the upper
layer details.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9fbbdea..973df99 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -58,9 +58,6 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
-	/* Must be called with in-kernel IRQ chip, otherwise it's nonsense */
-	ASSERT(irqchip_in_kernel(kvm));
-	ASSERT(kian);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
 }
 
-- 
cgit v0.10.2


From fdd897e6b5253a45b633f7d334cf3e150bbaf386 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 1 Dec 2008 13:57:46 +0000
Subject: KVM: make kvm_unregister_irq_ack_notifier() safe

We never pass a NULL notifier pointer here, but we may well
pass a notifier struct which hasn't previously been
registered.

Guard against this by using hlist_del_init() which will
not do anything if the node hasn't been added to the list
and, when removing the node, will ensure that a subsequent
call to hlist_del_init() will be fine too.

Fixes an oops seen when an assigned device is freed before
and IRQ is assigned to it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 973df99..db75045 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -63,9 +63,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 
 void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
 {
-	if (!kian)
-		return;
-	hlist_del(&kian->link);
+	hlist_del_init(&kian->link);
 }
 
 /* The caller must hold kvm->lock mutex */
-- 
cgit v0.10.2


From f29b2673d3fc7ae38ec22922e9cdc75ee37386b5 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 1 Dec 2008 13:57:47 +0000
Subject: KVM: don't free an unallocated irq source id

Set assigned_dev->irq_source_id to -1 so that we can avoid freeing
a source ID which we never allocated.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8dab7ce..63fd882 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -210,7 +210,10 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 		pci_disable_msi(assigned_dev->dev);
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
-	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+
+	if (assigned_dev->irq_source_id != -1)
+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	assigned_dev->irq_source_id = -1;
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
 		/* We had pending work. That means we will have to take
@@ -466,7 +469,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->dev = dev;
-
+	match->irq_source_id = -1;
 	match->kvm = kvm;
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
-- 
cgit v0.10.2


From 61552367b2ce5e9bea6b6af670ec80aea386f34e Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 1 Dec 2008 13:57:48 +0000
Subject: KVM: add KVM_USERSPACE_IRQ_SOURCE_ID assertions

Make sure kvm_request_irq_source_id() never returns
KVM_USERSPACE_IRQ_SOURCE_ID.

Likewise, check that kvm_free_irq_source_id() never accepts
KVM_USERSPACE_IRQ_SOURCE_ID.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index db75045..aa5d1e5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -72,11 +72,15 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
 	int irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
+
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
-		irq_source_id = -EFAULT;
-	} else
-		set_bit(irq_source_id, bitmap);
+		return -EFAULT;
+	}
+
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+	set_bit(irq_source_id, bitmap);
+
 	return irq_source_id;
 }
 
@@ -84,7 +88,9 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	int i;
 
-	if (irq_source_id <= 0 ||
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+
+	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
 		return;
-- 
cgit v0.10.2


From 4a643be8c9b8d3c1ae8f5ccd377daaa85bd57e0c Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 1 Dec 2008 13:57:49 +0000
Subject: KVM: split out kvm_free_assigned_irq()

Split out the logic corresponding to undoing assign_irq() and
clean it up a bit.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 63fd882..e41d39d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -200,14 +200,11 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	enable_irq(dev->host_irq);
 }
 
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
-		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
+	if (!irqchip_in_kernel(kvm))
+		return;
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 
@@ -215,12 +212,30 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 	assigned_dev->irq_source_id = -1;
 
+	if (!assigned_dev->irq_requested_type)
+		return;
+
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
 		/* We had pending work. That means we will have to take
 		 * care of kvm_put_kvm.
 		 */
 		kvm_put_kvm(kvm);
 
+	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		pci_disable_msi(assigned_dev->dev);
+
+	assigned_dev->irq_requested_type = 0;
+}
+
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	kvm_free_assigned_irq(kvm, assigned_dev);
+
 	pci_reset_function(assigned_dev->dev);
 
 	pci_release_regions(assigned_dev->dev);
-- 
cgit v0.10.2


From 891686188f69d330f7eeeec8e6642ccfb7453106 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:53 -0600
Subject: KVM: ppc: support large host pages

KVM on 440 has always been able to handle large guest mappings with 4K host
pages -- we must, since the guest kernel uses 256MB mappings.

This patch makes KVM work when the host has large pages too (tested with 64K).

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 844f683..5bb2926 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,8 +52,8 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
-                           u64 asid, u32 flags);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                           u64 asid, u32 flags, u32 max_bytes);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ee24618..d49dc66 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -28,6 +28,13 @@
 
 #include "44x_tlb.h"
 
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
 #define PPC44x_TLB_UATTR_MASK \
 	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
@@ -179,15 +186,26 @@ void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
 	vcpu_44x->shadow_tlb_mod[i] = 1;
 }
 
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
-                    u32 flags)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+                    u32 flags, u32 max_bytes)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct page *new_page;
 	struct kvmppc_44x_tlbe *stlbe;
 	hpa_t hpaddr;
+	gfn_t gfn;
 	unsigned int victim;
 
 	/* Future optimization: don't overwrite the TLB entry containing the
@@ -198,6 +216,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	stlbe = &vcpu_44x->shadow_tlb[victim];
 
 	/* Get reference to new page. */
+	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -220,10 +239,25 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	stlbe->tid = !(asid & 0xff);
 
 	/* Force TS=1 for all guest mappings. */
-	/* For now we hardcode 4KB mappings, but it will be important to
-	 * use host large pages in the future. */
-	stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
-	               | PPC44x_TLB_4K;
+	stlbe->word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+	if (max_bytes >= PAGE_SIZE) {
+		/* Guest mapping is larger than or equal to host page size. We can use
+		 * a "native" host mapping. */
+		stlbe->word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+	} else {
+		/* Guest mapping is smaller than host page size. We must restrict the
+		 * size of the mapping to be at most the smaller of the two, but for
+		 * simplicity we fall back to a 4K mapping (this is probably what the
+		 * guest is using anyways). */
+		stlbe->word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+		/* 'hpaddr' is a host page, which is larger than the mapping we're
+		 * inserting here. To compensate, we must add the in-page offset to the
+		 * sub-page. */
+		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+	}
+
 	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
 	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
 	                                            vcpu->arch.msr & MSR_PR);
@@ -322,10 +356,8 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	u64 eaddr;
-	u64 raddr;
+	gva_t eaddr;
 	u64 asid;
-	u32 flags;
 	struct kvmppc_44x_tlbe *tlbe;
 	unsigned int index;
 
@@ -364,15 +396,22 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	}
 
 	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		gpa_t gpaddr;
+		u32 flags;
+		u32 bytes;
+
 		eaddr = get_tlb_eaddr(tlbe);
-		raddr = get_tlb_raddr(tlbe);
+		gpaddr = get_tlb_raddr(tlbe);
+
+		/* Use the advertised page size to mask effective and real addrs. */
+		bytes = get_tlb_bytes(tlbe);
+		eaddr &= ~(bytes - 1);
+		gpaddr &= ~(bytes - 1);
+
 		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
 		flags = tlbe->word2 & 0xffff;
 
-		/* Create a 4KB mapping on the host. If the guest wanted a
-		 * large page, only the first 4KB is mapped here and the rest
-		 * are mapped on the fly. */
-		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes);
 	}
 
 	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ec59a67..924c7b4 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -308,8 +308,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
+			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe));
 			vcpu->stat.dtlb_virt_miss_exits++;
 			r = RESUME_GUEST;
 		} else {
@@ -325,6 +325,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_ITLB_MISS: {
 		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
+		gpa_t gpaddr;
 		gfn_t gfn;
 
 		r = RESUME_GUEST;
@@ -340,7 +341,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		vcpu->stat.itlb_virt_miss_exits++;
 
-		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 			/* The guest TLB had a mapping, but the shadow TLB
@@ -349,8 +351,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe));
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
-- 
cgit v0.10.2


From c0ca609c5f874f7d6ae8e180afe79317e1943d22 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:54 -0600
Subject: powerpc/44x: declare tlb_44x_index for use in C code

KVM currently ignores the host's round robin TLB eviction selection, instead
maintaining its own TLB state and its own round robin index. However, by
participating in the normal 44x TLB selection, we can drop the alternate TLB
processing in KVM. This results in a significant performance improvement,
since that processing currently must be done on *every* guest exit.

Accordingly, KVM needs to be able to access and increment tlb_44x_index.
(KVM on 440 cannot be a module, so there is no need to export this symbol.)

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb..27cc6fd 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
 
 typedef struct {
 	unsigned int	id;
-- 
cgit v0.10.2


From 7924bd41097ae8991c6d38cef8b1e4058e30d198 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:55 -0600
Subject: KVM: ppc: directly insert shadow mappings into the hardware TLB

Formerly, we used to maintain a per-vcpu shadow TLB and on every entry to the
guest would load this array into the hardware TLB. This consumed 1280 bytes of
memory (64 entries of 16 bytes plus a struct page pointer each), and also
required some assembly to loop over the array on every entry.

Instead of saving a copy in memory, we can just store shadow mappings directly
into the hardware TLB, accepting that the host kernel will clobber these as
part of the normal 440 TLB round robin. When we do that we need less than half
the memory, and we have decreased the exit handling time for all guest exits,
at the cost of increased number of TLB misses because the host overwrites some
guest entries.

These savings will be increased on processors with larger TLBs or which
implement intelligent flush instructions like tlbivax (which will avoid the
need to walk arrays in software).

In addition to that and to the code simplification, we have a greater chance of
leaving other host userspace mappings in the TLB, instead of forcing all
subsequent tasks to re-fault all their mappings.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index 72e5939..e770ea2 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -22,19 +22,25 @@
 
 #include <linux/kvm_host.h>
 
-/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
 #define PPC44x_TLB_SIZE 64
 
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+	struct page *page;
+	u16 gtlb_index;
+	u8 writeable;
+	u8 tid;
+};
+
 struct kvmppc_vcpu_44x {
 	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
-	/* TLB that's actually used when the guest is running. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	/* Pages which are referenced in the shadow TLB. */
-	struct page *shadow_pages[PPC44x_TLB_SIZE];
-
-	/* Track which TLB entries we've modified in the current exit. */
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+	/* References to guest pages in the hardware TLB. */
+	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
 
 	struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5bb2926..36d2a50 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -53,7 +53,8 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                           u64 asid, u32 flags, u32 max_bytes);
+                           u64 asid, u32 flags, u32 max_bytes,
+                           unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 393c7f3..ba39526 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -359,12 +359,6 @@ int main(void)
 #ifdef CONFIG_KVM
 	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
-	DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu));
-	DEFINE(VCPU44x_SHADOW_TLB,
-	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb));
-	DEFINE(VCPU44x_SHADOW_MOD,
-	       offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod));
-
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 22054b1..05d72fc 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int i;
-
-	/* Mark every guest entry in the shadow TLB entry modified, so that they
-	 * will all be reloaded on the next vcpu run (instead of being
-	 * demand-faulted). */
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_tlbe_set_modified(vcpu, i);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	/* Don't leave guest TLB entries resident when being de-scheduled. */
-	/* XXX It would be nice to differentiate between heavyweight exit and
-	 * sched_out here, since we could avoid the TLB flush for heavyweight
-	 * exits. */
+	/* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB
+	 * entries around when we're descheduled, so we must completely flush the
+	 * TLB of all guest mappings. On the other hand, if there is only one
+	 * guest, this flush is completely unnecessary. */
 	_tlbia();
 }
 
@@ -130,6 +123,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+	int i;
 
 	tlbe->tid = 0;
 	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
@@ -148,6 +142,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * CCR1[TCS]. */
 	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
 
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+		vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index d49dc66..2981ebe 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,6 +22,8 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_44x.h>
@@ -40,8 +42,6 @@
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
 
-static unsigned int kvmppc_tlb_44x_pos;
-
 #ifdef DEBUG
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -52,24 +52,49 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 	printk("| %2s | %3s | %8s | %8s | %8s |\n",
 			"nr", "tid", "word0", "word1", "word2");
 
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
 		tlbe = &vcpu_44x->guest_tlb[i];
 		if (tlbe->word0 & PPC44x_TLB_VALID)
 			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
 			       i, tlbe->tid, tlbe->word0, tlbe->word1,
 			       tlbe->word2);
 	}
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu_44x->shadow_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
 }
 #endif
 
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+	/* 0 <= index < 64, so the V bit is clear and we can use the index as
+	 * word0. */
+	asm volatile(
+		"tlbwe %[index], %[index], 0\n"
+	:
+	: [index] "r"(index)
+	);
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+                                    struct kvmppc_44x_tlbe *stlbe)
+{
+	unsigned long tmp;
+
+	asm volatile(
+		"mfspr %[tmp], %[sprn_mmucr]\n"
+		"rlwimi %[tmp], %[tid], 0, 0xff\n"
+		"mtspr %[sprn_mmucr], %[tmp]\n"
+		"tlbwe %[word0], %[index], 0\n"
+		"tlbwe %[word1], %[index], 1\n"
+		"tlbwe %[word2], %[index], 2\n"
+		: [tmp]   "=&r"(tmp)
+		: [word0] "r"(stlbe->word0),
+		  [word1] "r"(stlbe->word1),
+		  [word2] "r"(stlbe->word2),
+		  [tid]   "r"(stlbe->tid),
+		  [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+	);
+}
+
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
 	/* We only care about the guest's permission and user bits. */
@@ -99,7 +124,7 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
 		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
 		unsigned int tid;
 
@@ -125,65 +150,53 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	return -1;
 }
 
-struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
-                                               gva_t eaddr)
+int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu_44x->guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
-                                               gva_t eaddr)
+int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu_44x->guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-static int kvmppc_44x_tlbe_is_writable(struct kvmppc_44x_tlbe *tlbe)
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+                                      unsigned int stlb_index)
 {
-	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
-}
+	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
 
-static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
-                                      unsigned int index)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[index];
-	struct page *page = vcpu_44x->shadow_pages[index];
+	if (!ref->page)
+		return;
 
-	if (get_tlb_v(stlbe)) {
-		if (kvmppc_44x_tlbe_is_writable(stlbe))
-			kvm_release_page_dirty(page);
-		else
-			kvm_release_page_clean(page);
-	}
-}
+	/* Discard from the TLB. */
+	/* Note: we could actually invalidate a host mapping, if the host overwrote
+	 * this TLB entry since we inserted a guest mapping. */
+	kvmppc_44x_tlbie(stlb_index);
 
-void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
-{
-	int i;
+	/* Now release the page. */
+	if (ref->writeable)
+		kvm_release_page_dirty(ref->page);
+	else
+		kvm_release_page_clean(ref->page);
 
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu, i);
+	ref->page = NULL;
+
+	/* XXX set tlb_44x_index to stlb_index? */
+
+	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
 }
 
-void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
+void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
 
-	vcpu_44x->shadow_tlb_mod[i] = 1;
+	for (i = 0; i <= tlb_44x_hwater; i++)
+		kvmppc_44x_shadow_release(vcpu_44x, i);
 }
 
 /**
@@ -199,21 +212,24 @@ void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
  * the shadow TLB.
  */
 void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
-                    u32 flags, u32 max_bytes)
+                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
 {
+	struct kvmppc_44x_tlbe stlbe;
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_shadow_ref *ref;
 	struct page *new_page;
-	struct kvmppc_44x_tlbe *stlbe;
 	hpa_t hpaddr;
 	gfn_t gfn;
 	unsigned int victim;
 
-	/* Future optimization: don't overwrite the TLB entry containing the
-	 * current PC (or stack?). */
-	victim = kvmppc_tlb_44x_pos++;
-	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
-		kvmppc_tlb_44x_pos = 0;
-	stlbe = &vcpu_44x->shadow_tlb[victim];
+	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
+	 * miss handler by disabling interrupts. */
+	local_irq_disable();
+	victim = ++tlb_44x_index;
+	if (victim > tlb_44x_hwater)
+		victim = 0;
+	tlb_44x_index = victim;
+	local_irq_enable();
 
 	/* Get reference to new page. */
 	gfn = gpaddr >> PAGE_SHIFT;
@@ -225,10 +241,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 	}
 	hpaddr = page_to_phys(new_page);
 
-	/* Drop reference to old page. */
-	kvmppc_44x_shadow_release(vcpu, victim);
-
-	vcpu_44x->shadow_pages[victim] = new_page;
+	/* Invalidate any previous shadow mappings. */
+	kvmppc_44x_shadow_release(vcpu_44x, victim);
 
 	/* XXX Make sure (va, size) doesn't overlap any other
 	 * entries. 440x6 user manual says the result would be
@@ -236,21 +250,19 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 
 	/* XXX what about AS? */
 
-	stlbe->tid = !(asid & 0xff);
-
 	/* Force TS=1 for all guest mappings. */
-	stlbe->word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
 
 	if (max_bytes >= PAGE_SIZE) {
 		/* Guest mapping is larger than or equal to host page size. We can use
 		 * a "native" host mapping. */
-		stlbe->word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
 	} else {
 		/* Guest mapping is smaller than host page size. We must restrict the
 		 * size of the mapping to be at most the smaller of the two, but for
 		 * simplicity we fall back to a 4K mapping (this is probably what the
 		 * guest is using anyways). */
-		stlbe->word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
 
 		/* 'hpaddr' is a host page, which is larger than the mapping we're
 		 * inserting here. To compensate, we must add the in-page offset to the
@@ -258,47 +270,36 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
 	}
 
-	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
 	                                            vcpu->arch.msr & MSR_PR);
-	kvmppc_tlbe_set_modified(vcpu, victim);
-
-	KVMTRACE_5D(STLB_WRITE, vcpu, victim,
-			stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
-			handler);
+	stlbe.tid = !(asid & 0xff);
+
+	/* Keep track of the reference so we can properly release it later. */
+	ref = &vcpu_44x->shadow_refs[victim];
+	ref->page = new_page;
+	ref->gtlb_index = gtlb_index;
+	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+	ref->tid = stlbe.tid;
+
+	/* Insert shadow mapping into hardware TLB. */
+	kvmppc_44x_tlbwe(victim, &stlbe);
+	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
+	            stlbe.word2, handler);
 }
 
-static void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                  gva_t eend, u32 asid)
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+                                  unsigned int gtlb_index)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	unsigned int pid = !(asid & 0xff);
 	int i;
 
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-		unsigned int tid;
-
-		if (!get_tlb_v(stlbe))
-			continue;
-
-		if (eend < get_tlb_eaddr(stlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(stlbe))
-			continue;
-
-		tid = get_tlb_tid(stlbe);
-		if (tid && (tid != pid))
-			continue;
-
-		kvmppc_44x_shadow_release(vcpu, i);
-		stlbe->word0 = 0;
-		kvmppc_tlbe_set_modified(vcpu, i);
-		KVMTRACE_5D(STLB_INVAL, vcpu, i,
-				stlbe->tid, stlbe->word0, stlbe->word1,
-				stlbe->word2, handler);
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+		if (ref->gtlb_index == gtlb_index)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
 	}
 }
 
@@ -321,14 +322,11 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
 	 * can't access guest kernel mappings (TID=1). When we switch to a new
 	 * guest PID, which will also use host PID=0, we must discard the old guest
 	 * userspace mappings. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_tlb); i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (get_tlb_tid(stlbe) == 0) {
-			kvmppc_44x_shadow_release(vcpu, i);
-			stlbe->word0 = 0;
-			kvmppc_tlbe_set_modified(vcpu, i);
-		}
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+		if (ref->tid == 0)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
 	}
 }
 
@@ -356,26 +354,21 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	gva_t eaddr;
-	u64 asid;
 	struct kvmppc_44x_tlbe *tlbe;
-	unsigned int index;
+	unsigned int gtlb_index;
 
-	index = vcpu->arch.gpr[ra];
-	if (index > PPC44x_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, index);
+	gtlb_index = vcpu->arch.gpr[ra];
+	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, gtlb_index);
 		kvmppc_dump_vcpu(vcpu);
 		return EMULATE_FAIL;
 	}
 
-	tlbe = &vcpu_44x->guest_tlb[index];
+	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
 
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
-	if (tlbe->word0 & PPC44x_TLB_VALID) {
-		eaddr = get_tlb_eaddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
-	}
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+	if (tlbe->word0 & PPC44x_TLB_VALID)
+		kvmppc_44x_invalidate(vcpu, gtlb_index);
 
 	switch (ws) {
 	case PPC44x_TLB_PAGEID:
@@ -396,6 +389,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	}
 
 	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		u64 asid;
+		gva_t eaddr;
 		gpa_t gpaddr;
 		u32 flags;
 		u32 bytes;
@@ -411,12 +406,11 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
 		flags = tlbe->word2 & 0xffff;
 
-		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes);
+		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
 	}
 
-	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
-	            tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
-	            handler);
+	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
+	            tlbe->word1, tlbe->word2, handler);
 
 	return EMULATE_DONE;
 }
@@ -424,7 +418,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
 {
 	u32 ea;
-	int index;
+	int gtlb_index;
 	unsigned int as = get_mmucr_sts(vcpu);
 	unsigned int pid = get_mmucr_stid(vcpu);
 
@@ -432,14 +426,14 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
 	if (ra)
 		ea += vcpu->arch.gpr[ra];
 
-	index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
 	if (rc) {
-		if (index < 0)
+		if (gtlb_index < 0)
 			vcpu->arch.cr &= ~0x20000000;
 		else
 			vcpu->arch.cr |= 0x20000000;
 	}
-	vcpu->arch.gpr[rt] = index;
+	vcpu->arch.gpr[rt] = gtlb_index;
 
 	return EMULATE_DONE;
 }
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index b1029af..772191f 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,11 +25,8 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu,
-                                                      gva_t eaddr);
-extern struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
-                                                      gva_t eaddr);
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
+extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 
 extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
                                  u8 rc);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 924c7b4..eb24383 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,10 +24,12 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/cacheflush.h>
+#include <asm/kvm_44x.h>
 
 #include "booke.h"
 #include "44x_tlb.h"
@@ -207,10 +209,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		 * handled this interrupt the moment we enabled interrupts.
 		 * Now we just offer it a chance to reschedule the guest. */
 
-		/* XXX At this point the TLB still holds our shadow TLB, so if
-		 * we do reschedule the host will fault over it. Perhaps we
-		 * should politely restore the host's entries to minimize
-		 * misses before ceding control. */
 		vcpu->stat.dec_exits++;
 		if (need_resched())
 			cond_resched();
@@ -281,14 +279,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 
+	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_DTLB_MISS: {
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
+		int gtlb_index;
 		gfn_t gfn;
 
 		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
-		if (!gtlbe) {
+		gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
@@ -298,6 +299,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		}
 
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
 		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
 
@@ -309,7 +311,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
 			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe));
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
 			vcpu->stat.dtlb_virt_miss_exits++;
 			r = RESUME_GUEST;
 		} else {
@@ -322,17 +324,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	}
 
+	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_ITLB_MISS: {
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
 		gpa_t gpaddr;
 		gfn_t gfn;
+		int gtlb_index;
 
 		r = RESUME_GUEST;
 
 		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
-		if (!gtlbe) {
+		gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
 			vcpu->stat.itlb_real_miss_exits++;
@@ -341,6 +346,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		vcpu->stat.itlb_virt_miss_exits++;
 
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		gpaddr = tlb_xlate(gtlbe, eaddr);
 		gfn = gpaddr >> PAGE_SHIFT;
 
@@ -352,7 +358,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
 			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe));
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 8d6929b..eb21868 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -335,54 +335,6 @@ lightweight_exit:
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
-	/* Prevent all asynchronous TLB updates. */
-	mfmsr	r5
-	lis	r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
-	ori	r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
-	andc	r6, r5, r6
-	mtmsr	r6
-
-	/* Load the guest mappings, leaving the host's "pinned" kernel mappings
-	 * in place. */
-	mfspr	r10, SPRN_MMUCR			/* Save host MMUCR. */
-	li	r5, PPC44x_TLB_SIZE
-	lis	r5, tlb_44x_hwater@ha
-	lwz	r5, tlb_44x_hwater@l(r5)
-	mtctr	r5
-	addi	r9, r4, -VCPU_TO_44X + VCPU44x_SHADOW_TLB
-	addi	r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD
-	li	r3, 0
-1:
-	lbzx	r7, r3, r5
-	cmpwi	r7, 0
-	beq	3f
-
-	/* Load guest entry. */
-	mulli	r11, r3, TLBE_BYTES
-	add	r11, r11, r9
-	lwz	r7, 0(r11)
-	mtspr	SPRN_MMUCR, r7
-	lwz	r7, 4(r11)
-	tlbwe	r7, r3, PPC44x_TLB_PAGEID
-	lwz	r7, 8(r11)
-	tlbwe	r7, r3, PPC44x_TLB_XLAT
-	lwz	r7, 12(r11)
-	tlbwe	r7, r3, PPC44x_TLB_ATTRIB
-3:
-	addi	r3, r3, 1                       /* Increment index. */
-	bdnz	1b
-
-	mtspr	SPRN_MMUCR, r10			/* Restore host MMUCR. */
-
-	/* Clear bitmap of modified TLB entries */
-	li	r5, PPC44x_TLB_SIZE>>2
-	mtctr	r5
-	addi	r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD - 4
-	li	r6, 0
-1:
-	stwu	r6, 4(r5)
-	bdnz	1b
-
 	iccci	0, 0 /* XXX hack */
 
 	/* Load some guest volatiles. */
-- 
cgit v0.10.2


From c5fbdffbda79254047ec83b09c1a61a3655d052a Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:56 -0600
Subject: KVM: ppc: save and restore guest mappings on context switch

Store shadow TLB entries in memory, but only use it on host context switch
(instead of every guest entry). This improves performance for most workloads on
440 by reducing the guest TLB miss rate.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index e770ea2..f49031b 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -42,6 +42,10 @@ struct kvmppc_vcpu_44x {
 	/* References to guest pages in the hardware TLB. */
 	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
 
+	/* State of the shadow TLB at guest context switch time. */
+	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
 	struct kvm_vcpu vcpu;
 };
 
@@ -51,5 +55,7 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
 }
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
 
 #endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 05d72fc..a66bec5 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -96,15 +96,12 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	kvmppc_44x_tlb_load(vcpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	/* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB
-	 * entries around when we're descheduled, so we must completely flush the
-	 * TLB of all guest mappings. On the other hand, if there is only one
-	 * guest, this flush is completely unnecessary. */
-	_tlbia();
+	kvmppc_44x_tlb_put(vcpu);
 }
 
 int kvmppc_core_check_processor_compat(void)
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 2981ebe..ff16d0e 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -73,6 +73,25 @@ static inline void kvmppc_44x_tlbie(unsigned int index)
 	);
 }
 
+static inline void kvmppc_44x_tlbre(unsigned int index,
+                                    struct kvmppc_44x_tlbe *tlbe)
+{
+	asm volatile(
+		"tlbre %[word0], %[index], 0\n"
+		"mfspr %[tid], %[sprn_mmucr]\n"
+		"andi. %[tid], %[tid], 0xff\n"
+		"tlbre %[word1], %[index], 1\n"
+		"tlbre %[word2], %[index], 2\n"
+		: [word0] "=r"(tlbe->word0),
+		  [word1] "=r"(tlbe->word1),
+		  [word2] "=r"(tlbe->word2),
+		  [tid]   "=r"(tlbe->tid)
+		: [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+		: "cc"
+	);
+}
+
 static inline void kvmppc_44x_tlbwe(unsigned int index,
                                     struct kvmppc_44x_tlbe *stlbe)
 {
@@ -116,6 +135,44 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 	return attrib;
 }
 
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbwe(i, stlbe);
+	}
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+                                         unsigned int i)
+{
+	vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (vcpu_44x->shadow_tlb_mod[i])
+			kvmppc_44x_tlbre(i, stlbe);
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbie(i);
+	}
+}
+
+
 /* Search the guest TLB for a matching entry. */
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
                          unsigned int as)
@@ -283,6 +340,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 	ref->tid = stlbe.tid;
 
 	/* Insert shadow mapping into hardware TLB. */
+	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
 	kvmppc_44x_tlbwe(victim, &stlbe);
 	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
 	            stlbe.word2, handler);
-- 
cgit v0.10.2


From 73e75b416ffcfa3a84952d8e389a0eca080f00e1 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:57 -0600
Subject: KVM: ppc: Implement in-kernel exit timing statistics

Existing KVM statistics are either just counters (kvm_stat) reported for
KVM generally or trace based aproaches like kvm_trace.
For KVM on powerpc we had the need to track the timings of the different exit
types. While this could be achieved parsing data created with a kvm_trace
extension this adds too much overhead (at least on embedded PowerPC) slowing
down the workloads we wanted to measure.

Therefore this patch adds a in-kernel exit timing statistic to the powerpc kvm
code. These statistic is available per vm&vcpu under the kvm debugfs directory.
As this statistic is low, but still some overhead it can be enabled via a
.config entry and should be off by default.

Since this patch touched all powerpc kvm_stat code anyway this code is now
merged and simplified together with the exit timing statistic code (still
working with exit timing disabled in .config).

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a4a7d5e..2f5b49f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -71,6 +71,49 @@ struct kvmppc_44x_tlbe {
 	u32 word2;
 };
 
+enum kvm_exit_types {
+	MMIO_EXITS,
+	DCR_EXITS,
+	SIGNAL_EXITS,
+	ITLB_REAL_MISS_EXITS,
+	ITLB_VIRT_MISS_EXITS,
+	DTLB_REAL_MISS_EXITS,
+	DTLB_VIRT_MISS_EXITS,
+	SYSCALL_EXITS,
+	ISI_EXITS,
+	DSI_EXITS,
+	EMULATED_INST_EXITS,
+	EMULATED_MTMSRWE_EXITS,
+	EMULATED_WRTEE_EXITS,
+	EMULATED_MTSPR_EXITS,
+	EMULATED_MFSPR_EXITS,
+	EMULATED_MTMSR_EXITS,
+	EMULATED_MFMSR_EXITS,
+	EMULATED_TLBSX_EXITS,
+	EMULATED_TLBWE_EXITS,
+	EMULATED_RFI_EXITS,
+	DEC_EXITS,
+	EXT_INTR_EXITS,
+	HALT_WAKEUP,
+	USR_PR_INST,
+	FP_UNAVAIL,
+	DEBUG_EXITS,
+	TIMEINGUEST,
+	__NUMBER_OF_KVM_EXIT_TYPES
+};
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct exit_timing {
+	union {
+		u64 tv64;
+		struct {
+			u32 tbu, tbl;
+		} tv32;
+	};
+};
+#endif
+
 struct kvm_arch {
 };
 
@@ -130,6 +173,19 @@ struct kvm_vcpu_arch {
 	u32 dbcr0;
 	u32 dbcr1;
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	struct exit_timing timing_exit;
+	struct exit_timing timing_last_enter;
+	u32 last_exit_type;
+	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_last_exit;
+	struct dentry *debugfs_exit_timing;
+#endif
+
 	u32 last_inst;
 	ulong fault_dear;
 	ulong fault_esr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ba39526..9937fe4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -383,5 +383,16 @@ int main(void)
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbu));
+	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbl));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbu));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbl));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 9ef79c7..69f88d53 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -22,6 +22,7 @@
 #include <asm/dcr-regs.h>
 #include <asm/disassemble.h>
 #include <asm/kvm_44x.h>
+#include "timing.h"
 
 #include "booke.h"
 #include "44x_tlb.h"
@@ -58,11 +59,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int ws;
 
 	switch (get_op(inst)) {
-
 	case OP_RFI:
 		switch (get_xop(inst)) {
 		case XOP_RFI:
 			kvmppc_emul_rfi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
 			*advance = 0;
 			break;
 
@@ -78,10 +79,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		case XOP_MFMSR:
 			rt = get_rt(inst);
 			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
 			break;
 
 		case XOP_MTMSR:
 			rs = get_rs(inst);
+			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
 			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
 			break;
 
@@ -89,11 +92,13 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			rs = get_rs(inst);
 			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
 							 | (vcpu->arch.gpr[rs] & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
 			break;
 
 		case XOP_WRTEEI:
 			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
 							 | (inst & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
 			break;
 
 		case XOP_MFDCR:
@@ -127,6 +132,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				run->dcr.is_write = 0;
 				vcpu->arch.io_gpr = rt;
 				vcpu->arch.dcr_needed = 1;
+				account_exit(vcpu, DCR_EXITS);
 				emulated = EMULATE_DO_DCR;
 			}
 
@@ -146,6 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				run->dcr.data = vcpu->arch.gpr[rs];
 				run->dcr.is_write = 1;
 				vcpu->arch.dcr_needed = 1;
+				account_exit(vcpu, DCR_EXITS);
 				emulated = EMULATE_DO_DCR;
 			}
 
@@ -276,6 +283,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		return EMULATE_FAIL;
 	}
 
+	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
 	return EMULATE_DONE;
 }
 
@@ -357,6 +365,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		return EMULATE_FAIL;
 	}
 
+	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
 	return EMULATE_DONE;
 }
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ff16d0e..9a34b8e 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -27,6 +27,7 @@
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_44x.h>
+#include "timing.h"
 
 #include "44x_tlb.h"
 
@@ -470,6 +471,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
 	            tlbe->word1, tlbe->word2, handler);
 
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
 	return EMULATE_DONE;
 }
 
@@ -493,5 +495,6 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
 	}
 	vcpu->arch.gpr[rt] = gtlb_index;
 
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
 	return EMULATE_DONE;
 }
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index e4ab1c7..6dbdc48 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -32,6 +32,17 @@ config KVM_440
 
 	  If unsure, say N.
 
+config KVM_EXIT_TIMING
+	bool "Detailed exit timing"
+	depends on KVM
+	---help---
+	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
+	  report is available in debugfs kvm/vm#_vcpu#_timing.
+	  The overhead is relatively small, however it is not recommended for
+	  production environments.
+
+	  If unsure, say N.
+
 config KVM_TRACE
 	bool "KVM trace support"
 	depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index f045fad..df7ba59 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -9,6 +9,7 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 
 kvm-objs := $(common-objs-y) powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index eb24383..0f17124 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -28,6 +28,7 @@
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
+#include "timing.h"
 #include <asm/cacheflush.h>
 #include <asm/kvm_44x.h>
 
@@ -185,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	enum emulation_result er;
 	int r = RESUME_HOST;
 
+	/* update before a new last_exit_type is rewritten */
+	kvmppc_update_timing_stats(vcpu);
+
 	local_irq_enable();
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -198,7 +202,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_EXTERNAL:
-		vcpu->stat.ext_intr_exits++;
+		account_exit(vcpu, EXT_INTR_EXITS);
 		if (need_resched())
 			cond_resched();
 		r = RESUME_GUEST;
@@ -208,8 +212,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		/* Since we switched IVPR back to the host's value, the host
 		 * handled this interrupt the moment we enabled interrupts.
 		 * Now we just offer it a chance to reschedule the guest. */
-
-		vcpu->stat.dec_exits++;
+		account_exit(vcpu, DEC_EXITS);
 		if (need_resched())
 			cond_resched();
 		r = RESUME_GUEST;
@@ -222,20 +225,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 			r = RESUME_GUEST;
+			account_exit(vcpu, USR_PR_INST);
 			break;
 		}
 
 		er = kvmppc_emulate_instruction(run, vcpu);
 		switch (er) {
 		case EMULATE_DONE:
+			/* don't overwrite subtypes, just account kvm_stats */
+			account_exit_stat(vcpu, EMULATED_INST_EXITS);
 			/* Future optimization: only reload non-volatiles if
 			 * they were actually modified by emulation. */
-			vcpu->stat.emulated_inst_exits++;
 			r = RESUME_GUEST_NV;
 			break;
 		case EMULATE_DO_DCR:
 			run->exit_reason = KVM_EXIT_DCR;
-			vcpu->stat.dcr_exits++;
 			r = RESUME_HOST;
 			break;
 		case EMULATE_FAIL:
@@ -255,6 +259,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+		account_exit(vcpu, FP_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 
@@ -262,20 +267,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
-		vcpu->stat.dsi_exits++;
+		account_exit(vcpu, DSI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
-		vcpu->stat.isi_exits++;
+		account_exit(vcpu, ISI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
-		vcpu->stat.syscall_exits++;
+		account_exit(vcpu, SYSCALL_EXITS);
 		r = RESUME_GUEST;
 		break;
 
@@ -294,7 +299,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			vcpu->stat.dtlb_real_miss_exits++;
+			account_exit(vcpu, DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -312,13 +317,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * invoking the guest. */
 			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
 			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
-			vcpu->stat.dtlb_virt_miss_exits++;
+			account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
-			vcpu->stat.mmio_exits++;
+			account_exit(vcpu, MMIO_EXITS);
 		}
 
 		break;
@@ -340,11 +345,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
-			vcpu->stat.itlb_real_miss_exits++;
+			account_exit(vcpu, ITLB_REAL_MISS_EXITS);
 			break;
 		}
 
-		vcpu->stat.itlb_virt_miss_exits++;
+		account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
 
 		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		gpaddr = tlb_xlate(gtlbe, eaddr);
@@ -378,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		mtspr(SPRN_DBSR, dbsr);
 
 		run->exit_reason = KVM_EXIT_DEBUG;
+		account_exit(vcpu, DEBUG_EXITS);
 		r = RESUME_HOST;
 		break;
 	}
@@ -398,7 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (signal_pending(current)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-			vcpu->stat.signal_exits++;
+			account_exit(vcpu, SIGNAL_EXITS);
 		}
 	}
 
@@ -418,6 +424,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * before it's programmed its own IVPR. */
 	vcpu->arch.ivpr = 0x55550000;
 
+	kvmppc_init_timing_stats(vcpu);
+
 	return kvmppc_core_vcpu_setup(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 48d905f..cf7c94c 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_host.h>
+#include "timing.h"
 
 /* interrupt priortity ordering */
 #define BOOKE_IRQPRIO_DATA_STORAGE 0
@@ -50,8 +51,10 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 
 	vcpu->arch.msr = new_msr;
 
-	if (vcpu->arch.msr & MSR_WE)
+	if (vcpu->arch.msr & MSR_WE) {
 		kvm_vcpu_block(vcpu);
+		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+	};
 }
 
 #endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index eb21868..084ebcd 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
 	li	r6, 1
 	slw	r6, r6, r5
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save exit time */
+1:
+	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	bne	1b
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
 	/* Save the faulting instruction and all GPRs for emulation. */
 	andi.	r7, r6, NEED_INST_MASK
 	beq	..skip_inst_copy
@@ -375,6 +387,18 @@ lightweight_exit:
 	lwz	r3, VCPU_SPRG7(r4)
 	mtspr	SPRN_SPRG7, r3
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save enter time */
+1:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r7, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	bne	1b
+	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
 	/* Finish loading guest volatiles and jump to guest. */
 	lwz	r3, VCPU_CTR(r4)
 	mtctr	r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4c30fa0..d1d38da 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -28,6 +28,7 @@
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
 #include <asm/disassemble.h>
+#include "timing.h"
 
 void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
@@ -73,6 +74,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
 
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
 	switch (get_op(inst)) {
 	case 3:                                             /* trap */
 		vcpu->arch.esr |= ESR_PTR;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7ad150e..1deda37 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
+#include "timing.h"
 #include "../mm/mmu_decl.h"
 
-
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
@@ -171,11 +171,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	return kvmppc_core_vcpu_create(kvm, id);
+	struct kvm_vcpu *vcpu;
+	vcpu = kvmppc_core_vcpu_create(kvm, id);
+	kvmppc_create_vcpu_debugfs(vcpu, id);
+	return vcpu;
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kvmppc_remove_vcpu_debugfs(vcpu);
 	kvmppc_core_vcpu_free(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 0000000..f42d272
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,262 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include "timing.h"
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	/* pause guest execution to avoid concurrent updates */
+	local_irq_disable();
+	mutex_lock(&vcpu->mutex);
+
+	vcpu->arch.last_exit_type = 0xDEAD;
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		vcpu->arch.timing_count_type[i] = 0;
+		vcpu->arch.timing_max_duration[i] = 0;
+		vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+		vcpu->arch.timing_sum_duration[i] = 0;
+		vcpu->arch.timing_sum_quad_duration[i] = 0;
+	}
+	vcpu->arch.timing_last_exit = 0;
+	vcpu->arch.timing_exit.tv64 = 0;
+	vcpu->arch.timing_last_enter.tv64 = 0;
+
+	mutex_unlock(&vcpu->mutex);
+	local_irq_enable();
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu,
+					u64 duration, int type)
+{
+	u64 old;
+
+	do_div(duration, tb_ticks_per_usec);
+	if (unlikely(duration > 0xFFFFFFFF)) {
+		printk(KERN_ERR"%s - duration too big -> overflow"
+			" duration %lld type %d exit #%d\n",
+			__func__, duration, type,
+			vcpu->arch.timing_count_type[type]);
+		return;
+	}
+
+	vcpu->arch.timing_count_type[type]++;
+
+	/* sum */
+	old = vcpu->arch.timing_sum_duration[type];
+	vcpu->arch.timing_sum_duration[type] += duration;
+	if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old, vcpu->arch.timing_sum_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* square sum */
+	old = vcpu->arch.timing_sum_quad_duration[type];
+	vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+	if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of squared durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old,
+			vcpu->arch.timing_sum_quad_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* set min/max */
+	if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+		vcpu->arch.timing_min_duration[type] = duration;
+	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+		vcpu->arch.timing_max_duration[type] = duration;
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+	u64 exit = vcpu->arch.timing_last_exit;
+	u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+	/* save exit time, used next exit when the reenter time is known */
+	vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+	if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+		return; /* skip incomplete cycle (e.g. after reset) */
+
+	/* update statistics for average and standard deviation */
+	add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+	/* enter -> timing_last_exit is time spent in guest - log this too */
+	add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+			TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+	[MMIO_EXITS] = 			"MMIO",
+	[DCR_EXITS] =			"DCR",
+	[SIGNAL_EXITS] =		"SIGNAL",
+	[ITLB_REAL_MISS_EXITS] =	"ITLBREAL",
+	[ITLB_VIRT_MISS_EXITS] =	"ITLBVIRT",
+	[DTLB_REAL_MISS_EXITS] =	"DTLBREAL",
+	[DTLB_VIRT_MISS_EXITS] =	"DTLBVIRT",
+	[SYSCALL_EXITS] =		"SYSCALL",
+	[ISI_EXITS] =			"ISI",
+	[DSI_EXITS] =			"DSI",
+	[EMULATED_INST_EXITS] =		"EMULINST",
+	[EMULATED_MTMSRWE_EXITS] =	"EMUL_WAIT",
+	[EMULATED_WRTEE_EXITS] =	"EMUL_WRTEE",
+	[EMULATED_MTSPR_EXITS] =	"EMUL_MTSPR",
+	[EMULATED_MFSPR_EXITS] =	"EMUL_MFSPR",
+	[EMULATED_MTMSR_EXITS] =	"EMUL_MTMSR",
+	[EMULATED_MFMSR_EXITS] =	"EMUL_MFMSR",
+	[EMULATED_TLBSX_EXITS] =	"EMUL_TLBSX",
+	[EMULATED_TLBWE_EXITS] =	"EMUL_TLBWE",
+	[EMULATED_RFI_EXITS] =		"EMUL_RFI",
+	[DEC_EXITS] =			"DEC",
+	[EXT_INTR_EXITS] =		"EXTINT",
+	[HALT_WAKEUP] =			"HALT",
+	[USR_PR_INST] =			"USR_PR_INST",
+	[FP_UNAVAIL] =			"FP_UNAVAIL",
+	[DEBUG_EXITS] =			"DEBUG",
+	[TIMEINGUEST] =			"TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+	struct kvm_vcpu *vcpu = m->private;
+	int i;
+	u64 min, max;
+
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		if (vcpu->arch.timing_min_duration[i] == 0xFFFFFFFF)
+			min = 0;
+		else
+			min = vcpu->arch.timing_min_duration[i];
+		if (vcpu->arch.timing_max_duration[i] == 0)
+			max = 0;
+		else
+			max = vcpu->arch.timing_max_duration[i];
+
+		seq_printf(m, "%12s: count %10d min %10lld "
+			"max %10lld sum %20lld sum_quad %20lld\n",
+			kvm_exit_names[i], vcpu->arch.timing_count_type[i],
+			vcpu->arch.timing_min_duration[i],
+			vcpu->arch.timing_max_duration[i],
+			vcpu->arch.timing_sum_duration[i],
+			vcpu->arch.timing_sum_quad_duration[i]);
+	}
+	return 0;
+}
+
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+				       const char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	size_t len;
+	int err;
+	const char __user *p;
+	char c;
+
+	len = 0;
+	p = user_buf;
+	while (len < count) {
+		if (get_user(c, p++))
+			err = -EFAULT;
+		if (c == 0 || c == '\n')
+			break;
+		len++;
+	}
+
+	if (len > 1) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	if (copy_from_user(&c, user_buf, sizeof(c))) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	if (c == 'c') {
+		struct seq_file *seqf = (struct seq_file *)file->private_data;
+		struct kvm_vcpu *vcpu = seqf->private;
+		/* write does not affect out buffers previsously generated with
+		 * show. Seq file is locked here to prevent races of init with
+		 * a show call */
+		mutex_lock(&seqf->lock);
+		kvmppc_init_timing_stats(vcpu);
+		mutex_unlock(&seqf->lock);
+		err = count;
+	} else {
+		err = -EINVAL;
+		goto done;
+	}
+
+done:
+	return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static struct file_operations kvmppc_exit_timing_fops = {
+	.owner   = THIS_MODULE,
+	.open    = kvmppc_exit_timing_open,
+	.read    = seq_read,
+	.write   = kvmppc_exit_timing_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+	static char dbg_fname[50];
+	struct dentry *debugfs_file;
+
+	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%03u_timing",
+		 current->pid, id);
+	debugfs_file = debugfs_create_file(dbg_fname, 0666,
+					kvm_debugfs_dir, vcpu,
+					&kvmppc_exit_timing_fops);
+
+	if (!debugfs_file) {
+		printk(KERN_ERR"%s: error creating debugfs file %s\n",
+			__func__, dbg_fname);
+		return;
+	}
+
+	vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.debugfs_exit_timing) {
+		debugfs_remove(vcpu->arch.debugfs_exit_timing);
+		vcpu->arch.debugfs_exit_timing = NULL;
+	}
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 0000000..1af7181
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+	vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+						unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+	/* type has to be known at build time for optimization */
+	BUILD_BUG_ON(__builtin_constant_p(type));
+	switch (type) {
+	case EXT_INTR_EXITS:
+		vcpu->stat.ext_intr_exits++;
+		break;
+	case DEC_EXITS:
+		vcpu->stat.dec_exits++;
+		break;
+	case EMULATED_INST_EXITS:
+		vcpu->stat.emulated_inst_exits++;
+		break;
+	case DCR_EXITS:
+		vcpu->stat.dcr_exits++;
+		break;
+	case DSI_EXITS:
+		vcpu->stat.dsi_exits++;
+		break;
+	case ISI_EXITS:
+		vcpu->stat.isi_exits++;
+		break;
+	case SYSCALL_EXITS:
+		vcpu->stat.syscall_exits++;
+		break;
+	case DTLB_REAL_MISS_EXITS:
+		vcpu->stat.dtlb_real_miss_exits++;
+		break;
+	case DTLB_VIRT_MISS_EXITS:
+		vcpu->stat.dtlb_virt_miss_exits++;
+		break;
+	case MMIO_EXITS:
+		vcpu->stat.mmio_exits++;
+		break;
+	case ITLB_REAL_MISS_EXITS:
+		vcpu->stat.itlb_real_miss_exits++;
+		break;
+	case ITLB_VIRT_MISS_EXITS:
+		vcpu->stat.itlb_virt_miss_exits++;
+		break;
+	case SIGNAL_EXITS:
+		vcpu->stat.signal_exits++;
+		break;
+	}
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void account_exit(struct kvm_vcpu *vcpu, int type)
+{
+	kvmppc_set_exit_type(vcpu, type);
+	account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
-- 
cgit v0.10.2


From 7b7015914b30ad8d9136d41412c5129b9bc9af70 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 2 Dec 2008 15:51:58 -0600
Subject: KVM: ppc: mostly cosmetic updates to the exit timing accounting code

The only significant changes were to kvmppc_exit_timing_write() and
kvmppc_exit_timing_show(), both of which were dramatically simplified.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2f5b49f..c1e436f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -102,9 +102,8 @@ enum kvm_exit_types {
 	__NUMBER_OF_KVM_EXIT_TYPES
 };
 
-#ifdef CONFIG_KVM_EXIT_TIMING
 /* allow access to big endian 32bit upper/lower parts and 64bit var */
-struct exit_timing {
+struct kvmppc_exit_timing {
 	union {
 		u64 tv64;
 		struct {
@@ -112,7 +111,6 @@ struct exit_timing {
 		} tv32;
 	};
 };
-#endif
 
 struct kvm_arch {
 };
@@ -174,8 +172,8 @@ struct kvm_vcpu_arch {
 	u32 dbcr1;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
-	struct exit_timing timing_exit;
-	struct exit_timing timing_last_enter;
+	struct kvmppc_exit_timing timing_exit;
+	struct kvmppc_exit_timing timing_last_enter;
 	u32 last_exit_type;
 	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
 	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 69f88d53..82489a7 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -132,7 +132,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				run->dcr.is_write = 0;
 				vcpu->arch.io_gpr = rt;
 				vcpu->arch.dcr_needed = 1;
-				account_exit(vcpu, DCR_EXITS);
+				kvmppc_account_exit(vcpu, DCR_EXITS);
 				emulated = EMULATE_DO_DCR;
 			}
 
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				run->dcr.data = vcpu->arch.gpr[rs];
 				run->dcr.is_write = 1;
 				vcpu->arch.dcr_needed = 1;
-				account_exit(vcpu, DCR_EXITS);
+				kvmppc_account_exit(vcpu, DCR_EXITS);
 				emulated = EMULATE_DO_DCR;
 			}
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0f17124..35485dd 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -202,7 +202,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_EXTERNAL:
-		account_exit(vcpu, EXT_INTR_EXITS);
+		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
 		if (need_resched())
 			cond_resched();
 		r = RESUME_GUEST;
@@ -212,7 +212,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		/* Since we switched IVPR back to the host's value, the host
 		 * handled this interrupt the moment we enabled interrupts.
 		 * Now we just offer it a chance to reschedule the guest. */
-		account_exit(vcpu, DEC_EXITS);
+		kvmppc_account_exit(vcpu, DEC_EXITS);
 		if (need_resched())
 			cond_resched();
 		r = RESUME_GUEST;
@@ -225,7 +225,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 			r = RESUME_GUEST;
-			account_exit(vcpu, USR_PR_INST);
+			kvmppc_account_exit(vcpu, USR_PR_INST);
 			break;
 		}
 
@@ -233,7 +233,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		switch (er) {
 		case EMULATE_DONE:
 			/* don't overwrite subtypes, just account kvm_stats */
-			account_exit_stat(vcpu, EMULATED_INST_EXITS);
+			kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
 			/* Future optimization: only reload non-volatiles if
 			 * they were actually modified by emulation. */
 			r = RESUME_GUEST_NV;
@@ -259,7 +259,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
-		account_exit(vcpu, FP_UNAVAIL);
+		kvmppc_account_exit(vcpu, FP_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 
@@ -267,20 +267,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
-		account_exit(vcpu, DSI_EXITS);
+		kvmppc_account_exit(vcpu, DSI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
-		account_exit(vcpu, ISI_EXITS);
+		kvmppc_account_exit(vcpu, ISI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
-		account_exit(vcpu, SYSCALL_EXITS);
+		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
 		r = RESUME_GUEST;
 		break;
 
@@ -299,7 +299,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -317,13 +317,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * invoking the guest. */
 			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
 			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
-			account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
-			account_exit(vcpu, MMIO_EXITS);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);
 		}
 
 		break;
@@ -345,11 +345,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
-			account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
 			break;
 		}
 
-		account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
 
 		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		gpaddr = tlb_xlate(gtlbe, eaddr);
@@ -383,7 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		mtspr(SPRN_DBSR, dbsr);
 
 		run->exit_reason = KVM_EXIT_DEBUG;
-		account_exit(vcpu, DEBUG_EXITS);
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
 		r = RESUME_HOST;
 		break;
 	}
@@ -404,7 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (signal_pending(current)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-			account_exit(vcpu, SIGNAL_EXITS);
+			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
 		}
 	}
 
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index f42d272..47ee603 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -12,7 +12,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright IBM Corp. 2007
+ * Copyright IBM Corp. 2008
  *
  * Authors: Hollis Blanchard <hollisb@us.ibm.com>
  *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
@@ -24,10 +24,11 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 
-#include "timing.h"
 #include <asm/time.h>
 #include <asm-generic/div64.h>
 
+#include "timing.h"
+
 void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -52,8 +53,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 }
 
-static void add_exit_timing(struct kvm_vcpu *vcpu,
-					u64 duration, int type)
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
 {
 	u64 old;
 
@@ -115,54 +115,46 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
 }
 
 static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
-	[MMIO_EXITS] = 			"MMIO",
-	[DCR_EXITS] =			"DCR",
-	[SIGNAL_EXITS] =		"SIGNAL",
-	[ITLB_REAL_MISS_EXITS] =	"ITLBREAL",
-	[ITLB_VIRT_MISS_EXITS] =	"ITLBVIRT",
-	[DTLB_REAL_MISS_EXITS] =	"DTLBREAL",
-	[DTLB_VIRT_MISS_EXITS] =	"DTLBVIRT",
-	[SYSCALL_EXITS] =		"SYSCALL",
-	[ISI_EXITS] =			"ISI",
-	[DSI_EXITS] =			"DSI",
-	[EMULATED_INST_EXITS] =		"EMULINST",
-	[EMULATED_MTMSRWE_EXITS] =	"EMUL_WAIT",
-	[EMULATED_WRTEE_EXITS] =	"EMUL_WRTEE",
-	[EMULATED_MTSPR_EXITS] =	"EMUL_MTSPR",
-	[EMULATED_MFSPR_EXITS] =	"EMUL_MFSPR",
-	[EMULATED_MTMSR_EXITS] =	"EMUL_MTMSR",
-	[EMULATED_MFMSR_EXITS] =	"EMUL_MFMSR",
-	[EMULATED_TLBSX_EXITS] =	"EMUL_TLBSX",
-	[EMULATED_TLBWE_EXITS] =	"EMUL_TLBWE",
-	[EMULATED_RFI_EXITS] =		"EMUL_RFI",
-	[DEC_EXITS] =			"DEC",
-	[EXT_INTR_EXITS] =		"EXTINT",
-	[HALT_WAKEUP] =			"HALT",
-	[USR_PR_INST] =			"USR_PR_INST",
-	[FP_UNAVAIL] =			"FP_UNAVAIL",
-	[DEBUG_EXITS] =			"DEBUG",
-	[TIMEINGUEST] =			"TIMEINGUEST"
+	[MMIO_EXITS] =              "MMIO",
+	[DCR_EXITS] =               "DCR",
+	[SIGNAL_EXITS] =            "SIGNAL",
+	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
+	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
+	[DTLB_REAL_MISS_EXITS] =    "DTLBREAL",
+	[DTLB_VIRT_MISS_EXITS] =    "DTLBVIRT",
+	[SYSCALL_EXITS] =           "SYSCALL",
+	[ISI_EXITS] =               "ISI",
+	[DSI_EXITS] =               "DSI",
+	[EMULATED_INST_EXITS] =     "EMULINST",
+	[EMULATED_MTMSRWE_EXITS] =  "EMUL_WAIT",
+	[EMULATED_WRTEE_EXITS] =    "EMUL_WRTEE",
+	[EMULATED_MTSPR_EXITS] =    "EMUL_MTSPR",
+	[EMULATED_MFSPR_EXITS] =    "EMUL_MFSPR",
+	[EMULATED_MTMSR_EXITS] =    "EMUL_MTMSR",
+	[EMULATED_MFMSR_EXITS] =    "EMUL_MFMSR",
+	[EMULATED_TLBSX_EXITS] =    "EMUL_TLBSX",
+	[EMULATED_TLBWE_EXITS] =    "EMUL_TLBWE",
+	[EMULATED_RFI_EXITS] =      "EMUL_RFI",
+	[DEC_EXITS] =               "DEC",
+	[EXT_INTR_EXITS] =          "EXTINT",
+	[HALT_WAKEUP] =             "HALT",
+	[USR_PR_INST] =             "USR_PR_INST",
+	[FP_UNAVAIL] =              "FP_UNAVAIL",
+	[DEBUG_EXITS] =             "DEBUG",
+	[TIMEINGUEST] =             "TIMEINGUEST"
 };
 
 static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
 {
 	struct kvm_vcpu *vcpu = m->private;
 	int i;
-	u64 min, max;
+
+	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
 
 	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
-		if (vcpu->arch.timing_min_duration[i] == 0xFFFFFFFF)
-			min = 0;
-		else
-			min = vcpu->arch.timing_min_duration[i];
-		if (vcpu->arch.timing_max_duration[i] == 0)
-			max = 0;
-		else
-			max = vcpu->arch.timing_max_duration[i];
-
-		seq_printf(m, "%12s: count %10d min %10lld "
-			"max %10lld sum %20lld sum_quad %20lld\n",
-			kvm_exit_names[i], vcpu->arch.timing_count_type[i],
+		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
+			kvm_exit_names[i],
+			vcpu->arch.timing_count_type[i],
 			vcpu->arch.timing_min_duration[i],
 			vcpu->arch.timing_max_duration[i],
 			vcpu->arch.timing_sum_duration[i],
@@ -171,31 +163,19 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
 	return 0;
 }
 
+/* Write 'c' to clear the timing statistics. */
 static ssize_t kvmppc_exit_timing_write(struct file *file,
 				       const char __user *user_buf,
 				       size_t count, loff_t *ppos)
 {
-	size_t len;
-	int err;
-	const char __user *p;
+	int err = -EINVAL;
 	char c;
 
-	len = 0;
-	p = user_buf;
-	while (len < count) {
-		if (get_user(c, p++))
-			err = -EFAULT;
-		if (c == 0 || c == '\n')
-			break;
-		len++;
-	}
-
-	if (len > 1) {
-		err = -EINVAL;
+	if (count > 1) {
 		goto done;
 	}
 
-	if (copy_from_user(&c, user_buf, sizeof(c))) {
+	if (get_user(c, user_buf)) {
 		err = -EFAULT;
 		goto done;
 	}
@@ -203,16 +183,13 @@ static ssize_t kvmppc_exit_timing_write(struct file *file,
 	if (c == 'c') {
 		struct seq_file *seqf = (struct seq_file *)file->private_data;
 		struct kvm_vcpu *vcpu = seqf->private;
-		/* write does not affect out buffers previsously generated with
-		 * show. Seq file is locked here to prevent races of init with
+		/* Write does not affect our buffers previously generated with
+		 * show. seq_file is locked here to prevent races of init with
 		 * a show call */
 		mutex_lock(&seqf->lock);
 		kvmppc_init_timing_stats(vcpu);
 		mutex_unlock(&seqf->lock);
 		err = count;
-	} else {
-		err = -EINVAL;
-		goto done;
 	}
 
 done:
@@ -238,7 +215,7 @@ void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
 	static char dbg_fname[50];
 	struct dentry *debugfs_file;
 
-	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%03u_timing",
+	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
 		 current->pid, id);
 	debugfs_file = debugfs_create_file(dbg_fname, 0666,
 					kvm_debugfs_dir, vcpu,
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 1af7181..bb13b1f 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -45,7 +45,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
 #endif /* CONFIG_KVM_EXIT_TIMING */
 
 /* account the exit in kvm_stats */
-static inline void account_exit_stat(struct kvm_vcpu *vcpu, int type)
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 {
 	/* type has to be known at build time for optimization */
 	BUILD_BUG_ON(__builtin_constant_p(type));
@@ -93,10 +93,10 @@ static inline void account_exit_stat(struct kvm_vcpu *vcpu, int type)
 }
 
 /* wrapper to set exit time and account for it in kvm_stats */
-static inline void account_exit(struct kvm_vcpu *vcpu, int type)
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
 {
 	kvmppc_set_exit_type(vcpu, type);
-	account_exit_stat(vcpu, type);
+	kvmppc_account_exit_stat(vcpu, type);
 }
 
 #endif /* __POWERPC_KVM_EXITTIMING_H__ */
-- 
cgit v0.10.2


From 6f89724829cfd4ad6771a92fd4b8d59c90c7220c Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Wed, 3 Dec 2008 13:40:51 -0200
Subject: KVM: Really remove a slot when a user ask us so

Right now, KVM does not remove a slot when we do a
register ioctl for size 0 (would be the expected behaviour).

Instead, we only mark it as empty, but keep all bitmaps
and allocated data structures present. It completely
nullifies our chances of reusing that same slot again
for mapping a different piece of memory.

In this patch, we destroy rmaps, and vfree() the
pointers that used to hold the dirty bitmap, rmap
and lpage_info structures.

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e41d39d..fd9cc79 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1020,7 +1020,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out_free;
 	}
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(&old, npages ? &new : NULL);
+	/* Slot deletion case: we have to update the current slot */
+	if (!npages)
+		*memslot = old;
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
-- 
cgit v0.10.2


From 45ed60b371aeae6ed80f7e9d594a5e6412edc176 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:25:38 +0100
Subject: KVM: x86 emulator: Extend the opcode descriptor

Extend the opcode descriptor to 32 bits. This is needed by the
introduction of a new Src2 operand type.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 69b330b..7a07ca4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -76,7 +76,7 @@ enum {
 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
 };
 
-static u16 opcode_table[256] = {
+static u32 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +195,7 @@ static u16 opcode_table[256] = {
 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
-static u16 twobyte_table[256] = {
+static u32 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -253,7 +253,7 @@ static u16 twobyte_table[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static u16 group_table[] = {
+static u32 group_table[] = {
 	[Group1_80*8] =
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,7 +297,7 @@ static u16 group_table[] = {
 	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
 };
 
-static u16 group2_table[] = {
+static u32 group2_table[] = {
 	[Group7*8] =
 	SrcNone | ModRM, 0, 0, 0,
 	SrcNone | ModRM | DstMem | Mov, 0,
-- 
cgit v0.10.2


From 0dc8d10f7d848b63c8d32cf6fd31ba7def792ac9 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:26:42 +0100
Subject: KVM: x86 emulator: add Src2 decode set

Instruction like shld has three operands, so we need to add a Src2
decode set. We start with Src2None, Src2CL, and Src2ImmByte, Src2One to
support shld/shrd and we will expand it later.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 16a0026..6a15973 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
 	u8 ad_bytes;
 	u8 rex_prefix;
 	struct operand src;
+	struct operand src2;
 	struct operand dst;
 	bool has_seg_override;
 	u8 seg_override;
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 7a07ca4..7f5cd62 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -70,6 +70,12 @@
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
+/* Source 2 operand type */
+#define Src2None    (0<<29)
+#define Src2CL      (1<<29)
+#define Src2ImmByte (2<<29)
+#define Src2One     (3<<29)
+#define Src2Mask    (7<<29)
 
 enum {
 	Group1_80, Group1_81, Group1_82, Group1_83,
@@ -1000,6 +1006,29 @@ done_prefixes:
 		break;
 	}
 
+	/*
+	 * Decode and fetch the second source operand: register, memory
+	 * or immediate.
+	 */
+	switch (c->d & Src2Mask) {
+	case Src2None:
+		break;
+	case Src2CL:
+		c->src2.bytes = 1;
+		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+		break;
+	case Src2ImmByte:
+		c->src2.type = OP_IMM;
+		c->src2.ptr = (unsigned long *)c->eip;
+		c->src2.bytes = 1;
+		c->src2.val = insn_fetch(u8, 1, c->eip);
+		break;
+	case Src2One:
+		c->src2.bytes = 1;
+		c->src2.val = 1;
+		break;
+	}
+
 	/* Decode and fetch the destination operand: register or memory. */
 	switch (c->d & DstMask) {
 	case ImplicitOps:
-- 
cgit v0.10.2


From bfcadf83ec5aafe600e73dd427d997db7bcc1d12 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:27:38 +0100
Subject: KVM: x86 emulator: add a new "implied 1" Src decode type

Add SrcOne operand type when we need to decode an implied '1' like with
regular shift instruction

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 7f5cd62..0c75306 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
+#define SrcOne      (7<<4)	/* Implied '1' */
 #define SrcMask     (7<<4)
 /* Generic ModRM decode. */
 #define ModRM       (1<<7)
@@ -1004,6 +1005,10 @@ done_prefixes:
 		c->src.bytes = 1;
 		c->src.val = insn_fetch(s8, 1, c->eip);
 		break;
+	case SrcOne:
+		c->src.bytes = 1;
+		c->src.val = 1;
+		break;
 	}
 
 	/*
-- 
cgit v0.10.2


From d175226a5f54817ba427368c6b739aefa7780fb2 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:29:00 +0100
Subject: KVM: x86 emulator: add the assembler code for three operands

Add the assembler code for instruction with three operands and one
operand is stored in ECX register

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 0c75306..9ae6d5b 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -431,6 +431,45 @@ static u32 group2_table[] = {
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
+/* Instruction has three operands and one operand is stored in ECX register */
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
+	do {									\
+		unsigned long _tmp;						\
+		_type _clv  = (_cl).val;  					\
+		_type _srcv = (_src).val;    					\
+		_type _dstv = (_dst).val;					\
+										\
+		__asm__ __volatile__ (						\
+			_PRE_EFLAGS("0", "5", "2")				\
+			_op _suffix " %4,%1 \n"					\
+			_POST_EFLAGS("0", "5", "2")				\
+			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
+			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
+			); 							\
+										\
+		(_cl).val  = (unsigned long) _clv;				\
+		(_src).val = (unsigned long) _srcv;				\
+		(_dst).val = (unsigned long) _dstv;				\
+	} while (0)
+
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
+	do {									\
+		switch ((_dst).bytes) {						\
+		case 2:								\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"w", unsigned short);         	\
+			break;							\
+		case 4: 							\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"l", unsigned int);           	\
+			break;							\
+		case 8:								\
+			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
+						"q", unsigned long));  		\
+			break;							\
+		}								\
+	} while (0)
+
 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
 	do {								\
 		unsigned long _tmp;					\
-- 
cgit v0.10.2


From 9bf8ea42fe22d7d1c48044148fa658cb9083d49c Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:30:13 +0100
Subject: KVM: x86 emulator: add the emulation of shld and shrd instructions

Add emulation of shld and shrd instructions

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 9ae6d5b..219dc31 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -237,9 +237,14 @@ static u32 twobyte_table[256] = {
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
 	/* 0xA8 - 0xAF */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM,
+	ModRM, 0,
 	/* 0xB0 - 0xB7 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
 	    DstMem | SrcReg | ModRM | BitOp,
@@ -2037,12 +2042,20 @@ twobyte_insn:
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xa4: /* shld imm8, r, r/m */
+	case 0xa5: /* shld cl, r, r/m */
+		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xab:
 	      bts:		/* bts */
 		/* only subword offset */
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xac: /* shrd imm8, r, r/m */
+	case 0xad: /* shrd cl, r, r/m */
+		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xae:              /* clflush */
 		break;
 	case 0xb0 ... 0xb1:	/* cmpxchg */
-- 
cgit v0.10.2


From fbce554e940a983d005e29849636d0ef54b3eb18 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 4 Dec 2008 11:11:40 +0000
Subject: KVM: x86 emulator: Fix handling of VMMCALL instruction

The VMMCALL instruction doesn't get recognised and isn't processed
by the emulator.

This is seen on an Intel host that tries to execute the VMMCALL
instruction after a guest live migrates from an AMD host.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 219dc31..d174db7 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -311,7 +311,7 @@ static u32 group_table[] = {
 
 static u32 group2_table[] = {
 	[Group7*8] =
-	SrcNone | ModRM, 0, 0, 0,
+	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
 	SrcNone | ModRM | DstMem | Mov, 0,
 	SrcMem16 | ModRM | Mov, 0,
 };
-- 
cgit v0.10.2


From 60c8aec6e2c9923492dabbd6b67e34692bd26c20 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:02 -0200
Subject: KVM: MMU: use page array in unsync walk

Instead of invoking the handler directly collect pages into
an array so the caller can work with it.

Simplifies TLB flush collapsing.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f58f7eb..93d0aed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,7 +200,7 @@ struct kvm_mmu_page {
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool unsync_children;
+	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dd20b19..7ce92f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -908,8 +908,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
 	struct kvm_mmu_page *sp = page_header(__pa(spte));
 
 	index = spte - sp->spt;
-	__set_bit(index, sp->unsync_child_bitmap);
-	sp->unsync_children = 1;
+	if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
+		sp->unsync_children++;
+	WARN_ON(!sp->unsync_children);
 }
 
 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -936,7 +937,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
 
 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
-	sp->unsync_children = 1;
 	kvm_mmu_update_parents_unsync(sp);
 	return 1;
 }
@@ -967,18 +967,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 }
 
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+	struct mmu_page_and_offset {
+		struct kvm_mmu_page *sp;
+		unsigned int idx;
+	} page[KVM_PAGE_ARRAY_NR];
+	unsigned int nr;
+};
+
 #define for_each_unsync_children(bitmap, idx)		\
 	for (idx = find_first_bit(bitmap, 512);		\
 	     idx < 512;					\
 	     idx = find_next_bit(bitmap, 512, idx+1))
 
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_unsync_walk *walker)
+int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+		   int idx)
 {
-	int i, ret;
+	int i;
 
-	if (!sp->unsync_children)
-		return 0;
+	if (sp->unsync)
+		for (i=0; i < pvec->nr; i++)
+			if (pvec->page[i].sp == sp)
+				return 0;
+
+	pvec->page[pvec->nr].sp = sp;
+	pvec->page[pvec->nr].idx = idx;
+	pvec->nr++;
+	return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	int i, ret, nr_unsync_leaf = 0;
 
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
@@ -988,17 +1011,22 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 			if (child->unsync_children) {
-				ret = mmu_unsync_walk(child, walker);
-				if (ret)
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
+
+				ret = __mmu_unsync_walk(child, pvec);
+				if (!ret)
+					__clear_bit(i, sp->unsync_child_bitmap);
+				else if (ret > 0)
+					nr_unsync_leaf += ret;
+				else
 					return ret;
-				__clear_bit(i, sp->unsync_child_bitmap);
 			}
 
 			if (child->unsync) {
-				ret = walker->entry(child, walker);
-				__clear_bit(i, sp->unsync_child_bitmap);
-				if (ret)
-					return ret;
+				nr_unsync_leaf++;
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
 			}
 		}
 	}
@@ -1006,7 +1034,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 	if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
 		sp->unsync_children = 0;
 
-	return 0;
+	return nr_unsync_leaf;
+}
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	if (!sp->unsync_children)
+		return 0;
+
+	mmu_pages_add(pvec, sp, 0);
+	return __mmu_unsync_walk(sp, pvec);
 }
 
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1056,30 +1094,81 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	return 0;
 }
 
-struct sync_walker {
-	struct kvm_vcpu *vcpu;
-	struct kvm_unsync_walk walker;
+struct mmu_page_path {
+	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
+	unsigned int idx[PT64_ROOT_LEVEL-1];
 };
 
-static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+#define for_each_sp(pvec, sp, parents, i)			\
+		for (i = mmu_pages_next(&pvec, &parents, -1),	\
+			sp = pvec.page[i].sp;			\
+			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
+			i = mmu_pages_next(&pvec, &parents, i))
+
+int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
+		   int i)
 {
-	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
-						     walker);
-	struct kvm_vcpu *vcpu = sync_walk->vcpu;
+	int n;
+
+	for (n = i+1; n < pvec->nr; n++) {
+		struct kvm_mmu_page *sp = pvec->page[n].sp;
+
+		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
+			parents->idx[0] = pvec->page[n].idx;
+			return n;
+		}
 
-	kvm_sync_page(vcpu, sp);
-	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+		parents->parent[sp->role.level-2] = sp;
+		parents->idx[sp->role.level-1] = pvec->page[n].idx;
+	}
+
+	return n;
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+void mmu_pages_clear_parents(struct mmu_page_path *parents)
 {
-	struct sync_walker walker = {
-		.walker = { .entry = mmu_sync_fn, },
-		.vcpu = vcpu,
-	};
+	struct kvm_mmu_page *sp;
+	unsigned int level = 0;
+
+	do {
+		unsigned int idx = parents->idx[level];
+
+		sp = parents->parent[level];
+		if (!sp)
+			return;
+
+		--sp->unsync_children;
+		WARN_ON((int)sp->unsync_children < 0);
+		__clear_bit(idx, sp->unsync_child_bitmap);
+		level++;
+	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+}
+
+static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
+			       struct mmu_page_path *parents,
+			       struct kvm_mmu_pages *pvec)
+{
+	parents->parent[parent->role.level-1] = NULL;
+	pvec->nr = 0;
+}
 
-	while (mmu_unsync_walk(sp, &walker.walker))
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+			      struct kvm_mmu_page *parent)
+{
+	int i;
+	struct kvm_mmu_page *sp;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		for_each_sp(pages, sp, parents, i) {
+			kvm_sync_page(vcpu, sp);
+			mmu_pages_clear_parents(&parents);
+		}
 		cond_resched_lock(&vcpu->kvm->mmu_lock);
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
 }
 
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1245,33 +1334,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 }
 
-struct zap_walker {
-	struct kvm_unsync_walk walker;
-	struct kvm *kvm;
-	int zapped;
-};
-
-static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
-{
-	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
-						     walker);
-	kvm_mmu_zap_page(zap_walk->kvm, sp);
-	zap_walk->zapped = 1;
-	return 0;
-}
-
-static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+static int mmu_zap_unsync_children(struct kvm *kvm,
+				   struct kvm_mmu_page *parent)
 {
-	struct zap_walker walker = {
-		.walker = { .entry = mmu_zap_fn, },
-		.kvm = kvm,
-		.zapped = 0,
-	};
+	int i, zapped = 0;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
 
-	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
 		return 0;
-	mmu_unsync_walk(sp, &walker.walker);
-	return walker.zapped;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		struct kvm_mmu_page *sp;
+
+		for_each_sp(pages, sp, parents, i) {
+			kvm_mmu_zap_page(kvm, sp);
+			mmu_pages_clear_parents(&parents);
+		}
+		zapped += pages.nr;
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
+
+	return zapped;
 }
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-- 
cgit v0.10.2


From b1a368218ad5b6e62380c8f206f16e6f18bf154c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:03 -0200
Subject: KVM: MMU: collapse remote TLB flushes on root sync

Collapse remote TLB flushes on root sync.

kernbench is 2.7% faster on 4-way guest. Improvements have been seen
with other loads such as AIM7.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7ce92f7..58c35de 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -621,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 	return NULL;
 }
 
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
 	unsigned long *rmapp;
 	u64 *spte;
@@ -667,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 		spte = rmap_next(kvm, rmapp, spte);
 	}
 
-	if (write_protected)
-		kvm_flush_remote_tlbs(kvm);
+	return write_protected;
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -1083,7 +1082,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		return 1;
 	}
 
-	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (rmap_write_protect(vcpu->kvm, sp->gfn))
+		kvm_flush_remote_tlbs(vcpu->kvm);
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1162,6 +1162,14 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 
 	kvm_mmu_pages_init(parent, &parents, &pages);
 	while (mmu_unsync_walk(parent, &pages)) {
+		int protected = 0;
+
+		for_each_sp(pages, sp, parents, i)
+			protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+
+		if (protected)
+			kvm_flush_remote_tlbs(vcpu->kvm);
+
 		for_each_sp(pages, sp, parents, i) {
 			kvm_sync_page(vcpu, sp);
 			mmu_pages_clear_parents(&parents);
@@ -1226,7 +1234,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical) {
-		rmap_write_protect(vcpu->kvm, gfn);
+		if (rmap_write_protect(vcpu->kvm, gfn))
+			kvm_flush_remote_tlbs(vcpu->kvm);
 		account_shadowed(vcpu->kvm, gfn);
 	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
-- 
cgit v0.10.2


From 6cffe8ca4a2adf1ac5003d9cad08fe4434d6eee0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:04 -0200
Subject: KVM: MMU: skip global pgtables on sync due to cr3 switch

Skip syncing global pages on cr3 switch (but not on cr4/cr0). This is
important for Linux 32-bit guests with PAE, where the kmap page is
marked as global.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93d0aed..65b1ed2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -182,6 +182,8 @@ struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
 
+	struct list_head oos_link;
+
 	/*
 	 * The following two entries are used to key the shadow page in the
 	 * hash table.
@@ -200,6 +202,7 @@ struct kvm_mmu_page {
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
+	bool global;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
@@ -356,6 +359,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct list_head oos_global_pages;
 	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
@@ -385,6 +389,7 @@ struct kvm_vm_stat {
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
+	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -603,6 +608,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 58c35de..cbac9e4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -793,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&sp->oos_link);
 	ASSERT(is_empty_shadow_page(sp->spt));
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
+	sp->global = 1;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -1066,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	return NULL;
 }
 
+static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	list_del(&sp->oos_link);
+	--kvm->stat.mmu_unsync_global;
+}
+
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
+	if (sp->global)
+		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1615,9 +1625,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		if (s->role.word != sp->role.word)
 			return 1;
 	}
-	kvm_mmu_mark_parents_unsync(vcpu, sp);
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
+
+	if (sp->global) {
+		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
+		++vcpu->kvm->stat.mmu_unsync_global;
+	} else
+		kvm_mmu_mark_parents_unsync(vcpu, sp);
+
 	mmu_convert_notrap(sp);
 	return 0;
 }
@@ -1643,12 +1659,21 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
 	u64 mt_mask = shadow_mt_mask;
+	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
+
+	if (!global && sp->global) {
+		sp->global = 0;
+		if (sp->unsync) {
+			kvm_unlink_unsync_global(vcpu->kvm, sp);
+			kvm_mmu_mark_parents_unsync(vcpu, sp);
+		}
+	}
 
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
@@ -1717,8 +1742,8 @@ set_pte:
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, gfn_t gfn,
-			 pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, int global,
+			 gfn_t gfn, pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1751,7 +1776,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative, true)) {
+		      dirty, largepage, global, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1808,7 +1833,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
 	    || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
 		mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
 			     0, walk->write, 1, &walk->pt_write,
-			     walk->largepage, gfn, walk->pfn, false);
+			     walk->largepage, 0, gfn, walk->pfn, false);
 		++vcpu->stat.pf_fixed;
 		return 1;
 	}
@@ -1995,6 +2020,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_mmu_page *sp, *n;
+
+	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
+		kvm_sync_page(vcpu, sp);
+}
+
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2002,6 +2036,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_sync_global(vcpu);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43..e644d81 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -274,7 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+		     gpte & PT_DIRTY_MASK, NULL, largepage,
+		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
 		     pfn, true);
 }
 
@@ -301,8 +302,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
 		mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
 			     sw->user_fault, sw->write_fault,
 			     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-			     sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
-			     false);
+			     sw->ptwrite, sw->largepage,
+			     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+			     gw->gfn, sw->pfn, false);
 		sw->sptep = sptep;
 		return 1;
 	}
@@ -580,7 +582,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gfn,
+			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7a2aeba..774db00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -104,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
+	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -315,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -358,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -4113,6 +4116,7 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-- 
cgit v0.10.2


From ad218f85e388e8ca816ff09d91c246cd014c53a8 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:05 -0200
Subject: KVM: MMU: prepopulate the shadow on invlpg

If the guest executes invlpg, peek into the pagetable and attempt to
prepopulate the shadow entry.

Also stop dirty fault updates from interfering with the fork detector.

2% improvement on RHEL3/AIM7.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65b1ed2..97215a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -602,7 +602,8 @@ unsigned long segment_base(u16 selector);
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes);
+		       const u8 *new, int bytes,
+		       bool guest_initiated);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cbac9e4..863baf7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2441,7 +2441,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes)
+		       const u8 *new, int bytes,
+		       bool guest_initiated)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	struct kvm_mmu_page *sp;
@@ -2467,15 +2468,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
-	if (gfn == vcpu->arch.last_pt_write_gfn
-	    && !last_updated_pte_accessed(vcpu)) {
-		++vcpu->arch.last_pt_write_count;
-		if (vcpu->arch.last_pt_write_count >= 3)
-			flooded = 1;
-	} else {
-		vcpu->arch.last_pt_write_gfn = gfn;
-		vcpu->arch.last_pt_write_count = 1;
-		vcpu->arch.last_pte_updated = NULL;
+	if (guest_initiated) {
+		if (gfn == vcpu->arch.last_pt_write_gfn
+		    && !last_updated_pte_accessed(vcpu)) {
+			++vcpu->arch.last_pt_write_count;
+			if (vcpu->arch.last_pt_write_count >= 3)
+				flooded = 1;
+		} else {
+			vcpu->arch.last_pt_write_gfn = gfn;
+			vcpu->arch.last_pt_write_count = 1;
+			vcpu->arch.last_pte_updated = NULL;
+		}
 	}
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2615,9 +2618,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	spin_lock(&vcpu->kvm->mmu_lock);
 	vcpu->arch.mmu.invlpg(vcpu, gva);
-	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_flush_tlb(vcpu);
 	++vcpu->stat.invlpg;
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index e644d81..d206401 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
 	int *ptwrite;
 	pfn_t pfn;
 	u64 *sptep;
+	gpa_t pte_gpa;
 };
 
 static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
 		walker->ptes[walker->level - 1] = pte;
 	}
 
@@ -468,8 +469,15 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 				      struct kvm_vcpu *vcpu, u64 addr,
 				      u64 *sptep, int level)
 {
+	struct shadow_walker *sw =
+		container_of(_sw, struct shadow_walker, walker);
 
 	if (level == PT_PAGE_TABLE_LEVEL) {
+		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
+		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
 		if (is_shadow_present_pte(*sptep))
 			rmap_remove(vcpu->kvm, sptep);
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
@@ -482,11 +490,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	pt_element_t gpte;
 	struct shadow_walker walker = {
 		.walker = { .entry = FNAME(shadow_invlpg_entry), },
+		.pte_gpa = -1,
 	};
 
+	spin_lock(&vcpu->kvm->mmu_lock);
 	walk_shadow(&walker.walker, vcpu, gva);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (walker.pte_gpa == -1)
+		return;
+	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+				  sizeof(pt_element_t)))
+		return;
+	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
+		if (mmu_topup_memory_caches(vcpu))
+			return;
+		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+				  sizeof(pt_element_t), 0);
+	}
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 774db00..ba10287 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2046,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0)
 		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
 	return 1;
 }
 
-- 
cgit v0.10.2


From e93353c93a3ba4215633ce930784f40a4e94e3f9 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 5 Dec 2008 18:36:45 -0200
Subject: x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj

kvm_get_tsc_khz() currently returns the previously-calculated preset_lpj
value, but it is in loops-per-jiffy, not kHz. The current code works
correctly only when HZ=1000.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index b38e801..652fce6d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
  */
 static unsigned long kvm_get_tsc_khz(void)
 {
-	return preset_lpj;
+	struct pvclock_vcpu_time_info *src;
+	src = &per_cpu(hv_clock, 0);
+	return pvclock_tsc_khz(src);
 }
 
 static void kvm_get_preset_lpj(void)
 {
-	struct pvclock_vcpu_time_info *src;
 	unsigned long khz;
 	u64 lpj;
 
-	src = &per_cpu(hv_clock, 0);
-	khz = pvclock_tsc_khz(src);
+	khz = kvm_get_tsc_khz();
 
 	lpj = ((u64)khz * 1000);
 	do_div(lpj, HZ);
-- 
cgit v0.10.2


From e3a2a0d4e5ace731e60e2eff4fb7056ecb34adc1 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 2 Dec 2008 11:16:03 +0100
Subject: anon_inodes: use fops->owner for module refcount

There is an imbalance for anonymous inodes. If the fops->owner field is set,
the module reference count of owner is decreases on release.
("filp_close" --> "__fput" ---> "fops_put")

On the other hand, anon_inode_getfd does not increase the module reference
count of owner. This causes two problems:

- if owner is set, the module refcount goes negative
- if owner is not set, the module can be unloaded while code is running

This patch changes anon_inode_getfd to be symmetric regarding fops->owner
handling.

I have checked all existing users of anon_inode_getfd. Noone sets fops->owner,
thats why nobody has seen the module refcount negative. The refcounting was
tested with a patched and unpatched KVM module.(see patch 2/2) I also did an
epoll_open/close test.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index c16d9be..3bbdb9d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	if (IS_ERR(anon_inode_inode))
 		return -ENODEV;
 
+	if (fops->owner && !try_module_get(fops->owner))
+		return -ENOENT;
+
 	error = get_unused_fd_flags(flags);
 	if (error < 0)
-		return error;
+		goto err_module;
 	fd = error;
 
 	/*
@@ -128,6 +131,8 @@ err_dput:
 	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
+err_module:
+	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
-- 
cgit v0.10.2


From 3d3aab1b973b01bd2a1aa46307e94a1380b1d802 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 2 Dec 2008 11:17:32 +0100
Subject: KVM: set owner of cpu and vm file operations

There is a race between a "close of the file descriptors" and module
unload in the kvm module.

You can easily trigger this problem by applying this debug patch:
>--- kvm.orig/virt/kvm/kvm_main.c
>+++ kvm/virt/kvm/kvm_main.c
>@@ -648,10 +648,14 @@ void kvm_free_physmem(struct kvm *kvm)
>                kvm_free_physmem_slot(&kvm->memslots[i], NULL);
> }
>
>+#include <linux/delay.h>
> static void kvm_destroy_vm(struct kvm *kvm)
> {
>        struct mm_struct *mm = kvm->mm;
>
>+       printk("off1\n");
>+       msleep(5000);
>+       printk("off2\n");
>        spin_lock(&kvm_lock);
>        list_del(&kvm->vm_list);
>        spin_unlock(&kvm_lock);

and killing the userspace, followed by an rmmod.

The problem is that kvm_destroy_vm can run while the module count
is 0. That means, you can remove the module while kvm_destroy_vm
is running. But kvm_destroy_vm is part of the module text. This
causes a kerneloops. The race exists without the msleep but is much
harder to trigger.

This patch requires the fix for anon_inodes (anon_inodes: use fops->owner
for module refcount).
With this patch, we can set the owner of all anonymous KVM inodes file
operations. The VFS will then control the KVM module refcount as long as there
is an open file. kvm_destroy_vm will be called by the release function of the
last closed file - before the VFS drops the module refcount.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fd9cc79..484c903 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1498,7 +1498,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static const struct file_operations kvm_vcpu_fops = {
+static struct file_operations kvm_vcpu_fops = {
 	.release        = kvm_vcpu_release,
 	.unlocked_ioctl = kvm_vcpu_ioctl,
 	.compat_ioctl   = kvm_vcpu_ioctl,
@@ -1892,7 +1892,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations kvm_vm_fops = {
+static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 	.compat_ioctl   = kvm_vm_ioctl,
@@ -2256,6 +2256,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	}
 
 	kvm_chardev_ops.owner = module;
+	kvm_vm_fops.owner = module;
+	kvm_vcpu_fops.owner = module;
 
 	r = misc_register(&kvm_dev);
 	if (r) {
-- 
cgit v0.10.2


From 498468961ed6f62a306eb90c49125776c526fa40 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 8 Dec 2008 20:26:24 +1030
Subject: KVM: Extract core of kvm_flush_remote_tlbs/kvm_reload_remote_mmus

Avi said:
> Wow, code duplication from Rusty. Things must be bad.

Something about glass houses comes to mind.  But instead, a patch.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 484c903..ba4275d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -552,10 +552,11 @@ static void ack_flush(void *_completed)
 {
 }
 
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_t cpus;
+	bool called = false;
 	struct kvm_vcpu *vcpu;
 
 	me = get_cpu();
@@ -564,45 +565,30 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 		vcpu = kvm->vcpus[i];
 		if (!vcpu)
 			continue;
-		if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		if (test_and_set_bit(req, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
 		if (cpu != -1 && cpu != me)
 			cpu_set(cpu, cpus);
 	}
-	if (cpus_empty(cpus))
-		goto out;
-	++kvm->stat.remote_tlb_flush;
-	smp_call_function_mask(cpus, ack_flush, NULL, 1);
-out:
+	if (!cpus_empty(cpus)) {
+		smp_call_function_mask(cpus, ack_flush, NULL, 1);
+		called = true;
+	}
 	put_cpu();
+	return called;
 }
 
-void kvm_reload_remote_mmus(struct kvm *kvm)
+void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	int i, cpu, me;
-	cpumask_t cpus;
-	struct kvm_vcpu *vcpu;
-
-	me = get_cpu();
-	cpus_clear(cpus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		vcpu = kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
-			continue;
-		cpu = vcpu->cpu;
-		if (cpu != -1 && cpu != me)
-			cpu_set(cpu, cpus);
-	}
-	if (cpus_empty(cpus))
-		goto out;
-	smp_call_function_mask(cpus, ack_flush, NULL, 1);
-out:
-	put_cpu();
+	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+		++kvm->stat.remote_tlb_flush;
 }
 
+void kvm_reload_remote_mmus(struct kvm *kvm)
+{
+	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+}
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
-- 
cgit v0.10.2


From 6ef7a1bc45f80fe0a263119d404688c596ea5031 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 8 Dec 2008 20:28:04 +1030
Subject: KVM: use modern cpumask primitives, no cpumask_t on stack

We're getting rid on on-stack cpumasks for large NR_CPUS.

1) Use cpumask_var_t/alloc_cpumask_var.
2) smp_call_function_mask -> smp_call_function_many
3) cpus_clear, cpus_empty, cpu_set -> cpumask_clear, cpumask_empty,
   cpumask_set_cpu.

This actually generates slightly smaller code than the old one with
CONFIG_CPUMASKS_OFFSTACK=n.  (gcc knows that cpus cannot be NULL in
that case, where cpumask_var_t is cpumask_t[1]).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba4275d..2d6ca79 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -555,12 +555,14 @@ static void ack_flush(void *_completed)
 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
-	cpumask_t cpus;
-	bool called = false;
+	cpumask_var_t cpus;
+	bool called = true;
 	struct kvm_vcpu *vcpu;
 
+	if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
+		cpumask_clear(cpus);
+
 	me = get_cpu();
-	cpus_clear(cpus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
 		if (!vcpu)
@@ -568,14 +570,17 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		if (test_and_set_bit(req, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
-		if (cpu != -1 && cpu != me)
-			cpu_set(cpu, cpus);
-	}
-	if (!cpus_empty(cpus)) {
-		smp_call_function_mask(cpus, ack_flush, NULL, 1);
-		called = true;
+		if (cpus != NULL && cpu != -1 && cpu != me)
+			cpumask_set_cpu(cpu, cpus);
 	}
+	if (unlikely(cpus == NULL))
+		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
+	else if (!cpumask_empty(cpus))
+		smp_call_function_many(cpus, ack_flush, NULL, 1);
+	else
+		called = false;
 	put_cpu();
+	free_cpumask_var(cpus);
 	return called;
 }
 
-- 
cgit v0.10.2


From 7f59f492da722eb3551bbe1f8f4450a21896f05d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 7 Dec 2008 21:25:45 +1030
Subject: KVM: use cpumask_var_t for cpus_hardware_enabled

This changes cpus_hardware_enabled from a cpumask_t to a cpumask_var_t:
equivalent for CONFIG_CPUMASKS_OFFSTACK=n, otherwise dynamically allocated.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2d6ca79..e7644b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,7 @@ module_param(msi2intx, bool, 0);
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
-static cpumask_t cpus_hardware_enabled;
+static cpumask_var_t cpus_hardware_enabled;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -1965,9 +1965,9 @@ static void hardware_enable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
 
-	if (cpu_isset(cpu, cpus_hardware_enabled))
+	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
-	cpu_set(cpu, cpus_hardware_enabled);
+	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_enable(NULL);
 }
 
@@ -1975,9 +1975,9 @@ static void hardware_disable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, cpus_hardware_enabled))
+	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
-	cpu_clear(cpu, cpus_hardware_enabled);
+	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_disable(NULL);
 }
 
@@ -2211,9 +2211,14 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	bad_pfn = page_to_pfn(bad_page);
 
+	if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
 	r = kvm_arch_hardware_setup();
 	if (r < 0)
-		goto out_free_0;
+		goto out_free_0a;
 
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu,
@@ -2277,6 +2282,8 @@ out_free_2:
 	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
+out_free_0a:
+	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
 	__free_page(bad_page);
 out:
@@ -2300,6 +2307,7 @@ void kvm_exit(void)
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	kvm_exit_debug();
+	free_cpumask_var(cpus_hardware_enabled);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
-- 
cgit v0.10.2


From 1a811b6167089bcdb84284f2dc9fd0b4d0f1899d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Dec 2008 18:25:27 +0200
Subject: KVM: Advertise the bug in memory region destruction as fixed

Userspace might need to act differently.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0997e6f..4880776 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -395,6 +395,8 @@ struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
+/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
+#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e7644b9..e066eb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1905,6 +1905,17 @@ static int kvm_dev_ioctl_create_vm(void)
 	return fd;
 }
 
+static long kvm_dev_ioctl_check_extension_generic(long arg)
+{
+	switch (arg) {
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+		return 1;
+	default:
+		break;
+	}
+	return kvm_dev_ioctl_check_extension(arg);
+}
+
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
@@ -1924,7 +1935,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm();
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension(arg);
+		r = kvm_dev_ioctl_check_extension_generic(arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
-- 
cgit v0.10.2


From ca9edaee1aea34ebd9adb48910aba0b3d64b1b22 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Dec 2008 18:29:29 +0200
Subject: KVM: Consolidate userspace memory capability reporting into common
 code

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b4d24e2..d2eb969 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -180,7 +180,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_MP_STATE:
 
 		r = 1;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1deda37..2822c8c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -137,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
-	case KVM_CAP_USER_MEMORY:
-		r = 1;
-		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 76f05dd..be84971 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	switch (ext) {
-	case KVM_CAP_USER_MEMORY:
-		return 1;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ba10287..10302d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -964,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_HLT:
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
-	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e066eb1..eb70ca6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1908,6 +1908,7 @@ static int kvm_dev_ioctl_create_vm(void)
 static long kvm_dev_ioctl_check_extension_generic(long arg)
 {
 	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 		return 1;
 	default:
-- 
cgit v0.10.2


From eb64f1e8cd5c3cae912db30a77d062367f7a11a6 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 9 Dec 2008 16:07:22 +0100
Subject: KVM: MMU: check for present pdptr shadow page in walk_shadow

walk_shadow assumes the caller verified validity of the pdptr pointer in
question, which is not the case for the invlpg handler.

Fixes oops during Solaris 10 install.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 863baf7..641c078 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1269,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
 	if (level == PT32E_ROOT_LEVEL) {
 		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
 		shadow_addr &= PT64_BASE_ADDR_MASK;
+		if (!shadow_addr)
+			return 1;
 		--level;
 	}
 
-- 
cgit v0.10.2


From defaf1587c5d7dff828f6f11c8941e5bcef00f50 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Tue, 2 Dec 2008 12:16:33 +0000
Subject: KVM: fix handling of ACK from shared guest IRQ

If an assigned device shares a guest irq with an emulated
device then we currently interpret an ack generated by the
emulated device as originating from the assigned device
leading to e.g. "Unbalanced enable for IRQ 4347" from the
enable_irq() in kvm_assigned_dev_ack_irq().

The fix is fairly simple - don't enable the physical device
irq unless it was previously disabled.

Of course, this can still lead to a situation where a
non-assigned device ACK can cause the physical device irq to
be reenabled before the device was serviced. However, being
level sensitive, the interrupt will merely be regenerated.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d..eafabd5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,6 +307,7 @@ struct kvm_assigned_dev_kernel {
 	int host_busnr;
 	int host_devfn;
 	int host_irq;
+	bool host_irq_disabled;
 	int guest_irq;
 	struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eb70ca6..fc6127c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -170,6 +170,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 				KVM_ASSIGNED_DEV_GUEST_MSI) {
 		assigned_device_msi_dispatch(assigned_dev);
 		enable_irq(assigned_dev->host_irq);
+		assigned_dev->host_irq_disabled = false;
 	}
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
@@ -181,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
 	kvm_get_kvm(assigned_dev->kvm);
+
 	schedule_work(&assigned_dev->interrupt_work);
+
 	disable_irq_nosync(irq);
+	assigned_dev->host_irq_disabled = true;
+
 	return IRQ_HANDLED;
 }
 
@@ -196,8 +201,16 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
+
 	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-	enable_irq(dev->host_irq);
+
+	/* The guest irq may be shared so this ack may be
+	 * from another device.
+	 */
+	if (dev->host_irq_disabled) {
+		enable_irq(dev->host_irq);
+		dev->host_irq_disabled = false;
+	}
 }
 
 static void kvm_free_assigned_irq(struct kvm *kvm,
-- 
cgit v0.10.2


From 264ff01d55b456932cef03082448b41d2edeb6a1 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 24 Nov 2008 12:26:19 +0100
Subject: KVM: VMX: Fix pending NMI-vs.-IRQ race for user space irqchip

As with the kernel irqchip, don't allow an NMI to stomp over an already
injected IRQ; instead wait for the IRQ injection to be completed.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e446f23..487e1dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2486,7 +2486,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	vmx_update_window_states(vcpu);
 
 	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.nmi_window_open) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
 			vcpu->arch.nmi_pending = false;
 			vcpu->arch.nmi_injected = true;
 		} else {
-- 
cgit v0.10.2


From 4531220b71f0399e71cda0c4cf749e7281a7416a Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 11 Dec 2008 16:54:54 +0100
Subject: KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI

There is no point in doing the ready_for_nmi_injection/
request_nmi_window dance with user space. First, we don't do this for
in-kernel irqchip anyway, while the code path is the same as for user
space irqchip mode. And second, there is nothing to loose if a pending
NMI is overwritten by another one (in contrast to IRQs where we have to
save the number). Actually, there is even the risk of raising spurious
NMIs this way because the reason for the held-back NMI might already be
handled while processing the first one.

Therefore this patch creates a simplified user space NMI injection
interface, exporting it under KVM_CAP_USER_NMI and dropping the old
KVM_CAP_NMI capability. And this time we also take care to provide the
interface only on archs supporting NMIs via KVM (right now only x86).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 487e1dc..6259d74 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2498,15 +2498,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
+		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
 		else if (vcpu->arch.irq_summary
 			 || kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
 	}
-	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
-		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3040,14 +3038,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
-	/*
-	 * If the user space waits to inject a NMI, exit as soon as possible
-	 */
-	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
-		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-		return 0;
-	}
-
 	return 1;
 }
 
@@ -3162,7 +3152,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
-		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			   vcpu->arch.nmi_pending) {
 			/*
 			 * This CPU don't support us in finding the end of an
 			 * NMI-blocked window if the guest runs with IRQs
@@ -3175,16 +3165,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vmx->vcpu.arch.nmi_window_open = 1;
 		}
-
-		/*
-		 * If the user space waits to inject an NNI, exit ASAP
-		 */
-		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
-		    && !vcpu->arch.nmi_pending) {
-			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-			++vcpu->stat.nmi_window_exits;
-			return 0;
-		}
 	}
 
 	if (exit_reason < kvm_vmx_max_exit_handlers
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10302d3..0e6aa81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2887,37 +2887,18 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
-/*
- * Check if userspace requested a NMI window, and that the NMI window
- * is open.
- *
- * No need to exit to userspace if we already have a NMI queued.
- */
-static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
-					struct kvm_run *kvm_run)
-{
-	return (!vcpu->arch.nmi_pending &&
-		kvm_run->request_nmi_window &&
-		vcpu->arch.nmi_window_open);
-}
-
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (irqchip_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
-		kvm_run->ready_for_nmi_injection = 1;
-	} else {
+	else
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
-		kvm_run->ready_for_nmi_injection =
-					(vcpu->arch.nmi_window_open &&
-					 vcpu->arch.nmi_pending == 0);
-	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3093,11 +3074,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
-			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_NMI;
-				++vcpu->stat.request_nmi_exits;
-			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4880776..35525ac 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -84,21 +84,18 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
 #define KVM_EXIT_NMI              16
-#define KVM_EXIT_NMI_WINDOW_OPEN  17
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 request_nmi_window;
-	__u8 padding1[6];
+	__u8 padding1[7];
 
 	/* out */
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 ready_for_nmi_injection;
-	__u8 padding2;
+	__u8 padding2[2];
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -391,12 +388,14 @@ struct kvm_trace_rec {
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-#define KVM_CAP_NMI 19
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
+#if defined(CONFIG_X86)
+#define KVM_CAP_USER_NMI 22
+#endif
 
 /*
  * ioctls for VM fds
-- 
cgit v0.10.2


From 042b26edf0bc1b0f03238a71aed71cca4593848c Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Tue, 16 Dec 2008 16:45:47 +0100
Subject: KVM: ia64: Fix kvm_arch_vcpu_ioctl_[gs]et_regs()

Fix kvm_arch_vcpu_ioctl_[gs]et_regs() to do something meaningful on
ia64. Old versions could never have worked since they required
pointers to be set in the ioctl payload which were never being set by
the ioctl handler for get_regs.

In addition reserve extra space for future extensions.

The change of layout of struct kvm_regs doesn't require adding a new
CAP since get/set regs never worked on ia64 until now.

This version doesn't support copying the KVM kernel stack in/out of
the kernel. This should be implemented in a seperate ioctl call if
ever needed.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by : Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472a..68aa6da 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@ struct saved_vpd {
 };
 
 struct kvm_regs {
-	char *saved_guest;
-	char *saved_stack;
 	struct saved_vpd vpd;
 	/*Arch-regs*/
 	int mp_state;
@@ -200,6 +198,10 @@ struct kvm_regs {
 	unsigned long fp_psr;       /*used for lazy float register */
 	unsigned long saved_gp;
 	/*for phycial  emulation */
+
+	union context saved_guest;
+
+	unsigned long reserved[64];	/* for future use */
 };
 
 struct kvm_sregs {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d2eb969..0f5ebd9 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -831,9 +831,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	int i;
 	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int r;
+	int i;
 
 	vcpu_load(vcpu);
 
@@ -850,18 +849,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	vpd->vpr = regs->vpd.vpr;
 
-	r = -EFAULT;
-	r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
-						sizeof(union context));
-	if (r)
-		goto out;
-	r = copy_from_user(vcpu + 1, regs->saved_stack +
-			sizeof(struct kvm_vcpu),
-			KVM_STK_OFFSET - sizeof(struct kvm_vcpu));
-	if (r)
-		goto out;
-	vcpu->arch.exit_data =
-		((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
 
 	RESTORE_REGS(mp_state);
 	RESTORE_REGS(vmm_rr);
@@ -895,9 +883,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
 	vcpu_put(vcpu);
-	r = 0;
-out:
-	return r;
+
+	return 0;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -1378,9 +1365,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	int i;
-	int r;
 	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
 	vcpu_load(vcpu);
 
 	for (i = 0; i < 16; i++) {
@@ -1395,14 +1382,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->vpd.vpsr = vpd->vpsr;
 	regs->vpd.vpr = vpd->vpr;
 
-	r = -EFAULT;
-	r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
-					sizeof(union context));
-	if (r)
-		goto out;
-	r = copy_to_user(regs->saved_stack, (void *)vcpu, KVM_STK_OFFSET);
-	if (r)
-		goto out;
+	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
 	SAVE_REGS(mp_state);
 	SAVE_REGS(vmm_rr);
 	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1430,10 +1411,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	SAVE_REGS(metaphysical_saved_rr4);
 	SAVE_REGS(fp_psr);
 	SAVE_REGS(saved_gp);
+
 	vcpu_put(vcpu);
-	r = 0;
-out:
-	return r;
+	return 0;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From fe634fd46ff643d98fdbcd153847e08c3c076e6e Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Wed, 17 Dec 2008 09:38:14 +0800
Subject: MAINTAINERS: Maintainership changes for kvm/ia64

Anthony Xu no longer works on kvm.

Cc:  "Luck, Tony"  <tony.luck@intel.com>
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index ceb32ee..4209b71 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2541,8 +2541,6 @@ W:	http://kvm.qumranet.com
 S:	Supported
 
 KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-P:	Anthony Xu
-M:	anthony.xu@intel.com
 P:	Xiantao Zhang
 M:	xiantao.zhang@intel.com
 L:	kvm-ia64@vger.kernel.org
-- 
cgit v0.10.2


From 25e2343246fe135fce672f41abe61e9d2c38caac Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 21 Dec 2008 18:31:10 +0200
Subject: KVM: MMU: Don't treat a global pte as such if cr4.pge is cleared

The pte.g bit is meaningless if global pages are disabled; deferring
mmu page synchronization on these ptes will lead to the guest using stale
shadow ptes.

Fixes Vista x86 smp bootloader failure.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 641c078..d50ebac 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1669,6 +1669,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	u64 mt_mask = shadow_mt_mask;
 	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
 
+	if (!(vcpu->arch.cr4 & X86_CR4_PGE))
+		global = 0;
 	if (!global && sp->global) {
 		sp->global = 0;
 		if (sp->unsync) {
-- 
cgit v0.10.2


From 3f353858c98dbe0240dac558a89870f4600f81bb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 21 Dec 2008 22:48:32 +0200
Subject: KVM: Add locking to virtual i8259 interrupt controller

While most accesses to the i8259 are with the kvm mutex taken, the call
to kvm_pic_read_irq() is not.  We can't easily take the kvm mutex there
since the function is called with interrupts disabled.

Fix by adding a spinlock to the virtual interrupt controller.  Since we
can't send an IPI under the spinlock (we also take the same spinlock in
an irq disabled context), we defer the IPI until the spinlock is released.
Similarly, we defer irq ack notifications until after spinlock release to
avoid lock recursion.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e1..179dcb0 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
  *   Port from Qemu.
  */
 #include <linux/mm.h>
+#include <linux/bitops.h>
 #include "irq.h"
 
 #include <linux/kvm_host.h>
 
+static void pic_lock(struct kvm_pic *s)
+{
+	spin_lock(&s->lock);
+}
+
+static void pic_unlock(struct kvm_pic *s)
+{
+	struct kvm *kvm = s->kvm;
+	unsigned acks = s->pending_acks;
+	bool wakeup = s->wakeup_needed;
+	struct kvm_vcpu *vcpu;
+
+	s->pending_acks = 0;
+	s->wakeup_needed = false;
+
+	spin_unlock(&s->lock);
+
+	while (acks) {
+		kvm_notify_acked_irq(kvm, __ffs(acks));
+		acks &= acks - 1;
+	}
+
+	if (wakeup) {
+		vcpu = s->kvm->vcpus[0];
+		if (vcpu)
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
 	s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
+	pic_lock(s);
 	pic_update_irq(s);
+	pic_unlock(s);
 }
 
 void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
 
+	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
+	pic_unlock(s);
 }
 
 /*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
+	pic_lock(s);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
+	pic_unlock(s);
 	kvm_notify_acked_irq(kvm, irq);
 
 	return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq, irqbase;
+	int irq, irqbase, n;
 	struct kvm *kvm = s->pics_state->irq_request_opaque;
 	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
 
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (s->irr & (1 << irq) || s->isr & (1 << irq))
-				kvm_notify_acked_irq(kvm, irq+irqbase);
+			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
+				n = irq + irqbase;
+				s->pics_state->pending_acks |= 1 << n;
+			}
 	}
 	s->last_irr = 0;
 	s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte write\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
+	pic_unlock(s);
 }
 
 static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte read\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
 		break;
 	}
 	*(unsigned char *)val = data;
+	pic_unlock(s);
 }
 
 /*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
 	s->output = level;
 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
 		s->pics[0].isr_ack &= ~(1 << irq);
-		kvm_vcpu_kick(vcpu);
+		s->wakeup_needed = true;
 	}
 }
 
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
+	spin_lock_init(&s->lock);
+	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
 	s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index b9e9051..2bf32a0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
 #include <linux/mm_types.h>
 #include <linux/hrtimer.h>
 #include <linux/kvm_host.h>
+#include <linux/spinlock.h>
 
 #include "iodev.h"
 #include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
 };
 
 struct kvm_pic {
+	spinlock_t lock;
+	bool wakeup_needed;
+	unsigned pending_acks;
+	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	irq_request_func *irq_request;
 	void *irq_request_opaque;
-- 
cgit v0.10.2


From 87917239204d67a316cb89751750f86c9ed3640b Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 22 Dec 2008 18:49:30 -0200
Subject: KVM: MMU: handle large host sptes on invlpg/resync

The invlpg and sync walkers lack knowledge of large host sptes,
descending to non-existant pagetable level.

Stop at directory level in such case.

Fixes SMP Windows XP with hugepages.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d50ebac..83f11c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1007,7 +1007,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
 
-		if (is_shadow_present_pte(ent)) {
+		if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
 			struct kvm_mmu_page *child;
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d206401..9fd78b6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -472,14 +472,19 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 	struct shadow_walker *sw =
 		container_of(_sw, struct shadow_walker, walker);
 
-	if (level == PT_PAGE_TABLE_LEVEL) {
+	/* FIXME: properly handle invlpg on large guest pages */
+	if (level == PT_PAGE_TABLE_LEVEL ||
+	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
 		struct kvm_mmu_page *sp = page_header(__pa(sptep));
 
 		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
 		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 
-		if (is_shadow_present_pte(*sptep))
+		if (is_shadow_present_pte(*sptep)) {
 			rmap_remove(vcpu->kvm, sptep);
+			if (is_large_pte(*sptep))
+				--vcpu->kvm->stat.lpages;
+		}
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 		return 1;
 	}
-- 
cgit v0.10.2


From 794db26f20b7dbb879f4e1911221e1959818dfdb Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck <wim@iguana.be>
Date: Wed, 15 Oct 2008 11:44:40 +0000
Subject: [WATCHDOG] ib700wdt - add timeout parameter

Add the timeout module parameter to ib700wdt.c

Signed-off-by: Wim Van Sebroeck <wim@iguana.be>

diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 317ef2b..4bef3dd 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -91,32 +91,16 @@ static char expect_close;
  *
  */
 
-static int wd_times[] = {
-	30,	/* 0x0 */
-	28,	/* 0x1 */
-	26,	/* 0x2 */
-	24,	/* 0x3 */
-	22,	/* 0x4 */
-	20,	/* 0x5 */
-	18,	/* 0x6 */
-	16,	/* 0x7 */
-	14,	/* 0x8 */
-	12,	/* 0x9 */
-	10,	/* 0xA */
-	8,	/* 0xB */
-	6,	/* 0xC */
-	4,	/* 0xD */
-	2,	/* 0xE */
-	0,	/* 0xF */
-};
-
 #define WDT_STOP 0x441
 #define WDT_START 0x443
 
 /* Default timeout */
-#define WD_TIMO 0		/* 30 seconds +/- 20%, from table */
-
-static int wd_margin = WD_TIMO;
+#define WATCHDOG_TIMEOUT 30		/* 30 seconds +/- 20% */
+static int timeout = WATCHDOG_TIMEOUT;	/* in seconds */
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout,
+	"Watchdog timeout in seconds. 0<= timeout <=30, default="
+		__MODULE_STRING(WATCHDOG_TIMEOUT) ".");
 
 static int nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, int, 0);
@@ -131,6 +115,8 @@ MODULE_PARM_DESC(nowayout,
 
 static void ibwdt_ping(void)
 {
+	int wd_margin = 15 - ((timeout + 1) / 2);
+
 	spin_lock(&ibwdt_lock);
 
 	/* Write a watchdog value */
@@ -148,15 +134,10 @@ static void ibwdt_disable(void)
 
 static int ibwdt_set_heartbeat(int t)
 {
-	int i;
-
-	if ((t < 0) || (t > 30))
+	if (t < 0 || t > 30)
 		return -EINVAL;
 
-	for (i = 0x0F; i > -1; i--)
-		if (wd_times[i] >= t)
-			break;
-	wd_margin = i;
+	timeout = t;
 	return 0;
 }
 
@@ -240,7 +221,7 @@ static long ibwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		/* Fall */
 
 	case WDIOC_GETTIMEOUT:
-		return put_user(wd_times[wd_margin], p);
+		return put_user(timeout, p);
 
 	default:
 		return -ENOTTY;
@@ -317,6 +298,14 @@ static int __devinit ibwdt_probe(struct platform_device *dev)
 		goto out_nostartreg;
 	}
 
+	/* Check that the heartbeat value is within it's range ;
+	 * if not reset to the default */
+	if (ibwdt_set_heartbeat(timeout)) {
+		ibwdt_set_heartbeat(WATCHDOG_TIMEOUT);
+		printk(KERN_INFO PFX
+			"timeout value must be 0<=x<=30, using %d\n", timeout);
+	}
+
 	res = misc_register(&ibwdt_miscdev);
 	if (res) {
 		printk(KERN_ERR PFX "failed to register misc device\n");
-- 
cgit v0.10.2


From 4c6e63bd177a28ca9154ae8c1bab00a387c350c4 Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck <wim@iguana.be>
Date: Wed, 22 Oct 2008 08:59:25 +0000
Subject: [WATCHDOG] Add SMSC SCH311x Watchdog Timer.

Add a watchdog driver for the hardware watchdog timer on the
SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset.

Tested-by: Marco Chiappero <marco@absence.it>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 4fd3fa5..81f7021 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -551,6 +551,18 @@ config CPU5_WDT
 	  To compile this driver as a module, choose M here: the
 	  module will be called cpu5wdt.
 
+config SMSC_SCH311X_WDT
+	tristate "SMSC SCH311X Watchdog Timer"
+	depends on X86
+	---help---
+	  This is the driver for the hardware watchdog timer on the
+	  SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset
+	  (LPC IO with 8042 KBC, Reset Generation, HWM and multiple
+	  serial ports).
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called sch311x_wdt.
+
 config SMSC37B787_WDT
 	tristate "Winbond SMsC37B787 Watchdog Timer"
 	depends on X86
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index e352bbb..0cd4786 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o
 obj-$(CONFIG_SBC8360_WDT) += sbc8360.o
 obj-$(CONFIG_SBC7240_WDT) += sbc7240_wdt.o
 obj-$(CONFIG_CPU5_WDT) += cpu5wdt.o
+obj-$(CONFIG_SMSC_SCH311X_WDT) += sch311x_wdt.o
 obj-$(CONFIG_SMSC37B787_WDT) += smsc37b787_wdt.o
 obj-$(CONFIG_W83627HF_WDT) += w83627hf_wdt.o
 obj-$(CONFIG_W83697HF_WDT) += w83697hf_wdt.o
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
new file mode 100644
index 0000000..569eb29
--- /dev/null
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -0,0 +1,578 @@
+/*
+ *	sch311x_wdt.c - Driver for the SCH311x Super-I/O chips
+ *			integrated watchdog.
+ *
+ *	(c) Copyright 2008 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
+ *	provide warranty for any of this software. This material is
+ *	provided "AS-IS" and at no charge.
+ */
+
+/*
+ *	Includes, defines, variables, module parameters, ...
+ */
+
+/* Includes */
+#include <linux/module.h>		/* For module specific items */
+#include <linux/moduleparam.h>		/* For new moduleparam's */
+#include <linux/types.h>		/* For standard types (like size_t) */
+#include <linux/errno.h>		/* For the -ENODEV/... values */
+#include <linux/kernel.h>		/* For printk/... */
+#include <linux/miscdevice.h>		/* For MODULE_ALIAS_MISCDEV
+							(WATCHDOG_MINOR) */
+#include <linux/watchdog.h>		/* For the watchdog specific items */
+#include <linux/init.h>			/* For __init/__exit/... */
+#include <linux/fs.h>			/* For file operations */
+#include <linux/platform_device.h>	/* For platform_driver framework */
+#include <linux/ioport.h>		/* For io-port access */
+#include <linux/spinlock.h>		/* For spin_lock/spin_unlock/... */
+#include <linux/uaccess.h>		/* For copy_to_user/put_user/... */
+#include <linux/io.h>			/* For inb/outb/... */
+
+/* Module and version information */
+#define DRV_NAME	"sch311x_wdt"
+#define PFX		DRV_NAME ": "
+
+/* Runtime registers */
+#define RESGEN			0x1d
+#define GP60			0x47
+#define WDT_TIME_OUT		0x65
+#define WDT_VAL			0x66
+#define WDT_CFG			0x67
+#define WDT_CTRL		0x68
+
+/* internal variables */
+static unsigned long sch311x_wdt_is_open;
+static char sch311x_wdt_expect_close;
+static struct platform_device *sch311x_wdt_pdev;
+
+static int sch311x_ioports[] = { 0x2e, 0x4e, 0x162e, 0x164e, 0x00 };
+
+static struct {	/* The devices private data */
+	/* the Runtime Register base address */
+	unsigned short runtime_reg;
+	/* The card's boot status */
+	int boot_status;
+	/* the lock for io operations */
+	spinlock_t io_lock;
+} sch311x_wdt_data;
+
+/* Module load parameters */
+static unsigned short force_id;
+module_param(force_id, ushort, 0);
+MODULE_PARM_DESC(force_id, "Override the detected device ID");
+
+static unsigned short therm_trip;
+module_param(therm_trip, ushort, 0);
+MODULE_PARM_DESC(therm_trip, "Should a ThermTrip trigger the reset generator");
+
+#define WATCHDOG_TIMEOUT 60		/* 60 sec default timeout */
+static int timeout = WATCHDOG_TIMEOUT;	/* in seconds */
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout,
+	"Watchdog timeout in seconds. 1<= timeout <=15300, default="
+		__MODULE_STRING(WATCHDOG_TIMEOUT) ".");
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout,
+	"Watchdog cannot be stopped once started (default="
+		__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+/*
+ *	Super-IO functions
+ */
+
+static inline void sch311x_sio_enter(int sio_config_port)
+{
+	outb(0x55, sio_config_port);
+}
+
+static inline void sch311x_sio_exit(int sio_config_port)
+{
+	outb(0xaa, sio_config_port);
+}
+
+static inline int sch311x_sio_inb(int sio_config_port, int reg)
+{
+	outb(reg, sio_config_port);
+	return inb(sio_config_port + 1);
+}
+
+static inline void sch311x_sio_outb(int sio_config_port, int reg, int val)
+{
+	outb(reg, sio_config_port);
+	outb(val, sio_config_port + 1);
+}
+
+/*
+ *	Watchdog Operations
+ */
+
+static void sch311x_wdt_set_timeout(int t)
+{
+	unsigned char timeout_unit = 0x80;
+
+	/* When new timeout is bigger then 255 seconds, we will use minutes */
+	if (t > 255) {
+		timeout_unit = 0;
+		t /= 60;
+	}
+
+	/* -- Watchdog Timeout --
+	 * Bit 0-6 (Reserved)
+	 * Bit 7   WDT Time-out Value Units Select
+	 *         (0 = Minutes, 1 = Seconds)
+	 */
+	outb(timeout_unit, sch311x_wdt_data.runtime_reg + WDT_TIME_OUT);
+
+	/* -- Watchdog Timer Time-out Value --
+	 * Bit 0-7 Binary coded units (0=Disabled, 1..255)
+	 */
+	outb(t, sch311x_wdt_data.runtime_reg + WDT_VAL);
+}
+
+static void sch311x_wdt_start(void)
+{
+	spin_lock(&sch311x_wdt_data.io_lock);
+
+	/* set watchdog's timeout */
+	sch311x_wdt_set_timeout(timeout);
+	/* enable the watchdog */
+	/* -- General Purpose I/O Bit 6.0 --
+	 * Bit 0,   In/Out: 0 = Output, 1 = Input
+	 * Bit 1,   Polarity: 0 = No Invert, 1 = Invert
+	 * Bit 2-3, Function select: 00 = GPI/O, 01 = LED1, 11 = WDT,
+	 *                           10 = Either Edge Triggered Intr.4
+	 * Bit 4-6  (Reserved)
+	 * Bit 7,   Output Type: 0 = Push Pull Bit, 1 = Open Drain
+	 */
+	outb(0x0e, sch311x_wdt_data.runtime_reg + GP60);
+
+	spin_unlock(&sch311x_wdt_data.io_lock);
+
+}
+
+static void sch311x_wdt_stop(void)
+{
+	spin_lock(&sch311x_wdt_data.io_lock);
+
+	/* stop the watchdog */
+	outb(0x01, sch311x_wdt_data.runtime_reg + GP60);
+	/* disable timeout by setting it to 0 */
+	sch311x_wdt_set_timeout(0);
+
+	spin_unlock(&sch311x_wdt_data.io_lock);
+}
+
+static void sch311x_wdt_keepalive(void)
+{
+	spin_lock(&sch311x_wdt_data.io_lock);
+	sch311x_wdt_set_timeout(timeout);
+	spin_unlock(&sch311x_wdt_data.io_lock);
+}
+
+static int sch311x_wdt_set_heartbeat(int t)
+{
+	if (t < 1 || t > (255*60))
+		return -EINVAL;
+
+	/* When new timeout is bigger then 255 seconds,
+	 * we will round up to minutes (with a max of 255) */
+	if (t > 255)
+		t = (((t - 1) / 60) + 1) * 60;
+
+	timeout = t;
+	return 0;
+}
+
+static void sch311x_wdt_get_status(int *status)
+{
+	unsigned char new_status;
+
+	*status = 0;
+
+	spin_lock(&sch311x_wdt_data.io_lock);
+
+	/* -- Watchdog timer control --
+	 * Bit 0   Status Bit: 0 = Timer counting, 1 = Timeout occured
+	 * Bit 1   Reserved
+	 * Bit 2   Force Timeout: 1 = Forces WD timeout event (self-cleaning)
+	 * Bit 3   P20 Force Timeout enabled:
+	 *          0 = P20 activity does not generate the WD timeout event
+	 *          1 = P20 Allows rising edge of P20, from the keyboard
+	 *              controller, to force the WD timeout event.
+	 * Bit 4-7 Reserved
+	 */
+	new_status = inb(sch311x_wdt_data.runtime_reg + WDT_CTRL);
+	if (new_status & 0x01)
+		*status |= WDIOF_CARDRESET;
+
+	spin_unlock(&sch311x_wdt_data.io_lock);
+}
+
+/*
+ *	/dev/watchdog handling
+ */
+
+static ssize_t sch311x_wdt_write(struct file *file, const char __user *buf,
+						size_t count, loff_t *ppos)
+{
+	if (count) {
+		if (!nowayout) {
+			size_t i;
+
+			sch311x_wdt_expect_close = 0;
+
+			for (i = 0; i != count; i++) {
+				char c;
+				if (get_user(c, buf + i))
+					return -EFAULT;
+				if (c == 'V')
+					sch311x_wdt_expect_close = 42;
+			}
+		}
+		sch311x_wdt_keepalive();
+	}
+	return count;
+}
+
+static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
+{
+	int status;
+	int new_timeout;
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	static struct watchdog_info ident = {
+		.options		= WDIOF_KEEPALIVEPING |
+					  WDIOF_SETTIMEOUT |
+					  WDIOF_MAGICCLOSE,
+		.firmware_version	= 1,
+		.identity		= DRV_NAME,
+	};
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		if (copy_to_user(argp, &ident, sizeof(ident)))
+			return -EFAULT;
+		break;
+
+	case WDIOC_GETSTATUS:
+	{
+		sch311x_wdt_get_status(&status);
+		return put_user(status, p);
+	}
+	case WDIOC_GETBOOTSTATUS:
+		return put_user(sch311x_wdt_data.boot_status, p);
+
+	case WDIOC_SETOPTIONS:
+	{
+		int options, retval = -EINVAL;
+
+		if (get_user(options, p))
+			return -EFAULT;
+		if (options & WDIOS_DISABLECARD) {
+			sch311x_wdt_stop();
+			retval = 0;
+		}
+		if (options & WDIOS_ENABLECARD) {
+			sch311x_wdt_start();
+			retval = 0;
+		}
+		return retval;
+	}
+	case WDIOC_KEEPALIVE:
+		sch311x_wdt_keepalive();
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		if (get_user(new_timeout, p))
+			return -EFAULT;
+		if (sch311x_wdt_set_heartbeat(new_timeout))
+			return -EINVAL;
+		sch311x_wdt_keepalive();
+		/* Fall */
+	case WDIOC_GETTIMEOUT:
+		return put_user(timeout, p);
+	default:
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static int sch311x_wdt_open(struct inode *inode, struct file *file)
+{
+	if (test_and_set_bit(0, &sch311x_wdt_is_open))
+		return -EBUSY;
+	/*
+	 *	Activate
+	 */
+	sch311x_wdt_start();
+	return nonseekable_open(inode, file);
+}
+
+static int sch311x_wdt_close(struct inode *inode, struct file *file)
+{
+	if (sch311x_wdt_expect_close == 42) {
+		sch311x_wdt_stop();
+	} else {
+		printk(KERN_CRIT PFX
+				"Unexpected close, not stopping watchdog!\n");
+		sch311x_wdt_keepalive();
+	}
+	clear_bit(0, &sch311x_wdt_is_open);
+	sch311x_wdt_expect_close = 0;
+	return 0;
+}
+
+/*
+ *	Kernel Interfaces
+ */
+
+static const struct file_operations sch311x_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.write		= sch311x_wdt_write,
+	.unlocked_ioctl	= sch311x_wdt_ioctl,
+	.open		= sch311x_wdt_open,
+	.release	= sch311x_wdt_close,
+};
+
+static struct miscdevice sch311x_wdt_miscdev = {
+	.minor	= WATCHDOG_MINOR,
+	.name	= "watchdog",
+	.fops	= &sch311x_wdt_fops,
+};
+
+/*
+ *	Init & exit routines
+ */
+
+static int __devinit sch311x_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	unsigned char val;
+	int err;
+
+	spin_lock_init(&sch311x_wdt_data.io_lock);
+
+	if (!request_region(sch311x_wdt_data.runtime_reg + RESGEN, 1,
+								DRV_NAME)) {
+		dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n",
+			sch311x_wdt_data.runtime_reg + RESGEN,
+			sch311x_wdt_data.runtime_reg + RESGEN);
+		err = -EBUSY;
+		goto exit;
+	}
+
+	if (!request_region(sch311x_wdt_data.runtime_reg + GP60, 1, DRV_NAME)) {
+		dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n",
+			sch311x_wdt_data.runtime_reg + GP60,
+			sch311x_wdt_data.runtime_reg + GP60);
+		err = -EBUSY;
+		goto exit_release_region;
+	}
+
+	if (!request_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4,
+								DRV_NAME)) {
+		dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n",
+			sch311x_wdt_data.runtime_reg + WDT_TIME_OUT,
+			sch311x_wdt_data.runtime_reg + WDT_CTRL);
+		err = -EBUSY;
+		goto exit_release_region2;
+	}
+
+	/* Make sure that the watchdog is not running */
+	sch311x_wdt_stop();
+
+	/* Disable keyboard and mouse interaction and interrupt */
+	/* -- Watchdog timer configuration --
+	 * Bit 0   Reserved
+	 * Bit 1   Keyboard enable: 0* = No Reset, 1 = Reset WDT upon KBD Intr.
+	 * Bit 2   Mouse enable: 0* = No Reset, 1 = Reset WDT upon Mouse Intr
+	 * Bit 3   Reserved
+	 * Bit 4-7 WDT Interrupt Mapping: (0000* = Disabled,
+	 *            0001=IRQ1, 0010=(Invalid), 0011=IRQ3 to 1111=IRQ15)
+	 */
+	outb(0, sch311x_wdt_data.runtime_reg + WDT_CFG);
+
+	/* Check that the heartbeat value is within it's range ;
+	 * if not reset to the default */
+	if (sch311x_wdt_set_heartbeat(timeout)) {
+		sch311x_wdt_set_heartbeat(WATCHDOG_TIMEOUT);
+		dev_info(dev, "timeout value must be 1<=x<=15300, using %d\n",
+			timeout);
+	}
+
+	/* Get status at boot */
+	sch311x_wdt_get_status(&sch311x_wdt_data.boot_status);
+
+	/* enable watchdog */
+	/* -- Reset Generator --
+	 * Bit 0   Enable Watchdog Timer Generation: 0* = Enabled, 1 = Disabled
+	 * Bit 1   Thermtrip Source Select: O* = No Source, 1 = Source
+	 * Bit 2   WDT2_CTL: WDT input bit
+	 * Bit 3-7 Reserved
+	 */
+	outb(0, sch311x_wdt_data.runtime_reg + RESGEN);
+	val = therm_trip ? 0x06 : 0x04;
+	outb(val, sch311x_wdt_data.runtime_reg + RESGEN);
+
+	err = misc_register(&sch311x_wdt_miscdev);
+	if (err != 0) {
+		dev_err(dev, "cannot register miscdev on minor=%d (err=%d)\n",
+							WATCHDOG_MINOR, err);
+		goto exit_release_region3;
+	}
+
+	sch311x_wdt_miscdev.parent = dev;
+
+	dev_info(dev,
+		"SMSC SCH311x WDT initialized. timeout=%d sec (nowayout=%d)\n",
+		timeout, nowayout);
+
+	return 0;
+
+exit_release_region3:
+	release_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4);
+exit_release_region2:
+	release_region(sch311x_wdt_data.runtime_reg + GP60, 1);
+exit_release_region:
+	release_region(sch311x_wdt_data.runtime_reg + RESGEN, 1);
+	sch311x_wdt_data.runtime_reg = 0;
+exit:
+	return err;
+}
+
+static int __devexit sch311x_wdt_remove(struct platform_device *pdev)
+{
+	/* Stop the timer before we leave */
+	if (!nowayout)
+		sch311x_wdt_stop();
+
+	/* Deregister */
+	misc_deregister(&sch311x_wdt_miscdev);
+	release_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4);
+	release_region(sch311x_wdt_data.runtime_reg + GP60, 1);
+	release_region(sch311x_wdt_data.runtime_reg + RESGEN, 1);
+	sch311x_wdt_data.runtime_reg = 0;
+	return 0;
+}
+
+static void sch311x_wdt_shutdown(struct platform_device *dev)
+{
+	/* Turn the WDT off if we have a soft shutdown */
+	sch311x_wdt_stop();
+}
+
+#define sch311x_wdt_suspend NULL
+#define sch311x_wdt_resume  NULL
+
+static struct platform_driver sch311x_wdt_driver = {
+	.probe		= sch311x_wdt_probe,
+	.remove		= __devexit_p(sch311x_wdt_remove),
+	.shutdown	= sch311x_wdt_shutdown,
+	.suspend	= sch311x_wdt_suspend,
+	.resume		= sch311x_wdt_resume,
+	.driver		= {
+		.owner = THIS_MODULE,
+		.name = DRV_NAME,
+	},
+};
+
+static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
+{
+	int err = 0, reg;
+	unsigned short base_addr;
+	unsigned char dev_id;
+
+	sch311x_sio_enter(sio_config_port);
+
+	/* Check device ID. We currently know about:
+	 * SCH3112 (0x7c), SCH3114 (0x7d), and SCH3116 (0x7f). */
+	reg = force_id ? force_id : sch311x_sio_inb(sio_config_port, 0x20);
+	if (!(reg == 0x7c || reg == 0x7d || reg == 0x7f)) {
+		err = -ENODEV;
+		goto exit;
+	}
+	dev_id = reg == 0x7c ? 2 : reg == 0x7d ? 4 : 6;
+
+	/* Select logical device A (runtime registers) */
+	sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
+
+	/* Check if Logical Device Register is currently active */
+	if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0)
+		printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
+
+	/* Get the base address of the runtime registers */
+	base_addr = (sch311x_sio_inb(sio_config_port, 0x60) << 8) |
+			   sch311x_sio_inb(sio_config_port, 0x61);
+	if (!base_addr) {
+		printk(KERN_ERR PFX "Base address not set.\n");
+		err = -ENODEV;
+		goto exit;
+	}
+	*addr = base_addr;
+
+	printk(KERN_INFO PFX "Found an SMSC SCH311%d chip at 0x%04x\n",
+		dev_id, base_addr);
+
+exit:
+	sch311x_sio_exit(sio_config_port);
+	return err;
+}
+
+static int __init sch311x_wdt_init(void)
+{
+	int err, i, found = 0;
+	unsigned short addr = 0;
+
+	for (i = 0; !found && sch311x_ioports[i]; i++)
+		if (sch311x_detect(sch311x_ioports[i], &addr) == 0)
+			found++;
+
+	if (!found)
+		return -ENODEV;
+
+	sch311x_wdt_data.runtime_reg = addr;
+
+	err = platform_driver_register(&sch311x_wdt_driver);
+	if (err)
+		return err;
+
+	sch311x_wdt_pdev = platform_device_register_simple(DRV_NAME, addr,
+								NULL, 0);
+
+	if (IS_ERR(sch311x_wdt_pdev)) {
+		err = PTR_ERR(sch311x_wdt_pdev);
+		goto unreg_platform_driver;
+	}
+
+	return 0;
+
+unreg_platform_driver:
+	platform_driver_unregister(&sch311x_wdt_driver);
+	return err;
+}
+
+static void __exit sch311x_wdt_exit(void)
+{
+	platform_device_unregister(sch311x_wdt_pdev);
+	platform_driver_unregister(&sch311x_wdt_driver);
+}
+
+module_init(sch311x_wdt_init);
+module_exit(sch311x_wdt_exit);
+
+MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>");
+MODULE_DESCRIPTION("SMSC SCH311x WatchDog Timer Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
+
-- 
cgit v0.10.2


From 006948bafece27265dce72d3158b12af3ff67fce Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 6 Nov 2008 10:56:21 +0000
Subject: [WATCHDOG] Add support for the WM8350 watchdog

This driver implements support for the watchdog functionality provided
by the Wolfson Microelectronics WM8350, a multi-function audio and
power management subsystem intended for use in embedded systems. It is
based on a driver originally written by Graeme Gregory, though it has
been extensively modified since then.

Use of a GPIO to kick the watchdog is not yet supported.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 81f7021..ec68c74 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -55,6 +55,13 @@ config SOFT_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called softdog.
 
+config WM8350_WATCHDOG
+	tristate "WM8350 watchdog"
+	depends on MFD_WM8350
+	help
+	  Support for the watchdog in the WM8350 AudioPlus PMIC.  When
+	  the watchdog triggers the system will be reset.
+
 # ALPHA Architecture
 
 # ARM Architecture
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 0cd4786..c19b866 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -134,4 +134,5 @@ obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
 # XTENSA Architecture
 
 # Architecture Independant
+obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
 obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o
diff --git a/drivers/watchdog/wm8350_wdt.c b/drivers/watchdog/wm8350_wdt.c
new file mode 100644
index 0000000..2bc0d4d
--- /dev/null
+++ b/drivers/watchdog/wm8350_wdt.c
@@ -0,0 +1,329 @@
+/*
+ * Watchdog driver for the wm8350
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics <linux@wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/uaccess.h>
+#include <linux/mfd/wm8350/core.h>
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout,
+		 "Watchdog cannot be stopped once started (default="
+		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static unsigned long wm8350_wdt_users;
+static struct miscdevice wm8350_wdt_miscdev;
+static int wm8350_wdt_expect_close;
+static DEFINE_MUTEX(wdt_mutex);
+
+static struct {
+	int time;  /* Seconds */
+	u16 val;   /* To be set in WM8350_SYSTEM_CONTROL_2 */
+} wm8350_wdt_cfgs[] = {
+	{ 1, 0x02 },
+	{ 2, 0x04 },
+	{ 4, 0x05 },
+};
+
+static struct wm8350 *get_wm8350(void)
+{
+	return dev_get_drvdata(wm8350_wdt_miscdev.parent);
+}
+
+static int wm8350_wdt_set_timeout(struct wm8350 *wm8350, u16 value)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+	wm8350_reg_unlock(wm8350);
+
+	reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2);
+	reg &= ~WM8350_WDOG_TO_MASK;
+	reg |= value;
+	ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg);
+
+	wm8350_reg_lock(wm8350);
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm8350_wdt_start(struct wm8350 *wm8350)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+	wm8350_reg_unlock(wm8350);
+
+	reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2);
+	reg &= ~WM8350_WDOG_MODE_MASK;
+	reg |= 0x20;
+	ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg);
+
+	wm8350_reg_lock(wm8350);
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm8350_wdt_stop(struct wm8350 *wm8350)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+	wm8350_reg_unlock(wm8350);
+
+	reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2);
+	reg &= ~WM8350_WDOG_MODE_MASK;
+	ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg);
+
+	wm8350_reg_lock(wm8350);
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm8350_wdt_kick(struct wm8350 *wm8350)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+
+	reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2);
+	ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg);
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm8350_wdt_open(struct inode *inode, struct file *file)
+{
+	struct wm8350 *wm8350 = get_wm8350();
+	int ret;
+
+	if (!wm8350)
+		return -ENODEV;
+
+	if (test_and_set_bit(0, &wm8350_wdt_users))
+		return -EBUSY;
+
+	ret = wm8350_wdt_start(wm8350);
+	if (ret != 0)
+		return ret;
+
+	return nonseekable_open(inode, file);
+}
+
+static int wm8350_wdt_release(struct inode *inode, struct file *file)
+{
+	struct wm8350 *wm8350 = get_wm8350();
+
+	if (wm8350_wdt_expect_close)
+		wm8350_wdt_stop(wm8350);
+	else {
+		dev_warn(wm8350->dev, "Watchdog device closed uncleanly\n");
+		wm8350_wdt_kick(wm8350);
+	}
+
+	clear_bit(0, &wm8350_wdt_users);
+
+	return 0;
+}
+
+static ssize_t wm8350_wdt_write(struct file *file,
+				const char __user *data, size_t count,
+				loff_t *ppos)
+{
+	struct wm8350 *wm8350 = get_wm8350();
+	size_t i;
+
+	if (count) {
+		wm8350_wdt_kick(wm8350);
+
+		if (!nowayout) {
+			/* In case it was set long ago */
+			wm8350_wdt_expect_close = 0;
+
+			/* scan to see whether or not we got the magic
+			   character */
+			for (i = 0; i != count; i++) {
+				char c;
+				if (get_user(c, data + i))
+					return -EFAULT;
+				if (c == 'V')
+					wm8350_wdt_expect_close = 42;
+			}
+		}
+	}
+	return count;
+}
+
+static struct watchdog_info ident = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.identity = "WM8350 Watchdog",
+};
+
+static long wm8350_wdt_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct wm8350 *wm8350 = get_wm8350();
+	int ret = -ENOTTY, time, i;
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	u16 reg;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, p);
+		break;
+
+	case WDIOC_SETOPTIONS:
+	{
+		int options;
+
+		if (get_user(options, p))
+			return -EFAULT;
+
+		ret = -EINVAL;
+
+		/* Setting both simultaneously means at least one must fail */
+		if (options == WDIOS_DISABLECARD)
+			ret = wm8350_wdt_start(wm8350);
+
+		if (options == WDIOS_ENABLECARD)
+			ret = wm8350_wdt_stop(wm8350);
+		break;
+	}
+
+	case WDIOC_KEEPALIVE:
+		ret = wm8350_wdt_kick(wm8350);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(time, p);
+		if (ret)
+			break;
+
+		if (time == 0) {
+			if (nowayout)
+				ret = -EINVAL;
+			else
+				wm8350_wdt_stop(wm8350);
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(wm8350_wdt_cfgs); i++)
+			if (wm8350_wdt_cfgs[i].time == time)
+				break;
+		if (i == ARRAY_SIZE(wm8350_wdt_cfgs))
+			ret = -EINVAL;
+		else
+			ret = wm8350_wdt_set_timeout(wm8350,
+						     wm8350_wdt_cfgs[i].val);
+		break;
+
+	case WDIOC_GETTIMEOUT:
+		reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2);
+		reg &= WM8350_WDOG_TO_MASK;
+		for (i = 0; i < ARRAY_SIZE(wm8350_wdt_cfgs); i++)
+			if (wm8350_wdt_cfgs[i].val == reg)
+				break;
+		if (i == ARRAY_SIZE(wm8350_wdt_cfgs)) {
+			dev_warn(wm8350->dev,
+				 "Unknown watchdog configuration: %x\n", reg);
+			ret = -EINVAL;
+		} else
+			ret = put_user(wm8350_wdt_cfgs[i].time, p);
+
+	}
+
+	return ret;
+}
+
+static const struct file_operations wm8350_wdt_fops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.write = wm8350_wdt_write,
+	.unlocked_ioctl = wm8350_wdt_ioctl,
+	.open = wm8350_wdt_open,
+	.release = wm8350_wdt_release,
+};
+
+static struct miscdevice wm8350_wdt_miscdev = {
+	.minor = WATCHDOG_MINOR,
+	.name = "watchdog",
+	.fops = &wm8350_wdt_fops,
+};
+
+static int wm8350_wdt_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+
+	if (!wm8350) {
+		dev_err(wm8350->dev, "No driver data supplied\n");
+		return -ENODEV;
+	}
+
+	/* Default to 4s timeout */
+	wm8350_wdt_set_timeout(wm8350, 0x05);
+
+	wm8350_wdt_miscdev.parent = &pdev->dev;
+
+	return misc_register(&wm8350_wdt_miscdev);
+}
+
+static int __exit wm8350_wdt_remove(struct platform_device *pdev)
+{
+	misc_deregister(&wm8350_wdt_miscdev);
+
+	return 0;
+}
+
+static struct platform_driver wm8350_wdt_driver = {
+	.probe = wm8350_wdt_probe,
+	.remove = wm8350_wdt_remove,
+	.driver = {
+		.name = "wm8350-wdt",
+	},
+};
+
+static int __init wm8350_wdt_init(void)
+{
+	return platform_driver_register(&wm8350_wdt_driver);
+}
+module_init(wm8350_wdt_init);
+
+static void __exit wm8350_wdt_exit(void)
+{
+	platform_driver_unregister(&wm8350_wdt_driver);
+}
+module_exit(wm8350_wdt_exit);
+
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("WM8350 Watchdog");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-wdt");
-- 
cgit v0.10.2


From 092f82edbe96d0a08e1d10436927e89fa101fe0d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 28 Sep 2008 16:15:56 -0700
Subject: pci: use pci_ioremap_bar() in drivers/mmc

Use the new pci_ioremap_bar() function in drivers/mmc.
pci_ioremap_bar() just takes a pci device and a bar number, with the goal
of making it really hard to get wrong, while also having a central place
to stick sanity checks.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 9bd7026..f07255c 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -545,7 +545,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	}
 
 	addr = pci_resource_start(pdev, bar);
-	host->ioaddr = ioremap_nocache(addr, pci_resource_len(pdev, bar));
+	host->ioaddr = pci_ioremap_bar(pdev, bar);
 	if (!host->ioaddr) {
 		dev_err(&pdev->dev, "failed to remap registers\n");
 		goto release;
-- 
cgit v0.10.2


From b7a03210b7b381e06f71751cb9addfae7704489c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 22 Oct 2008 17:09:00 -0700
Subject: mmc: trivial annotation of 'blocks'

sg_init_one is reading a be32, annotate as such.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 3d067c3..903c8aa 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -145,7 +145,7 @@ struct mmc_blk_request {
 static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 {
 	int err;
-	u32 blocks;
+	__be32 blocks;
 
 	struct mmc_request mrq;
 	struct mmc_command cmd;
@@ -204,9 +204,7 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 	if (cmd.error || data.error)
 		return (u32)-1;
 
-	blocks = ntohl(blocks);
-
-	return blocks;
+	return ntohl(blocks);
 }
 
 static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
-- 
cgit v0.10.2


From 35ff8554d12ecc80a46ea0d9bce34fe28733ff38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Piel?= <eric.piel@tremplin-utc.net>
Date: Sat, 22 Nov 2008 19:29:29 +0100
Subject: sdhci: activate led support also when module

CONFIG_LEDS_CLASS is defined only if led-class is built-in, otherwise
when it is a module the option is called CONFIG_LEDS_CLASS_MODULE. Led
support should also be activated in this case.

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4d010a9..3b1b54f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -149,7 +149,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host)
 	writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
 }
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 static void sdhci_led_control(struct led_classdev *led,
 	enum led_brightness brightness)
 {
@@ -994,7 +994,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 	WARN_ON(host->mrq != NULL);
 
-#ifndef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	sdhci_activate_led(host);
 #endif
 
@@ -1201,7 +1201,7 @@ static void sdhci_tasklet_finish(unsigned long param)
 	host->cmd = NULL;
 	host->data = NULL;
 
-#ifndef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	sdhci_deactivate_led(host);
 #endif
 
@@ -1717,7 +1717,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	sdhci_dumpregs(host);
 #endif
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	host->led.name = mmc_hostname(mmc);
 	host->led.brightness = LED_OFF;
 	host->led.default_trigger = mmc_hostname(mmc);
@@ -1739,7 +1739,7 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	return 0;
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 reset:
 	sdhci_reset(host, SDHCI_RESET_ALL);
 	free_irq(host->irq, host);
@@ -1775,7 +1775,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	mmc_remove_host(host->mmc);
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	led_classdev_unregister(&host->led);
 #endif
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 31f4b15..3efba23 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -220,7 +220,7 @@ struct sdhci_host {
 	struct mmc_host		*mmc;		/* MMC structure */
 	u64			dma_mask;	/* custom DMA mask */
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	struct led_classdev	led;		/* LED control */
 #endif
 
-- 
cgit v0.10.2


From b30f8af3358b5c66be223e3a9f3d11b3d02b4a8f Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Mon, 17 Nov 2008 14:35:21 +0200
Subject: mmc: Add 8-bit bus width support

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fdd7c76..c232d11 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -434,13 +434,24 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 * Activate wide bus (if supported).
 	 */
 	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
-		(host->caps & MMC_CAP_4_BIT_DATA)) {
+	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
+		unsigned ext_csd_bit, bus_width;
+
+		if (host->caps & MMC_CAP_8_BIT_DATA) {
+			ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
+			bus_width = MMC_BUS_WIDTH_8;
+		} else {
+			ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
+			bus_width = MMC_BUS_WIDTH_4;
+		}
+
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_BUS_WIDTH, EXT_CSD_BUS_WIDTH_4);
+				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
+
 		if (err)
 			goto free_card;
 
-		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		mmc_set_bus_width(card->host, bus_width);
 	}
 
 	if (!oldcard)
@@ -624,4 +635,3 @@ err:
 
 	return err;
 }
-
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f842f23..4e45725 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -41,6 +41,7 @@ struct mmc_ios {
 
 #define MMC_BUS_WIDTH_1		0
 #define MMC_BUS_WIDTH_4		2
+#define MMC_BUS_WIDTH_8		3
 
 	unsigned char	timing;			/* timing specification used */
 
@@ -116,6 +117,7 @@ struct mmc_host {
 #define MMC_CAP_SDIO_IRQ	(1 << 3)	/* Can signal pending SDIO IRQs */
 #define MMC_CAP_SPI		(1 << 4)	/* Talks only SPI protocols */
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
+#define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v0.10.2


From 0527a60c2b6bd7ab20e82cc5e488659e20eaaacd Mon Sep 17 00:00:00 2001
From: "philipl@overt.org" <philipl@overt.org>
Date: Sun, 30 Nov 2008 20:27:50 -0500
Subject: ricoh_mmc: Handle newer models of Ricoh controllers

The latest generation of laptops are shipping with a newer
model of Ricoh chip where the firewire controller is the
primary PCI function but a cardbus controller is also present.

The existing code assumes that if a cardbus controller is,
present, then it must be the one to manipulate - but the real
rule is that you manipulate PCI function 0. This patch adds an
additional constraint that the target must be function 0.

Signed-off-by: Philip Langdale <philipl@overt.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/ricoh_mmc.c b/drivers/mmc/host/ricoh_mmc.c
index a16d760..be9e7b3 100644
--- a/drivers/mmc/host/ricoh_mmc.c
+++ b/drivers/mmc/host/ricoh_mmc.c
@@ -11,9 +11,10 @@
 
 /*
  * This is a conceptually ridiculous driver, but it is required by the way
- * the Ricoh multi-function R5C832 works. This chip implements firewire
- * and four different memory card controllers. Two of those controllers are
- * an SDHCI controller and a proprietary MMC controller. The linux SDHCI
+ * the Ricoh multi-function chips (R5CXXX) work. These chips implement
+ * the four main memory card controllers (SD, MMC, MS, xD) and one or both
+ * of cardbus or firewire. It happens that they implement SD and MMC
+ * support as separate controllers (and PCI functions). The linux SDHCI
  * driver supports MMC cards but the chip detects MMC cards in hardware
  * and directs them to the MMC controller - so the SDHCI driver never sees
  * them. To get around this, we must disable the useless MMC controller.
@@ -21,8 +22,10 @@
  * a detection event occurs immediately, even if the MMC card is already
  * in the reader.
  *
- * The relevant registers live on the firewire function, so this is unavoidably
- * ugly. Such is life.
+ * It seems to be the case that the relevant PCI registers to deactivate the
+ * MMC controller live on PCI function 0, which might be the cardbus controller
+ * or the firewire controller, depending on the particular chip in question. As
+ * such, it makes what this driver has to do unavoidably ugly. Such is life.
  */
 
 #include <linux/pci.h>
@@ -143,6 +146,7 @@ static int __devinit ricoh_mmc_probe(struct pci_dev *pdev,
 		pci_get_device(PCI_VENDOR_ID_RICOH,
 			PCI_DEVICE_ID_RICOH_RL5C476, fw_dev))) {
 		if (PCI_SLOT(pdev->devfn) == PCI_SLOT(fw_dev->devfn) &&
+		    PCI_FUNC(fw_dev->devfn) == 0 &&
 		    pdev->bus == fw_dev->bus) {
 			if (ricoh_mmc_disable(fw_dev) != 0)
 				return -ENODEV;
@@ -160,6 +164,7 @@ static int __devinit ricoh_mmc_probe(struct pci_dev *pdev,
 	    (fw_dev = pci_get_device(PCI_VENDOR_ID_RICOH,
 					PCI_DEVICE_ID_RICOH_R5C832, fw_dev))) {
 		if (PCI_SLOT(pdev->devfn) == PCI_SLOT(fw_dev->devfn) &&
+		    PCI_FUNC(fw_dev->devfn) == 0 &&
 		    pdev->bus == fw_dev->bus) {
 			if (ricoh_mmc_disable(fw_dev) != 0)
 				return -ENODEV;
@@ -172,7 +177,7 @@ static int __devinit ricoh_mmc_probe(struct pci_dev *pdev,
 
 	if (!ctrlfound) {
 		printk(KERN_WARNING DRIVER_NAME
-		       ": Main firewire function not found. Cannot disable controller.\n");
+		       ": Main Ricoh function not found. Cannot disable controller.\n");
 		return -ENODEV;
 	}
 
-- 
cgit v0.10.2


From 86e8286a0e48663e1e86a5884b30a6d05de2993a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 26 Nov 2008 22:54:17 +0300
Subject: mmc: Add mmc_vddrange_to_ocrmask() helper function

This function sets the OCR mask bits according to provided voltage
ranges. Will be used by the mmc_spi OpenFirmware bindings.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f7284b9..5f288ae 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/leds.h>
 #include <linux/scatterlist.h>
+#include <linux/log2.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -448,6 +449,80 @@ void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 	mmc_set_ios(host);
 }
 
+/**
+ * mmc_vdd_to_ocrbitnum - Convert a voltage to the OCR bit number
+ * @vdd:	voltage (mV)
+ * @low_bits:	prefer low bits in boundary cases
+ *
+ * This function returns the OCR bit number according to the provided @vdd
+ * value. If conversion is not possible a negative errno value returned.
+ *
+ * Depending on the @low_bits flag the function prefers low or high OCR bits
+ * on boundary voltages. For example,
+ * with @low_bits = true, 3300 mV translates to ilog2(MMC_VDD_32_33);
+ * with @low_bits = false, 3300 mV translates to ilog2(MMC_VDD_33_34);
+ *
+ * Any value in the [1951:1999] range translates to the ilog2(MMC_VDD_20_21).
+ */
+static int mmc_vdd_to_ocrbitnum(int vdd, bool low_bits)
+{
+	const int max_bit = ilog2(MMC_VDD_35_36);
+	int bit;
+
+	if (vdd < 1650 || vdd > 3600)
+		return -EINVAL;
+
+	if (vdd >= 1650 && vdd <= 1950)
+		return ilog2(MMC_VDD_165_195);
+
+	if (low_bits)
+		vdd -= 1;
+
+	/* Base 2000 mV, step 100 mV, bit's base 8. */
+	bit = (vdd - 2000) / 100 + 8;
+	if (bit > max_bit)
+		return max_bit;
+	return bit;
+}
+
+/**
+ * mmc_vddrange_to_ocrmask - Convert a voltage range to the OCR mask
+ * @vdd_min:	minimum voltage value (mV)
+ * @vdd_max:	maximum voltage value (mV)
+ *
+ * This function returns the OCR mask bits according to the provided @vdd_min
+ * and @vdd_max values. If conversion is not possible the function returns 0.
+ *
+ * Notes wrt boundary cases:
+ * This function sets the OCR bits for all boundary voltages, for example
+ * [3300:3400] range is translated to MMC_VDD_32_33 | MMC_VDD_33_34 |
+ * MMC_VDD_34_35 mask.
+ */
+u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max)
+{
+	u32 mask = 0;
+
+	if (vdd_max < vdd_min)
+		return 0;
+
+	/* Prefer high bits for the boundary vdd_max values. */
+	vdd_max = mmc_vdd_to_ocrbitnum(vdd_max, false);
+	if (vdd_max < 0)
+		return 0;
+
+	/* Prefer low bits for the boundary vdd_min values. */
+	vdd_min = mmc_vdd_to_ocrbitnum(vdd_min, true);
+	if (vdd_min < 0)
+		return 0;
+
+	/* Fill the mask, from max bit to min bit. */
+	while (vdd_max >= vdd_min)
+		mask |= 1 << vdd_max--;
+
+	return mask;
+}
+EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
+
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 143cebf..7ac8b50 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -151,4 +151,6 @@ static inline void mmc_claim_host(struct mmc_host *host)
 	__mmc_claim_host(host, NULL);
 }
 
+extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
+
 #endif
-- 
cgit v0.10.2


From 504f191f25b1671802246bac06c9f59f94f0b7de Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Thu, 16 Oct 2008 12:55:25 +0300
Subject: mmc_block: print better error messages

Add command response and card status to error
messages.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 903c8aa..cc9b3ab 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -207,6 +207,23 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 	return ntohl(blocks);
 }
 
+static u32 get_card_status(struct mmc_card *card, struct request *req)
+{
+	struct mmc_command cmd;
+	int err;
+
+	memset(&cmd, 0, sizeof(struct mmc_command));
+	cmd.opcode = MMC_SEND_STATUS;
+	if (!mmc_host_is_spi(card->host))
+		cmd.arg = card->rca << 16;
+	cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(card->host, &cmd, 0);
+	if (err)
+		printk(KERN_ERR "%s: error %d sending status comand",
+		       req->rq_disk->disk_name, err);
+	return cmd.resp[0];
+}
+
 static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -218,7 +235,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 	do {
 		struct mmc_command cmd;
-		u32 readcmd, writecmd;
+		u32 readcmd, writecmd, status = 0;
 
 		memset(&brq, 0, sizeof(struct mmc_blk_request));
 		brq.mrq.cmd = &brq.cmd;
@@ -273,19 +290,32 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		 * until later as we need to wait for the card to leave
 		 * programming mode even when things go wrong.
 		 */
+		if (brq.cmd.error || brq.data.error || brq.stop.error)
+			status = get_card_status(card, req);
+
 		if (brq.cmd.error) {
-			printk(KERN_ERR "%s: error %d sending read/write command\n",
-			       req->rq_disk->disk_name, brq.cmd.error);
+			printk(KERN_ERR "%s: error %d sending read/write "
+			       "command, response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.cmd.error,
+			       brq.cmd.resp[0], status);
 		}
 
 		if (brq.data.error) {
-			printk(KERN_ERR "%s: error %d transferring data\n",
-			       req->rq_disk->disk_name, brq.data.error);
+			if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
+				/* 'Stop' response contains card status */
+				status = brq.mrq.stop->resp[0];
+			printk(KERN_ERR "%s: error %d transferring data,"
+			       " sector %u, nr %u, card status %#x\n",
+			       req->rq_disk->disk_name, brq.data.error,
+			       (unsigned)req->sector,
+			       (unsigned)req->nr_sectors, status);
 		}
 
 		if (brq.stop.error) {
-			printk(KERN_ERR "%s: error %d sending stop command\n",
-			       req->rq_disk->disk_name, brq.stop.error);
+			printk(KERN_ERR "%s: error %d sending stop command, "
+			       "response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.stop.error,
+			       brq.stop.resp[0], status);
 		}
 
 		if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
-- 
cgit v0.10.2


From ca4f10563929b932ed8970fda41a7f99385e4b0b Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sat, 13 Dec 2008 21:21:33 +0100
Subject: mmc: balanc pci_iomap with pci_iounmap

balance pci_iomap with pci_iounmap, not iounmap

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index 1df44d9..d4748a8 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -463,7 +463,7 @@ static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 
 err:
 	if (iobase)
-		iounmap(iobase);
+		pci_iounmap(pci_dev, iobase);
 	if (mmc)
 		mmc_free_host(mmc);
 
-- 
cgit v0.10.2


From f9134319c81c6c56e0ddf38e7adac2492b243d9b Mon Sep 17 00:00:00 2001
From: Pierre Ossman <drzeus@drzeus.cx>
Date: Sun, 21 Dec 2008 17:01:48 +0100
Subject: sdhci: handle built-in sdhci with modular leds class

As reported by Randy Dunlap, having sdhci built-in and LEDs class
as a module resulted in undefined symbols. Change the code to handle
that case properly (by not having LEDs class support in sdhci).

Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 3b1b54f..6b2d1f9 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -30,6 +30,11 @@
 #define DBG(f, x...) \
 	pr_debug(DRIVER_NAME " [%s()]: " f, __func__,## x)
 
+#if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \
+	defined(CONFIG_MMC_SDHCI_MODULE))
+#define SDHCI_USE_LEDS_CLASS
+#endif
+
 static unsigned int debug_quirks = 0;
 
 static void sdhci_prepare_data(struct sdhci_host *, struct mmc_data *);
@@ -149,7 +154,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host)
 	writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
 }
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef SDHCI_USE_LEDS_CLASS
 static void sdhci_led_control(struct led_classdev *led,
 	enum led_brightness brightness)
 {
@@ -994,7 +999,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 	WARN_ON(host->mrq != NULL);
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifndef SDHCI_USE_LEDS_CLASS
 	sdhci_activate_led(host);
 #endif
 
@@ -1201,7 +1206,7 @@ static void sdhci_tasklet_finish(unsigned long param)
 	host->cmd = NULL;
 	host->data = NULL;
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifndef SDHCI_USE_LEDS_CLASS
 	sdhci_deactivate_led(host);
 #endif
 
@@ -1717,7 +1722,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	sdhci_dumpregs(host);
 #endif
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef SDHCI_USE_LEDS_CLASS
 	host->led.name = mmc_hostname(mmc);
 	host->led.brightness = LED_OFF;
 	host->led.default_trigger = mmc_hostname(mmc);
@@ -1739,7 +1744,7 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	return 0;
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef SDHCI_USE_LEDS_CLASS
 reset:
 	sdhci_reset(host, SDHCI_RESET_ALL);
 	free_irq(host->irq, host);
@@ -1775,7 +1780,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	mmc_remove_host(host->mmc);
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef SDHCI_USE_LEDS_CLASS
 	led_classdev_unregister(&host->led);
 #endif
 
-- 
cgit v0.10.2


From a0d045cac9bcb3e9a9796d596415f7ffb64852e2 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 16 Dec 2008 16:13:09 +0100
Subject: drivers/mmc: Move a dereference below a NULL test

In each case, if the NULL test is necessary, then the dereference should be
moved below the NULL test.

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
type T;
expression E;
identifier i,fld;
statement S;
@@

- T i = E->fld;
+ T i;
  ... when != E
      when != i
  if (E == NULL) S
+ i = E->fld;
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 95430b8..6a7a619 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -224,7 +224,7 @@ static inline void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 {
 	void __iomem *ctl = host->ctl;
 	struct mmc_data *data = host->data;
-	struct mmc_command *stop = data->stop;
+	struct mmc_command *stop;
 
 	host->data = NULL;
 
@@ -232,6 +232,7 @@ static inline void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 		pr_debug("Spurious data end IRQ\n");
 		return;
 	}
+	stop = data->stop;
 
 	/* FIXME - return correct transfer count on errors */
 	if (!data->error)
-- 
cgit v0.10.2


From 6a79e391df295bd7c2aa1309ea5031f361c197fd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Wed, 31 Dec 2008 18:21:17 +0100
Subject: mmc_block: ensure all sectors that do not have errors are read

If a card encounters an ECC error while reading a sector it will
timeout.  Instead of reporting the entire I/O request as having
an error, redo the I/O one sector at a time so that all readable
sectors are provided to the upper layers.

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index cc9b3ab..45b1f43 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -229,7 +229,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_blk_data *md = mq->data;
 	struct mmc_card *card = md->queue.card;
 	struct mmc_blk_request brq;
-	int ret = 1;
+	int ret = 1, disable_multi = 0;
 
 	mmc_claim_host(card->host);
 
@@ -251,6 +251,14 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
 		brq.data.blocks = req->nr_sectors;
 
+		/*
+		 * After a read error, we redo the request one sector at a time
+		 * in order to accurately determine which sectors can be read
+		 * successfully.
+		 */
+		if (disable_multi && brq.data.blocks > 1)
+			brq.data.blocks = 1;
+
 		if (brq.data.blocks > 1) {
 			/* SPI multiblock writes terminate using a special
 			 * token, not a STOP_TRANSMISSION request.
@@ -279,6 +287,25 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.data.sg = mq->sg;
 		brq.data.sg_len = mmc_queue_map_sg(mq);
 
+		/*
+		 * Adjust the sg list so it is the same size as the
+		 * request.
+		 */
+		if (brq.data.blocks != req->nr_sectors) {
+			int i, data_size = brq.data.blocks << 9;
+			struct scatterlist *sg;
+
+			for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
+				data_size -= sg->length;
+				if (data_size <= 0) {
+					sg->length += data_size;
+					i++;
+					break;
+				}
+			}
+			brq.data.sg_len = i;
+		}
+
 		mmc_queue_bounce_pre(mq);
 
 		mmc_wait_for_req(card->host, &brq.mrq);
@@ -290,8 +317,16 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		 * until later as we need to wait for the card to leave
 		 * programming mode even when things go wrong.
 		 */
-		if (brq.cmd.error || brq.data.error || brq.stop.error)
+		if (brq.cmd.error || brq.data.error || brq.stop.error) {
+			if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
+				/* Redo read one sector at a time */
+				printk(KERN_WARNING "%s: retrying using single "
+				       "block read\n", req->rq_disk->disk_name);
+				disable_multi = 1;
+				continue;
+			}
 			status = get_card_status(card, req);
+		}
 
 		if (brq.cmd.error) {
 			printk(KERN_ERR "%s: error %d sending read/write "
@@ -348,8 +383,20 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 #endif
 		}
 
-		if (brq.cmd.error || brq.data.error || brq.stop.error)
+		if (brq.cmd.error || brq.stop.error || brq.data.error) {
+			if (rq_data_dir(req) == READ) {
+				/*
+				 * After an error, we redo I/O one sector at a
+				 * time, so we only reach here after trying to
+				 * read a single sector.
+				 */
+				spin_lock_irq(&md->lock);
+				ret = __blk_end_request(req, -EIO, brq.data.blksz);
+				spin_unlock_irq(&md->lock);
+				continue;
+			}
 			goto cmd_err;
+		}
 
 		/*
 		 * A block was successfully transferred.
@@ -371,25 +418,20 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	 * If the card is not SD, we can still ok written sectors
 	 * as reported by the controller (which might be less than
 	 * the real number of written sectors, but never more).
-	 *
-	 * For reads we just fail the entire chunk as that should
-	 * be safe in all cases.
 	 */
-	if (rq_data_dir(req) != READ) {
-		if (mmc_card_sd(card)) {
-			u32 blocks;
+	if (mmc_card_sd(card)) {
+		u32 blocks;
 
-			blocks = mmc_sd_num_wr_blocks(card);
-			if (blocks != (u32)-1) {
-				spin_lock_irq(&md->lock);
-				ret = __blk_end_request(req, 0, blocks << 9);
-				spin_unlock_irq(&md->lock);
-			}
-		} else {
+		blocks = mmc_sd_num_wr_blocks(card);
+		if (blocks != (u32)-1) {
 			spin_lock_irq(&md->lock);
-			ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
+			ret = __blk_end_request(req, 0, blocks << 9);
 			spin_unlock_irq(&md->lock);
 		}
+	} else {
+		spin_lock_irq(&md->lock);
+		ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
+		spin_unlock_irq(&md->lock);
 	}
 
 	mmc_release_host(card->host);
-- 
cgit v0.10.2


From c00a46abd4d45a67ff62f4ff6d4f839dff38b877 Mon Sep 17 00:00:00 2001
From: Vernon Sauder <vernoninhand@gmail.com>
Date: Mon, 29 Dec 2008 19:21:28 -0500
Subject: pxamci: fix dma_unmap_sg length

dma_unmap_sg should be given the same length as dma_map_sg, not the
value returned from dma_map_sg

Signed-off-by: Vernon Sauder <vsauder@inhand.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index f88cc74..3c5483b 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -283,7 +283,7 @@ static int pxamci_data_done(struct pxamci_host *host, unsigned int stat)
 		return 0;
 
 	DCSR(host->dma) = 0;
-	dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_len,
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
 		     host->dma_dir);
 
 	if (stat & STAT_READ_TIME_OUT)
-- 
cgit v0.10.2


From 9c43df57910bbba540a6cb5c9132302a9ea5f41a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 30 Dec 2008 18:15:28 +0300
Subject: mmc_spi: Add support for OpenFirmware bindings

The support is implemented via platform data accessors, new module
(of_mmc_spi) will be created automatically when the driver compiles
on OpenFirmware platforms. Link-time dependency will load the module
automatically.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index c794cc5..f485328 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -19,6 +19,9 @@ obj-$(CONFIG_MMC_AT91)		+= at91_mci.o
 obj-$(CONFIG_MMC_ATMELMCI)	+= atmel-mci.o
 obj-$(CONFIG_MMC_TIFM_SD)	+= tifm_sd.o
 obj-$(CONFIG_MMC_SPI)		+= mmc_spi.o
+ifeq ($(CONFIG_OF),y)
+obj-$(CONFIG_MMC_SPI)		+= of_mmc_spi.o
+endif
 obj-$(CONFIG_MMC_S3C)   	+= s3cmci.o
 obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index ad00e16..87e211d 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1285,7 +1285,7 @@ static int mmc_spi_probe(struct spi_device *spi)
 	/* Platform data is used to hook up things like card sensing
 	 * and power switching gpios.
 	 */
-	host->pdata = spi->dev.platform_data;
+	host->pdata = mmc_spi_get_pdata(spi);
 	if (host->pdata)
 		mmc->ocr_avail = host->pdata->ocr_mask;
 	if (!mmc->ocr_avail) {
@@ -1368,6 +1368,7 @@ fail_glue_init:
 
 fail_nobuf1:
 	mmc_free_host(mmc);
+	mmc_spi_put_pdata(spi);
 	dev_set_drvdata(&spi->dev, NULL);
 
 nomem:
@@ -1402,6 +1403,7 @@ static int __devexit mmc_spi_remove(struct spi_device *spi)
 
 		spi->max_speed_hz = mmc->f_max;
 		mmc_free_host(mmc);
+		mmc_spi_put_pdata(spi);
 		dev_set_drvdata(&spi->dev, NULL);
 	}
 	return 0;
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
new file mode 100644
index 0000000..fb2921f
--- /dev/null
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -0,0 +1,149 @@
+/*
+ * OpenFirmware bindings for the MMC-over-SPI driver
+ *
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/gpio.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mmc_spi.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
+
+enum {
+	CD_GPIO = 0,
+	WP_GPIO,
+	NUM_GPIOS,
+};
+
+struct of_mmc_spi {
+	int gpios[NUM_GPIOS];
+	bool alow_gpios[NUM_GPIOS];
+	struct mmc_spi_platform_data pdata;
+};
+
+static struct of_mmc_spi *to_of_mmc_spi(struct device *dev)
+{
+	return container_of(dev->platform_data, struct of_mmc_spi, pdata);
+}
+
+static int of_mmc_spi_read_gpio(struct device *dev, int gpio_num)
+{
+	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
+	bool active_low = oms->alow_gpios[gpio_num];
+	bool value = gpio_get_value(oms->gpios[gpio_num]);
+
+	return active_low ^ value;
+}
+
+static int of_mmc_spi_get_cd(struct device *dev)
+{
+	return of_mmc_spi_read_gpio(dev, CD_GPIO);
+}
+
+static int of_mmc_spi_get_ro(struct device *dev)
+{
+	return of_mmc_spi_read_gpio(dev, WP_GPIO);
+}
+
+struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	struct of_mmc_spi *oms;
+	const u32 *voltage_ranges;
+	int num_ranges;
+	int i;
+	int ret = -EINVAL;
+
+	if (dev->platform_data || !np)
+		return dev->platform_data;
+
+	oms = kzalloc(sizeof(*oms), GFP_KERNEL);
+	if (!oms)
+		return NULL;
+
+	voltage_ranges = of_get_property(np, "voltage-ranges", &num_ranges);
+	num_ranges = num_ranges / sizeof(*voltage_ranges) / 2;
+	if (!voltage_ranges || !num_ranges) {
+		dev_err(dev, "OF: voltage-ranges unspecified\n");
+		goto err_ocr;
+	}
+
+	for (i = 0; i < num_ranges; i++) {
+		const int j = i * 2;
+		u32 mask;
+
+		mask = mmc_vddrange_to_ocrmask(voltage_ranges[j],
+					       voltage_ranges[j + 1]);
+		if (!mask) {
+			ret = -EINVAL;
+			dev_err(dev, "OF: voltage-range #%d is invalid\n", i);
+			goto err_ocr;
+		}
+		oms->pdata.ocr_mask |= mask;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
+		enum of_gpio_flags gpio_flags;
+
+		oms->gpios[i] = of_get_gpio_flags(np, i, &gpio_flags);
+		if (!gpio_is_valid(oms->gpios[i]))
+			continue;
+
+		ret = gpio_request(oms->gpios[i], dev->bus_id);
+		if (ret < 0) {
+			oms->gpios[i] = -EINVAL;
+			continue;
+		}
+
+		if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+			oms->alow_gpios[i] = true;
+	}
+
+	if (gpio_is_valid(oms->gpios[CD_GPIO]))
+		oms->pdata.get_cd = of_mmc_spi_get_cd;
+	if (gpio_is_valid(oms->gpios[WP_GPIO]))
+		oms->pdata.get_ro = of_mmc_spi_get_ro;
+
+	/* We don't support interrupts yet, let's poll. */
+	oms->pdata.caps |= MMC_CAP_NEEDS_POLL;
+
+	dev->platform_data = &oms->pdata;
+	return dev->platform_data;
+err_ocr:
+	kfree(oms);
+	return NULL;
+}
+EXPORT_SYMBOL(mmc_spi_get_pdata);
+
+void mmc_spi_put_pdata(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
+	int i;
+
+	if (!dev->platform_data || !np)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
+		if (gpio_is_valid(oms->gpios[i]))
+			gpio_free(oms->gpios[i]);
+	}
+	kfree(oms);
+	dev->platform_data = NULL;
+}
+EXPORT_SYMBOL(mmc_spi_put_pdata);
diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h
index a3626ae..0f4eb16 100644
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -1,9 +1,10 @@
 #ifndef __LINUX_SPI_MMC_SPI_H
 #define __LINUX_SPI_MMC_SPI_H
 
+#include <linux/device.h>
+#include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 
-struct device;
 struct mmc_host;
 
 /* Put this in platform_data of a device being used to manage an MMC/SD
@@ -41,4 +42,16 @@ struct mmc_spi_platform_data {
 	void (*setpower)(struct device *, unsigned int maskval);
 };
 
+#ifdef CONFIG_OF
+extern struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi);
+extern void mmc_spi_put_pdata(struct spi_device *spi);
+#else
+static inline struct mmc_spi_platform_data *
+mmc_spi_get_pdata(struct spi_device *spi)
+{
+	return spi->dev.platform_data;
+}
+static inline void mmc_spi_put_pdata(struct spi_device *spi) {}
+#endif /* CONFIG_OF */
+
 #endif /* __LINUX_SPI_MMC_SPI_H */
-- 
cgit v0.10.2


From f6e10b865c3ea56bdaa8c6ecfee313b997900dbb Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 31 Dec 2008 09:50:30 -0800
Subject: mmc: warn about voltage mismatches

Get rid of a silent failure mode when the MMC/SD host doesn't
support the voltages needed to operate a given card, by
adding a warning.  A 3.3V host and a 3.0V card, for example,
no longer need to mysteriously just not work at all.

This isn't the best diagnostic; ideally it would also tell
what voltage the card and host support (and not just by
dumping the bitmasks).

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5f288ae..df6ce4a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -542,6 +542,8 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
 		host->ios.vdd = bit;
 		mmc_set_ios(host);
 	} else {
+		pr_warning("%s: host doesn't support card's voltages\n",
+				mmc_hostname(host));
 		ocr = 0;
 	}
 
-- 
cgit v0.10.2


From be6d3e56a6b9b3a4ee44a0685e39e595073c6f0d Mon Sep 17 00:00:00 2001
From: Kentaro Takeda <takedakn@nttdata.co.jp>
Date: Wed, 17 Dec 2008 13:24:15 +0900
Subject: introduce new LSM hooks where vfsmount is available.

Add new LSM hooks for path-based checks.  Call them on directory-modifying
operations at the points where we still know the vfsmount involved.

Signed-off-by: Kentaro Takeda <takedakn@nttdata.co.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Toshiharu Harada <haradats@nttdata.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index af3783f..ab441af 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1556,6 +1556,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 * Refuse to truncate files with mandatory locks held on them.
 		 */
 		error = locks_verify_locked(inode);
+		if (!error)
+			error = security_path_truncate(&nd->path, 0,
+					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
 
@@ -1586,7 +1589,11 @@ static int __open_namei_create(struct nameidata *nd, struct path *path,
 
 	if (!IS_POSIXACL(dir->d_inode))
 		mode &= ~current->fs->umask;
+	error = security_path_mknod(&nd->path, path->dentry, mode, 0);
+	if (error)
+		goto out_unlock;
 	error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	dput(nd->path.dentry);
 	nd->path.dentry = path->dentry;
@@ -1999,6 +2006,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mknod(&nd.path, dentry, mode, dev);
+	if (error)
+		goto out_drop_write;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
 			error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2011,6 +2021,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
 			break;
 	}
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2070,7 +2081,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mkdir(&nd.path, dentry, mode);
+	if (error)
+		goto out_drop_write;
 	error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2180,7 +2195,11 @@ static long do_rmdir(int dfd, const char __user *pathname)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit3;
+	error = security_path_rmdir(&nd.path, dentry);
+	if (error)
+		goto exit4;
 	error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+exit4:
 	mnt_drop_write(nd.path.mnt);
 exit3:
 	dput(dentry);
@@ -2265,7 +2284,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
+		error = security_path_unlink(&nd.path, dentry);
+		if (error)
+			goto exit3;
 		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+exit3:
 		mnt_drop_write(nd.path.mnt);
 	exit2:
 		dput(dentry);
@@ -2346,7 +2369,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_symlink(&nd.path, dentry, from);
+	if (error)
+		goto out_drop_write;
 	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2443,7 +2470,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_link(old_path.dentry, &nd.path, new_dentry);
+	if (error)
+		goto out_drop_write;
 	error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(new_dentry);
@@ -2679,8 +2710,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(oldnd.path.mnt);
 	if (error)
 		goto exit5;
+	error = security_path_rename(&oldnd.path, old_dentry,
+				     &newnd.path, new_dentry);
+	if (error)
+		goto exit6;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
 				   new_dir->d_inode, new_dentry);
+exit6:
 	mnt_drop_write(oldnd.path.mnt);
 exit5:
 	dput(new_dentry);
diff --git a/fs/open.c b/fs/open.c
index c0a426d..1cd7d40 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 		goto put_write_and_out;
 
 	error = locks_verify_truncate(inode, NULL, length);
+	if (!error)
+		error = security_path_truncate(&path, length, 0);
 	if (!error) {
 		DQUOT_INIT(inode);
 		error = do_truncate(path.dentry, length, 0, NULL);
@@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 
 	error = locks_verify_truncate(inode, file, length);
 	if (!error)
+		error = security_path_truncate(&file->f_path, length,
+					       ATTR_MTIME|ATTR_CTIME);
+	if (!error)
 		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 out_putf:
 	fput(file);
diff --git a/include/linux/security.h b/include/linux/security.h
index 3416cb8..b92b5e4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -335,17 +335,37 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dir contains the inode structure of the parent directory of the new link.
  *	@new_dentry contains the dentry structure for the new link.
  *	Return 0 if permission is granted.
+ * @path_link:
+ *	Check permission before creating a new hard link to a file.
+ *	@old_dentry contains the dentry structure for an existing link
+ *	to the file.
+ *	@new_dir contains the path structure of the parent directory of
+ *	the new link.
+ *	@new_dentry contains the dentry structure for the new link.
+ *	Return 0 if permission is granted.
  * @inode_unlink:
  *	Check the permission to remove a hard link to a file.
  *	@dir contains the inode structure of parent directory of the file.
  *	@dentry contains the dentry structure for file to be unlinked.
  *	Return 0 if permission is granted.
+ * @path_unlink:
+ *	Check the permission to remove a hard link to a file.
+ *	@dir contains the path structure of parent directory of the file.
+ *	@dentry contains the dentry structure for file to be unlinked.
+ *	Return 0 if permission is granted.
  * @inode_symlink:
  *	Check the permission to create a symbolic link to a file.
  *	@dir contains the inode structure of parent directory of the symbolic link.
  *	@dentry contains the dentry structure of the symbolic link.
  *	@old_name contains the pathname of file.
  *	Return 0 if permission is granted.
+ * @path_symlink:
+ *	Check the permission to create a symbolic link to a file.
+ *	@dir contains the path structure of parent directory of
+ *	the symbolic link.
+ *	@dentry contains the dentry structure of the symbolic link.
+ *	@old_name contains the pathname of file.
+ *	Return 0 if permission is granted.
  * @inode_mkdir:
  *	Check permissions to create a new directory in the existing directory
  *	associated with inode strcture @dir.
@@ -353,11 +373,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure of new directory.
  *	@mode contains the mode of new directory.
  *	Return 0 if permission is granted.
+ * @path_mkdir:
+ *	Check permissions to create a new directory in the existing directory
+ *	associated with path strcture @path.
+ *	@dir containst the path structure of parent of the directory
+ *	to be created.
+ *	@dentry contains the dentry structure of new directory.
+ *	@mode contains the mode of new directory.
+ *	Return 0 if permission is granted.
  * @inode_rmdir:
  *	Check the permission to remove a directory.
  *	@dir contains the inode structure of parent of the directory to be removed.
  *	@dentry contains the dentry structure of directory to be removed.
  *	Return 0 if permission is granted.
+ * @path_rmdir:
+ *	Check the permission to remove a directory.
+ *	@dir contains the path structure of parent of the directory to be
+ *	removed.
+ *	@dentry contains the dentry structure of directory to be removed.
+ *	Return 0 if permission is granted.
  * @inode_mknod:
  *	Check permissions when creating a special file (or a socket or a fifo
  *	file created via the mknod system call).  Note that if mknod operation
@@ -368,6 +402,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mode contains the mode of the new file.
  *	@dev contains the device number.
  *	Return 0 if permission is granted.
+ * @path_mknod:
+ *	Check permissions when creating a file. Note that this hook is called
+ *	even if mknod operation is being done for a regular file.
+ *	@dir contains the path structure of parent of the new file.
+ *	@dentry contains the dentry structure of the new file.
+ *	@mode contains the mode of the new file.
+ *	@dev contains the undecoded device number. Use new_decode_dev() to get
+ *	the decoded device number.
+ *	Return 0 if permission is granted.
  * @inode_rename:
  *	Check for permission to rename a file or directory.
  *	@old_dir contains the inode structure for parent of the old link.
@@ -375,6 +418,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new_dir contains the inode structure for parent of the new link.
  *	@new_dentry contains the dentry structure of the new link.
  *	Return 0 if permission is granted.
+ * @path_rename:
+ *	Check for permission to rename a file or directory.
+ *	@old_dir contains the path structure for parent of the old link.
+ *	@old_dentry contains the dentry structure of the old link.
+ *	@new_dir contains the path structure for parent of the new link.
+ *	@new_dentry contains the dentry structure of the new link.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -403,6 +453,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure for the file.
  *	@attr is the iattr structure containing the new file attributes.
  *	Return 0 if permission is granted.
+ * @path_truncate:
+ *	Check permission before truncating a file.
+ *	@path contains the path structure for the file.
+ *	@length is the new length of the file.
+ *	@time_attrs is the flags passed to do_truncate().
+ *	Return 0 if permission is granted.
  * @inode_getattr:
  *	Check permission before obtaining file attributes.
  *	@mnt is the vfsmount where the dentry was looked up
@@ -1331,6 +1387,22 @@ struct security_operations {
 				   struct super_block *newsb);
 	int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
 
+#ifdef CONFIG_SECURITY_PATH
+	int (*path_unlink) (struct path *dir, struct dentry *dentry);
+	int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
+	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
+	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
+			   unsigned int dev);
+	int (*path_truncate) (struct path *path, loff_t length,
+			      unsigned int time_attrs);
+	int (*path_symlink) (struct path *dir, struct dentry *dentry,
+			     const char *old_name);
+	int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
+			  struct dentry *new_dentry);
+	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
+			    struct path *new_dir, struct dentry *new_dentry);
+#endif
+
 	int (*inode_alloc_security) (struct inode *inode);
 	void (*inode_free_security) (struct inode *inode);
 	int (*inode_init_security) (struct inode *inode, struct inode *dir,
@@ -2705,6 +2777,71 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_unlink(struct path *dir, struct dentry *dentry);
+int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
+int security_path_rmdir(struct path *dir, struct dentry *dentry);
+int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			unsigned int dev);
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs);
+int security_path_symlink(struct path *dir, struct dentry *dentry,
+			  const char *old_name);
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry);
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry);
+#else	/* CONFIG_SECURITY_PATH */
+static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
+				      int mode)
+{
+	return 0;
+}
+
+static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
+				      int mode, unsigned int dev)
+{
+	return 0;
+}
+
+static inline int security_path_truncate(struct path *path, loff_t length,
+					 unsigned int time_attrs)
+{
+	return 0;
+}
+
+static inline int security_path_symlink(struct path *dir, struct dentry *dentry,
+					const char *old_name)
+{
+	return 0;
+}
+
+static inline int security_path_link(struct dentry *old_dentry,
+				     struct path *new_dir,
+				     struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static inline int security_path_rename(struct path *old_dir,
+				       struct dentry *old_dentry,
+				       struct path *new_dir,
+				       struct dentry *new_dentry)
+{
+	return 0;
+}
+#endif	/* CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c6250d0..d1b8982 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -836,7 +836,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		err = mnt_want_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
+		err = security_path_mknod(&nd.path, dentry, mode, 0);
+		if (err)
+			goto out_mknod_drop_write;
 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+out_mknod_drop_write:
 		mnt_drop_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
diff --git a/security/Kconfig b/security/Kconfig
index d9f47ce..9438535 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -81,6 +81,15 @@ config SECURITY_NETWORK_XFRM
 	  IPSec.
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_PATH
+	bool "Security hooks for pathname based access control"
+	depends on SECURITY
+	help
+	  This enables the security hooks for pathname based access control.
+	  If enabled, a security module can use these hooks to
+	  implement pathname based access controls.
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY_FILE_CAPABILITIES
 	bool "File POSIX Capabilities"
 	default n
diff --git a/security/capability.c b/security/capability.c
index 2dce66f..c545bd1 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -263,6 +263,53 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid)
 	*secid = 0;
 }
 
+#ifdef CONFIG_SECURITY_PATH
+static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			  unsigned int dev)
+{
+	return 0;
+}
+
+static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+{
+	return 0;
+}
+
+static int cap_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_symlink(struct path *dir, struct dentry *dentry,
+			    const char *old_name)
+{
+	return 0;
+}
+
+static int cap_path_link(struct dentry *old_dentry, struct path *new_dir,
+			 struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
+			   struct path *new_path, struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_truncate(struct path *path, loff_t length,
+			     unsigned int time_attrs)
+{
+	return 0;
+}
+#endif
+
 static int cap_file_permission(struct file *file, int mask)
 {
 	return 0;
@@ -883,6 +930,16 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inode_setsecurity);
 	set_to_cap_if_null(ops, inode_listsecurity);
 	set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+	set_to_cap_if_null(ops, path_mknod);
+	set_to_cap_if_null(ops, path_mkdir);
+	set_to_cap_if_null(ops, path_rmdir);
+	set_to_cap_if_null(ops, path_unlink);
+	set_to_cap_if_null(ops, path_symlink);
+	set_to_cap_if_null(ops, path_link);
+	set_to_cap_if_null(ops, path_rename);
+	set_to_cap_if_null(ops, path_truncate);
+#endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
 	set_to_cap_if_null(ops, file_free_security);
diff --git a/security/security.c b/security/security.c
index d85dbb3..678d4d0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -355,6 +355,72 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 }
 EXPORT_SYMBOL(security_inode_init_security);
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_mknod(struct path *path, struct dentry *dentry, int mode,
+			unsigned int dev)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mknod(path, dentry, mode, dev);
+}
+EXPORT_SYMBOL(security_path_mknod);
+
+int security_path_mkdir(struct path *path, struct dentry *dentry, int mode)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mkdir(path, dentry, mode);
+}
+
+int security_path_rmdir(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_rmdir(path, dentry);
+}
+
+int security_path_unlink(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_unlink(path, dentry);
+}
+
+int security_path_symlink(struct path *path, struct dentry *dentry,
+			  const char *old_name)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_symlink(path, dentry, old_name);
+}
+
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+		return 0;
+	return security_ops->path_link(old_dentry, new_dir, new_dentry);
+}
+
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
+		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+		return 0;
+	return security_ops->path_rename(old_dir, old_dentry, new_dir,
+					 new_dentry);
+}
+
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_truncate(path, length, time_attrs);
+}
+#endif
+
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	if (unlikely(IS_PRIVATE(dir)))
-- 
cgit v0.10.2


From e2b689d82c0394e5239a3557a217f19e2f47f1be Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 4 Dec 2008 11:17:47 +0000
Subject: fs: reorder struct inotify_device on 64bits to remove padding

Reorder struct inotify_device to remove 8 bytes of padding on 64bit
builds, reducing size to 128 bytes . Therefore allocating from a smaller
slab & using one fewer cachelines.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>

----
Hi,
patch against 2.6.28-rc7.
built & tested on AMDX2 desktop.

I've not been able to send this to the listed inotify maintainers, I
just get mail failures. So I guessed filesystem was the best home for
it, hope that's ok.

regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index e2425bb..400f806 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -76,10 +76,10 @@ struct inotify_device {
 	struct mutex		ev_mutex;	/* protects event queue */
 	struct mutex		up_mutex;	/* synchronizes watch updates */
 	struct list_head 	events;		/* list of queued events */
-	atomic_t		count;		/* reference count */
 	struct user_struct	*user;		/* user who opened this dev */
 	struct inotify_handle	*ih;		/* inotify handle */
 	struct fasync_struct    *fa;            /* async notification */
+	atomic_t		count;		/* reference count */
 	unsigned int		queue_size;	/* size of the queue (bytes) */
 	unsigned int		event_count;	/* number of pending events */
 	unsigned int		max_events;	/* maximum number of events */
-- 
cgit v0.10.2


From c2452f32786159ed85f0e4b21fec09258f822fc8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 1 Dec 2008 09:33:43 +0100
Subject: shrink struct dentry

struct dentry is one of the most critical structures in the kernel. So it's
sad to see it going neglected.

With CONFIG_PROFILING turned on (which is probably the common case at least
for distros and kernel developers), sizeof(struct dcache) == 208 here
(64-bit). This gives 19 objects per slab.

I packed d_mounted into a hole, and took another 4 bytes off the inline
name length to take the padding out from the end of the structure. This
shinks it to 200 bytes. I could have gone the other way and increased the
length to 40, but I'm aiming for a magic number, read on...

I then got rid of the d_cookie pointer. This shrinks it to 192 bytes. Rant:
why was this ever a good idea? The cookie system should increase its hash
size or use a tree or something if lookups are a problem. Also the "fast
dcookie lookups" in oprofile should be moved into the dcookie code -- how
can oprofile possibly care about the dcookie_mutex? It gets dropped after
get_dcookie() returns so it can't be providing any sort of protection.

At 192 bytes, 21 objects fit into a 4K page, saving about 3MB on my system
with ~140 000 entries allocated. 192 is also a multiple of 64, so we get
nice cacheline alignment on 64 and 32 byte line systems -- any given dentry
will now require 3 cachelines to touch all fields wheras previously it
would require 4.

I know the inline name size was chosen quite carefully, however with the
reduction in cacheline footprint, it should actually be just about as fast
to do a name lookup for a 36 character name as it was before the patch (and
faster for other sizes). The memory footprint savings for names which are
<= 32 or > 36 bytes long should more than make up for the memory cost for
33-36 byte names.

Performance is a feature...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2949126..6b793ae 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 737bd94..65e8294 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -200,7 +200,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7..fd244c7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
 #include <linux/bootmem.h>
 #include "internal.h"
 
-
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
@@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
-#ifdef CONFIG_PROFILING
-	dentry->d_cookie = NULL;
-#endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 855d4b1..180e9fe 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
 {
 	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
 							GFP_KERNEL);
+	struct dentry *d;
 	if (!dcs)
 		return NULL;
 
-	path->dentry->d_cookie = dcs;
+	d = path->dentry;
+	spin_lock(&d->d_lock);
+	d->d_flags |= DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	dcs->path = *path;
 	path_get(path);
 	hash_dcookie(dcs);
@@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie)
 		goto out;
 	}
 
-	dcs = path->dentry->d_cookie;
-
-	if (!dcs)
+	if (path->dentry->d_flags & DCACHE_COOKIE) {
+		dcs = find_dcookie((unsigned long)path->dentry);
+	} else {
 		dcs = alloc_dcookie(path);
-
-	if (!dcs) {
-		err = -ENOMEM;
-		goto out;
+		if (!dcs) {
+			err = -ENOMEM;
+			goto out;
+		}
 	}
 
 	*cookie = dcookie_value(dcs);
@@ -251,7 +256,12 @@ out_kmem:
 
 static void free_dcookie(struct dcookie_struct * dcs)
 {
-	dcs->path.dentry->d_cookie = NULL;
+	struct dentry *d = dcs->path.dentry;
+
+	spin_lock(&d->d_lock);
+	d->d_flags &= ~DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	path_put(&dcs->path);
 	kmem_cache_free(dcookie_cache, dcs);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a37359d..c66d224 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -75,14 +75,22 @@ full_name_hash(const unsigned char *name, unsigned int len)
 	return end_name_hash(hash);
 }
 
-struct dcookie_struct;
-
-#define DNAME_INLINE_LEN_MIN 36
+/*
+ * Try to keep struct dentry aligned on 64 byte cachelines (this will
+ * give reasonable cacheline footprint with larger lines without the
+ * large memory footprint increase).
+ */
+#ifdef CONFIG_64BIT
+#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
+#else
+#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
+#endif
 
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
+	int d_mounted;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	/*
@@ -107,10 +115,7 @@ struct dentry {
 	struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
-#ifdef CONFIG_PROFILING
-	struct dcookie_struct *d_cookie; /* cookie, if any */
-#endif
-	int d_mounted;
+
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -177,6 +182,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
 
+#define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
-- 
cgit v0.10.2


From dded4f4d5048e64a01cf52eed4d27c8cb2600525 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 1 Dec 2008 14:34:50 -0800
Subject: include: linux/fs.h: put declarations in __KERNEL__

include/linux/fs.h contains externs for a bunch of variables.  That obviously
belongs under ifdef __KERNEL__.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 001ded4..c5e4c5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,6 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-extern int sysctl_nr_open;
 #define INR_OPEN 1024		/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
@@ -38,21 +37,13 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
-extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
 	int nr_unused;
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
-extern struct inodes_stat_t inodes_stat;
 
-extern int leases_enable, lease_break_time;
-
-#ifdef CONFIG_DNOTIFY
-extern int dir_notify_enable;
-#endif
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -330,6 +321,15 @@ extern void __init inode_init(void);
 extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
+extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
+extern int sysctl_nr_open;
+extern struct inodes_stat_t inodes_stat;
+extern int leases_enable, lease_break_time;
+#ifdef CONFIG_DNOTIFY
+extern int dir_notify_enable;
+#endif
+
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
-- 
cgit v0.10.2


From 5cc4a0341a1295ea56b2e62eb70d96d8fdb94ded Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 1 Dec 2008 14:34:51 -0800
Subject: fs/namespace.c: drop code after return

The extra semicolon serves no purpose.

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Richard Genoud <richard.genoud@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 1c09cab..a40685d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	if (!new_ns->root) {
 		up_write(&namespace_sem);
 		kfree(new_ns);
-		return ERR_PTR(-ENOMEM);;
+		return ERR_PTR(-ENOMEM);
 	}
 	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
-- 
cgit v0.10.2


From a17d5232de7b53d34229de79ec22f4bb04adb7e4 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:10 +0000
Subject: eCryptfs: check readlink result was not an error before using it

The result from readlink is being used to index into the link name
buffer without checking whether it is a valid length. If readlink
returns an error this will fault or cause memory corruption.

Cc: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Cc: Dustin Kirkland <kirkland@canonical.com>
Cc: ecryptfs-devel@lists.launchpad.net
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Acked-by: Michael Halcrow <mhalcrow@us.ibm.com>
Acked-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 89209f0..5e78fc1 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
 			"dentry->d_name.name = [%s]\n", dentry->d_name.name);
 	rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
-	buf[rc] = '\0';
 	set_fs(old_fs);
 	if (rc < 0)
 		goto out_free;
+	else
+		buf[rc] = '\0';
 	rc = 0;
 	nd_set_link(nd, buf);
 	goto out;
-- 
cgit v0.10.2


From 035146851cfa2fe24c1d9dc7637bb009ad06b2f7 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:11 +0000
Subject: vfs: introduce helper function to safely NUL-terminate symlinks

A number of filesystems were potentially triggering kernel bugs due to
corrupted symlink names on disk. This function helps safely terminate
the names.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 99eb803..fc2e035 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -94,4 +94,9 @@ static inline char *nd_get_link(struct nameidata *nd)
 	return nd->saved_names[nd->depth];
 }
 
+static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
+{
+	((char *) name)[min(len, maxlen)] = '\0';
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v0.10.2


From ebd09abbd9699f328165aee50a070403fbf55a37 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:12 +0000
Subject: vfs: ensure page symlinks are NUL-terminated

On-disk data corruption could cause a page link to have its i_size set
to PAGE_SIZE (or a multiple thereof) and its contents all non-NUL.
NUL-terminate the link name to ensure this doesn't cause further
problems for the kernel.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index ab441af..9ed5e28 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2786,13 +2786,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link)
 /* get the link contents into pagecache */
 static char *page_getlink(struct dentry * dentry, struct page **ppage)
 {
-	struct page * page;
+	char *kaddr;
+	struct page *page;
 	struct address_space *mapping = dentry->d_inode->i_mapping;
 	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
 		return (char*)page;
 	*ppage = page;
-	return kmap(page);
+	kaddr = kmap(page);
+	nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
+	return kaddr;
 }
 
 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-- 
cgit v0.10.2


From 8d6d0c4da2dbbe0a69fea3692146af39f139f8b4 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:13 +0000
Subject: ext2: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7658b33..02b39a5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
+#include <linux/namei.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xip.h"
@@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		else
 			inode->i_mapping->a_ops = &ext2_aops;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (ext2_inode_is_fast_symlink(inode))
+		if (ext2_inode_is_fast_symlink(inode)) {
 			inode->i_op = &ext2_fast_symlink_inode_operations;
-		else {
+			nd_terminate_link(ei->i_data, inode->i_size,
+				sizeof(ei->i_data) - 1);
+		} else {
 			inode->i_op = &ext2_symlink_inode_operations;
 			if (test_opt(inode->i_sb, NOBH))
 				inode->i_mapping->a_ops = &ext2_nobh_aops;
-- 
cgit v0.10.2


From b5ed3112b5f74c8ec1c7aa03a76c596635e85197 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:14 +0000
Subject: ext3: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f8424ad..c4bdccf 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -37,6 +37,7 @@
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/fiemap.h>
+#include <linux/namei.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 		inode->i_op = &ext3_dir_inode_operations;
 		inode->i_fop = &ext3_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (ext3_inode_is_fast_symlink(inode))
+		if (ext3_inode_is_fast_symlink(inode)) {
 			inode->i_op = &ext3_fast_symlink_inode_operations;
-		else {
+			nd_terminate_link(ei->i_data, inode->i_size,
+				sizeof(ei->i_data) - 1);
+		} else {
 			inode->i_op = &ext3_symlink_inode_operations;
 			ext3_set_aops(inode);
 		}
-- 
cgit v0.10.2


From e83c1397cafc4e44f868289db5e417463c0d09a4 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:15 +0000
Subject: ext4: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: adilger@sun.com
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..7c3325e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -34,6 +34,7 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
+#include <linux/namei.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include "ext4_jbd2.h"
@@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		inode->i_op = &ext4_dir_inode_operations;
 		inode->i_fop = &ext4_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (ext4_inode_is_fast_symlink(inode))
+		if (ext4_inode_is_fast_symlink(inode)) {
 			inode->i_op = &ext4_fast_symlink_inode_operations;
-		else {
+			nd_terminate_link(ei->i_data, inode->i_size,
+				sizeof(ei->i_data) - 1);
+		} else {
 			inode->i_op = &ext4_symlink_inode_operations;
 			ext4_set_aops(inode);
 		}
-- 
cgit v0.10.2


From 21acaf8e8da00235be59a3e489d5fa2a8721cafc Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:16 +0000
Subject: sysv: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index df0d435..3d81bf5 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/namei.h>
 #include <asm/byteorder.h>
 #include "sysv.h"
 
@@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
 		if (inode->i_blocks) {
 			inode->i_op = &sysv_symlink_inode_operations;
 			inode->i_mapping->a_ops = &sysv_aops;
-		} else
+		} else {
 			inode->i_op = &sysv_fast_symlink_inode_operations;
+			nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
+				sizeof(SYSV_I(inode)->i_data) - 1);
+		}
 	} else
 		init_special_inode(inode, inode->i_mode, rdev);
 }
-- 
cgit v0.10.2


From a63d0ff31a136bdf52350c4e6c2929eaf47ea2b2 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:17 +0000
Subject: freevxfs: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 9f3f2ce..03a6ea5 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 		if (!VXFS_ISIMMED(vip)) {
 			ip->i_op = &page_symlink_inode_operations;
 			ip->i_mapping->a_ops = &vxfs_aops;
-		} else
+		} else {
 			ip->i_op = &vxfs_immed_symlink_iops;
+			vip->vii_immed.vi_immed[ip->i_size] = '\0';
+		}
 	} else
 		init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
 
-- 
cgit v0.10.2


From 7df5fa06de89a4ac311957e0cb9c1d87552b4325 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:18 +0000
Subject: befs: ensure fast symlinks are NUL-terminated

Ensure fast symlink targets are NUL-terminated, even if corrupted
on-disk.

Cc: Sergey S. Kostyliov <rathamahata@php4.ru>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b6dfee3..d06cb02 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 		inode->i_size = 0;
 		inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE;
 		strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
-			BEFS_SYMLINK_LEN);
+			BEFS_SYMLINK_LEN - 1);
+		befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0';
 	} else {
 		int num_blks;
 
@@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 			kfree(link);
 			befs_error(sb, "Failed to read entire long symlink");
 			link = ERR_PTR(-EIO);
+		} else {
+			link[len - 1] = '\0';
 		}
 	} else {
 		link = befs_ino->i_data.symlink;
-- 
cgit v0.10.2


From dc711ca35f9d95a1eec02118e0c298b5e3068315 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 3 Nov 2008 15:03:50 -0500
Subject: fix switch_names() breakage in short-to-short case

We want ->name.len to match the resulting name on *both*
source and target

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index fd244c7..eeafc14 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1616,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
 			 */
 			memcpy(dentry->d_iname, target->d_name.name,
 					target->d_name.len + 1);
+			dentry->d_name.len = target->d_name.len;
+			return;
 		}
 	}
+	do_switch(dentry->d_name.len, target->d_name.len);
 }
 
 /*
@@ -1677,7 +1680,6 @@ already_unhashed:
 
 	/* Switch the names.. */
 	switch_names(dentry, target);
-	do_switch(dentry->d_name.len, target->d_name.len);
 	do_switch(dentry->d_name.hash, target->d_name.hash);
 
 	/* ... and switch the parents */
@@ -1787,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 	struct dentry *dparent, *aparent;
 
 	switch_names(dentry, anon);
-	do_switch(dentry->d_name.len, anon->d_name.len);
 	do_switch(dentry->d_name.hash, anon->d_name.hash);
 
 	dparent = dentry->d_parent;
-- 
cgit v0.10.2


From be42c4c433c2c0d3f1583c08908fead00d36d222 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 1 Dec 2008 14:34:58 -0800
Subject: correct wrong function name of d_put in kernel document and source
 comment

no function named d_put(), it should be dput().

Impact: fix document and comment, no functionality changed

Signed-off-by: Zhao Lei <zhaolei@cn.fuijtsu.com>
Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5579bda..041cb77 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -931,7 +931,7 @@ manipulate dentries:
   d_lookup: look up a dentry given its parent and path name component
 	It looks up the child of that given name from the dcache
 	hash table. If it is found, the reference count is incremented
-	and the dentry is returned. The caller must use d_put()
+	and the dentry is returned. The caller must use dput()
 	to free the dentry when it finishes using it.
 
 For further information on dentry locking, please refer to the document
diff --git a/fs/dcache.c b/fs/dcache.c
index eeafc14..c231a63 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1332,7 +1332,7 @@ err_out:
  *
  * Searches the children of the parent dentry for the name in question. If
  * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
+ * is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned on failure.
  *
  * __d_lookup is dcache_lock free. The hash list is protected using RCU.
-- 
cgit v0.10.2


From 52afeefb9dac9287429642189996426a2bfd6a25 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 1 Dec 2008 14:35:00 -0800
Subject: expand some comments (d_path / seq_path)

Explain that you really need to use the return value of d_path rather than
the buffer you passed into it.

Also fix the comment for seq_path(), the function arguments changed
recently but the comment hadn't been updated in sync.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index c231a63..bdb3f50 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1908,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
  * Convert a dentry into an ASCII path name. If the entry has been deleted
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
  *
@@ -1984,7 +1985,10 @@ Elong:
  * Convert a dentry into an ASCII path name. If the entry has been deleted
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
  *
  * "buflen" should be positive.
  */
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 16c2115..99d8b8c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -389,8 +389,14 @@ char *mangle_path(char *s, char *p, char *esc)
 }
 EXPORT_SYMBOL(mangle_path);
 
-/*
- * return the absolute path of 'dentry' residing in mount 'mnt'.
+/**
+ * seq_path - seq_file interface to print a pathname
+ * @m: the seq_file handle
+ * @path: the struct path to print
+ * @esc: set of characters to escape in the output
+ *
+ * return the absolute path of 'path', as represented by the
+ * dentry / mnt pair in the path parameter.
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-- 
cgit v0.10.2


From 66f221875dc10813aa2f06c83ad60d0eb1356406 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 5 Nov 2008 15:04:29 +0100
Subject: remove incorrect comment in inode_permission

We now pass on all MAY_ flags to the filesystems permission routines,
so remove the comment stating the contrary.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index 9ed5e28..631cfdd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -247,7 +247,6 @@ int inode_permission(struct inode *inode, int mask)
 			return -EACCES;
 	}
 
-	/* Ordinary permission routines do not understand MAY_APPEND. */
 	if (inode->i_op && inode->i_op->permission)
 		retval = inode->i_op->permission(inode, mask);
 	else
-- 
cgit v0.10.2


From b4091d5f6fde28ab762e1094a1a26d81f3badfa5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 5 Nov 2008 15:07:21 +0100
Subject: kill walk_init_root

walk_init_root is a tiny helper that is marked __always_inline, has just
one caller and an unused argument.  Just merge it into the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index 631cfdd..d4d0b59 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -526,18 +526,6 @@ out_unlock:
 	return result;
 }
 
-/* SMP-safe */
-static __always_inline void
-walk_init_root(const char *name, struct nameidata *nd)
-{
-	struct fs_struct *fs = current->fs;
-
-	read_lock(&fs->lock);
-	nd->path = fs->root;
-	path_get(&fs->root);
-	read_unlock(&fs->lock);
-}
-
 /*
  * Wrapper to retry pathname resolution whenever the underlying
  * file system returns an ESTALE.
@@ -575,9 +563,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		goto fail;
 
 	if (*link == '/') {
+		struct fs_struct *fs = current->fs;
+
 		path_put(&nd->path);
-		walk_init_root(link, nd);
+
+		read_lock(&fs->lock);
+		nd->path = fs->root;
+		path_get(&fs->root);
+		read_unlock(&fs->lock);
 	}
+
 	res = link_path_walk(link, nd);
 	if (nd->depth || res || nd->last_type!=LAST_NORM)
 		return res;
-- 
cgit v0.10.2


From 3fb64190aa3c23c10e6e9fd0124ac030115c99bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:58:10 +0200
Subject: pass a struct path * to may_open

No need for the nameidata in may_open - a struct path is enough.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index d4d0b59..5cc0dc9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1487,9 +1487,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	return error;
 }
 
-int may_open(struct nameidata *nd, int acc_mode, int flag)
+int may_open(struct path *path, int acc_mode, int flag)
 {
-	struct dentry *dentry = nd->path.dentry;
+	struct dentry *dentry = path->dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1510,13 +1510,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 	    	flag &= ~O_TRUNC;
 	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
-		if (nd->path.mnt->mnt_flags & MNT_NODEV)
+		if (path->mnt->mnt_flags & MNT_NODEV)
 			return -EACCES;
 
 		flag &= ~O_TRUNC;
 	}
 
-	error = vfs_permission(nd, acc_mode);
+	error = inode_permission(inode, acc_mode);
 	if (error)
 		return error;
 	/*
@@ -1551,7 +1551,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 */
 		error = locks_verify_locked(inode);
 		if (!error)
-			error = security_path_truncate(&nd->path, 0,
+			error = security_path_truncate(path, 0,
 					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
@@ -1594,7 +1594,7 @@ out_unlock:
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
-	return may_open(nd, 0, flag & ~O_TRUNC);
+	return may_open(&nd->path, 0, flag & ~O_TRUNC);
 }
 
 /*
@@ -1780,7 +1780,7 @@ ok:
 		if (error)
 			goto exit;
 	}
-	error = may_open(&nd, acc_mode, flag);
+	error = may_open(&nd.path, acc_mode, flag);
 	if (error) {
 		if (will_write)
 			mnt_drop_write(nd.path.mnt);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index b1acbd6..b274519 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags)
 		return ERR_PTR(error);
 
 	if (flags == O_RDWR)
-		error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE);
+		error = may_open(&nd.path, MAY_READ|MAY_WRITE,
+					   FMODE_READ|FMODE_WRITE);
 	else
-		error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
+		error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
 
 	if (!error)
 		return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c5e4c5c..3468df5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1869,7 +1869,7 @@ extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
 		int open_flag, int mode);
-extern int may_open(struct nameidata *, int, int);
+extern int may_open(struct path *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
 extern struct file * open_exec(const char *);
-- 
cgit v0.10.2


From cb23beb55100171646e69e248fb45f10db6e99a4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:59:29 +0200
Subject: kill vfs_permission

With all the nameidata removal there's no point anymore for this helper.
Of the three callers left two will go away with the next lookup series
anyway.

Also add proper kerneldoc to inode_permission as this is the main
permission check routine now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/exec.c b/fs/exec.c
index 02d2e12..dfbf700 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,8 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
+	error = inode_permission(nd.path.dentry->d_inode,
+				 MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -680,7 +681,7 @@ struct file *open_exec(const char *name)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto out_path_put;
 
-	err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
+	err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
 
diff --git a/fs/namei.c b/fs/namei.c
index 5cc0dc9..3f88e04 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask,
 	return -EACCES;
 }
 
+/**
+ * inode_permission  -  check for access rights to a given inode
+ * @inode:	inode to check permission on
+ * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Used to check for read/write/execute permissions on an inode.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things.
+ */
 int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
@@ -264,21 +274,6 @@ int inode_permission(struct inode *inode, int mask)
 }
 
 /**
- * vfs_permission  -  check for access rights to a given path
- * @nd:		lookup result that describes the path
- * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- *
- * Used to check for read/write/execute permissions on a path.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
- */
-int vfs_permission(struct nameidata *nd, int mask)
-{
-	return inode_permission(nd->path.dentry->d_inode, mask);
-}
-
-/**
  * file_permission  -  check for additional access rights to a given file
  * @file:	file to check access rights for
  * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
@@ -288,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  *
  * Note:
  *	Do not use this function in new code.  All access checks should
- *	be done using vfs_permission().
+ *	be done using inode_permission().
  */
 int file_permission(struct file *file, int mask)
 {
@@ -853,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode);
 		if (err == -EAGAIN)
-			err = vfs_permission(nd, MAY_EXEC);
+			err = inode_permission(nd->path.dentry->d_inode,
+					       MAY_EXEC);
  		if (err)
 			break;
 
@@ -2882,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(kern_path);
 EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
 EXPORT_SYMBOL(vfs_create);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3468df5..fd61598 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1212,7 +1212,6 @@ extern void unlock_super(struct super_block *);
 /*
  * VFS helper functions..
  */
-extern int vfs_permission(struct nameidata *, int);
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, int);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
-- 
cgit v0.10.2


From 18d8fda7c3c9439be04d7ea2e82da2513b121acb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:35:37 -0500
Subject: take init_fs to saner place

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 1f76218..c2938e5 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/uaccess.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 0bbf806..e859af3 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -12,7 +12,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 4405846..993d56e 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -13,7 +13,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 6bdba7b..2c228c0 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -33,7 +33,6 @@
 #include <linux/mqueue.h>
 #include <linux/fs.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 5933656..60816e8 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -37,7 +37,6 @@
  * setup.
  */
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index e219881..29429a8 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 93a4899..cb5dc55 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 9d7e1c6..5b0e830 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,7 +17,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 0d658db..016885c 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -11,7 +11,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 3042c2b..632ce01 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,7 +40,6 @@
  * alignment requirements and potentially different initial
  * setup.
  */
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 344c01a..fe282de 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index d72487a..149cd91 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -9,7 +9,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index af16f6e..5ac3566 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -18,7 +18,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index f5941c0..1e25a45 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -34,7 +34,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 4c85b8d..688b329 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <linux/mqueue.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index e807168..7db95c0 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -16,7 +16,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index b151a25..80c35ff 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct pt_regs fake_swapper_regs;
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 62126e4..f28cb82 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 910eda8..806d381 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include "linux/mqueue.h"
 #include "asm/uaccess.h"
 
-static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d399180..df3bf26 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 3df469d..e07f5c9 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -21,7 +21,6 @@
 
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/fs/namei.c b/fs/namei.c
index 3f88e04..e203691 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2893,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
 EXPORT_SYMBOL(dentry_unhash);
 EXPORT_SYMBOL(generic_readlink);
+
+/* to be mentioned only in INIT_TASK */
+struct fs_struct init_fs = {
+	.count		= ATOMIC_INIT(1),
+	.lock		= RW_LOCK_UNLOCKED,
+	.umask		= 0022,
+};
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 9e5a06e..a97c053 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -10,12 +10,6 @@ struct fs_struct {
 	struct path root, pwd;
 };
 
-#define INIT_FS {				\
-	.count		= ATOMIC_INIT(1),	\
-	.lock		= RW_LOCK_UNLOCKED,	\
-	.umask		= 0022, \
-}
-
 extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 959f552..2f3c2d4 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <net/net_namespace.h>
 
 extern struct files_struct init_files;
+extern struct fs_struct init_fs;
 
 #define INIT_KIOCTX(name, which_mm) \
 {							\
-- 
cgit v0.10.2


From 1239f26c05899f1f3c541b41e719c59d58038786 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 10 Dec 2008 18:37:28 -0500
Subject: make INIT_FS use the __RW_LOCK_UNLOCKED initialization

[AV: rediffed on top of unification of init_fs]
Initialization of init_fs still uses the deprecated RW_LOCK_UNLOCKED macro.
This patch updates it to use the __RW_LOCK_UNLOCKED(lock) macro.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index e203691..dd5c9f0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2897,6 +2897,6 @@ EXPORT_SYMBOL(generic_readlink);
 /* to be mentioned only in INIT_TASK */
 struct fs_struct init_fs = {
 	.count		= ATOMIC_INIT(1),
-	.lock		= RW_LOCK_UNLOCKED,
+	.lock		= __RW_LOCK_UNLOCKED(init_fs.lock),
 	.umask		= 0022,
 };
-- 
cgit v0.10.2


From fd659fd6275d3426d7967da1f0e3638bbbd2fedb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 10 Dec 2008 09:35:45 -0800
Subject: fix f_count description in Documentation/filesystems/files.txt

Documentation/filesystems/files.txt was not updated when
f_count became an atomic_long_t.
atomic_long_inc_not_zero() is now used instead of atomic_inc_not_zero()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt
index bb0142f..ac2facc 100644
--- a/Documentation/filesystems/files.txt
+++ b/Documentation/filesystems/files.txt
@@ -76,13 +76,13 @@ the fdtable structure -
 5. Handling of the file structures is special. Since the look-up
    of the fd (fget()/fget_light()) are lock-free, it is possible
    that look-up may race with the last put() operation on the
-   file structure. This is avoided using atomic_inc_not_zero()
+   file structure. This is avoided using atomic_long_inc_not_zero()
    on ->f_count :
 
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (file) {
-		if (atomic_inc_not_zero(&file->f_count))
+		if (atomic_long_inc_not_zero(&file->f_count))
 			*fput_needed = 1;
 		else
 		/* Didn't get the reference, someone's freed */
@@ -92,7 +92,7 @@ the fdtable structure -
 	....
 	return file;
 
-   atomic_inc_not_zero() detects if refcounts is already zero or
+   atomic_long_inc_not_zero() detects if refcounts is already zero or
    goes to zero during increment. If it does, we fail
    fget()/fget_light().
 
-- 
cgit v0.10.2


From b6b3fdead251d432f32f2cfce2a893ab8a658110 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 10 Dec 2008 09:35:45 -0800
Subject: filp_cachep can be static in fs/file_table.c

Instead of creating the "filp" kmem_cache in vfs_caches_init(),
we can do it a litle be later in files_init(), so that filp_cachep
is static to fs/file_table.c

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index bdb3f50..e88c23b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2314,9 +2314,6 @@ static void __init dcache_init(void)
 /* SLAB cache for __getname() consumers */
 struct kmem_cache *names_cachep __read_mostly;
 
-/* SLAB cache for file structures */
-struct kmem_cache *filp_cachep __read_mostly;
-
 EXPORT_SYMBOL(d_genocide);
 
 void __init vfs_caches_init_early(void)
@@ -2338,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages)
 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
-	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-
 	dcache_init();
 	inode_init();
 	files_init(mempages);
diff --git a/fs/file_table.c b/fs/file_table.c
index 0fbcacc..bbeeac6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,6 +32,9 @@ struct files_stat_struct files_stat = {
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
+/* SLAB cache for file structures */
+static struct kmem_cache *filp_cachep __read_mostly;
+
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
 static inline void file_free_rcu(struct rcu_head *head)
@@ -397,7 +400,12 @@ too_bad:
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
-	/* One file with associated inode and dcache is very roughly 1K. 
+
+	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+
+	/*
+	 * One file with associated inode and dcache is very roughly 1K.
 	 * Per default don't use more than 10% of our memory for files. 
 	 */ 
 
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 4aab6f1..09d6c5b 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,8 +57,6 @@ struct files_struct {
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
 
-extern struct kmem_cache *filp_cachep;
-
 struct file_operations;
 struct vfsmount;
 struct dentry;
-- 
cgit v0.10.2


From 6badd79bd002788aaec27b50a74ab69ef65ab8ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:57:40 -0500
Subject: kill ->dir_notify()

Remove the hopelessly misguided ->dir_notify().  The only instance (cifs)
has been broken by design from the very beginning; the objects it creates
are never destroyed, keep references to struct file they can outlive, nothing
that could possibly evict them exists on close(2) path *and* no locking
whatsoever is done to prevent races with close(), should the previous, er,
deficiencies someday be dealt with.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 23d2f44..ccec553 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -394,7 +394,6 @@ prototypes:
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
 			unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *, unsigned long);
 };
 
 locking rules:
@@ -424,7 +423,6 @@ sendfile:		no
 sendpage:		no
 get_unmapped_area:	no
 check_flags:		no
-dir_notify:		no
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 041cb77..ef19afa 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -733,7 +733,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
@@ -800,8 +799,6 @@ otherwise noted.
 
   check_flags: called by the fcntl(2) system call for F_SETFL command
 
-  dir_notify: called by the fcntl(2) system call for F_NOTIFY command
-
   flock: called by the flock(2) system call
 
   splice_write: called by the VFS to splice data from a pipe to a file. This
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f1538c..a05287a 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags)
 	return -EIO;
 }
 
-static int bad_file_dir_notify(struct file *file, unsigned long arg)
-{
-	return -EIO;
-}
-
 static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	return -EIO;
@@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops =
 	.sendpage	= bad_file_sendpage,
 	.get_unmapped_area = bad_file_get_unmapped_area,
 	.check_flags	= bad_file_check_flags,
-	.dir_notify	= bad_file_dir_notify,
 	.flock		= bad_file_flock,
 	.splice_write	= bad_file_splice_write,
 	.splice_read	= bad_file_splice_read,
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ba43fb..9948c00 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
-	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \
+	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o cifsacl.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0005a19..13ea532 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
 	.unlocked_ioctl  = cifs_ioctl,
 	.llseek = generic_file_llseek,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2ce04c7..7ac4818 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
 extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
-extern int cifs_dir_notify(struct file *, unsigned long arg);
 
 /* Functions related to dir entries */
 extern struct dentry_operations cifs_dentry_ops;
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
deleted file mode 100644
index 5a57581..0000000
--- a/fs/cifs/fcntl.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *   fs/cifs/fcntl.c
- *
- *   vfs operations that deal with the file control API
- *
- *   Copyright (C) International Business Machines  Corp., 2003,2004
- *   Author(s): Steve French (sfrench@us.ibm.com)
- *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include "cifsglob.h"
-#include "cifsproto.h"
-#include "cifs_unicode.h"
-#include "cifs_debug.h"
-#include "cifsfs.h"
-
-static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
-{
-	__u32 cifs_ntfy_flags = 0;
-
-	/* No way on Linux VFS to ask to monitor xattr
-	changes (and no stream support either */
-	if (fcntl_notify_flags & DN_ACCESS)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
-	if (fcntl_notify_flags & DN_MODIFY) {
-		/* What does this mean on directories? */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
-			FILE_NOTIFY_CHANGE_SIZE;
-	}
-	if (fcntl_notify_flags & DN_CREATE) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
-			FILE_NOTIFY_CHANGE_LAST_WRITE;
-	}
-	if (fcntl_notify_flags & DN_DELETE)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-	if (fcntl_notify_flags & DN_RENAME) {
-		/* BB review this - checking various server behaviors */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
-			FILE_NOTIFY_CHANGE_FILE_NAME;
-	}
-	if (fcntl_notify_flags & DN_ATTRIB) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
-			FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	}
-/*	if (fcntl_notify_flags & DN_MULTISHOT) {
-		cifs_ntfy_flags |= ;
-	} */ /* BB fixme - not sure how to handle this with CIFS yet */
-
-	return cifs_ntfy_flags;
-}
-
-int cifs_dir_notify(struct file *file, unsigned long arg)
-{
-	int xid;
-	int rc = -EINVAL;
-	int oplock = 0;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	char *full_path = NULL;
-	__u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	__u16 netfid;
-
-	if (experimEnabled == 0)
-		return 0;
-
-	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-	pTcon = cifs_sb->tcon;
-
-	full_path = build_path_from_dentry(file->f_path.dentry);
-
-	if (full_path == NULL) {
-		rc = -ENOMEM;
-	} else {
-		cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
-		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-			GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
-			&netfid, &oplock, NULL, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-		/* BB fixme - add this handle to a notify handle list */
-		if (rc) {
-			cFYI(1, ("Could not open directory for notify"));
-		} else {
-			filter = convert_to_cifs_notify_flags(arg);
-			if (filter != 0) {
-				rc = CIFSSMBNotify(xid, pTcon,
-					0 /* no subdirs */, netfid,
-					filter, file, arg & DN_MULTISHOT,
-					cifs_sb->local_nls);
-			} else {
-				rc = -EINVAL;
-			}
-			/* BB add code to close file eventually (at unmount
-			it would close automatically but may be a way
-			to do it easily when inode freed or when
-			notify info is cleared/changed */
-			cFYI(1, ("notify rc %d", rc));
-		}
-	}
-
-	FreeXid(xid);
-	return rc;
-}
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 676073b..b0aa2cd 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	dn->dn_next = inode->i_dnotify;
 	inode->i_dnotify = dn;
 	spin_unlock(&inode->i_lock);
-
-	if (filp->f_op && filp->f_op->dir_notify)
-		return filp->f_op->dir_notify(filp, arg);
 	return 0;
 
 out_free:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd61598..be16ce0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1309,7 +1309,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-- 
cgit v0.10.2


From c2acf7b90821785fe812cc0aa05148e5a1f84204 Mon Sep 17 00:00:00 2001
From: Denis ChengRq <crquan@gmail.com>
Date: Mon, 1 Dec 2008 14:34:56 -0800
Subject: fs/block_dev.c: __read_mostly improvement and sb_is_blkdev_sb
 utilization

- iget5_locked in bdget really needs blockdev_superblock, instead of
  bd_mnt, so bd_mnt could be just a local variable;

- blockdev_superblock really needs __read_mostly, while local var bd_mnt
  not;

- make use of sb_is_blkdev_sb in bd_forget, instead of direct reference
  to blockdev_superblock.

Signed-off-by: Denis ChengRq <crquan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99e0ae1..349a26c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -326,12 +326,13 @@ static struct file_system_type bd_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static struct vfsmount *bd_mnt __read_mostly;
-struct super_block *blockdev_superblock;
+struct super_block *blockdev_superblock __read_mostly;
 
 void __init bdev_cache_init(void)
 {
 	int err;
+	struct vfsmount *bd_mnt;
+
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
 			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
 				SLAB_MEM_SPREAD|SLAB_PANIC),
@@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev)
 	struct block_device *bdev;
 	struct inode *inode;
 
-	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
+	inode = iget5_locked(blockdev_superblock, hash(dev),
 			bdev_test, bdev_set, &dev);
 
 	if (!inode)
@@ -463,7 +464,7 @@ void bd_forget(struct inode *inode)
 
 	spin_lock(&bdev_lock);
 	if (inode->i_bdev) {
-		if (inode->i_sb != blockdev_superblock)
+		if (!sb_is_blkdev_sb(inode->i_sb))
 			bdev = inode->i_bdev;
 		__bd_forget(inode);
 	}
-- 
cgit v0.10.2


From 272eb01485dda98e3b8910c7c1a53d597616b0a0 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 17 Dec 2008 13:59:41 -0500
Subject: filesystem notification: create fs/notify to contain all fs
 notification

Creating a generic filesystem notification interface, fsnotify, which will be
used by inotify, dnotify, and eventually fanotify is really starting to
clutter the fs directory.  This patch simply moves inotify and dnotify into
fs/notify/inotify and fs/notify/dnotify respectively to make both current fs/
and future notification tidier.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/Kconfig b/fs/Kconfig
index 522469a..ff0e819 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD
 
 endif # BLOCK
 
-config DNOTIFY
-	bool "Dnotify support"
-	default y
-	help
-	  Dnotify is a directory-based per-fd file change notification system
-	  that uses signals to communicate events to user-space.  There exist
-	  superior alternatives, but some applications may still rely on
-	  dnotify.
-
-	  If unsure, say Y.
-
-config INOTIFY
-	bool "Inotify file change notification support"
-	default y
-	---help---
-	  Say Y here to enable inotify support.  Inotify is a file change
-	  notification system and a replacement for dnotify.  Inotify fixes
-	  numerous shortcomings in dnotify and introduces several new features
-	  including multiple file events, one-shot support, and unmount
-	  notification.
-
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
-
-	  If unsure, say Y.
-
-config INOTIFY_USER
-	bool "Inotify support for userspace"
-	depends on INOTIFY
-	default y
-	---help---
-	  Say Y here to enable inotify support for userspace, including the
-	  associated system calls.  Inotify allows monitoring of both files and
-	  directories via a single open fd.  Events are read from the file
-	  descriptor, which is also select()- and poll()-able.
-
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
-
-	  If unsure, say Y.
+source "fs/notify/Kconfig"
 
 config QUOTA
 	bool "Quota support"
diff --git a/fs/Makefile b/fs/Makefile
index d9f8afe..e6f423d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -20,8 +20,7 @@ obj-y +=	no-block.o
 endif
 
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
-obj-$(CONFIG_INOTIFY)		+= inotify.o
-obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
+obj-y				+= notify/
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_ANON_INODES)	+= anon_inodes.o
 obj-$(CONFIG_SIGNALFD)		+= signalfd.o
@@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
 
-obj-$(CONFIG_DNOTIFY)		+= dnotify.o
-
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-$(CONFIG_SYSFS)		+= sysfs/
diff --git a/fs/dnotify.c b/fs/dnotify.c
deleted file mode 100644
index b0aa2cd..0000000
--- a/fs/dnotify.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Directory notifications for Linux.
- *
- * Copyright (C) 2000,2001,2002 Stephen Rothwell
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/dnotify.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/fdtable.h>
-
-int dir_notify_enable __read_mostly = 1;
-
-static struct kmem_cache *dn_cache __read_mostly;
-
-static void redo_inode_mask(struct inode *inode)
-{
-	unsigned long new_mask;
-	struct dnotify_struct *dn;
-
-	new_mask = 0;
-	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
-		new_mask |= dn->dn_mask & ~DN_MULTISHOT;
-	inode->i_dnotify_mask = new_mask;
-}
-
-void dnotify_flush(struct file *filp, fl_owner_t id)
-{
-	struct dnotify_struct *dn;
-	struct dnotify_struct **prev;
-	struct inode *inode;
-
-	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
-		return;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((dn = *prev) != NULL) {
-		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
-			*prev = dn->dn_next;
-			redo_inode_mask(inode);
-			kmem_cache_free(dn_cache, dn);
-			break;
-		}
-		prev = &dn->dn_next;
-	}
-	spin_unlock(&inode->i_lock);
-}
-
-int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
-{
-	struct dnotify_struct *dn;
-	struct dnotify_struct *odn;
-	struct dnotify_struct **prev;
-	struct inode *inode;
-	fl_owner_t id = current->files;
-	struct file *f;
-	int error = 0;
-
-	if ((arg & ~DN_MULTISHOT) == 0) {
-		dnotify_flush(filp, id);
-		return 0;
-	}
-	if (!dir_notify_enable)
-		return -EINVAL;
-	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
-		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
-	if (dn == NULL)
-		return -ENOMEM;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((odn = *prev) != NULL) {
-		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
-			odn->dn_fd = fd;
-			odn->dn_mask |= arg;
-			inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-			goto out_free;
-		}
-		prev = &odn->dn_next;
-	}
-
-	rcu_read_lock();
-	f = fcheck(fd);
-	rcu_read_unlock();
-	/* we'd lost the race with close(), sod off silently */
-	/* note that inode->i_lock prevents reordering problems
-	 * between accesses to descriptor table and ->i_dnotify */
-	if (f != filp)
-		goto out_free;
-
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error)
-		goto out_free;
-
-	dn->dn_mask = arg;
-	dn->dn_fd = fd;
-	dn->dn_filp = filp;
-	dn->dn_owner = id;
-	inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-	dn->dn_next = inode->i_dnotify;
-	inode->i_dnotify = dn;
-	spin_unlock(&inode->i_lock);
-	return 0;
-
-out_free:
-	spin_unlock(&inode->i_lock);
-	kmem_cache_free(dn_cache, dn);
-	return error;
-}
-
-void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	struct dnotify_struct *	dn;
-	struct dnotify_struct **prev;
-	struct fown_struct *	fown;
-	int			changed = 0;
-
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((dn = *prev) != NULL) {
-		if ((dn->dn_mask & event) == 0) {
-			prev = &dn->dn_next;
-			continue;
-		}
-		fown = &dn->dn_filp->f_owner;
-		send_sigio(fown, dn->dn_fd, POLL_MSG);
-		if (dn->dn_mask & DN_MULTISHOT)
-			prev = &dn->dn_next;
-		else {
-			*prev = dn->dn_next;
-			changed = 1;
-			kmem_cache_free(dn_cache, dn);
-		}
-	}
-	if (changed)
-		redo_inode_mask(inode);
-	spin_unlock(&inode->i_lock);
-}
-
-EXPORT_SYMBOL(__inode_dir_notify);
-
-/*
- * This is hopelessly wrong, but unfixable without API changes.  At
- * least it doesn't oops the kernel...
- *
- * To safely access ->d_parent we need to keep d_move away from it.  Use the
- * dentry's d_lock for this.
- */
-void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-	struct dentry *parent;
-
-	if (!dir_notify_enable)
-		return;
-
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	if (parent->d_inode->i_dnotify_mask & event) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		__inode_dir_notify(parent->d_inode, event);
-		dput(parent);
-	} else {
-		spin_unlock(&dentry->d_lock);
-	}
-}
-EXPORT_SYMBOL_GPL(dnotify_parent);
-
-static int __init dnotify_init(void)
-{
-	dn_cache = kmem_cache_create("dnotify_cache",
-		sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
-	return 0;
-}
-
-module_init(dnotify_init)
diff --git a/fs/inotify.c b/fs/inotify.c
deleted file mode 100644
index dae3f28..0000000
--- a/fs/inotify.c
+++ /dev/null
@@ -1,913 +0,0 @@
-/*
- * fs/inotify.c - inode-based file event notifications
- *
- * Authors:
- *	John McCutchan	<ttb@tentacle.dhs.org>
- *	Robert Love	<rml@novell.com>
- *
- * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
- *
- * Copyright (C) 2005 John McCutchan
- * Copyright 2006 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/writeback.h>
-#include <linux/inotify.h>
-
-static atomic_t inotify_cookie;
-
-/*
- * Lock ordering:
- *
- * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
- * iprune_mutex (synchronize shrink_icache_memory())
- * 	inode_lock (protects the super_block->s_inodes list)
- * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
- * 		inotify_handle->mutex (protects inotify_handle and watches->h_list)
- *
- * The inode->inotify_mutex and inotify_handle->mutex and held during execution
- * of a caller's event handler.  Thus, the caller must not hold any locks
- * taken in their event handler while calling any of the published inotify
- * interfaces.
- */
-
-/*
- * Lifetimes of the three main data structures--inotify_handle, inode, and
- * inotify_watch--are managed by reference count.
- *
- * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
- * Additional references can bump the count via get_inotify_handle() and drop
- * the count via put_inotify_handle().
- *
- * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
- * to remove_watch_no_event().  Additional references can bump the count via
- * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
- * is reponsible for the final put after receiving IN_IGNORED, or when using
- * IN_ONESHOT after receiving the first event.  Inotify does the final put if
- * inotify_destroy() is called.
- *
- * inode: Pinned so long as the inode is associated with a watch, from
- * inotify_add_watch() to the final put_inotify_watch().
- */
-
-/*
- * struct inotify_handle - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_handle {
-	struct idr		idr;		/* idr mapping wd -> watch */
-	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head	watches;	/* list of watches */
-	atomic_t		count;		/* reference count */
-	u32			last_wd;	/* the last wd allocated */
-	const struct inotify_operations *in_ops; /* inotify caller operations */
-};
-
-static inline void get_inotify_handle(struct inotify_handle *ih)
-{
-	atomic_inc(&ih->count);
-}
-
-static inline void put_inotify_handle(struct inotify_handle *ih)
-{
-	if (atomic_dec_and_test(&ih->count)) {
-		idr_destroy(&ih->idr);
-		kfree(ih);
-	}
-}
-
-/**
- * get_inotify_watch - grab a reference to an inotify_watch
- * @watch: watch to grab
- */
-void get_inotify_watch(struct inotify_watch *watch)
-{
-	atomic_inc(&watch->count);
-}
-EXPORT_SYMBOL_GPL(get_inotify_watch);
-
-int pin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	spin_lock(&sb_lock);
-	if (sb->s_count >= S_BIAS) {
-		atomic_inc(&sb->s_active);
-		spin_unlock(&sb_lock);
-		atomic_inc(&watch->count);
-		return 1;
-	}
-	spin_unlock(&sb_lock);
-	return 0;
-}
-
-/**
- * put_inotify_watch - decrements the ref count on a given watch.  cleans up
- * watch references if the count reaches zero.  inotify_watch is freed by
- * inotify callers via the destroy_watch() op.
- * @watch: watch to release
- */
-void put_inotify_watch(struct inotify_watch *watch)
-{
-	if (atomic_dec_and_test(&watch->count)) {
-		struct inotify_handle *ih = watch->ih;
-
-		iput(watch->inode);
-		ih->in_ops->destroy_watch(watch);
-		put_inotify_handle(ih);
-	}
-}
-EXPORT_SYMBOL_GPL(put_inotify_watch);
-
-void unpin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	deactivate_super(sb);
-}
-
-/*
- * inotify_handle_get_wd - returns the next WD for use by the given handle
- *
- * Callers must hold ih->mutex.  This function can sleep.
- */
-static int inotify_handle_get_wd(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	int ret;
-
-	do {
-		if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
-			return -ENOSPC;
-		ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
-	} while (ret == -EAGAIN);
-
-	if (likely(!ret))
-		ih->last_wd = watch->wd;
-
-	return ret;
-}
-
-/*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
- */
-static inline int inotify_inode_watched(struct inode *inode)
-{
-	return !list_empty(&inode->inotify_watches);
-}
-
-/*
- * Get child dentry flag into synch with parent inode.
- * Flag should always be clear for negative dentrys.
- */
-static void set_dentry_child_flags(struct inode *inode, int watched)
-{
-	struct dentry *alias;
-
-	spin_lock(&dcache_lock);
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-		struct dentry *child;
-
-		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
-			if (!child->d_inode)
-				continue;
-
-			spin_lock(&child->d_lock);
-			if (watched)
-				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-			else
-				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
-			spin_unlock(&child->d_lock);
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
-/*
- * inotify_find_handle - find the watch associated with the given inode and
- * handle
- *
- * Callers must hold inode->inotify_mutex.
- */
-static struct inotify_watch *inode_find_handle(struct inode *inode,
-					       struct inotify_handle *ih)
-{
-	struct inotify_watch *watch;
-
-	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
-		if (watch->ih == ih)
-			return watch;
-	}
-
-	return NULL;
-}
-
-/*
- * remove_watch_no_event - remove watch without the IN_IGNORED event.
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-static void remove_watch_no_event(struct inotify_watch *watch,
-				  struct inotify_handle *ih)
-{
-	list_del(&watch->i_list);
-	list_del(&watch->h_list);
-
-	if (!inotify_inode_watched(watch->inode))
-		set_dentry_child_flags(watch->inode, 0);
-
-	idr_remove(&ih->idr, watch->wd);
-}
-
-/**
- * inotify_remove_watch_locked - Remove a watch from both the handle and the
- * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
- * watched.  May be invoked from a caller's event handler.
- * @ih: inotify handle associated with watch
- * @watch: watch to remove
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-void inotify_remove_watch_locked(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	remove_watch_no_event(watch, ih);
-	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
-
-/* Kernel API for producing events */
-
-/*
- * inotify_d_instantiate - instantiate dcache entry for inode
- */
-void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
-{
-	struct dentry *parent;
-
-	if (!inode)
-		return;
-
-	spin_lock(&entry->d_lock);
-	parent = entry->d_parent;
-	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	spin_unlock(&entry->d_lock);
-}
-
-/*
- * inotify_d_move - dcache entry has been moved
- */
-void inotify_d_move(struct dentry *entry)
-{
-	struct dentry *parent;
-
-	parent = entry->d_parent;
-	if (inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	else
-		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
-}
-
-/**
- * inotify_inode_queue_event - queue an event to all watches on this inode
- * @inode: inode event is originating from
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- * @n_inode: inode associated with name
- */
-void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
-			       const char *name, struct inode *n_inode)
-{
-	struct inotify_watch *watch, *next;
-
-	if (!inotify_inode_watched(inode))
-		return;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		u32 watch_mask = watch->mask;
-		if (watch_mask & mask) {
-			struct inotify_handle *ih= watch->ih;
-			mutex_lock(&ih->mutex);
-			if (watch_mask & IN_ONESHOT)
-				remove_watch_no_event(watch, ih);
-			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
-						 name, n_inode);
-			mutex_unlock(&ih->mutex);
-		}
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
-
-/**
- * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
- * @dentry: the dentry in question, we queue against this dentry's parent
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- */
-void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
-				       u32 cookie, const char *name)
-{
-	struct dentry *parent;
-	struct inode *inode;
-
-	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
-		return;
-
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	inode = parent->d_inode;
-
-	if (inotify_inode_watched(inode)) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		inotify_inode_queue_event(inode, mask, cookie, name,
-					  dentry->d_inode);
-		dput(parent);
-	} else
-		spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
-
-/**
- * inotify_get_cookie - return a unique cookie for use in synchronizing events.
- */
-u32 inotify_get_cookie(void)
-{
-	return atomic_inc_return(&inotify_cookie);
-}
-EXPORT_SYMBOL_GPL(inotify_get_cookie);
-
-/**
- * inotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
- * @list: list of inodes being unmounted (sb->s_inodes)
- *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
- */
-void inotify_unmount_inodes(struct list_head *list)
-{
-	struct inode *inode, *next_i, *need_iput = NULL;
-
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
-		struct inotify_watch *watch, *next_w;
-		struct inode *need_iput_tmp;
-		struct list_head *watches;
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count))
-			continue;
-
-		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
-		 * I_WILL_FREE which is fine because by that point the inode
-		 * cannot have any associated watches.
-		 */
-		if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
-			continue;
-
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-		/* In case inotify_remove_watch_locked() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
-		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-				atomic_read(&next_i->i_count) &&
-				!(next_i->i_state & (I_CLEAR | I_FREEING |
-					I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
-		}
-
-		/*
-		 * We can safely drop inode_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
-		 */
-		spin_unlock(&inode_lock);
-
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
-
-		/* for each watch, send IN_UNMOUNT and then remove it */
-		mutex_lock(&inode->inotify_mutex);
-		watches = &inode->inotify_watches;
-		list_for_each_entry_safe(watch, next_w, watches, i_list) {
-			struct inotify_handle *ih= watch->ih;
-			get_inotify_watch(watch);
-			mutex_lock(&ih->mutex);
-			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
-						 NULL, NULL);
-			inotify_remove_watch_locked(ih, watch);
-			mutex_unlock(&ih->mutex);
-			put_inotify_watch(watch);
-		}
-		mutex_unlock(&inode->inotify_mutex);
-		iput(inode);		
-
-		spin_lock(&inode_lock);
-	}
-}
-EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
-
-/**
- * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
- * @inode: inode that is about to be removed
- */
-void inotify_inode_is_dead(struct inode *inode)
-{
-	struct inotify_watch *watch, *next;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		struct inotify_handle *ih = watch->ih;
-		mutex_lock(&ih->mutex);
-		inotify_remove_watch_locked(ih, watch);
-		mutex_unlock(&ih->mutex);
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
-
-/* Kernel Consumer API */
-
-/**
- * inotify_init - allocate and initialize an inotify instance
- * @ops: caller's inotify operations
- */
-struct inotify_handle *inotify_init(const struct inotify_operations *ops)
-{
-	struct inotify_handle *ih;
-
-	ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
-	if (unlikely(!ih))
-		return ERR_PTR(-ENOMEM);
-
-	idr_init(&ih->idr);
-	INIT_LIST_HEAD(&ih->watches);
-	mutex_init(&ih->mutex);
-	ih->last_wd = 0;
-	ih->in_ops = ops;
-	atomic_set(&ih->count, 0);
-	get_inotify_handle(ih);
-
-	return ih;
-}
-EXPORT_SYMBOL_GPL(inotify_init);
-
-/**
- * inotify_init_watch - initialize an inotify watch
- * @watch: watch to initialize
- */
-void inotify_init_watch(struct inotify_watch *watch)
-{
-	INIT_LIST_HEAD(&watch->h_list);
-	INIT_LIST_HEAD(&watch->i_list);
-	atomic_set(&watch->count, 0);
-	get_inotify_watch(watch); /* initial get */
-}
-EXPORT_SYMBOL_GPL(inotify_init_watch);
-
-/*
- * Watch removals suck violently.  To kick the watch out we need (in this
- * order) inode->inotify_mutex and ih->mutex.  That's fine if we have
- * a hold on inode; however, for all other cases we need to make damn sure
- * we don't race with umount.  We can *NOT* just grab a reference to a
- * watch - inotify_unmount_inodes() will happily sail past it and we'll end
- * with reference to inode potentially outliving its superblock.  Ideally
- * we just want to grab an active reference to superblock if we can; that
- * will make sure we won't go into inotify_umount_inodes() until we are
- * done.  Cleanup is just deactivate_super().  However, that leaves a messy
- * case - what if we *are* racing with umount() and active references to
- * superblock can't be acquired anymore?  We can bump ->s_count, grab
- * ->s_umount, which will almost certainly wait until the superblock is shut
- * down and the watch in question is pining for fjords.  That's fine, but
- * there is a problem - we might have hit the window between ->s_active
- * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
- * is past the point of no return and is heading for shutdown) and the
- * moment when deactivate_super() acquires ->s_umount.  We could just do
- * drop_super() yield() and retry, but that's rather antisocial and this
- * stuff is luser-triggerable.  OTOH, having grabbed ->s_umount and having
- * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
- * that we won't race with inotify_umount_inodes().  So we could grab a
- * reference to watch and do the rest as above, just with drop_super() instead
- * of deactivate_super(), right?  Wrong.  We had to drop ih->mutex before we
- * could grab ->s_umount.  So the watch could've been gone already.
- *
- * That still can be dealt with - we need to save watch->wd, do idr_find()
- * and compare its result with our pointer.  If they match, we either have
- * the damn thing still alive or we'd lost not one but two races at once,
- * the watch had been killed and a new one got created with the same ->wd
- * at the same address.  That couldn't have happened in inotify_destroy(),
- * but inotify_rm_wd() could run into that.  Still, "new one got created"
- * is not a problem - we have every right to kill it or leave it alone,
- * whatever's more convenient.
- *
- * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
- * "grab it and kill it" check.  If it's been our original watch, we are
- * fine, if it's a newcomer - nevermind, just pretend that we'd won the
- * race and kill the fscker anyway; we are safe since we know that its
- * superblock won't be going away.
- *
- * And yes, this is far beyond mere "not very pretty"; so's the entire
- * concept of inotify to start with.
- */
-
-/**
- * pin_to_kill - pin the watch down for removal
- * @ih: inotify handle
- * @watch: watch to kill
- *
- * Called with ih->mutex held, drops it.  Possible return values:
- * 0 - nothing to do, it has died
- * 1 - remove it, drop the reference and deactivate_super()
- * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
- * that variant, since it involved a lot of PITA, but that's the best that
- * could've been done.
- */
-static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	s32 wd = watch->wd;
-
-	spin_lock(&sb_lock);
-	if (sb->s_count >= S_BIAS) {
-		atomic_inc(&sb->s_active);
-		spin_unlock(&sb_lock);
-		get_inotify_watch(watch);
-		mutex_unlock(&ih->mutex);
-		return 1;	/* the best outcome */
-	}
-	sb->s_count++;
-	spin_unlock(&sb_lock);
-	mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
-	down_read(&sb->s_umount);
-	if (likely(!sb->s_root)) {
-		/* fs is already shut down; the watch is dead */
-		drop_super(sb);
-		return 0;
-	}
-	/* raced with the final deactivate_super() */
-	mutex_lock(&ih->mutex);
-	if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
-		/* the watch is dead */
-		mutex_unlock(&ih->mutex);
-		drop_super(sb);
-		return 0;
-	}
-	/* still alive or freed and reused with the same sb and wd; kill */
-	get_inotify_watch(watch);
-	mutex_unlock(&ih->mutex);
-	return 2;
-}
-
-static void unpin_and_kill(struct inotify_watch *watch, int how)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	switch (how) {
-	case 1:
-		deactivate_super(sb);
-		break;
-	case 2:
-		drop_super(sb);
-	}
-}
-
-/**
- * inotify_destroy - clean up and destroy an inotify instance
- * @ih: inotify handle
- */
-void inotify_destroy(struct inotify_handle *ih)
-{
-	/*
-	 * Destroy all of the watches for this handle. Unfortunately, not very
-	 * pretty.  We cannot do a simple iteration over the list, because we
-	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_mutex before ih->mutex.  The following works.
-	 *
-	 * AV: it had to become even uglier to start working ;-/
-	 */
-	while (1) {
-		struct inotify_watch *watch;
-		struct list_head *watches;
-		struct super_block *sb;
-		struct inode *inode;
-		int how;
-
-		mutex_lock(&ih->mutex);
-		watches = &ih->watches;
-		if (list_empty(watches)) {
-			mutex_unlock(&ih->mutex);
-			break;
-		}
-		watch = list_first_entry(watches, struct inotify_watch, h_list);
-		sb = watch->inode->i_sb;
-		how = pin_to_kill(ih, watch);
-		if (!how)
-			continue;
-
-		inode = watch->inode;
-		mutex_lock(&inode->inotify_mutex);
-		mutex_lock(&ih->mutex);
-
-		/* make sure we didn't race with another list removal */
-		if (likely(idr_find(&ih->idr, watch->wd))) {
-			remove_watch_no_event(watch, ih);
-			put_inotify_watch(watch);
-		}
-
-		mutex_unlock(&ih->mutex);
-		mutex_unlock(&inode->inotify_mutex);
-		unpin_and_kill(watch, how);
-	}
-
-	/* free this handle: the put matching the get in inotify_init() */
-	put_inotify_handle(ih);
-}
-EXPORT_SYMBOL_GPL(inotify_destroy);
-
-/**
- * inotify_find_watch - find an existing watch for an (ih,inode) pair
- * @ih: inotify handle
- * @inode: inode to watch
- * @watchp: pointer to existing inotify_watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
-		       struct inotify_watch **watchp)
-{
-	struct inotify_watch *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	old = inode_find_handle(inode, ih);
-	if (unlikely(old)) {
-		get_inotify_watch(old); /* caller must put watch */
-		*watchp = old;
-		ret = old->wd;
-	}
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_watch);
-
-/**
- * inotify_find_update_watch - find and update the mask of an existing watch
- * @ih: inotify handle
- * @inode: inode's watch to update
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
-			      u32 mask)
-{
-	struct inotify_watch *old;
-	int mask_add = 0;
-	int ret;
-
-	if (mask & IN_MASK_ADD)
-		mask_add = 1;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/*
-	 * Handle the case of re-adding a watch on an (inode,ih) pair that we
-	 * are already watching.  We just update the mask and return its wd.
-	 */
-	old = inode_find_handle(inode, ih);
-	if (unlikely(!old)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (mask_add)
-		old->mask |= mask;
-	else
-		old->mask = mask;
-	ret = old->wd;
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_update_watch);
-
-/**
- * inotify_add_watch - add a watch to an inotify instance
- * @ih: inotify handle
- * @watch: caller allocated watch structure
- * @inode: inode to watch
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- * Caller must ensure it only calls inotify_add_watch() once per watch.
- * Calls inotify_handle_get_wd() so may sleep.
- */
-s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
-		      struct inode *inode, u32 mask)
-{
-	int ret = 0;
-	int newly_watched;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-	watch->mask = mask;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, watch);
-	if (unlikely(ret))
-		goto out;
-	ret = watch->wd;
-
-	/* save a reference to handle and bump the count to make it official */
-	get_inotify_handle(ih);
-	watch->ih = ih;
-
-	/*
-	 * Save a reference to the inode and bump the ref count to make it
-	 * official.  We hold a reference to nameidata, which makes this safe.
-	 */
-	watch->inode = igrab(inode);
-
-	/* Add the watch to the handle's and the inode's list */
-	newly_watched = !inotify_inode_watched(inode);
-	list_add(&watch->h_list, &ih->watches);
-	list_add(&watch->i_list, &inode->inotify_watches);
-	/*
-	 * Set child flags _after_ adding the watch, so there is no race
-	 * windows where newly instantiated children could miss their parent's
-	 * watched flag.
-	 */
-	if (newly_watched)
-		set_dentry_child_flags(inode, 1);
-
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_add_watch);
-
-/**
- * inotify_clone_watch - put the watch next to existing one
- * @old: already installed watch
- * @new: new watch
- *
- * Caller must hold the inotify_mutex of inode we are dealing with;
- * it is expected to remove the old watch before unlocking the inode.
- */
-s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
-{
-	struct inotify_handle *ih = old->ih;
-	int ret = 0;
-
-	new->mask = old->mask;
-	new->ih = ih;
-
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, new);
-	if (unlikely(ret))
-		goto out;
-	ret = new->wd;
-
-	get_inotify_handle(ih);
-
-	new->inode = igrab(old->inode);
-
-	list_add(&new->h_list, &ih->watches);
-	list_add(&new->i_list, &old->inode->inotify_watches);
-out:
-	mutex_unlock(&ih->mutex);
-	return ret;
-}
-
-void inotify_evict_watch(struct inotify_watch *watch)
-{
-	get_inotify_watch(watch);
-	mutex_lock(&watch->ih->mutex);
-	inotify_remove_watch_locked(watch->ih, watch);
-	mutex_unlock(&watch->ih->mutex);
-}
-
-/**
- * inotify_rm_wd - remove a watch from an inotify instance
- * @ih: inotify handle
- * @wd: watch descriptor to remove
- *
- * Can sleep.
- */
-int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
-{
-	struct inotify_watch *watch;
-	struct super_block *sb;
-	struct inode *inode;
-	int how;
-
-	mutex_lock(&ih->mutex);
-	watch = idr_find(&ih->idr, wd);
-	if (unlikely(!watch)) {
-		mutex_unlock(&ih->mutex);
-		return -EINVAL;
-	}
-	sb = watch->inode->i_sb;
-	how = pin_to_kill(ih, watch);
-	if (!how)
-		return 0;
-
-	inode = watch->inode;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* make sure that we did not race */
-	if (likely(idr_find(&ih->idr, wd) == watch))
-		inotify_remove_watch_locked(ih, watch);
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	unpin_and_kill(watch, how);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(inotify_rm_wd);
-
-/**
- * inotify_rm_watch - remove a watch from an inotify instance
- * @ih: inotify handle
- * @watch: watch to remove
- *
- * Can sleep.
- */
-int inotify_rm_watch(struct inotify_handle *ih,
-		     struct inotify_watch *watch)
-{
-	return inotify_rm_wd(ih, watch->wd);
-}
-EXPORT_SYMBOL_GPL(inotify_rm_watch);
-
-/*
- * inotify_setup - core initialization function
- */
-static int __init inotify_setup(void)
-{
-	atomic_set(&inotify_cookie, 0);
-
-	return 0;
-}
-
-module_init(inotify_setup);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
deleted file mode 100644
index 400f806..0000000
--- a/fs/inotify_user.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
- * fs/inotify_user.c - inotify support for userspace
- *
- * Authors:
- *	John McCutchan	<ttb@tentacle.dhs.org>
- *	Robert Love	<rml@novell.com>
- *
- * Copyright (C) 2005 John McCutchan
- * Copyright 2006 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/inotify.h>
-#include <linux/syscalls.h>
-#include <linux/magic.h>
-
-#include <asm/ioctls.h>
-
-static struct kmem_cache *watch_cachep __read_mostly;
-static struct kmem_cache *event_cachep __read_mostly;
-
-static struct vfsmount *inotify_mnt __read_mostly;
-
-/* these are configurable via /proc/sys/fs/inotify/ */
-static int inotify_max_user_instances __read_mostly;
-static int inotify_max_user_watches __read_mostly;
-static int inotify_max_queued_events __read_mostly;
-
-/*
- * Lock ordering:
- *
- * inotify_dev->up_mutex (ensures we don't re-add the same watch)
- * 	inode->inotify_mutex (protects inode's watch list)
- * 		inotify_handle->mutex (protects inotify_handle's watch list)
- * 			inotify_dev->ev_mutex (protects device's event queue)
- */
-
-/*
- * Lifetimes of the main data structures:
- *
- * inotify_device: Lifetime is managed by reference count, from
- * sys_inotify_init() until release.  Additional references can bump the count
- * via get_inotify_dev() and drop the count via put_inotify_dev().
- *
- * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
- * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
- * first event, or to inotify_destroy().
- */
-
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	struct fasync_struct    *fa;            /* async notification */
-	atomic_t		count;		/* reference count */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
-
-#ifdef CONFIG_SYSCTL
-
-#include <linux/sysctl.h>
-
-static int zero;
-
-ctl_table inotify_table[] = {
-	{
-		.ctl_name	= INOTIFY_MAX_USER_INSTANCES,
-		.procname	= "max_user_instances",
-		.data		= &inotify_max_user_instances,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero,
-	},
-	{
-		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
-		.procname	= "max_user_watches",
-		.data		= &inotify_max_user_watches,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero,
-	},
-	{
-		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
-		.procname	= "max_queued_events",
-		.data		= &inotify_max_queued_events,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero
-	},
-	{ .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-static inline void get_inotify_dev(struct inotify_device *dev)
-{
-	atomic_inc(&dev->count);
-}
-
-static inline void put_inotify_dev(struct inotify_device *dev)
-{
-	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
-		kfree(dev);
-	}
-}
-
-/*
- * free_inotify_user_watch - cleans up the watch and its references
- */
-static void free_inotify_user_watch(struct inotify_watch *w)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	atomic_dec(&dev->user->inotify_watches);
-	put_inotify_dev(dev);
-	kmem_cache_free(watch_cachep, watch);
-}
-
-/*
- * kernel_event - create a new kernel event with the given parameters
- *
- * This function can sleep.
- */
-static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
-						  const char *name)
-{
-	struct inotify_kernel_event *kevent;
-
-	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
-	if (unlikely(!kevent))
-		return NULL;
-
-	/* we hand this out to user-space, so zero it just in case */
-	memset(&kevent->event, 0, sizeof(struct inotify_event));
-
-	kevent->event.wd = wd;
-	kevent->event.mask = mask;
-	kevent->event.cookie = cookie;
-
-	INIT_LIST_HEAD(&kevent->list);
-
-	if (name) {
-		size_t len, rem, event_size = sizeof(struct inotify_event);
-
-		/*
-		 * We need to pad the filename so as to properly align an
-		 * array of inotify_event structures.  Because the structure is
-		 * small and the common case is a small filename, we just round
-		 * up to the next multiple of the structure's sizeof.  This is
-		 * simple and safe for all architectures.
-		 */
-		len = strlen(name) + 1;
-		rem = event_size - len;
-		if (len > event_size) {
-			rem = event_size - (len % event_size);
-			if (len % event_size == 0)
-				rem = 0;
-		}
-
-		kevent->name = kmalloc(len + rem, GFP_KERNEL);
-		if (unlikely(!kevent->name)) {
-			kmem_cache_free(event_cachep, kevent);
-			return NULL;
-		}
-		memcpy(kevent->name, name, len);
-		if (rem)
-			memset(kevent->name + len, 0, rem);
-		kevent->event.len = len + rem;
-	} else {
-		kevent->event.len = 0;
-		kevent->name = NULL;
-	}
-
-	return kevent;
-}
-
-/*
- * inotify_dev_get_event - return the next event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_event(struct inotify_device *dev)
-{
-	return list_entry(dev->events.next, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_get_last_event - return the last event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_last_event(struct inotify_device *dev)
-{
-	if (list_empty(&dev->events))
-		return NULL;
-	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_queue_event - event handler registered with core inotify, adds
- * a new event to the given device
- *
- * Can sleep (calls kernel_event()).
- */
-static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
-				    u32 cookie, const char *name,
-				    struct inode *ignored)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-	struct inotify_kernel_event *kevent, *last;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	mutex_lock(&dev->ev_mutex);
-
-	/* we can safely put the watch as we don't reference it while
-	 * generating the event
-	 */
-	if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
-		put_inotify_watch(w); /* final put */
-
-	/* coalescing: drop this event if it is a dupe of the previous */
-	last = inotify_dev_get_last_event(dev);
-	if (last && last->event.mask == mask && last->event.wd == wd &&
-			last->event.cookie == cookie) {
-		const char *lastname = last->name;
-
-		if (!name && !lastname)
-			goto out;
-		if (name && lastname && !strcmp(lastname, name))
-			goto out;
-	}
-
-	/* the queue overflowed and we already sent the Q_OVERFLOW event */
-	if (unlikely(dev->event_count > dev->max_events))
-		goto out;
-
-	/* if the queue overflows, we need to notify user space */
-	if (unlikely(dev->event_count == dev->max_events))
-		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
-	else
-		kevent = kernel_event(wd, mask, cookie, name);
-
-	if (unlikely(!kevent))
-		goto out;
-
-	/* queue the event and wake up anyone waiting */
-	dev->event_count++;
-	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
-	list_add_tail(&kevent->list, &dev->events);
-	wake_up_interruptible(&dev->wq);
-	kill_fasync(&dev->fa, SIGIO, POLL_IN);
-
-out:
-	mutex_unlock(&dev->ev_mutex);
-}
-
-/*
- * remove_kevent - cleans up the given kevent
- *
- * Caller must hold dev->ev_mutex.
- */
-static void remove_kevent(struct inotify_device *dev,
-			  struct inotify_kernel_event *kevent)
-{
-	list_del(&kevent->list);
-
-	dev->event_count--;
-	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
-}
-
-/*
- * free_kevent - frees the given kevent.
- */
-static void free_kevent(struct inotify_kernel_event *kevent)
-{
-	kfree(kevent->name);
-	kmem_cache_free(event_cachep, kevent);
-}
-
-/*
- * inotify_dev_event_dequeue - destroy an event on the given device
- *
- * Caller must hold dev->ev_mutex.
- */
-static void inotify_dev_event_dequeue(struct inotify_device *dev)
-{
-	if (!list_empty(&dev->events)) {
-		struct inotify_kernel_event *kevent;
-		kevent = inotify_dev_get_event(dev);
-		remove_kevent(dev, kevent);
-		free_kevent(kevent);
-	}
-}
-
-/*
- * find_inode - resolve a user-given path to a specific inode
- */
-static int find_inode(const char __user *dirname, struct path *path,
-		      unsigned flags)
-{
-	int error;
-
-	error = user_path_at(AT_FDCWD, dirname, flags, path);
-	if (error)
-		return error;
-	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
-	if (error)
-		path_put(path);
-	return error;
-}
-
-/*
- * create_watch - creates a watch on the given device.
- *
- * Callers must hold dev->up_mutex.
- */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
-{
-	struct inotify_user_watch *watch;
-	int ret;
-
-	if (atomic_read(&dev->user->inotify_watches) >=
-			inotify_max_user_watches)
-		return -ENOSPC;
-
-	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
-	if (unlikely(!watch))
-		return -ENOMEM;
-
-	/* save a reference to device and bump the count to make it official */
-	get_inotify_dev(dev);
-	watch->dev = dev;
-
-	atomic_inc(&dev->user->inotify_watches);
-
-	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
-	if (ret < 0)
-		free_inotify_user_watch(&watch->wdata);
-
-	return ret;
-}
-
-/* Device Interface */
-
-static unsigned int inotify_poll(struct file *file, poll_table *wait)
-{
-	struct inotify_device *dev = file->private_data;
-	int ret = 0;
-
-	poll_wait(file, &dev->wq, wait);
-	mutex_lock(&dev->ev_mutex);
-	if (!list_empty(&dev->events))
-		ret = POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->ev_mutex);
-
-	return ret;
-}
-
-static ssize_t inotify_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *pos)
-{
-	size_t event_size = sizeof (struct inotify_event);
-	struct inotify_device *dev;
-	char __user *start;
-	int ret;
-	DEFINE_WAIT(wait);
-
-	start = buf;
-	dev = file->private_data;
-
-	while (1) {
-
-		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_lock(&dev->ev_mutex);
-		if (!list_empty(&dev->events)) {
-			ret = 0;
-			break;
-		}
-		mutex_unlock(&dev->ev_mutex);
-
-		if (file->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-
-		schedule();
-	}
-
-	finish_wait(&dev->wq, &wait);
-	if (ret)
-		return ret;
-
-	while (1) {
-		struct inotify_kernel_event *kevent;
-
-		ret = buf - start;
-		if (list_empty(&dev->events))
-			break;
-
-		kevent = inotify_dev_get_event(dev);
-		if (event_size + kevent->event.len > count) {
-			if (ret == 0 && count > 0) {
-				/*
-				 * could not get a single event because we
-				 * didn't have enough buffer space.
-				 */
-				ret = -EINVAL;
-			}
-			break;
-		}
-		remove_kevent(dev, kevent);
-
-		/*
-		 * Must perform the copy_to_user outside the mutex in order
-		 * to avoid a lock order reversal with mmap_sem.
-		 */
-		mutex_unlock(&dev->ev_mutex);
-
-		if (copy_to_user(buf, &kevent->event, event_size)) {
-			ret = -EFAULT;
-			break;
-		}
-		buf += event_size;
-		count -= event_size;
-
-		if (kevent->name) {
-			if (copy_to_user(buf, kevent->name, kevent->event.len)){
-				ret = -EFAULT;
-				break;
-			}
-			buf += kevent->event.len;
-			count -= kevent->event.len;
-		}
-
-		free_kevent(kevent);
-
-		mutex_lock(&dev->ev_mutex);
-	}
-	mutex_unlock(&dev->ev_mutex);
-
-	return ret;
-}
-
-static int inotify_fasync(int fd, struct file *file, int on)
-{
-	struct inotify_device *dev = file->private_data;
-
-	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
-}
-
-static int inotify_release(struct inode *ignored, struct file *file)
-{
-	struct inotify_device *dev = file->private_data;
-
-	inotify_destroy(dev->ih);
-
-	/* destroy all of the events on this device */
-	mutex_lock(&dev->ev_mutex);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	mutex_unlock(&dev->ev_mutex);
-
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
-
-	return 0;
-}
-
-static long inotify_ioctl(struct file *file, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct inotify_device *dev;
-	void __user *p;
-	int ret = -ENOTTY;
-
-	dev = file->private_data;
-	p = (void __user *) arg;
-
-	switch (cmd) {
-	case FIONREAD:
-		ret = put_user(dev->queue_size, (int __user *) p);
-		break;
-	}
-
-	return ret;
-}
-
-static const struct file_operations inotify_fops = {
-	.poll           = inotify_poll,
-	.read           = inotify_read,
-	.fasync         = inotify_fasync,
-	.release        = inotify_release,
-	.unlocked_ioctl = inotify_ioctl,
-	.compat_ioctl	= inotify_ioctl,
-};
-
-static const struct inotify_operations inotify_user_ops = {
-	.handle_event	= inotify_dev_queue_event,
-	.destroy_watch	= free_inotify_user_watch,
-};
-
-asmlinkage long sys_inotify_init1(int flags)
-{
-	struct inotify_device *dev;
-	struct inotify_handle *ih;
-	struct user_struct *user;
-	struct file *filp;
-	int fd, ret;
-
-	/* Check the IN_* constants for consistency.  */
-	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
-	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
-
-	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
-		return -EINVAL;
-
-	fd = get_unused_fd_flags(flags & O_CLOEXEC);
-	if (fd < 0)
-		return fd;
-
-	filp = get_empty_filp();
-	if (!filp) {
-		ret = -ENFILE;
-		goto out_put_fd;
-	}
-
-	user = get_current_user();
-	if (unlikely(atomic_read(&user->inotify_devs) >=
-			inotify_max_user_instances)) {
-		ret = -EMFILE;
-		goto out_free_uid;
-	}
-
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
-	if (unlikely(!dev)) {
-		ret = -ENOMEM;
-		goto out_free_uid;
-	}
-
-	ih = inotify_init(&inotify_user_ops);
-	if (IS_ERR(ih)) {
-		ret = PTR_ERR(ih);
-		goto out_free_dev;
-	}
-	dev->ih = ih;
-	dev->fa = NULL;
-
-	filp->f_op = &inotify_fops;
-	filp->f_path.mnt = mntget(inotify_mnt);
-	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
-	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
-	filp->f_mode = FMODE_READ;
-	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = dev;
-
-	INIT_LIST_HEAD(&dev->events);
-	init_waitqueue_head(&dev->wq);
-	mutex_init(&dev->ev_mutex);
-	mutex_init(&dev->up_mutex);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
-	atomic_inc(&user->inotify_devs);
-	fd_install(fd, filp);
-
-	return fd;
-out_free_dev:
-	kfree(dev);
-out_free_uid:
-	free_uid(user);
-	put_filp(filp);
-out_put_fd:
-	put_unused_fd(fd);
-	return ret;
-}
-
-asmlinkage long sys_inotify_init(void)
-{
-	return sys_inotify_init1(0);
-}
-
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
-{
-	struct inode *inode;
-	struct inotify_device *dev;
-	struct path path;
-	struct file *filp;
-	int ret, fput_needed;
-	unsigned flags = 0;
-
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
-		return -EBADF;
-
-	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
-		goto fput_and_out;
-	}
-
-	if (!(mask & IN_DONT_FOLLOW))
-		flags |= LOOKUP_FOLLOW;
-	if (mask & IN_ONLYDIR)
-		flags |= LOOKUP_DIRECTORY;
-
-	ret = find_inode(pathname, &path, flags);
-	if (unlikely(ret))
-		goto fput_and_out;
-
-	/* inode held in place by reference to path; dev by fget on fd */
-	inode = path.dentry->d_inode;
-	dev = filp->private_data;
-
-	mutex_lock(&dev->up_mutex);
-	ret = inotify_find_update_watch(dev->ih, inode, mask);
-	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
-	mutex_unlock(&dev->up_mutex);
-
-	path_put(&path);
-fput_and_out:
-	fput_light(filp, fput_needed);
-	return ret;
-}
-
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
-{
-	struct file *filp;
-	struct inotify_device *dev;
-	int ret, fput_needed;
-
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
-		return -EBADF;
-
-	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	dev = filp->private_data;
-
-	/* we free our watch data when we get IN_IGNORED */
-	ret = inotify_rm_wd(dev->ih, wd);
-
-out:
-	fput_light(filp, fput_needed);
-	return ret;
-}
-
-static int
-inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_pseudo(fs_type, "inotify", NULL,
-			INOTIFYFS_SUPER_MAGIC, mnt);
-}
-
-static struct file_system_type inotify_fs_type = {
-    .name           = "inotifyfs",
-    .get_sb         = inotify_get_sb,
-    .kill_sb        = kill_anon_super,
-};
-
-/*
- * inotify_user_setup - Our initialization function.  Note that we cannnot return
- * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
- * must result in panic().
- */
-static int __init inotify_user_setup(void)
-{
-	int ret;
-
-	ret = register_filesystem(&inotify_fs_type);
-	if (unlikely(ret))
-		panic("inotify: register_filesystem returned %d!\n", ret);
-
-	inotify_mnt = kern_mount(&inotify_fs_type);
-	if (IS_ERR(inotify_mnt))
-		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
-
-	inotify_max_queued_events = 16384;
-	inotify_max_user_instances = 128;
-	inotify_max_user_watches = 8192;
-
-	watch_cachep = kmem_cache_create("inotify_watch_cache",
-					 sizeof(struct inotify_user_watch),
-					 0, SLAB_PANIC, NULL);
-	event_cachep = kmem_cache_create("inotify_event_cache",
-					 sizeof(struct inotify_kernel_event),
-					 0, SLAB_PANIC, NULL);
-
-	return 0;
-}
-
-module_init(inotify_user_setup);
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
new file mode 100644
index 0000000..50914d7
--- /dev/null
+++ b/fs/notify/Kconfig
@@ -0,0 +1,2 @@
+source "fs/notify/dnotify/Kconfig"
+source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
new file mode 100644
index 0000000..5a95b60
--- /dev/null
+++ b/fs/notify/Makefile
@@ -0,0 +1,2 @@
+obj-y			+= dnotify/
+obj-y			+= inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
new file mode 100644
index 0000000..26adf5d
--- /dev/null
+++ b/fs/notify/dnotify/Kconfig
@@ -0,0 +1,10 @@
+config DNOTIFY
+	bool "Dnotify support"
+	default y
+	help
+	  Dnotify is a directory-based per-fd file change notification system
+	  that uses signals to communicate events to user-space.  There exist
+	  superior alternatives, but some applications may still rely on
+	  dnotify.
+
+	  If unsure, say Y.
diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile
new file mode 100644
index 0000000..f145251
--- /dev/null
+++ b/fs/notify/dnotify/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DNOTIFY)		+= dnotify.o
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
new file mode 100644
index 0000000..b0aa2cd
--- /dev/null
+++ b/fs/notify/dnotify/dnotify.c
@@ -0,0 +1,191 @@
+/*
+ * Directory notifications for Linux.
+ *
+ * Copyright (C) 2000,2001,2002 Stephen Rothwell
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/dnotify.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/fdtable.h>
+
+int dir_notify_enable __read_mostly = 1;
+
+static struct kmem_cache *dn_cache __read_mostly;
+
+static void redo_inode_mask(struct inode *inode)
+{
+	unsigned long new_mask;
+	struct dnotify_struct *dn;
+
+	new_mask = 0;
+	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
+		new_mask |= dn->dn_mask & ~DN_MULTISHOT;
+	inode->i_dnotify_mask = new_mask;
+}
+
+void dnotify_flush(struct file *filp, fl_owner_t id)
+{
+	struct dnotify_struct *dn;
+	struct dnotify_struct **prev;
+	struct inode *inode;
+
+	inode = filp->f_path.dentry->d_inode;
+	if (!S_ISDIR(inode->i_mode))
+		return;
+	spin_lock(&inode->i_lock);
+	prev = &inode->i_dnotify;
+	while ((dn = *prev) != NULL) {
+		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
+			*prev = dn->dn_next;
+			redo_inode_mask(inode);
+			kmem_cache_free(dn_cache, dn);
+			break;
+		}
+		prev = &dn->dn_next;
+	}
+	spin_unlock(&inode->i_lock);
+}
+
+int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+{
+	struct dnotify_struct *dn;
+	struct dnotify_struct *odn;
+	struct dnotify_struct **prev;
+	struct inode *inode;
+	fl_owner_t id = current->files;
+	struct file *f;
+	int error = 0;
+
+	if ((arg & ~DN_MULTISHOT) == 0) {
+		dnotify_flush(filp, id);
+		return 0;
+	}
+	if (!dir_notify_enable)
+		return -EINVAL;
+	inode = filp->f_path.dentry->d_inode;
+	if (!S_ISDIR(inode->i_mode))
+		return -ENOTDIR;
+	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
+	if (dn == NULL)
+		return -ENOMEM;
+	spin_lock(&inode->i_lock);
+	prev = &inode->i_dnotify;
+	while ((odn = *prev) != NULL) {
+		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
+			odn->dn_fd = fd;
+			odn->dn_mask |= arg;
+			inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
+			goto out_free;
+		}
+		prev = &odn->dn_next;
+	}
+
+	rcu_read_lock();
+	f = fcheck(fd);
+	rcu_read_unlock();
+	/* we'd lost the race with close(), sod off silently */
+	/* note that inode->i_lock prevents reordering problems
+	 * between accesses to descriptor table and ->i_dnotify */
+	if (f != filp)
+		goto out_free;
+
+	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	if (error)
+		goto out_free;
+
+	dn->dn_mask = arg;
+	dn->dn_fd = fd;
+	dn->dn_filp = filp;
+	dn->dn_owner = id;
+	inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
+	dn->dn_next = inode->i_dnotify;
+	inode->i_dnotify = dn;
+	spin_unlock(&inode->i_lock);
+	return 0;
+
+out_free:
+	spin_unlock(&inode->i_lock);
+	kmem_cache_free(dn_cache, dn);
+	return error;
+}
+
+void __inode_dir_notify(struct inode *inode, unsigned long event)
+{
+	struct dnotify_struct *	dn;
+	struct dnotify_struct **prev;
+	struct fown_struct *	fown;
+	int			changed = 0;
+
+	spin_lock(&inode->i_lock);
+	prev = &inode->i_dnotify;
+	while ((dn = *prev) != NULL) {
+		if ((dn->dn_mask & event) == 0) {
+			prev = &dn->dn_next;
+			continue;
+		}
+		fown = &dn->dn_filp->f_owner;
+		send_sigio(fown, dn->dn_fd, POLL_MSG);
+		if (dn->dn_mask & DN_MULTISHOT)
+			prev = &dn->dn_next;
+		else {
+			*prev = dn->dn_next;
+			changed = 1;
+			kmem_cache_free(dn_cache, dn);
+		}
+	}
+	if (changed)
+		redo_inode_mask(inode);
+	spin_unlock(&inode->i_lock);
+}
+
+EXPORT_SYMBOL(__inode_dir_notify);
+
+/*
+ * This is hopelessly wrong, but unfixable without API changes.  At
+ * least it doesn't oops the kernel...
+ *
+ * To safely access ->d_parent we need to keep d_move away from it.  Use the
+ * dentry's d_lock for this.
+ */
+void dnotify_parent(struct dentry *dentry, unsigned long event)
+{
+	struct dentry *parent;
+
+	if (!dir_notify_enable)
+		return;
+
+	spin_lock(&dentry->d_lock);
+	parent = dentry->d_parent;
+	if (parent->d_inode->i_dnotify_mask & event) {
+		dget(parent);
+		spin_unlock(&dentry->d_lock);
+		__inode_dir_notify(parent->d_inode, event);
+		dput(parent);
+	} else {
+		spin_unlock(&dentry->d_lock);
+	}
+}
+EXPORT_SYMBOL_GPL(dnotify_parent);
+
+static int __init dnotify_init(void)
+{
+	dn_cache = kmem_cache_create("dnotify_cache",
+		sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
+	return 0;
+}
+
+module_init(dnotify_init)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
new file mode 100644
index 0000000..4467928
--- /dev/null
+++ b/fs/notify/inotify/Kconfig
@@ -0,0 +1,27 @@
+config INOTIFY
+	bool "Inotify file change notification support"
+	default y
+	---help---
+	  Say Y here to enable inotify support.  Inotify is a file change
+	  notification system and a replacement for dnotify.  Inotify fixes
+	  numerous shortcomings in dnotify and introduces several new features
+	  including multiple file events, one-shot support, and unmount
+	  notification.
+
+	  For more information, see <file:Documentation/filesystems/inotify.txt>
+
+	  If unsure, say Y.
+
+config INOTIFY_USER
+	bool "Inotify support for userspace"
+	depends on INOTIFY
+	default y
+	---help---
+	  Say Y here to enable inotify support for userspace, including the
+	  associated system calls.  Inotify allows monitoring of both files and
+	  directories via a single open fd.  Events are read from the file
+	  descriptor, which is also select()- and poll()-able.
+
+	  For more information, see <file:Documentation/filesystems/inotify.txt>
+
+	  If unsure, say Y.
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
new file mode 100644
index 0000000..e290f3b
--- /dev/null
+++ b/fs/notify/inotify/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INOTIFY)		+= inotify.o
+obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
new file mode 100644
index 0000000..dae3f28
--- /dev/null
+++ b/fs/notify/inotify/inotify.c
@@ -0,0 +1,913 @@
+/*
+ * fs/inotify.c - inode-based file event notifications
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/writeback.h>
+#include <linux/inotify.h>
+
+static atomic_t inotify_cookie;
+
+/*
+ * Lock ordering:
+ *
+ * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
+ * iprune_mutex (synchronize shrink_icache_memory())
+ * 	inode_lock (protects the super_block->s_inodes list)
+ * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
+ * 		inotify_handle->mutex (protects inotify_handle and watches->h_list)
+ *
+ * The inode->inotify_mutex and inotify_handle->mutex and held during execution
+ * of a caller's event handler.  Thus, the caller must not hold any locks
+ * taken in their event handler while calling any of the published inotify
+ * interfaces.
+ */
+
+/*
+ * Lifetimes of the three main data structures--inotify_handle, inode, and
+ * inotify_watch--are managed by reference count.
+ *
+ * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
+ * Additional references can bump the count via get_inotify_handle() and drop
+ * the count via put_inotify_handle().
+ *
+ * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
+ * to remove_watch_no_event().  Additional references can bump the count via
+ * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
+ * is reponsible for the final put after receiving IN_IGNORED, or when using
+ * IN_ONESHOT after receiving the first event.  Inotify does the final put if
+ * inotify_destroy() is called.
+ *
+ * inode: Pinned so long as the inode is associated with a watch, from
+ * inotify_add_watch() to the final put_inotify_watch().
+ */
+
+/*
+ * struct inotify_handle - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_handle {
+	struct idr		idr;		/* idr mapping wd -> watch */
+	struct mutex		mutex;		/* protects this bad boy */
+	struct list_head	watches;	/* list of watches */
+	atomic_t		count;		/* reference count */
+	u32			last_wd;	/* the last wd allocated */
+	const struct inotify_operations *in_ops; /* inotify caller operations */
+};
+
+static inline void get_inotify_handle(struct inotify_handle *ih)
+{
+	atomic_inc(&ih->count);
+}
+
+static inline void put_inotify_handle(struct inotify_handle *ih)
+{
+	if (atomic_dec_and_test(&ih->count)) {
+		idr_destroy(&ih->idr);
+		kfree(ih);
+	}
+}
+
+/**
+ * get_inotify_watch - grab a reference to an inotify_watch
+ * @watch: watch to grab
+ */
+void get_inotify_watch(struct inotify_watch *watch)
+{
+	atomic_inc(&watch->count);
+}
+EXPORT_SYMBOL_GPL(get_inotify_watch);
+
+int pin_inotify_watch(struct inotify_watch *watch)
+{
+	struct super_block *sb = watch->inode->i_sb;
+	spin_lock(&sb_lock);
+	if (sb->s_count >= S_BIAS) {
+		atomic_inc(&sb->s_active);
+		spin_unlock(&sb_lock);
+		atomic_inc(&watch->count);
+		return 1;
+	}
+	spin_unlock(&sb_lock);
+	return 0;
+}
+
+/**
+ * put_inotify_watch - decrements the ref count on a given watch.  cleans up
+ * watch references if the count reaches zero.  inotify_watch is freed by
+ * inotify callers via the destroy_watch() op.
+ * @watch: watch to release
+ */
+void put_inotify_watch(struct inotify_watch *watch)
+{
+	if (atomic_dec_and_test(&watch->count)) {
+		struct inotify_handle *ih = watch->ih;
+
+		iput(watch->inode);
+		ih->in_ops->destroy_watch(watch);
+		put_inotify_handle(ih);
+	}
+}
+EXPORT_SYMBOL_GPL(put_inotify_watch);
+
+void unpin_inotify_watch(struct inotify_watch *watch)
+{
+	struct super_block *sb = watch->inode->i_sb;
+	put_inotify_watch(watch);
+	deactivate_super(sb);
+}
+
+/*
+ * inotify_handle_get_wd - returns the next WD for use by the given handle
+ *
+ * Callers must hold ih->mutex.  This function can sleep.
+ */
+static int inotify_handle_get_wd(struct inotify_handle *ih,
+				 struct inotify_watch *watch)
+{
+	int ret;
+
+	do {
+		if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
+			return -ENOSPC;
+		ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
+	} while (ret == -EAGAIN);
+
+	if (likely(!ret))
+		ih->last_wd = watch->wd;
+
+	return ret;
+}
+
+/*
+ * inotify_inode_watched - returns nonzero if there are watches on this inode
+ * and zero otherwise.  We call this lockless, we do not care if we race.
+ */
+static inline int inotify_inode_watched(struct inode *inode)
+{
+	return !list_empty(&inode->inotify_watches);
+}
+
+/*
+ * Get child dentry flag into synch with parent inode.
+ * Flag should always be clear for negative dentrys.
+ */
+static void set_dentry_child_flags(struct inode *inode, int watched)
+{
+	struct dentry *alias;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode)
+				continue;
+
+			spin_lock(&child->d_lock);
+			if (watched)
+				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+			else
+				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
+ * inotify_find_handle - find the watch associated with the given inode and
+ * handle
+ *
+ * Callers must hold inode->inotify_mutex.
+ */
+static struct inotify_watch *inode_find_handle(struct inode *inode,
+					       struct inotify_handle *ih)
+{
+	struct inotify_watch *watch;
+
+	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
+		if (watch->ih == ih)
+			return watch;
+	}
+
+	return NULL;
+}
+
+/*
+ * remove_watch_no_event - remove watch without the IN_IGNORED event.
+ *
+ * Callers must hold both inode->inotify_mutex and ih->mutex.
+ */
+static void remove_watch_no_event(struct inotify_watch *watch,
+				  struct inotify_handle *ih)
+{
+	list_del(&watch->i_list);
+	list_del(&watch->h_list);
+
+	if (!inotify_inode_watched(watch->inode))
+		set_dentry_child_flags(watch->inode, 0);
+
+	idr_remove(&ih->idr, watch->wd);
+}
+
+/**
+ * inotify_remove_watch_locked - Remove a watch from both the handle and the
+ * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
+ * watched.  May be invoked from a caller's event handler.
+ * @ih: inotify handle associated with watch
+ * @watch: watch to remove
+ *
+ * Callers must hold both inode->inotify_mutex and ih->mutex.
+ */
+void inotify_remove_watch_locked(struct inotify_handle *ih,
+				 struct inotify_watch *watch)
+{
+	remove_watch_no_event(watch, ih);
+	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
+
+/* Kernel API for producing events */
+
+/*
+ * inotify_d_instantiate - instantiate dcache entry for inode
+ */
+void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *parent;
+
+	if (!inode)
+		return;
+
+	spin_lock(&entry->d_lock);
+	parent = entry->d_parent;
+	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	spin_unlock(&entry->d_lock);
+}
+
+/*
+ * inotify_d_move - dcache entry has been moved
+ */
+void inotify_d_move(struct dentry *entry)
+{
+	struct dentry *parent;
+
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	else
+		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+}
+
+/**
+ * inotify_inode_queue_event - queue an event to all watches on this inode
+ * @inode: inode event is originating from
+ * @mask: event mask describing this event
+ * @cookie: cookie for synchronization, or zero
+ * @name: filename, if any
+ * @n_inode: inode associated with name
+ */
+void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
+			       const char *name, struct inode *n_inode)
+{
+	struct inotify_watch *watch, *next;
+
+	if (!inotify_inode_watched(inode))
+		return;
+
+	mutex_lock(&inode->inotify_mutex);
+	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
+		u32 watch_mask = watch->mask;
+		if (watch_mask & mask) {
+			struct inotify_handle *ih= watch->ih;
+			mutex_lock(&ih->mutex);
+			if (watch_mask & IN_ONESHOT)
+				remove_watch_no_event(watch, ih);
+			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
+						 name, n_inode);
+			mutex_unlock(&ih->mutex);
+		}
+	}
+	mutex_unlock(&inode->inotify_mutex);
+}
+EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
+
+/**
+ * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
+ * @dentry: the dentry in question, we queue against this dentry's parent
+ * @mask: event mask describing this event
+ * @cookie: cookie for synchronization, or zero
+ * @name: filename, if any
+ */
+void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
+				       u32 cookie, const char *name)
+{
+	struct dentry *parent;
+	struct inode *inode;
+
+	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
+		return;
+
+	spin_lock(&dentry->d_lock);
+	parent = dentry->d_parent;
+	inode = parent->d_inode;
+
+	if (inotify_inode_watched(inode)) {
+		dget(parent);
+		spin_unlock(&dentry->d_lock);
+		inotify_inode_queue_event(inode, mask, cookie, name,
+					  dentry->d_inode);
+		dput(parent);
+	} else
+		spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
+
+/**
+ * inotify_get_cookie - return a unique cookie for use in synchronizing events.
+ */
+u32 inotify_get_cookie(void)
+{
+	return atomic_inc_return(&inotify_cookie);
+}
+EXPORT_SYMBOL_GPL(inotify_get_cookie);
+
+/**
+ * inotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
+ * @list: list of inodes being unmounted (sb->s_inodes)
+ *
+ * Called with inode_lock held, protecting the unmounting super block's list
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * We temporarily drop inode_lock, however, and CAN block.
+ */
+void inotify_unmount_inodes(struct list_head *list)
+{
+	struct inode *inode, *next_i, *need_iput = NULL;
+
+	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+		struct inotify_watch *watch, *next_w;
+		struct inode *need_iput_tmp;
+		struct list_head *watches;
+
+		/*
+		 * If i_count is zero, the inode cannot have any watches and
+		 * doing an __iget/iput with MS_ACTIVE clear would actually
+		 * evict all inodes with zero i_count from icache which is
+		 * unnecessarily violent and may in fact be illegal to do.
+		 */
+		if (!atomic_read(&inode->i_count))
+			continue;
+
+		/*
+		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
+		 * I_WILL_FREE which is fine because by that point the inode
+		 * cannot have any associated watches.
+		 */
+		if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
+			continue;
+
+		need_iput_tmp = need_iput;
+		need_iput = NULL;
+		/* In case inotify_remove_watch_locked() drops a reference. */
+		if (inode != need_iput_tmp)
+			__iget(inode);
+		else
+			need_iput_tmp = NULL;
+		/* In case the dropping of a reference would nuke next_i. */
+		if ((&next_i->i_sb_list != list) &&
+				atomic_read(&next_i->i_count) &&
+				!(next_i->i_state & (I_CLEAR | I_FREEING |
+					I_WILL_FREE))) {
+			__iget(next_i);
+			need_iput = next_i;
+		}
+
+		/*
+		 * We can safely drop inode_lock here because we hold
+		 * references on both inode and next_i.  Also no new inodes
+		 * will be added since the umount has begun.  Finally,
+		 * iprune_mutex keeps shrink_icache_memory() away.
+		 */
+		spin_unlock(&inode_lock);
+
+		if (need_iput_tmp)
+			iput(need_iput_tmp);
+
+		/* for each watch, send IN_UNMOUNT and then remove it */
+		mutex_lock(&inode->inotify_mutex);
+		watches = &inode->inotify_watches;
+		list_for_each_entry_safe(watch, next_w, watches, i_list) {
+			struct inotify_handle *ih= watch->ih;
+			get_inotify_watch(watch);
+			mutex_lock(&ih->mutex);
+			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
+						 NULL, NULL);
+			inotify_remove_watch_locked(ih, watch);
+			mutex_unlock(&ih->mutex);
+			put_inotify_watch(watch);
+		}
+		mutex_unlock(&inode->inotify_mutex);
+		iput(inode);		
+
+		spin_lock(&inode_lock);
+	}
+}
+EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
+
+/**
+ * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
+ * @inode: inode that is about to be removed
+ */
+void inotify_inode_is_dead(struct inode *inode)
+{
+	struct inotify_watch *watch, *next;
+
+	mutex_lock(&inode->inotify_mutex);
+	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
+		struct inotify_handle *ih = watch->ih;
+		mutex_lock(&ih->mutex);
+		inotify_remove_watch_locked(ih, watch);
+		mutex_unlock(&ih->mutex);
+	}
+	mutex_unlock(&inode->inotify_mutex);
+}
+EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
+
+/* Kernel Consumer API */
+
+/**
+ * inotify_init - allocate and initialize an inotify instance
+ * @ops: caller's inotify operations
+ */
+struct inotify_handle *inotify_init(const struct inotify_operations *ops)
+{
+	struct inotify_handle *ih;
+
+	ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
+	if (unlikely(!ih))
+		return ERR_PTR(-ENOMEM);
+
+	idr_init(&ih->idr);
+	INIT_LIST_HEAD(&ih->watches);
+	mutex_init(&ih->mutex);
+	ih->last_wd = 0;
+	ih->in_ops = ops;
+	atomic_set(&ih->count, 0);
+	get_inotify_handle(ih);
+
+	return ih;
+}
+EXPORT_SYMBOL_GPL(inotify_init);
+
+/**
+ * inotify_init_watch - initialize an inotify watch
+ * @watch: watch to initialize
+ */
+void inotify_init_watch(struct inotify_watch *watch)
+{
+	INIT_LIST_HEAD(&watch->h_list);
+	INIT_LIST_HEAD(&watch->i_list);
+	atomic_set(&watch->count, 0);
+	get_inotify_watch(watch); /* initial get */
+}
+EXPORT_SYMBOL_GPL(inotify_init_watch);
+
+/*
+ * Watch removals suck violently.  To kick the watch out we need (in this
+ * order) inode->inotify_mutex and ih->mutex.  That's fine if we have
+ * a hold on inode; however, for all other cases we need to make damn sure
+ * we don't race with umount.  We can *NOT* just grab a reference to a
+ * watch - inotify_unmount_inodes() will happily sail past it and we'll end
+ * with reference to inode potentially outliving its superblock.  Ideally
+ * we just want to grab an active reference to superblock if we can; that
+ * will make sure we won't go into inotify_umount_inodes() until we are
+ * done.  Cleanup is just deactivate_super().  However, that leaves a messy
+ * case - what if we *are* racing with umount() and active references to
+ * superblock can't be acquired anymore?  We can bump ->s_count, grab
+ * ->s_umount, which will almost certainly wait until the superblock is shut
+ * down and the watch in question is pining for fjords.  That's fine, but
+ * there is a problem - we might have hit the window between ->s_active
+ * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
+ * is past the point of no return and is heading for shutdown) and the
+ * moment when deactivate_super() acquires ->s_umount.  We could just do
+ * drop_super() yield() and retry, but that's rather antisocial and this
+ * stuff is luser-triggerable.  OTOH, having grabbed ->s_umount and having
+ * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
+ * that we won't race with inotify_umount_inodes().  So we could grab a
+ * reference to watch and do the rest as above, just with drop_super() instead
+ * of deactivate_super(), right?  Wrong.  We had to drop ih->mutex before we
+ * could grab ->s_umount.  So the watch could've been gone already.
+ *
+ * That still can be dealt with - we need to save watch->wd, do idr_find()
+ * and compare its result with our pointer.  If they match, we either have
+ * the damn thing still alive or we'd lost not one but two races at once,
+ * the watch had been killed and a new one got created with the same ->wd
+ * at the same address.  That couldn't have happened in inotify_destroy(),
+ * but inotify_rm_wd() could run into that.  Still, "new one got created"
+ * is not a problem - we have every right to kill it or leave it alone,
+ * whatever's more convenient.
+ *
+ * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
+ * "grab it and kill it" check.  If it's been our original watch, we are
+ * fine, if it's a newcomer - nevermind, just pretend that we'd won the
+ * race and kill the fscker anyway; we are safe since we know that its
+ * superblock won't be going away.
+ *
+ * And yes, this is far beyond mere "not very pretty"; so's the entire
+ * concept of inotify to start with.
+ */
+
+/**
+ * pin_to_kill - pin the watch down for removal
+ * @ih: inotify handle
+ * @watch: watch to kill
+ *
+ * Called with ih->mutex held, drops it.  Possible return values:
+ * 0 - nothing to do, it has died
+ * 1 - remove it, drop the reference and deactivate_super()
+ * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
+ * that variant, since it involved a lot of PITA, but that's the best that
+ * could've been done.
+ */
+static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
+{
+	struct super_block *sb = watch->inode->i_sb;
+	s32 wd = watch->wd;
+
+	spin_lock(&sb_lock);
+	if (sb->s_count >= S_BIAS) {
+		atomic_inc(&sb->s_active);
+		spin_unlock(&sb_lock);
+		get_inotify_watch(watch);
+		mutex_unlock(&ih->mutex);
+		return 1;	/* the best outcome */
+	}
+	sb->s_count++;
+	spin_unlock(&sb_lock);
+	mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
+	down_read(&sb->s_umount);
+	if (likely(!sb->s_root)) {
+		/* fs is already shut down; the watch is dead */
+		drop_super(sb);
+		return 0;
+	}
+	/* raced with the final deactivate_super() */
+	mutex_lock(&ih->mutex);
+	if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
+		/* the watch is dead */
+		mutex_unlock(&ih->mutex);
+		drop_super(sb);
+		return 0;
+	}
+	/* still alive or freed and reused with the same sb and wd; kill */
+	get_inotify_watch(watch);
+	mutex_unlock(&ih->mutex);
+	return 2;
+}
+
+static void unpin_and_kill(struct inotify_watch *watch, int how)
+{
+	struct super_block *sb = watch->inode->i_sb;
+	put_inotify_watch(watch);
+	switch (how) {
+	case 1:
+		deactivate_super(sb);
+		break;
+	case 2:
+		drop_super(sb);
+	}
+}
+
+/**
+ * inotify_destroy - clean up and destroy an inotify instance
+ * @ih: inotify handle
+ */
+void inotify_destroy(struct inotify_handle *ih)
+{
+	/*
+	 * Destroy all of the watches for this handle. Unfortunately, not very
+	 * pretty.  We cannot do a simple iteration over the list, because we
+	 * do not know the inode until we iterate to the watch.  But we need to
+	 * hold inode->inotify_mutex before ih->mutex.  The following works.
+	 *
+	 * AV: it had to become even uglier to start working ;-/
+	 */
+	while (1) {
+		struct inotify_watch *watch;
+		struct list_head *watches;
+		struct super_block *sb;
+		struct inode *inode;
+		int how;
+
+		mutex_lock(&ih->mutex);
+		watches = &ih->watches;
+		if (list_empty(watches)) {
+			mutex_unlock(&ih->mutex);
+			break;
+		}
+		watch = list_first_entry(watches, struct inotify_watch, h_list);
+		sb = watch->inode->i_sb;
+		how = pin_to_kill(ih, watch);
+		if (!how)
+			continue;
+
+		inode = watch->inode;
+		mutex_lock(&inode->inotify_mutex);
+		mutex_lock(&ih->mutex);
+
+		/* make sure we didn't race with another list removal */
+		if (likely(idr_find(&ih->idr, watch->wd))) {
+			remove_watch_no_event(watch, ih);
+			put_inotify_watch(watch);
+		}
+
+		mutex_unlock(&ih->mutex);
+		mutex_unlock(&inode->inotify_mutex);
+		unpin_and_kill(watch, how);
+	}
+
+	/* free this handle: the put matching the get in inotify_init() */
+	put_inotify_handle(ih);
+}
+EXPORT_SYMBOL_GPL(inotify_destroy);
+
+/**
+ * inotify_find_watch - find an existing watch for an (ih,inode) pair
+ * @ih: inotify handle
+ * @inode: inode to watch
+ * @watchp: pointer to existing inotify_watch
+ *
+ * Caller must pin given inode (via nameidata).
+ */
+s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
+		       struct inotify_watch **watchp)
+{
+	struct inotify_watch *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
+
+	old = inode_find_handle(inode, ih);
+	if (unlikely(old)) {
+		get_inotify_watch(old); /* caller must put watch */
+		*watchp = old;
+		ret = old->wd;
+	}
+
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(inotify_find_watch);
+
+/**
+ * inotify_find_update_watch - find and update the mask of an existing watch
+ * @ih: inotify handle
+ * @inode: inode's watch to update
+ * @mask: mask of events to watch
+ *
+ * Caller must pin given inode (via nameidata).
+ */
+s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
+			      u32 mask)
+{
+	struct inotify_watch *old;
+	int mask_add = 0;
+	int ret;
+
+	if (mask & IN_MASK_ADD)
+		mask_add = 1;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask &= IN_ALL_EVENTS | IN_ONESHOT;
+	if (unlikely(!mask))
+		return -EINVAL;
+
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
+
+	/*
+	 * Handle the case of re-adding a watch on an (inode,ih) pair that we
+	 * are already watching.  We just update the mask and return its wd.
+	 */
+	old = inode_find_handle(inode, ih);
+	if (unlikely(!old)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (mask_add)
+		old->mask |= mask;
+	else
+		old->mask = mask;
+	ret = old->wd;
+out:
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(inotify_find_update_watch);
+
+/**
+ * inotify_add_watch - add a watch to an inotify instance
+ * @ih: inotify handle
+ * @watch: caller allocated watch structure
+ * @inode: inode to watch
+ * @mask: mask of events to watch
+ *
+ * Caller must pin given inode (via nameidata).
+ * Caller must ensure it only calls inotify_add_watch() once per watch.
+ * Calls inotify_handle_get_wd() so may sleep.
+ */
+s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+		      struct inode *inode, u32 mask)
+{
+	int ret = 0;
+	int newly_watched;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask &= IN_ALL_EVENTS | IN_ONESHOT;
+	if (unlikely(!mask))
+		return -EINVAL;
+	watch->mask = mask;
+
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
+
+	/* Initialize a new watch */
+	ret = inotify_handle_get_wd(ih, watch);
+	if (unlikely(ret))
+		goto out;
+	ret = watch->wd;
+
+	/* save a reference to handle and bump the count to make it official */
+	get_inotify_handle(ih);
+	watch->ih = ih;
+
+	/*
+	 * Save a reference to the inode and bump the ref count to make it
+	 * official.  We hold a reference to nameidata, which makes this safe.
+	 */
+	watch->inode = igrab(inode);
+
+	/* Add the watch to the handle's and the inode's list */
+	newly_watched = !inotify_inode_watched(inode);
+	list_add(&watch->h_list, &ih->watches);
+	list_add(&watch->i_list, &inode->inotify_watches);
+	/*
+	 * Set child flags _after_ adding the watch, so there is no race
+	 * windows where newly instantiated children could miss their parent's
+	 * watched flag.
+	 */
+	if (newly_watched)
+		set_dentry_child_flags(inode, 1);
+
+out:
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(inotify_add_watch);
+
+/**
+ * inotify_clone_watch - put the watch next to existing one
+ * @old: already installed watch
+ * @new: new watch
+ *
+ * Caller must hold the inotify_mutex of inode we are dealing with;
+ * it is expected to remove the old watch before unlocking the inode.
+ */
+s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
+{
+	struct inotify_handle *ih = old->ih;
+	int ret = 0;
+
+	new->mask = old->mask;
+	new->ih = ih;
+
+	mutex_lock(&ih->mutex);
+
+	/* Initialize a new watch */
+	ret = inotify_handle_get_wd(ih, new);
+	if (unlikely(ret))
+		goto out;
+	ret = new->wd;
+
+	get_inotify_handle(ih);
+
+	new->inode = igrab(old->inode);
+
+	list_add(&new->h_list, &ih->watches);
+	list_add(&new->i_list, &old->inode->inotify_watches);
+out:
+	mutex_unlock(&ih->mutex);
+	return ret;
+}
+
+void inotify_evict_watch(struct inotify_watch *watch)
+{
+	get_inotify_watch(watch);
+	mutex_lock(&watch->ih->mutex);
+	inotify_remove_watch_locked(watch->ih, watch);
+	mutex_unlock(&watch->ih->mutex);
+}
+
+/**
+ * inotify_rm_wd - remove a watch from an inotify instance
+ * @ih: inotify handle
+ * @wd: watch descriptor to remove
+ *
+ * Can sleep.
+ */
+int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
+{
+	struct inotify_watch *watch;
+	struct super_block *sb;
+	struct inode *inode;
+	int how;
+
+	mutex_lock(&ih->mutex);
+	watch = idr_find(&ih->idr, wd);
+	if (unlikely(!watch)) {
+		mutex_unlock(&ih->mutex);
+		return -EINVAL;
+	}
+	sb = watch->inode->i_sb;
+	how = pin_to_kill(ih, watch);
+	if (!how)
+		return 0;
+
+	inode = watch->inode;
+
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
+
+	/* make sure that we did not race */
+	if (likely(idr_find(&ih->idr, wd) == watch))
+		inotify_remove_watch_locked(ih, watch);
+
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+	unpin_and_kill(watch, how);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inotify_rm_wd);
+
+/**
+ * inotify_rm_watch - remove a watch from an inotify instance
+ * @ih: inotify handle
+ * @watch: watch to remove
+ *
+ * Can sleep.
+ */
+int inotify_rm_watch(struct inotify_handle *ih,
+		     struct inotify_watch *watch)
+{
+	return inotify_rm_wd(ih, watch->wd);
+}
+EXPORT_SYMBOL_GPL(inotify_rm_watch);
+
+/*
+ * inotify_setup - core initialization function
+ */
+static int __init inotify_setup(void)
+{
+	atomic_set(&inotify_cookie, 0);
+
+	return 0;
+}
+
+module_init(inotify_setup);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
new file mode 100644
index 0000000..400f806
--- /dev/null
+++ b/fs/notify/inotify/inotify_user.c
@@ -0,0 +1,778 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/inotify.h>
+#include <linux/syscalls.h>
+#include <linux/magic.h>
+
+#include <asm/ioctls.h>
+
+static struct kmem_cache *watch_cachep __read_mostly;
+static struct kmem_cache *event_cachep __read_mostly;
+
+static struct vfsmount *inotify_mnt __read_mostly;
+
+/* these are configurable via /proc/sys/fs/inotify/ */
+static int inotify_max_user_instances __read_mostly;
+static int inotify_max_user_watches __read_mostly;
+static int inotify_max_queued_events __read_mostly;
+
+/*
+ * Lock ordering:
+ *
+ * inotify_dev->up_mutex (ensures we don't re-add the same watch)
+ * 	inode->inotify_mutex (protects inode's watch list)
+ * 		inotify_handle->mutex (protects inotify_handle's watch list)
+ * 			inotify_dev->ev_mutex (protects device's event queue)
+ */
+
+/*
+ * Lifetimes of the main data structures:
+ *
+ * inotify_device: Lifetime is managed by reference count, from
+ * sys_inotify_init() until release.  Additional references can bump the count
+ * via get_inotify_dev() and drop the count via put_inotify_dev().
+ *
+ * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
+ * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
+ * first event, or to inotify_destroy().
+ */
+
+/*
+ * struct inotify_device - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_device {
+	wait_queue_head_t 	wq;		/* wait queue for i/o */
+	struct mutex		ev_mutex;	/* protects event queue */
+	struct mutex		up_mutex;	/* synchronizes watch updates */
+	struct list_head 	events;		/* list of queued events */
+	struct user_struct	*user;		/* user who opened this dev */
+	struct inotify_handle	*ih;		/* inotify handle */
+	struct fasync_struct    *fa;            /* async notification */
+	atomic_t		count;		/* reference count */
+	unsigned int		queue_size;	/* size of the queue (bytes) */
+	unsigned int		event_count;	/* number of pending events */
+	unsigned int		max_events;	/* maximum number of events */
+};
+
+/*
+ * struct inotify_kernel_event - An inotify event, originating from a watch and
+ * queued for user-space.  A list of these is attached to each instance of the
+ * device.  In read(), this list is walked and all events that can fit in the
+ * buffer are returned.
+ *
+ * Protected by dev->ev_mutex of the device in which we are queued.
+ */
+struct inotify_kernel_event {
+	struct inotify_event	event;	/* the user-space event */
+	struct list_head        list;	/* entry in inotify_device's list */
+	char			*name;	/* filename, if any */
+};
+
+/*
+ * struct inotify_user_watch - our version of an inotify_watch, we add
+ * a reference to the associated inotify_device.
+ */
+struct inotify_user_watch {
+	struct inotify_device	*dev;	/* associated device */
+	struct inotify_watch	wdata;	/* inotify watch data */
+};
+
+#ifdef CONFIG_SYSCTL
+
+#include <linux/sysctl.h>
+
+static int zero;
+
+ctl_table inotify_table[] = {
+	{
+		.ctl_name	= INOTIFY_MAX_USER_INSTANCES,
+		.procname	= "max_user_instances",
+		.data		= &inotify_max_user_instances,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
+		.procname	= "max_user_watches",
+		.data		= &inotify_max_user_watches,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
+		.procname	= "max_queued_events",
+		.data		= &inotify_max_queued_events,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
+	{ .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+
+static inline void get_inotify_dev(struct inotify_device *dev)
+{
+	atomic_inc(&dev->count);
+}
+
+static inline void put_inotify_dev(struct inotify_device *dev)
+{
+	if (atomic_dec_and_test(&dev->count)) {
+		atomic_dec(&dev->user->inotify_devs);
+		free_uid(dev->user);
+		kfree(dev);
+	}
+}
+
+/*
+ * free_inotify_user_watch - cleans up the watch and its references
+ */
+static void free_inotify_user_watch(struct inotify_watch *w)
+{
+	struct inotify_user_watch *watch;
+	struct inotify_device *dev;
+
+	watch = container_of(w, struct inotify_user_watch, wdata);
+	dev = watch->dev;
+
+	atomic_dec(&dev->user->inotify_watches);
+	put_inotify_dev(dev);
+	kmem_cache_free(watch_cachep, watch);
+}
+
+/*
+ * kernel_event - create a new kernel event with the given parameters
+ *
+ * This function can sleep.
+ */
+static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
+						  const char *name)
+{
+	struct inotify_kernel_event *kevent;
+
+	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
+	if (unlikely(!kevent))
+		return NULL;
+
+	/* we hand this out to user-space, so zero it just in case */
+	memset(&kevent->event, 0, sizeof(struct inotify_event));
+
+	kevent->event.wd = wd;
+	kevent->event.mask = mask;
+	kevent->event.cookie = cookie;
+
+	INIT_LIST_HEAD(&kevent->list);
+
+	if (name) {
+		size_t len, rem, event_size = sizeof(struct inotify_event);
+
+		/*
+		 * We need to pad the filename so as to properly align an
+		 * array of inotify_event structures.  Because the structure is
+		 * small and the common case is a small filename, we just round
+		 * up to the next multiple of the structure's sizeof.  This is
+		 * simple and safe for all architectures.
+		 */
+		len = strlen(name) + 1;
+		rem = event_size - len;
+		if (len > event_size) {
+			rem = event_size - (len % event_size);
+			if (len % event_size == 0)
+				rem = 0;
+		}
+
+		kevent->name = kmalloc(len + rem, GFP_KERNEL);
+		if (unlikely(!kevent->name)) {
+			kmem_cache_free(event_cachep, kevent);
+			return NULL;
+		}
+		memcpy(kevent->name, name, len);
+		if (rem)
+			memset(kevent->name + len, 0, rem);
+		kevent->event.len = len + rem;
+	} else {
+		kevent->event.len = 0;
+		kevent->name = NULL;
+	}
+
+	return kevent;
+}
+
+/*
+ * inotify_dev_get_event - return the next event in the given dev's queue
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static inline struct inotify_kernel_event *
+inotify_dev_get_event(struct inotify_device *dev)
+{
+	return list_entry(dev->events.next, struct inotify_kernel_event, list);
+}
+
+/*
+ * inotify_dev_get_last_event - return the last event in the given dev's queue
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static inline struct inotify_kernel_event *
+inotify_dev_get_last_event(struct inotify_device *dev)
+{
+	if (list_empty(&dev->events))
+		return NULL;
+	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
+}
+
+/*
+ * inotify_dev_queue_event - event handler registered with core inotify, adds
+ * a new event to the given device
+ *
+ * Can sleep (calls kernel_event()).
+ */
+static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
+				    u32 cookie, const char *name,
+				    struct inode *ignored)
+{
+	struct inotify_user_watch *watch;
+	struct inotify_device *dev;
+	struct inotify_kernel_event *kevent, *last;
+
+	watch = container_of(w, struct inotify_user_watch, wdata);
+	dev = watch->dev;
+
+	mutex_lock(&dev->ev_mutex);
+
+	/* we can safely put the watch as we don't reference it while
+	 * generating the event
+	 */
+	if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
+		put_inotify_watch(w); /* final put */
+
+	/* coalescing: drop this event if it is a dupe of the previous */
+	last = inotify_dev_get_last_event(dev);
+	if (last && last->event.mask == mask && last->event.wd == wd &&
+			last->event.cookie == cookie) {
+		const char *lastname = last->name;
+
+		if (!name && !lastname)
+			goto out;
+		if (name && lastname && !strcmp(lastname, name))
+			goto out;
+	}
+
+	/* the queue overflowed and we already sent the Q_OVERFLOW event */
+	if (unlikely(dev->event_count > dev->max_events))
+		goto out;
+
+	/* if the queue overflows, we need to notify user space */
+	if (unlikely(dev->event_count == dev->max_events))
+		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
+	else
+		kevent = kernel_event(wd, mask, cookie, name);
+
+	if (unlikely(!kevent))
+		goto out;
+
+	/* queue the event and wake up anyone waiting */
+	dev->event_count++;
+	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
+	list_add_tail(&kevent->list, &dev->events);
+	wake_up_interruptible(&dev->wq);
+	kill_fasync(&dev->fa, SIGIO, POLL_IN);
+
+out:
+	mutex_unlock(&dev->ev_mutex);
+}
+
+/*
+ * remove_kevent - cleans up the given kevent
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void remove_kevent(struct inotify_device *dev,
+			  struct inotify_kernel_event *kevent)
+{
+	list_del(&kevent->list);
+
+	dev->event_count--;
+	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
+}
+
+/*
+ * free_kevent - frees the given kevent.
+ */
+static void free_kevent(struct inotify_kernel_event *kevent)
+{
+	kfree(kevent->name);
+	kmem_cache_free(event_cachep, kevent);
+}
+
+/*
+ * inotify_dev_event_dequeue - destroy an event on the given device
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void inotify_dev_event_dequeue(struct inotify_device *dev)
+{
+	if (!list_empty(&dev->events)) {
+		struct inotify_kernel_event *kevent;
+		kevent = inotify_dev_get_event(dev);
+		remove_kevent(dev, kevent);
+		free_kevent(kevent);
+	}
+}
+
+/*
+ * find_inode - resolve a user-given path to a specific inode
+ */
+static int find_inode(const char __user *dirname, struct path *path,
+		      unsigned flags)
+{
+	int error;
+
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
+	if (error)
+		return error;
+	/* you can only watch an inode if you have read permissions on it */
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error)
+		path_put(path);
+	return error;
+}
+
+/*
+ * create_watch - creates a watch on the given device.
+ *
+ * Callers must hold dev->up_mutex.
+ */
+static int create_watch(struct inotify_device *dev, struct inode *inode,
+			u32 mask)
+{
+	struct inotify_user_watch *watch;
+	int ret;
+
+	if (atomic_read(&dev->user->inotify_watches) >=
+			inotify_max_user_watches)
+		return -ENOSPC;
+
+	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
+	if (unlikely(!watch))
+		return -ENOMEM;
+
+	/* save a reference to device and bump the count to make it official */
+	get_inotify_dev(dev);
+	watch->dev = dev;
+
+	atomic_inc(&dev->user->inotify_watches);
+
+	inotify_init_watch(&watch->wdata);
+	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
+	if (ret < 0)
+		free_inotify_user_watch(&watch->wdata);
+
+	return ret;
+}
+
+/* Device Interface */
+
+static unsigned int inotify_poll(struct file *file, poll_table *wait)
+{
+	struct inotify_device *dev = file->private_data;
+	int ret = 0;
+
+	poll_wait(file, &dev->wq, wait);
+	mutex_lock(&dev->ev_mutex);
+	if (!list_empty(&dev->events))
+		ret = POLLIN | POLLRDNORM;
+	mutex_unlock(&dev->ev_mutex);
+
+	return ret;
+}
+
+static ssize_t inotify_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *pos)
+{
+	size_t event_size = sizeof (struct inotify_event);
+	struct inotify_device *dev;
+	char __user *start;
+	int ret;
+	DEFINE_WAIT(wait);
+
+	start = buf;
+	dev = file->private_data;
+
+	while (1) {
+
+		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
+
+		mutex_lock(&dev->ev_mutex);
+		if (!list_empty(&dev->events)) {
+			ret = 0;
+			break;
+		}
+		mutex_unlock(&dev->ev_mutex);
+
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		schedule();
+	}
+
+	finish_wait(&dev->wq, &wait);
+	if (ret)
+		return ret;
+
+	while (1) {
+		struct inotify_kernel_event *kevent;
+
+		ret = buf - start;
+		if (list_empty(&dev->events))
+			break;
+
+		kevent = inotify_dev_get_event(dev);
+		if (event_size + kevent->event.len > count) {
+			if (ret == 0 && count > 0) {
+				/*
+				 * could not get a single event because we
+				 * didn't have enough buffer space.
+				 */
+				ret = -EINVAL;
+			}
+			break;
+		}
+		remove_kevent(dev, kevent);
+
+		/*
+		 * Must perform the copy_to_user outside the mutex in order
+		 * to avoid a lock order reversal with mmap_sem.
+		 */
+		mutex_unlock(&dev->ev_mutex);
+
+		if (copy_to_user(buf, &kevent->event, event_size)) {
+			ret = -EFAULT;
+			break;
+		}
+		buf += event_size;
+		count -= event_size;
+
+		if (kevent->name) {
+			if (copy_to_user(buf, kevent->name, kevent->event.len)){
+				ret = -EFAULT;
+				break;
+			}
+			buf += kevent->event.len;
+			count -= kevent->event.len;
+		}
+
+		free_kevent(kevent);
+
+		mutex_lock(&dev->ev_mutex);
+	}
+	mutex_unlock(&dev->ev_mutex);
+
+	return ret;
+}
+
+static int inotify_fasync(int fd, struct file *file, int on)
+{
+	struct inotify_device *dev = file->private_data;
+
+	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
+}
+
+static int inotify_release(struct inode *ignored, struct file *file)
+{
+	struct inotify_device *dev = file->private_data;
+
+	inotify_destroy(dev->ih);
+
+	/* destroy all of the events on this device */
+	mutex_lock(&dev->ev_mutex);
+	while (!list_empty(&dev->events))
+		inotify_dev_event_dequeue(dev);
+	mutex_unlock(&dev->ev_mutex);
+
+	/* free this device: the put matching the get in inotify_init() */
+	put_inotify_dev(dev);
+
+	return 0;
+}
+
+static long inotify_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inotify_device *dev;
+	void __user *p;
+	int ret = -ENOTTY;
+
+	dev = file->private_data;
+	p = (void __user *) arg;
+
+	switch (cmd) {
+	case FIONREAD:
+		ret = put_user(dev->queue_size, (int __user *) p);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct file_operations inotify_fops = {
+	.poll           = inotify_poll,
+	.read           = inotify_read,
+	.fasync         = inotify_fasync,
+	.release        = inotify_release,
+	.unlocked_ioctl = inotify_ioctl,
+	.compat_ioctl	= inotify_ioctl,
+};
+
+static const struct inotify_operations inotify_user_ops = {
+	.handle_event	= inotify_dev_queue_event,
+	.destroy_watch	= free_inotify_user_watch,
+};
+
+asmlinkage long sys_inotify_init1(int flags)
+{
+	struct inotify_device *dev;
+	struct inotify_handle *ih;
+	struct user_struct *user;
+	struct file *filp;
+	int fd, ret;
+
+	/* Check the IN_* constants for consistency.  */
+	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+
+	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(flags & O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	filp = get_empty_filp();
+	if (!filp) {
+		ret = -ENFILE;
+		goto out_put_fd;
+	}
+
+	user = get_current_user();
+	if (unlikely(atomic_read(&user->inotify_devs) >=
+			inotify_max_user_instances)) {
+		ret = -EMFILE;
+		goto out_free_uid;
+	}
+
+	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+	if (unlikely(!dev)) {
+		ret = -ENOMEM;
+		goto out_free_uid;
+	}
+
+	ih = inotify_init(&inotify_user_ops);
+	if (IS_ERR(ih)) {
+		ret = PTR_ERR(ih);
+		goto out_free_dev;
+	}
+	dev->ih = ih;
+	dev->fa = NULL;
+
+	filp->f_op = &inotify_fops;
+	filp->f_path.mnt = mntget(inotify_mnt);
+	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
+	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+	filp->f_mode = FMODE_READ;
+	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+	filp->private_data = dev;
+
+	INIT_LIST_HEAD(&dev->events);
+	init_waitqueue_head(&dev->wq);
+	mutex_init(&dev->ev_mutex);
+	mutex_init(&dev->up_mutex);
+	dev->event_count = 0;
+	dev->queue_size = 0;
+	dev->max_events = inotify_max_queued_events;
+	dev->user = user;
+	atomic_set(&dev->count, 0);
+
+	get_inotify_dev(dev);
+	atomic_inc(&user->inotify_devs);
+	fd_install(fd, filp);
+
+	return fd;
+out_free_dev:
+	kfree(dev);
+out_free_uid:
+	free_uid(user);
+	put_filp(filp);
+out_put_fd:
+	put_unused_fd(fd);
+	return ret;
+}
+
+asmlinkage long sys_inotify_init(void)
+{
+	return sys_inotify_init1(0);
+}
+
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
+{
+	struct inode *inode;
+	struct inotify_device *dev;
+	struct path path;
+	struct file *filp;
+	int ret, fput_needed;
+	unsigned flags = 0;
+
+	filp = fget_light(fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an inotify instance */
+	if (unlikely(filp->f_op != &inotify_fops)) {
+		ret = -EINVAL;
+		goto fput_and_out;
+	}
+
+	if (!(mask & IN_DONT_FOLLOW))
+		flags |= LOOKUP_FOLLOW;
+	if (mask & IN_ONLYDIR)
+		flags |= LOOKUP_DIRECTORY;
+
+	ret = find_inode(pathname, &path, flags);
+	if (unlikely(ret))
+		goto fput_and_out;
+
+	/* inode held in place by reference to path; dev by fget on fd */
+	inode = path.dentry->d_inode;
+	dev = filp->private_data;
+
+	mutex_lock(&dev->up_mutex);
+	ret = inotify_find_update_watch(dev->ih, inode, mask);
+	if (ret == -ENOENT)
+		ret = create_watch(dev, inode, mask);
+	mutex_unlock(&dev->up_mutex);
+
+	path_put(&path);
+fput_and_out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+{
+	struct file *filp;
+	struct inotify_device *dev;
+	int ret, fput_needed;
+
+	filp = fget_light(fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an inotify instance */
+	if (unlikely(filp->f_op != &inotify_fops)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	dev = filp->private_data;
+
+	/* we free our watch data when we get IN_IGNORED */
+	ret = inotify_rm_wd(dev->ih, wd);
+
+out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+static int
+inotify_get_sb(struct file_system_type *fs_type, int flags,
+	       const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_pseudo(fs_type, "inotify", NULL,
+			INOTIFYFS_SUPER_MAGIC, mnt);
+}
+
+static struct file_system_type inotify_fs_type = {
+    .name           = "inotifyfs",
+    .get_sb         = inotify_get_sb,
+    .kill_sb        = kill_anon_super,
+};
+
+/*
+ * inotify_user_setup - Our initialization function.  Note that we cannnot return
+ * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
+ * must result in panic().
+ */
+static int __init inotify_user_setup(void)
+{
+	int ret;
+
+	ret = register_filesystem(&inotify_fs_type);
+	if (unlikely(ret))
+		panic("inotify: register_filesystem returned %d!\n", ret);
+
+	inotify_mnt = kern_mount(&inotify_fs_type);
+	if (IS_ERR(inotify_mnt))
+		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
+
+	inotify_max_queued_events = 16384;
+	inotify_max_user_instances = 128;
+	inotify_max_user_watches = 8192;
+
+	watch_cachep = kmem_cache_create("inotify_watch_cache",
+					 sizeof(struct inotify_user_watch),
+					 0, SLAB_PANIC, NULL);
+	event_cachep = kmem_cache_create("inotify_event_cache",
+					 sizeof(struct inotify_kernel_event),
+					 0, SLAB_PANIC, NULL);
+
+	return 0;
+}
+
+module_init(inotify_user_setup);
-- 
cgit v0.10.2


From 261bca86ed4f7f391d1938167624e78da61dcc6b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 01:48:21 -0500
Subject: nfsd/create race fixes, infrastructure

new helpers - insert_inode_locked() and insert_inode_locked4().
Hash new inode, making sure that there's no such inode in icache
already.  If there is and it does not end up unhashed (as would
happen if we have nfsd trying to resolve a bogus fhandle), fail.
Otherwise insert our inode into hash and succeed.

In either case have i_state set to new+locked; cleanup ends up
being simpler with such calling conventions.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/inode.c b/fs/inode.c
index 098a244..7de1cda 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 
 EXPORT_SYMBOL(iget_locked);
 
+int insert_inode_locked(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	ino_t ino = inode->i_ino;
+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode_fast(sb, head, ino);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked);
+
+int insert_inode_locked4(struct inode *inode, unsigned long hashval,
+		int (*test)(struct inode *, void *), void *data)
+{
+	struct super_block *sb = inode->i_sb;
+	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode(sb, head, test, data);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked4);
+
 /**
  *	__insert_inode_hash - hash an inode
  *	@inode: unhashed inode
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be16ce0..e2170ee 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1902,6 +1902,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
 
 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
 extern struct inode * iget_locked(struct super_block *, unsigned long);
+extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
+extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
 
 extern void __iget(struct inode * inode);
-- 
cgit v0.10.2


From 41080b5a240113328c607f22b849f653373db0ce Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 01:52:35 -0500
Subject: nfsd race fixes: ext2

* make ext2_new_inode() put the inode into icache in locked state
* do not unlock until the inode is fully set up; otherwise nfsd
might pick it in half-baked state.
* make sure that ext2_new_inode() does *not* lead to two inodes with the
same inumber hashed at the same time; otherwise a bogus fhandle coming
from nfsd might race with inode creation:

nfsd: iget_locked() creates inode
nfsd: try to read from disk, block on that.
ext2_new_inode(): allocate inode with that inumber
ext2_new_inode(): insert it into icache, set it up and dirty
ext2_write_inode(): get the relevant part of inode table in cache,
set the entry for our inode (and start writing to disk)
nfsd: get CPU again, look into inode table, see nice and sane on-disk
inode, set the in-core inode from it

oops - we have two in-core inodes with the same inumber live in icache,
both used for IO.  Welcome to fs corruption...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 8d0add6..c454d5d 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -585,7 +585,10 @@ got:
 	spin_lock(&sbi->s_next_gen_lock);
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
-	insert_inode_hash(inode);
+	if (insert_inode_locked(inode) < 0) {
+		err = -EINVAL;
+		goto fail_drop;
+	}
 
 	if (DQUOT_ALLOC_INODE(inode)) {
 		err = -EDQUOT;
@@ -612,6 +615,7 @@ fail_drop:
 	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
+	unlock_new_inode(inode);
 	iput(inode);
 	return ERR_PTR(err);
 
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2a74725..90ea179 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 	int err = ext2_add_link(dentry, inode);
 	if (!err) {
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -170,6 +172,7 @@ out:
 
 out_fail:
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput (inode);
 	goto out;
 }
@@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	struct dentry *dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
+	int err;
 
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
@@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
-	return ext2_add_nondir(dentry, inode);
+	err = ext2_add_link(dentry, inode);
+	if (!err) {
+		d_instantiate(dentry, inode);
+		return 0;
+	}
+	inode_dec_link_count(inode);
+	iput(inode);
+	return err;
 }
 
 static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
@@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		goto out_fail;
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out:
 	return err;
 
 out_fail:
 	inode_dec_link_count(inode);
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 out_dir:
 	inode_dec_link_count(dir);
-- 
cgit v0.10.2


From c38012daa7ad902a39a4213ba2b3fe50e81157ea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 02:02:50 -0500
Subject: nfsd race fixes: ext3

ext3 analog of the previous patch

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 490bd0e..5655fbc 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -579,7 +579,10 @@ got:
 	ext3_set_inode_flags(inode);
 	if (IS_DIRSYNC(inode))
 		handle->h_sync = 1;
-	insert_inode_hash(inode);
+	if (insert_inode_locked(inode) < 0) {
+		err = -EINVAL;
+		goto fail_drop;
+	}
 	spin_lock(&sbi->s_next_gen_lock);
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
@@ -627,6 +630,7 @@ fail_drop:
 	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
+	unlock_new_inode(inode);
 	iput(inode);
 	brelse(bitmap_bh);
 	return ERR_PTR(err);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3e5edc9..297ea8d 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle,
 	if (!err) {
 		ext3_mark_inode_dirty(handle, inode);
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	drop_nlink(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -1765,6 +1767,7 @@ retry:
 	dir_block = ext3_bread (handle, inode, 0, 1, &err);
 	if (!dir_block) {
 		drop_nlink(inode); /* is this nlink == 0? */
+		unlock_new_inode(inode);
 		ext3_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
@@ -1792,6 +1795,7 @@ retry:
 	err = ext3_add_entry (handle, dentry, inode);
 	if (err) {
 		inode->i_nlink = 0;
+		unlock_new_inode(inode);
 		ext3_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
@@ -1800,6 +1804,7 @@ retry:
 	ext3_update_dx_flag(dir);
 	ext3_mark_inode_dirty(handle, dir);
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out_stop:
 	ext3_journal_stop(handle);
 	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
@@ -2174,6 +2179,7 @@ retry:
 				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
 			drop_nlink(inode);
+			unlock_new_inode(inode);
 			ext3_mark_inode_dirty(handle, inode);
 			iput (inode);
 			goto out_stop;
@@ -2221,7 +2227,14 @@ retry:
 	inc_nlink(inode);
 	atomic_inc(&inode->i_count);
 
-	err = ext3_add_nondir(handle, dentry, inode);
+	err = ext3_add_entry(handle, dentry, inode);
+	if (!err) {
+		ext3_mark_inode_dirty(handle, inode);
+		d_instantiate(dentry, inode);
+	} else {
+		drop_nlink(inode);
+		iput(inode);
+	}
 	ext3_journal_stop(handle);
 	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
-- 
cgit v0.10.2


From 6b38e842bb832a3dbeb17e382404aef3c40ac5f9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 02:03:31 -0500
Subject: nfsd race fixes: ext4

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 08cac9f..6e60528 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -826,7 +826,10 @@ got:
 	ext4_set_inode_flags(inode);
 	if (IS_DIRSYNC(inode))
 		handle->h_sync = 1;
-	insert_inode_hash(inode);
+	if (insert_inode_locked(inode) < 0) {
+		err = -EINVAL;
+		goto fail_drop;
+	}
 	spin_lock(&sbi->s_next_gen_lock);
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
@@ -881,6 +884,7 @@ fail_drop:
 	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
+	unlock_new_inode(inode);
 	iput(inode);
 	brelse(bitmap_bh);
 	return ERR_PTR(err);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb7..da98a90 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle,
 	if (!err) {
 		ext4_mark_inode_dirty(handle, inode);
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	drop_nlink(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -1830,6 +1832,7 @@ retry:
 	if (err) {
 out_clear_inode:
 		clear_nlink(inode);
+		unlock_new_inode(inode);
 		ext4_mark_inode_dirty(handle, inode);
 		iput(inode);
 		goto out_stop;
@@ -1838,6 +1841,7 @@ out_clear_inode:
 	ext4_update_dx_flag(dir);
 	ext4_mark_inode_dirty(handle, dir);
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out_stop:
 	ext4_journal_stop(handle);
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -2212,6 +2216,7 @@ retry:
 				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
 			clear_nlink(inode);
+			unlock_new_inode(inode);
 			ext4_mark_inode_dirty(handle, inode);
 			iput(inode);
 			goto out_stop;
@@ -2262,7 +2267,14 @@ retry:
 	ext4_inc_count(handle, inode);
 	atomic_inc(&inode->i_count);
 
-	err = ext4_add_nondir(handle, dentry, inode);
+	err = ext4_add_entry(handle, dentry, inode);
+	if (!err) {
+		ext4_mark_inode_dirty(handle, inode);
+		d_instantiate(dentry, inode);
+	} else {
+		drop_nlink(inode);
+		iput(inode);
+	}
 	ext4_journal_stop(handle);
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
-- 
cgit v0.10.2


From c1eaa26b671299b3ec01d40c6c71ee19a4f81517 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 02:03:58 -0500
Subject: nfsd race fixes: reiserfs

... and the same for reiserfs.  The difference here is that we need
insert_inode_locked4() to match iget5_locked().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6c4c2c6..145c2d3 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		       struct inode *inode)
 {
 	struct super_block *sb;
+	struct reiserfs_iget_args args;
 	INITIALIZE_PATH(path_to_key);
 	struct cpu_key key;
 	struct item_head ih;
@@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		err = -ENOMEM;
 		goto out_bad_inode;
 	}
+	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
+	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
+	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
+	if (insert_inode_locked4(inode, args.objectid,
+			     reiserfs_find_actor, &args) < 0) {
+		err = -EINVAL;
+		goto out_bad_inode;
+	}
 	if (old_format_only(sb))
 		/* not a perfect generation count, as object ids can be reused, but 
 		 ** this is as good as reiserfs can do right now.
@@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	} else {
 		inode2sd(&sd, inode, inode->i_size);
 	}
-	// these do not go to on-disk stat data
-	inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
-
 	// store in in-core inode the key of stat data and version all
 	// object items will have (directory items will have old offset
 	// format, other new objects will consist of new items)
-	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
 	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
 	else
@@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		reiserfs_mark_inode_private(inode);
 	}
 
-	insert_inode_hash(inode);
 	reiserfs_update_sd(th, inode);
 	reiserfs_check_path(&path_to_key);
 
@@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
       out_inserted_sd:
 	inode->i_nlink = 0;
 	th->t_trans_id = 0;	/* so the caller can't use this handle later */
+	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
 
 	/* If we were inheriting an ACL, we need to release the lock so that
 	 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 4f322e5..738967f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		err = journal_end(&th, dir->i_sb, jbegin_count);
 		if (err)
 			retval = err;
+		unlock_new_inode(inode);
 		iput(inode);
 		goto out_failed;
 	}
@@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	reiserfs_update_inode_transaction(dir);
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 
       out_failed:
@@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 		err = journal_end(&th, dir->i_sb, jbegin_count);
 		if (err)
 			retval = err;
+		unlock_new_inode(inode);
 		iput(inode);
 		goto out_failed;
 	}
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 
       out_failed:
@@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		err = journal_end(&th, dir->i_sb, jbegin_count);
 		if (err)
 			retval = err;
+		unlock_new_inode(inode);
 		iput(inode);
 		goto out_failed;
 	}
@@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	reiserfs_update_sd(&th, dir);
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
       out_failed:
 	if (locked)
@@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
 		err = journal_end(&th, parent_dir->i_sb, jbegin_count);
 		if (err)
 			retval = err;
+		unlock_new_inode(inode);
 		iput(inode);
 		goto out_failed;
 	}
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
       out_failed:
 	reiserfs_write_unlock(parent_dir->i_sb);
-- 
cgit v0.10.2


From 1f3403fa640f9f7b135dee79f2d39d01c8ad4a08 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Tue, 30 Dec 2008 22:08:37 -0600
Subject: nfsd race fixes: jfs

jfs version of Al Viro's nfsd race patches

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 70022fd..d4d142c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	inode = new_inode(sb);
 	if (!inode) {
 		jfs_warn("ialloc: new_inode returned NULL!");
-		return ERR_PTR(-ENOMEM);
+		rc = -ENOMEM;
+		goto fail;
 	}
 
 	jfs_inode = JFS_IP(inode);
@@ -89,8 +90,12 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 		jfs_warn("ialloc: diAlloc returned %d!", rc);
 		if (rc == -EIO)
 			make_bad_inode(inode);
-		iput(inode);
-		return ERR_PTR(rc);
+		goto fail_put;
+	}
+
+	if (insert_inode_locked(inode) < 0) {
+		rc = -EINVAL;
+		goto fail_unlock;
 	}
 
 	inode->i_uid = current_fsuid();
@@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	 * Allocate inode to quota.
 	 */
 	if (DQUOT_ALLOC_INODE(inode)) {
-		DQUOT_DROP(inode);
-		inode->i_flags |= S_NOQUOTA;
-		inode->i_nlink = 0;
-		iput(inode);
-		return ERR_PTR(-EDQUOT);
+		rc = -EDQUOT;
+		goto fail_drop;
 	}
 
 	inode->i_mode = mode;
@@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	jfs_info("ialloc returns inode = 0x%p\n", inode);
 
 	return inode;
+
+fail_drop:
+	DQUOT_DROP(inode);
+	inode->i_flags |= S_NOQUOTA;
+fail_unlock:
+	inode->i_nlink = 0;
+	unlock_new_inode(inode);
+fail_put:
+	iput(inode);
+fail:
+	return ERR_PTR(rc);
 }
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index cc3cedf..b4de56b 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	ip->i_fop = &jfs_file_operations;
 	ip->i_mapping->a_ops = &jfs_aops;
 
-	insert_inode_hash(ip);
 	mark_inode_dirty(ip);
 
 	dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
+		unlock_new_inode(ip);
 		iput(ip);
-	} else
+	} else {
 		d_instantiate(dentry, ip);
+		unlock_new_inode(ip);
+	}
 
       out2:
 	free_UCSname(&dname);
@@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	ip->i_op = &jfs_dir_inode_operations;
 	ip->i_fop = &jfs_dir_operations;
 
-	insert_inode_hash(ip);
 	mark_inode_dirty(ip);
 
 	/* update parent directory inode */
@@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
+		unlock_new_inode(ip);
 		iput(ip);
-	} else
+	} else {
 		d_instantiate(dentry, ip);
+		unlock_new_inode(ip);
+	}
 
       out2:
 	free_UCSname(&dname);
@@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 		goto out3;
 	}
 
-	insert_inode_hash(ip);
 	mark_inode_dirty(ip);
 
 	dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
+		unlock_new_inode(ip);
 		iput(ip);
-	} else
+	} else {
 		d_instantiate(dentry, ip);
+		unlock_new_inode(ip);
+	}
 
       out2:
 	free_UCSname(&dname);
@@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	jfs_ip->dev = new_encode_dev(rdev);
 	init_special_inode(ip, ip->i_mode, rdev);
 
-	insert_inode_hash(ip);
 	mark_inode_dirty(ip);
 
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
+		unlock_new_inode(ip);
 		iput(ip);
-	} else
+	} else {
 		d_instantiate(dentry, ip);
+		unlock_new_inode(ip);
+	}
 
       out1:
 	free_UCSname(&dname);
-- 
cgit v0.10.2


From f320786063a9d1f885d2cf34ab44aa69c1d88f43 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:13 +1030
Subject: cpumask: Remove IA64 definition of total_cpus now it's in core code

Impact: fix IA64 compile

Fortunately, they have exactly the same semantics.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 54ae373..0553648 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -202,7 +202,6 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
                             Boot-time Table Parsing
    -------------------------------------------------------------------------- */
 
-static int total_cpus __initdata;
 static int available_cpus __initdata;
 struct acpi_table_madt *acpi_madt __initdata;
 static u8 has_8259;
-- 
cgit v0.10.2


From e9690a6e4b1615cb0102e425e04b7ce29e7858e2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 31 Dec 2008 16:45:50 +0800
Subject: cpumask: fix bogus kernel-doc

Impact: fix kernel-doc

alloc_bootmem_cpumask_var() returns avoid.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/lib/cpumask.c b/lib/cpumask.c
index a24edf1..8e1496c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -132,7 +132,7 @@ EXPORT_SYMBOL(alloc_cpumask_var);
  * @mask: pointer to cpumask_var_t where the cpumask is returned
  *
  * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
- * a nop returning a constant 1 (in <linux/cpumask.h>)
+ * a nop (in <linux/cpumask.h>).
  * Either returns an allocated (zero-filled) cpumask, or causes the
  * system to panic.
  */
-- 
cgit v0.10.2


From 6aaa8ce523c7ce954b81b8c0b3e32c8be599af8d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:14 +1030
Subject: percpu: fix percpu accessors to potentially !cpu_possible() cpus:
 pnpbios

Impact: CPU iterator bugfixes

Percpu areas are only allocated for possible cpus.  In general, you
shouldn't access random cpu's percpu areas.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Adam Belay <ambx1@neo.rr.com>

diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 7ff8244..7e6b5a3 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -481,7 +481,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 
 	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
 	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_possible_cpu(i) {
 		struct desc_struct *gdt = get_cpu_gdt_table(i);
 		if (!gdt)
 			continue;
-- 
cgit v0.10.2


From 9e2f913df70b378379a358a44e7d286f7b765e8e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:14 +1030
Subject: percpu: fix percpu accessors to potentially !cpu_possible() cpus:
 m32r

Impact: CPU iterator bugfixes

Percpu areas are only allocated for possible cpus.  In general, you
shouldn't access random cpu's percpu areas.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Hirokazu Takata <takata@linux-m32r.org>

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 0f06b37..2547d6c 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -592,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier)
 	 * accounting. At that time they also adjust their APIC timers
 	 * accordingly.
 	 */
-	for (i = 0; i < NR_CPUS; ++i)
+	for_each_possible_cpu(i)
 		per_cpu(prof_multiplier, i) = multiplier;
 
 	return 0;
-- 
cgit v0.10.2


From 4f4b6c1a94a8735bbdc030a2911cf395495645b6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:15 +1030
Subject: cpumask: prepare for iterators to only go to
 nr_cpu_ids/nr_cpumask_bits.: core

Impact: cleanup

In future, all cpumask ops will only be valid (in general) for bit
numbers < nr_cpu_ids.  So use that instead of NR_CPUS in iterators
and other comparisons.

This is always safe: no cpu number can be >= nr_cpu_ids, and
nr_cpu_ids is initialized to NR_CPUS at boot.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: James Morris <jmorris@namei.org>
Cc: Eric Biederman <ebiederm@xmission.com>

diff --git a/kernel/kexec.c b/kernel/kexec.c
index ac0fde7..3fb855a 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1116,7 +1116,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
 	struct elf_prstatus prstatus;
 	u32 *buf;
 
-	if ((cpu < 0) || (cpu >= NR_CPUS))
+	if ((cpu < 0) || (cpu >= nr_cpu_ids))
 		return;
 
 	/* Using ELF notes here is opportunistic.
diff --git a/kernel/smp.c b/kernel/smp.c
index 172b182..5cfa0e5 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -223,7 +223,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 		local_irq_save(flags);
 		func(info);
 		local_irq_restore(flags);
-	} else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
+	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
 		struct call_single_data *data = NULL;
 
 		if (!wait) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index c863036..e552099 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1211,7 +1211,7 @@ static struct avc_cache_stats *sel_avc_get_stat_idx(loff_t *idx)
 {
 	int cpu;
 
-	for (cpu = *idx; cpu < NR_CPUS; ++cpu) {
+	for (cpu = *idx; cpu < nr_cpu_ids; ++cpu) {
 		if (!cpu_possible(cpu))
 			continue;
 		*idx = cpu + 1;
-- 
cgit v0.10.2


From 915441b601e6662e79f6c958e7be307967a96977 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:15 +1030
Subject: cpumask: Use accessors code in core

Impact: use new API

cpu_*_map are going away in favour of cpu_*_mask, but const pointers.
So we have accessors where we really do want to frob them.  Archs
will also need the (trivial) conversion before we can finally remove
cpu_*_map.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/init/main.c b/init/main.c
index 2a7ce0f..84d3732 100644
--- a/init/main.c
+++ b/init/main.c
@@ -527,9 +527,9 @@ static void __init boot_cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
-	cpu_set(cpu, cpu_online_map);
-	cpu_set(cpu, cpu_present_map);
-	cpu_set(cpu, cpu_possible_map);
+	set_cpu_online(cpu, true);
+	set_cpu_present(cpu, true);
+	set_cpu_possible(cpu, true);
 }
 
 void __init __weak smp_setup_processor_id(void)
-- 
cgit v0.10.2


From 165ac433fa3f01ba99b29972f3adc283d03b0f17 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:16 +1030
Subject: parisc: remove gratuitous cpu_online_map declaration.

This is defined in linux/cpumask.h (included in this file already),
and this is now defined differently.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-parisc@vger.kernel.org

diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 409e698..6ef4b78 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -16,8 +16,6 @@
 #include <linux/cpumask.h>
 typedef unsigned long address_t;
 
-extern cpumask_t cpu_online_map;
-
 
 /*
  *	Private routines/data
-- 
cgit v0.10.2


From 96b8d4c19d797200b973caab57ca842531184c13 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:16 +1030
Subject: avr32: define __fls

Like fls, but can't be handed 0 and returns the bit number.

(I broke this arch in linux-next by using __fls in generic code).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 1a50b69..f7dd5f7 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -263,6 +263,11 @@ static inline int fls(unsigned long word)
 	return 32 - result;
 }
 
+static inline int __fls(unsigned long word)
+{
+	return fls(word) - 1;
+}
+
 unsigned long find_first_zero_bit(const unsigned long *addr,
 				  unsigned long size);
 unsigned long find_next_zero_bit(const unsigned long *addr,
-- 
cgit v0.10.2


From ccec25ff69d5f48c7a088c16fe2dc7e11d9e87fe Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:17 +1030
Subject: blackfin: define __fls

Like fls, but can't be handed 0 and returns the bit number.

(I broke this arch in linux-next by using __fls in generic code).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b39a175..c428e41 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -213,6 +213,7 @@ static __inline__ int __test_bit(int nr, const void *addr)
 #endif				/* __KERNEL__ */
 
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #endif				/* _BLACKFIN_BITOPS_H */
-- 
cgit v0.10.2


From 434ae514c23047db87a8bbf39cebc9e1767aea44 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:18 +1030
Subject: m68k: define __fls

Like fls, but can't be handed 0 and returns the bit number.

(I broke this arch in linux-next by using __fls in generic code).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index 3e81064..9bde784 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -315,6 +315,11 @@ static inline int fls(int x)
 	return 32 - cnt;
 }
 
+static inline int __fls(int x)
+{
+	return fls(x) - 1;
+}
+
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/hweight.h>
-- 
cgit v0.10.2


From 0db5d3d2f58804edb394e8008c7d9744110338a2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:18 +1030
Subject: m68knommu: define __fls

Like fls, but can't be handed 0 and returns the bit number.

(I broke this arch in linux-next by using __fls in generic code).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/m68knommu/include/asm/bitops.h b/arch/m68knommu/include/asm/bitops.h
index 6f3685e..9d3cbe5 100644
--- a/arch/m68knommu/include/asm/bitops.h
+++ b/arch/m68knommu/include/asm/bitops.h
@@ -331,6 +331,7 @@ found_middle:
 #endif /* __KERNEL__ */
 
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #endif /* _M68KNOMMU_BITOPS_H */
-- 
cgit v0.10.2


From ab53d472e785e51fdfc08fc1d66252c1153e6c0f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:19 +1030
Subject: bitmap: find_last_bit()

Impact: New API

As the name suggests.  For the moment everyone uses the generic one.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 024f2b0..6182913 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -134,9 +134,20 @@ extern unsigned long find_first_bit(const unsigned long *addr,
  */
 extern unsigned long find_first_zero_bit(const unsigned long *addr,
 					 unsigned long size);
-
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
+#ifdef CONFIG_GENERIC_FIND_LAST_BIT
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit, or size.
+ */
+extern unsigned long find_last_bit(const unsigned long *addr,
+				   unsigned long size);
+#endif /* CONFIG_GENERIC_FIND_LAST_BIT */
+
 #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
 
 /**
diff --git a/lib/Kconfig b/lib/Kconfig
index 2ba43c4..fc5f5ee 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -13,6 +13,10 @@ config GENERIC_FIND_FIRST_BIT
 config GENERIC_FIND_NEXT_BIT
 	bool
 
+config GENERIC_FIND_LAST_BIT
+	bool
+	default y
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 80fe8a3..32b0e64 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -37,6 +37,7 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_PLIST) += plist.o
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
new file mode 100644
index 0000000..5d202e3
--- /dev/null
+++ b/lib/find_last_bit.c
@@ -0,0 +1,45 @@
+/* find_last_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long words;
+	unsigned long tmp;
+
+	/* Start at final word. */
+	words = size / BITS_PER_LONG;
+
+	/* Partial final word? */
+	if (size & (BITS_PER_LONG-1)) {
+		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+					 - (size & (BITS_PER_LONG-1)))));
+		if (tmp)
+			goto found;
+	}
+
+	while (words) {
+		tmp = addr[--words];
+		if (tmp) {
+found:
+			return words * BITS_PER_LONG + __fls(tmp);
+		}
+	}
+
+	/* Not found */
+	return size;
+}
+EXPORT_SYMBOL(find_last_bit);
-- 
cgit v0.10.2


From e0c0ba736547e81c4f986ce192307c549d214167 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:19 +1030
Subject: cpumask: Use find_last_bit()

Impact: cleanup

There's one obvious place to use it: to find the highest possible cpu.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/init/main.c b/init/main.c
index 84d3732..546ebd2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -380,12 +380,7 @@ EXPORT_SYMBOL(nr_cpu_ids);
 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
 static void __init setup_nr_cpu_ids(void)
 {
-	int cpu, highest_cpu = 0;
-
-	for_each_possible_cpu(cpu)
-		highest_cpu = cpu;
-
-	nr_cpu_ids = highest_cpu + 1;
+	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-- 
cgit v0.10.2


From 78fd744f827586615da5b387fa9f0af1888601b6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:20 +1030
Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask():
 sparc

Impact: New API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 3270cfb..b8a65b6 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -79,6 +79,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()				(sparc64_multi_core)
 #define smt_capable()				(sparc64_multi_core)
 #endif /* CONFIG_SMP */
-- 
cgit v0.10.2


From 2bb23a63f22f0e2d91fee93ff5ca9c29e180b146 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:20 +1030
Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask():
 s390

Impact: New API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fff4a86..c93eb50 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -11,6 +11,7 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
 extern cpumask_t cpu_core_map[NR_CPUS];
 
 #define topology_core_siblings(cpu)	(cpu_core_map[cpu])
+#define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
 
 int topology_set_cpu_management(int fc);
 void topology_schedule_update(void);
-- 
cgit v0.10.2


From 9150641dd17fe9e213ab3391c8ebfc228daa2d9d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:21 +1030
Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask():
 powerpc

Impact: New API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index bcf25c2..236dae1 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -113,6 +113,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 
 #define topology_thread_siblings(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_siblings(cpu)	(per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu)	(&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)	(&per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 #endif
 #endif
-- 
cgit v0.10.2


From 333af15341b2f6cd813c054e1b441d7b6d8e9318 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:21 +1030
Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask():
 ia64

Impact: New API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 66f0f1e..97ae7f5 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -112,6 +112,8 @@ void build_cpu_to_node_map(void);
 #define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #define smt_capable() 				(smp_num_siblings > 1)
 #endif
 
-- 
cgit v0.10.2


From 9e01c1b74c9531e301c900edaa92a99fcb7738f2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:22 +1030
Subject: cpumask: convert kernel trace functions

Impact: Reduce future memory usage, use new cpumask API.

(Eventually, cpumask_var_t will be allocated based on nr_cpu_ids, not NR_CPUS).

Convert kernel trace functions to use struct cpumask API:
1) Use cpumask_copy/cpumask_test_cpu/for_each_cpu.
2) Use cpumask_var_t and alloc_cpumask_var/free_cpumask_var everywhere.
3) Use on_each_cpu instead of playing with current->cpus_allowed.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d601a7..a9d9760 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -195,7 +195,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 
 #define for_each_buffer_cpu(buffer, cpu)		\
-	for_each_cpu_mask(cpu, buffer->cpumask)
+	for_each_cpu(cpu, buffer->cpumask)
 
 #define TS_SHIFT	27
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
@@ -267,7 +267,7 @@ struct ring_buffer {
 	unsigned			pages;
 	unsigned			flags;
 	int				cpus;
-	cpumask_t			cpumask;
+	cpumask_var_t			cpumask;
 	atomic_t			record_disabled;
 
 	struct mutex			mutex;
@@ -458,6 +458,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	if (!buffer)
 		return NULL;
 
+	if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
+		goto fail_free_buffer;
+
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 
@@ -465,14 +468,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	if (buffer->pages == 1)
 		buffer->pages++;
 
-	buffer->cpumask = cpu_possible_map;
+	cpumask_copy(buffer->cpumask, cpu_possible_mask);
 	buffer->cpus = nr_cpu_ids;
 
 	bsize = sizeof(void *) * nr_cpu_ids;
 	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
 				  GFP_KERNEL);
 	if (!buffer->buffers)
-		goto fail_free_buffer;
+		goto fail_free_cpumask;
 
 	for_each_buffer_cpu(buffer, cpu) {
 		buffer->buffers[cpu] =
@@ -492,6 +495,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	}
 	kfree(buffer->buffers);
 
+ fail_free_cpumask:
+	free_cpumask_var(buffer->cpumask);
+
  fail_free_buffer:
 	kfree(buffer);
 	return NULL;
@@ -510,6 +516,8 @@ ring_buffer_free(struct ring_buffer *buffer)
 	for_each_buffer_cpu(buffer, cpu)
 		rb_free_cpu_buffer(buffer->buffers[cpu]);
 
+	free_cpumask_var(buffer->cpumask);
+
 	kfree(buffer);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_free);
@@ -1283,7 +1291,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1396,7 +1404,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1478,7 +1486,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1498,7 +1506,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1515,7 +1523,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1532,7 +1540,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1850,7 +1858,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct buffer_page *reader;
 	int nr_loops = 0;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -2025,7 +2033,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct ring_buffer_event *event;
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2062,7 +2070,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	struct ring_buffer_iter *iter;
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
@@ -2172,7 +2180,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2228,7 +2236,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 1;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -2252,8 +2260,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 	struct ring_buffer_per_cpu *cpu_buffer_a;
 	struct ring_buffer_per_cpu *cpu_buffer_b;
 
-	if (!cpu_isset(cpu, buffer_a->cpumask) ||
-	    !cpu_isset(cpu, buffer_b->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
+	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
 		return -EINVAL;
 
 	/* At least make sure the two buffers are somewhat the same */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0e91f43..5d04e27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -89,10 +89,10 @@ static inline void ftrace_enable_cpu(void)
 	preempt_enable();
 }
 
-static cpumask_t __read_mostly		tracing_buffer_mask;
+static cpumask_var_t __read_mostly	tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
-	for_each_cpu_mask(cpu, tracing_buffer_mask)
+	for_each_cpu(cpu, tracing_buffer_mask)
 
 /*
  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -2646,13 +2646,7 @@ static struct file_operations show_traces_fops = {
 /*
  * Only trace on a CPU if the bitmask is set:
  */
-static cpumask_t tracing_cpumask = CPU_MASK_ALL;
-
-/*
- * When tracing/tracing_cpu_mask is modified then this holds
- * the new bitmask we are about to install:
- */
-static cpumask_t tracing_cpumask_new;
+static cpumask_var_t tracing_cpumask;
 
 /*
  * The tracer itself will not take this lock, but still we want
@@ -2674,7 +2668,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2693,9 +2687,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 		      size_t count, loff_t *ppos)
 {
 	int err, cpu;
+	cpumask_var_t tracing_cpumask_new;
+
+	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
+		return -ENOMEM;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
@@ -2706,26 +2704,28 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 		 * Increase/decrease the disabled counter if we are
 		 * about to flip a bit in the cpumask:
 		 */
-		if (cpu_isset(cpu, tracing_cpumask) &&
-				!cpu_isset(cpu, tracing_cpumask_new)) {
+		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_inc(&global_trace.data[cpu]->disabled);
 		}
-		if (!cpu_isset(cpu, tracing_cpumask) &&
-				cpu_isset(cpu, tracing_cpumask_new)) {
+		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_dec(&global_trace.data[cpu]->disabled);
 		}
 	}
 	__raw_spin_unlock(&ftrace_max_lock);
 	local_irq_enable();
 
-	tracing_cpumask = tracing_cpumask_new;
+	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
 
 	mutex_unlock(&tracing_cpumask_update_lock);
+	free_cpumask_var(tracing_cpumask_new);
 
 	return count;
 
 err_unlock:
 	mutex_unlock(&tracing_cpumask_update_lock);
+	free_cpumask_var(tracing_cpumask);
 
 	return err;
 }
@@ -3752,7 +3752,6 @@ void ftrace_dump(void)
 	static DEFINE_SPINLOCK(ftrace_dump_lock);
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
-	static cpumask_t mask;
 	static int dump_ran;
 	unsigned long flags;
 	int cnt = 0, cpu;
@@ -3786,8 +3785,6 @@ void ftrace_dump(void)
 	 * and then release the locks again.
 	 */
 
-	cpus_clear(mask);
-
 	while (!trace_empty(&iter)) {
 
 		if (!cnt)
@@ -3823,19 +3820,28 @@ __init static int tracer_alloc_buffers(void)
 {
 	struct trace_array_cpu *data;
 	int i;
+	int ret = -ENOMEM;
 
-	/* TODO: make the number of buffers hot pluggable with CPUS */
-	tracing_buffer_mask = cpu_possible_map;
+	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
+		goto out;
+
+	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+		goto out_free_buffer_mask;
 
+	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
+	cpumask_copy(tracing_cpumask, cpu_all_mask);
+
+	/* TODO: make the number of buffers hot pluggable with CPUS */
 	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
 						   TRACE_BUFFER_FLAGS);
 	if (!global_trace.buffer) {
 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
 		WARN_ON(1);
-		return 0;
+		goto out_free_cpumask;
 	}
 	global_trace.entries = ring_buffer_size(global_trace.buffer);
 
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
 					     TRACE_BUFFER_FLAGS);
@@ -3843,7 +3849,7 @@ __init static int tracer_alloc_buffers(void)
 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
 		WARN_ON(1);
 		ring_buffer_free(global_trace.buffer);
-		return 0;
+		goto out_free_cpumask;
 	}
 	max_tr.entries = ring_buffer_size(max_tr.buffer);
 	WARN_ON(max_tr.entries != global_trace.entries);
@@ -3873,8 +3879,14 @@ __init static int tracer_alloc_buffers(void)
 				       &trace_panic_notifier);
 
 	register_die_notifier(&trace_die_notifier);
+	ret = 0;
 
-	return 0;
+out_free_cpumask:
+	free_cpumask_var(tracing_cpumask);
+out_free_buffer_mask:
+	free_cpumask_var(tracing_buffer_mask);
+out:
+	return ret;
 }
 early_initcall(tracer_alloc_buffers);
 fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index a5779bd..eaca5ad 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -196,9 +196,9 @@ static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
-static void start_stack_timer(int cpu)
+static void start_stack_timer(void *unused)
 {
-	struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
+	struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
 
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = stack_trace_timer_fn;
@@ -208,14 +208,7 @@ static void start_stack_timer(int cpu)
 
 static void start_stack_timers(void)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		start_stack_timer(cpu);
-	}
-	set_cpus_allowed_ptr(current, &saved_mask);
+	on_each_cpu(start_stack_timer, NULL, 1);
 }
 
 static void stop_stack_timer(int cpu)
-- 
cgit v0.10.2


From 4462344ee9ea9224d026801b877887f2f39774a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:23 +1030
Subject: cpumask: convert kernel trace functions further

Impact: Reduce future memory usage, use new cpumask API.

Since the last patch was created and acked, more old cpumask users
slipped into kernel/trace.

Mostly trivial conversions, except struct trace_iterator's "started"
member becomes a cpumask_var_t.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d04e27..c580233 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1811,10 +1811,10 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
 		return;
 
-	if (cpu_isset(iter->cpu, iter->started))
+	if (cpumask_test_cpu(iter->cpu, iter->started))
 		return;
 
-	cpu_set(iter->cpu, iter->started);
+	cpumask_set_cpu(iter->cpu, iter->started);
 	trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
 }
 
@@ -3114,10 +3114,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (!iter)
 		return -ENOMEM;
 
+	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
+		kfree(iter);
+		return -ENOMEM;
+	}
+
 	mutex_lock(&trace_types_lock);
 
 	/* trace pipe does not show start of buffer */
-	cpus_setall(iter->started);
+	cpumask_setall(iter->started);
 
 	iter->tr = &global_trace;
 	iter->trace = current_trace;
@@ -3134,6 +3139,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
 
+	free_cpumask_var(iter->started);
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cc7a4f8..4d3d381 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -368,7 +368,7 @@ struct trace_iterator {
 	loff_t			pos;
 	long			idx;
 
-	cpumask_t		started;
+	cpumask_var_t		started;
 };
 
 int tracing_is_enabled(void);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 3ccebde..366c8c3 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -42,7 +42,7 @@ static int boot_trace_init(struct trace_array *tr)
 	int cpu;
 	boot_trace = tr;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		tracing_reset(tr, cpu);
 
 	tracing_sched_switch_assign_trace(tr);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4bf39fc..930c08e 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -79,7 +79,7 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 	int i;
 	int ret;
 	int log10_this = log10_cpu(cpu);
-	int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+	int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
 
 
 	/*
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index b6a3e20..649df22 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -46,7 +46,7 @@ static void bts_trace_start(struct trace_array *tr)
 
 	tracing_reset_online_cpus(tr);
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
 }
 
@@ -62,7 +62,7 @@ static void bts_trace_stop(struct trace_array *tr)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
 }
 
@@ -172,7 +172,7 @@ static void trace_bts_prepare(struct trace_iterator *iter)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
 }
 
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index a7172a3..7bda248 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -39,7 +39,7 @@ static int power_trace_init(struct trace_array *tr)
 
 	trace_power_enabled = 1;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		tracing_reset(tr, cpu);
 	return 0;
 }
-- 
cgit v0.10.2


From f1fc057c79cb2d27602fb3ad08a031f13459ef27 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:23 +1030
Subject: cpumask: remove any_online_cpu() users: kernel/

Impact: Remove obsolete API usage

any_online_cpu() is a good name, but it takes a cpumask_t, not a
pointer.

There are several places where any_online_cpu() doesn't really want a
mask arg at all.  Replace all callers with cpumask_any() and
cpumask_any_and().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 466e75c..b7568d7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -733,7 +733,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 			break;
 		/* Unbind so it can run.  Fall thru. */
 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
-			     any_online_cpu(cpu_online_map));
+			     cpumask_any(cpu_online_mask));
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN: {
 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 1ab790c..492f0c7 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -303,7 +303,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		check_cpu = any_online_cpu(cpu_online_map);
+		check_cpu = cpumask_any(cpu_online_mask);
 		wake_up_process(per_cpu(watchdog_task, hotcpu));
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -313,7 +313,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 			cpumask_t temp_cpu_online_map = cpu_online_map;
 
 			cpu_clear(hotcpu, temp_cpu_online_map);
-			check_cpu = any_online_cpu(temp_cpu_online_map);
+			check_cpu = cpumask_any(&temp_cpu_online_map);
 		}
 		break;
 
@@ -323,7 +323,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 			break;
 		/* Unbind so it can run.  Fall thru. */
 		kthread_bind(per_cpu(watchdog_task, hotcpu),
-			     any_online_cpu(cpu_online_map));
+			     cpumask_any(cpu_online_mask));
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		p = per_cpu(watchdog_task, hotcpu);
-- 
cgit v0.10.2


From 3e597945384dee1457240158eb81e3afb90b68c2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:24 +1030
Subject: cpumask: remove any_online_cpu() users: mm/

Impact: Remove obsolete API usage

any_online_cpu() is a good name, but it takes a cpumask_t, not a
pointer.

There are several places where any_online_cpu() doesn't really want a
mask arg at all.  Replace all callers with cpumask_any() and
cpumask_any_and().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 62e7f62..240f062 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2141,7 +2141,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 			pg_data_t *pgdat = NODE_DATA(nid);
 			node_to_cpumask_ptr(mask, pgdat->node_id);
 
-			if (any_online_cpu(*mask) < nr_cpu_ids)
+			if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
 				/* One of our CPUs online: restore mask */
 				set_cpus_allowed_ptr(pgdat->kswapd, mask);
 		}
-- 
cgit v0.10.2


From a45185d2d7108b01b90b9e0293377be4d6346dde Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:24 +1030
Subject: cpumask: convert kernel/compat.c

Impact: Reduce stack usage, use new cpumask API.

Straightforward conversion; cpumasks' size is given by cpumask_size() (now
a variable rather than fixed) and on-stack cpu masks use cpumask_var_t.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/compat.c b/kernel/compat.c
index 8eafe3e..d52e2ec 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -454,16 +454,16 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
 }
 
 static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr,
-				    unsigned len, cpumask_t *new_mask)
+				    unsigned len, struct cpumask *new_mask)
 {
 	unsigned long *k;
 
-	if (len < sizeof(cpumask_t))
-		memset(new_mask, 0, sizeof(cpumask_t));
-	else if (len > sizeof(cpumask_t))
-		len = sizeof(cpumask_t);
+	if (len < cpumask_size())
+		memset(new_mask, 0, cpumask_size());
+	else if (len > cpumask_size())
+		len = cpumask_size();
 
-	k = cpus_addr(*new_mask);
+	k = cpumask_bits(new_mask);
 	return compat_get_bitmap(k, user_mask_ptr, len * 8);
 }
 
@@ -471,40 +471,51 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
 					     unsigned int len,
 					     compat_ulong_t __user *user_mask_ptr)
 {
-	cpumask_t new_mask;
+	cpumask_var_t new_mask;
 	int retval;
 
-	retval = compat_get_user_cpu_mask(user_mask_ptr, len, &new_mask);
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	retval = compat_get_user_cpu_mask(user_mask_ptr, len, new_mask);
 	if (retval)
-		return retval;
+		goto out;
 
-	return sched_setaffinity(pid, &new_mask);
+	retval = sched_setaffinity(pid, new_mask);
+out:
+	free_cpumask_var(new_mask);
+	return retval;
 }
 
 asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
 					     compat_ulong_t __user *user_mask_ptr)
 {
 	int ret;
-	cpumask_t mask;
+	cpumask_var_t mask;
 	unsigned long *k;
-	unsigned int min_length = sizeof(cpumask_t);
+	unsigned int min_length = cpumask_size();
 
-	if (NR_CPUS <= BITS_PER_COMPAT_LONG)
+	if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
 		min_length = sizeof(compat_ulong_t);
 
 	if (len < min_length)
 		return -EINVAL;
 
-	ret = sched_getaffinity(pid, &mask);
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = sched_getaffinity(pid, mask);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	k = cpus_addr(mask);
+	k = cpumask_bits(mask);
 	ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
-	if (ret)
-		return ret;
+	if (ret == 0)
+		ret = min_length;
 
-	return min_length;
+out:
+	free_cpumask_var(mask);
+	return ret;
 }
 
 int get_compat_itimerspec(struct itimerspec *dst,
-- 
cgit v0.10.2


From e7577c50f2fb2d1c167e2c04a4b4c2cc042acb82 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:25 +1030
Subject: cpumask: convert kernel/workqueue.c

Impact: Reduce memory usage, use new cpumask API.

cpu_populated_map becomes a cpumask_var_t, and cpu_singlethread_map is
simply a cpumask pointer: it's simply the cpumask containing the first
possible CPU anyway.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4952322..2f44583 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 
 static int singlethread_cpu __read_mostly;
-static cpumask_t cpu_singlethread_map __read_mostly;
+static const struct cpumask *cpu_singlethread_map __read_mostly;
 /*
  * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
  * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
@@ -81,7 +81,7 @@ static cpumask_t cpu_singlethread_map __read_mostly;
  * use cpu_possible_map, the cpumask below is more a documentation
  * than optimization.
  */
-static cpumask_t cpu_populated_map __read_mostly;
+static cpumask_var_t cpu_populated_map __read_mostly;
 
 /* If it's single threaded, it isn't in the list of workqueues. */
 static inline int is_wq_single_threaded(struct workqueue_struct *wq)
@@ -89,10 +89,10 @@ static inline int is_wq_single_threaded(struct workqueue_struct *wq)
 	return wq->singlethread;
 }
 
-static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
+static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
 {
 	return is_wq_single_threaded(wq)
-		? &cpu_singlethread_map : &cpu_populated_map;
+		? cpu_singlethread_map : cpu_populated_map;
 }
 
 static
@@ -410,7 +410,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
-	const cpumask_t *cpu_map = wq_cpu_map(wq);
+	const struct cpumask *cpu_map = wq_cpu_map(wq);
 	int cpu;
 
 	might_sleep();
@@ -532,7 +532,7 @@ static void wait_on_work(struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq;
 	struct workqueue_struct *wq;
-	const cpumask_t *cpu_map;
+	const struct cpumask *cpu_map;
 	int cpu;
 
 	might_sleep();
@@ -903,7 +903,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
  */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
-	const cpumask_t *cpu_map = wq_cpu_map(wq);
+	const struct cpumask *cpu_map = wq_cpu_map(wq);
 	int cpu;
 
 	cpu_maps_update_begin();
@@ -933,7 +933,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		cpu_set(cpu, cpu_populated_map);
+		cpumask_set_cpu(cpu, cpu_populated_map);
 	}
 undo:
 	list_for_each_entry(wq, &workqueues, list) {
@@ -964,7 +964,7 @@ undo:
 	switch (action) {
 	case CPU_UP_CANCELED:
 	case CPU_POST_DEAD:
-		cpu_clear(cpu, cpu_populated_map);
+		cpumask_clear_cpu(cpu, cpu_populated_map);
 	}
 
 	return ret;
@@ -1017,9 +1017,11 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
 
 void __init init_workqueues(void)
 {
-	cpu_populated_map = cpu_online_map;
-	singlethread_cpu = first_cpu(cpu_possible_map);
-	cpu_singlethread_map = cpumask_of_cpu(singlethread_cpu);
+	alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);
+
+	cpumask_copy(cpu_populated_map, cpu_online_mask);
+	singlethread_cpu = cpumask_first(cpu_possible_mask);
+	cpu_singlethread_map = cpumask_of(singlethread_cpu);
 	hotcpu_notifier(workqueue_cpu_callback, 0);
 	keventd_wq = create_workqueue("events");
 	BUG_ON(!keventd_wq);
-- 
cgit v0.10.2


From 6b954823c24f04ed026a8517f6bab5abda279db8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:25 +1030
Subject: cpumask: convert kernel time functions

Impact: Use new APIs

Convert kernel/time functions to use struct cpumask *.

Note the ugly bitmap declarations in tick-broadcast.c.  These should
be cpumask_var_t, but there was no obvious initialization function to
put the alloc_cpumask_var() calls in.  This was safe.

(Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK,
so we use a bitmap here to show we really mean it).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b6ec818..469b82d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -84,10 +84,10 @@ static inline void tick_cancel_sched_timer(int cpu) { }
 
 # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern struct tick_device *tick_get_broadcast_device(void);
-extern cpumask_t *tick_get_broadcast_mask(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
 
 #  ifdef CONFIG_TICK_ONESHOT
-extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #  endif
 
 # endif /* BROADCAST */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 9ed2eec..32141b1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -148,7 +148,7 @@ static void clocksource_watchdog(unsigned long data)
 		int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
 
 		if (next_cpu >= nr_cpu_ids)
-			next_cpu = first_cpu(cpu_online_map);
+			next_cpu = cpumask_first(cpu_online_mask);
 		watchdog_timer.expires += WATCHDOG_INTERVAL;
 		add_timer_on(&watchdog_timer, next_cpu);
 	}
@@ -173,7 +173,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 			watchdog_last = watchdog->read();
 			watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 			add_timer_on(&watchdog_timer,
-				     first_cpu(cpu_online_map));
+				     cpumask_first(cpu_online_mask));
 		}
 	} else {
 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -195,7 +195,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 				watchdog_timer.expires =
 					jiffies + WATCHDOG_INTERVAL;
 				add_timer_on(&watchdog_timer,
-					     first_cpu(cpu_online_map));
+					     cpumask_first(cpu_online_mask));
 			}
 		}
 	}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9590af2..356fac5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,7 +28,9 @@
  */
 
 struct tick_device tick_broadcast_device;
-static cpumask_t tick_broadcast_mask;
+/* FIXME: Use cpumask_var_t. */
+static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
+static DECLARE_BITMAP(tmpmask, NR_CPUS);
 static DEFINE_SPINLOCK(tick_broadcast_lock);
 static int tick_broadcast_force;
 
@@ -46,9 +48,9 @@ struct tick_device *tick_get_broadcast_device(void)
 	return &tick_broadcast_device;
 }
 
-cpumask_t *tick_get_broadcast_mask(void)
+struct cpumask *tick_get_broadcast_mask(void)
 {
-	return &tick_broadcast_mask;
+	return to_cpumask(tick_broadcast_mask);
 }
 
 /*
@@ -72,7 +74,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
 
 	clockevents_exchange_device(NULL, dev);
 	tick_broadcast_device.evtdev = dev;
-	if (!cpus_empty(tick_broadcast_mask))
+	if (!cpumask_empty(tick_get_broadcast_mask()))
 		tick_broadcast_start_periodic(dev);
 	return 1;
 }
@@ -104,7 +106,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	 */
 	if (!tick_device_is_functional(dev)) {
 		dev->event_handler = tick_handle_periodic;
-		cpu_set(cpu, tick_broadcast_mask);
+		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
 	} else {
@@ -116,7 +118,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
 			int cpu = smp_processor_id();
 
-			cpu_clear(cpu, tick_broadcast_mask);
+			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 			tick_broadcast_clear_oneshot(cpu);
 		}
 	}
@@ -125,9 +127,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 }
 
 /*
- * Broadcast the event to the cpus, which are set in the mask
+ * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
-static void tick_do_broadcast(cpumask_t mask)
+static void tick_do_broadcast(struct cpumask *mask)
 {
 	int cpu = smp_processor_id();
 	struct tick_device *td;
@@ -135,22 +137,21 @@ static void tick_do_broadcast(cpumask_t mask)
 	/*
 	 * Check, if the current cpu is in the mask
 	 */
-	if (cpu_isset(cpu, mask)) {
-		cpu_clear(cpu, mask);
+	if (cpumask_test_cpu(cpu, mask)) {
+		cpumask_clear_cpu(cpu, mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->event_handler(td->evtdev);
 	}
 
-	if (!cpus_empty(mask)) {
+	if (!cpumask_empty(mask)) {
 		/*
 		 * It might be necessary to actually check whether the devices
 		 * have different broadcast functions. For now, just use the
 		 * one of the first device. This works as long as we have this
 		 * misfeature only on x86 (lapic)
 		 */
-		cpu = first_cpu(mask);
-		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(&mask);
+		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
+		td->evtdev->broadcast(mask);
 	}
 }
 
@@ -160,12 +161,11 @@ static void tick_do_broadcast(cpumask_t mask)
  */
 static void tick_do_periodic_broadcast(void)
 {
-	cpumask_t mask;
-
 	spin_lock(&tick_broadcast_lock);
 
-	cpus_and(mask, cpu_online_map, tick_broadcast_mask);
-	tick_do_broadcast(mask);
+	cpumask_and(to_cpumask(tmpmask),
+		    cpu_online_mask, tick_get_broadcast_mask());
+	tick_do_broadcast(to_cpumask(tmpmask));
 
 	spin_unlock(&tick_broadcast_lock);
 }
@@ -228,13 +228,13 @@ static void tick_do_broadcast_on_off(void *why)
 	if (!tick_device_is_functional(dev))
 		goto out;
 
-	bc_stopped = cpus_empty(tick_broadcast_mask);
+	bc_stopped = cpumask_empty(tick_get_broadcast_mask());
 
 	switch (*reason) {
 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-		if (!cpu_isset(cpu, tick_broadcast_mask)) {
-			cpu_set(cpu, tick_broadcast_mask);
+		if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
+			cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
@@ -244,8 +244,8 @@ static void tick_do_broadcast_on_off(void *why)
 		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
 		if (!tick_broadcast_force &&
-		    cpu_isset(cpu, tick_broadcast_mask)) {
-			cpu_clear(cpu, tick_broadcast_mask);
+		    cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
+			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				tick_setup_periodic(dev, 0);
@@ -253,7 +253,7 @@ static void tick_do_broadcast_on_off(void *why)
 		break;
 	}
 
-	if (cpus_empty(tick_broadcast_mask)) {
+	if (cpumask_empty(tick_get_broadcast_mask())) {
 		if (!bc_stopped)
 			clockevents_shutdown(bc);
 	} else if (bc_stopped) {
@@ -272,7 +272,7 @@ out:
  */
 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 {
-	if (!cpu_isset(*oncpu, cpu_online_map))
+	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
 		       "offline CPU #%d\n", *oncpu);
 	else
@@ -303,10 +303,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
 	spin_lock_irqsave(&tick_broadcast_lock, flags);
 
 	bc = tick_broadcast_device.evtdev;
-	cpu_clear(cpu, tick_broadcast_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 
 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
-		if (bc && cpus_empty(tick_broadcast_mask))
+		if (bc && cpumask_empty(tick_get_broadcast_mask()))
 			clockevents_shutdown(bc);
 	}
 
@@ -342,10 +342,10 @@ int tick_resume_broadcast(void)
 
 		switch (tick_broadcast_device.mode) {
 		case TICKDEV_MODE_PERIODIC:
-			if(!cpus_empty(tick_broadcast_mask))
+			if (!cpumask_empty(tick_get_broadcast_mask()))
 				tick_broadcast_start_periodic(bc);
-			broadcast = cpu_isset(smp_processor_id(),
-					      tick_broadcast_mask);
+			broadcast = cpumask_test_cpu(smp_processor_id(),
+						     tick_get_broadcast_mask());
 			break;
 		case TICKDEV_MODE_ONESHOT:
 			broadcast = tick_resume_broadcast_oneshot(bc);
@@ -360,14 +360,15 @@ int tick_resume_broadcast(void)
 
 #ifdef CONFIG_TICK_ONESHOT
 
-static cpumask_t tick_broadcast_oneshot_mask;
+/* FIXME: use cpumask_var_t. */
+static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
 
 /*
- * Debugging: see timer_list.c
+ * Exposed for debugging: see timer_list.c
  */
-cpumask_t *tick_get_broadcast_oneshot_mask(void)
+struct cpumask *tick_get_broadcast_oneshot_mask(void)
 {
-	return &tick_broadcast_oneshot_mask;
+	return to_cpumask(tick_broadcast_oneshot_mask);
 }
 
 static int tick_broadcast_set_event(ktime_t expires, int force)
@@ -389,7 +390,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  */
 void tick_check_oneshot_broadcast(int cpu)
 {
-	if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+	if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
 		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 
 		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -402,7 +403,6 @@ void tick_check_oneshot_broadcast(int cpu)
 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 {
 	struct tick_device *td;
-	cpumask_t mask;
 	ktime_t now, next_event;
 	int cpu;
 
@@ -410,13 +410,13 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 again:
 	dev->next_event.tv64 = KTIME_MAX;
 	next_event.tv64 = KTIME_MAX;
-	mask = CPU_MASK_NONE;
+	cpumask_clear(to_cpumask(tmpmask));
 	now = ktime_get();
 	/* Find all expired events */
-	for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
+	for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
 		td = &per_cpu(tick_cpu_device, cpu);
 		if (td->evtdev->next_event.tv64 <= now.tv64)
-			cpu_set(cpu, mask);
+			cpumask_set_cpu(cpu, to_cpumask(tmpmask));
 		else if (td->evtdev->next_event.tv64 < next_event.tv64)
 			next_event.tv64 = td->evtdev->next_event.tv64;
 	}
@@ -424,7 +424,7 @@ again:
 	/*
 	 * Wakeup the cpus which have an expired event.
 	 */
-	tick_do_broadcast(mask);
+	tick_do_broadcast(to_cpumask(tmpmask));
 
 	/*
 	 * Two reasons for reprogram:
@@ -476,15 +476,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
 		goto out;
 
 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
-		if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
-			cpu_set(cpu, tick_broadcast_oneshot_mask);
+		if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
+			cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
 			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
 			if (dev->next_event.tv64 < bc->next_event.tv64)
 				tick_broadcast_set_event(dev->next_event, 1);
 		}
 	} else {
-		if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
-			cpu_clear(cpu, tick_broadcast_oneshot_mask);
+		if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
+			cpumask_clear_cpu(cpu,
+					  tick_get_broadcast_oneshot_mask());
 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
 			if (dev->next_event.tv64 != KTIME_MAX)
 				tick_program_event(dev->next_event, 1);
@@ -502,10 +503,11 @@ out:
  */
 static void tick_broadcast_clear_oneshot(int cpu)
 {
-	cpu_clear(cpu, tick_broadcast_oneshot_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
 }
 
-static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
+static void tick_broadcast_init_next_event(struct cpumask *mask,
+					   ktime_t expires)
 {
 	struct tick_device *td;
 	int cpu;
@@ -526,7 +528,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
 		int cpu = smp_processor_id();
-		cpumask_t mask;
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
@@ -540,13 +541,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 		 * oneshot_mask bits for those and program the
 		 * broadcast device to fire.
 		 */
-		mask = tick_broadcast_mask;
-		cpu_clear(cpu, mask);
-		cpus_or(tick_broadcast_oneshot_mask,
-			tick_broadcast_oneshot_mask, mask);
-
-		if (was_periodic && !cpus_empty(mask)) {
-			tick_broadcast_init_next_event(&mask, tick_next_period);
+		cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
+		cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
+		cpumask_or(tick_get_broadcast_oneshot_mask(),
+			   tick_get_broadcast_oneshot_mask(),
+			   to_cpumask(tmpmask));
+
+		if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
+			tick_broadcast_init_next_event(to_cpumask(tmpmask),
+						       tick_next_period);
 			tick_broadcast_set_event(tick_next_period, 1);
 		} else
 			bc->next_event.tv64 = KTIME_MAX;
@@ -585,7 +588,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 	 * Clear the broadcast mask flag for the dead cpu, but do not
 	 * stop the broadcast device!
 	 */
-	cpu_clear(cpu, tick_broadcast_oneshot_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
 
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index f8372be..63e05d4 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -254,7 +254,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		curdev = NULL;
 	}
 	clockevents_exchange_device(curdev, newdev);
-	tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
+	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 		tick_oneshot_notify();
 
@@ -299,9 +299,9 @@ static void tick_shutdown(unsigned int *cpup)
 	}
 	/* Transfer the do_timer job away from this cpu */
 	if (*cpup == tick_do_timer_cpu) {
-		int cpu = first_cpu(cpu_online_map);
+		int cpu = cpumask_first(cpu_online_mask);
 
-		tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
+		tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
 			TICK_DO_TIMER_NONE;
 	}
 	spin_unlock_irqrestore(&tick_device_lock, flags);
-- 
cgit v0.10.2


From d036e67b40f52bdd95392390108defbac7e53837 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:26 +1030
Subject: cpumask: convert kernel/irq

Impact: Reduce stack usage, use new cpumask API.  ALPHA mod!

Main change is that irq_default_affinity becomes a cpumask_var_t, so
treat it as a pointer (this effects alpha).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index d0f1620..703731a 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -50,7 +50,8 @@ int irq_select_affinity(unsigned int irq)
 	if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
 		return 1;
 
-	while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity))
+	while (!cpu_possible(cpu) ||
+	       !cpumask_test_cpu(cpu, irq_default_affinity))
 		cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
 	last_cpu = cpu;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index dfaee6b..91f1ef8 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -109,7 +109,7 @@ extern void enable_irq(unsigned int irq);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
-extern cpumask_t irq_default_affinity;
+extern cpumask_var_t irq_default_affinity;
 
 extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c4a9b..cd0cd8d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,8 +16,15 @@
 #include "internals.h"
 
 #ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
 
-cpumask_t irq_default_affinity = CPU_MASK_ALL;
+static int init_irq_default_affinity(void)
+{
+	alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
+	cpumask_setall(irq_default_affinity);
+	return 0;
+}
+core_initcall(init_irq_default_affinity);
 
 /**
  *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
@@ -127,7 +134,7 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+	cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
 set_affinity:
 	desc->chip->set_affinity(irq, &desc->affinity);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d2c0e5e..2abd3a7 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -20,7 +20,7 @@ static struct proc_dir_entry *root_irq_dir;
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	cpumask_t *mask = &desc->affinity;
+	const struct cpumask *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PENDING)
@@ -93,7 +93,7 @@ static const struct file_operations irq_affinity_proc_fops = {
 
 static int default_affinity_show(struct seq_file *m, void *v)
 {
-	seq_cpumask(m, &irq_default_affinity);
+	seq_cpumask(m, irq_default_affinity);
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -101,27 +101,37 @@ static int default_affinity_show(struct seq_file *m, void *v)
 static ssize_t default_affinity_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		return err;
+		goto out;
 
-	if (!is_affinity_mask_valid(new_value))
-		return -EINVAL;
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpus_intersects(new_value, cpu_online_map))
-		return -EINVAL;
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
+		err = -EINVAL;
+		goto out;
+	}
 
-	irq_default_affinity = new_value;
+	cpumask_copy(irq_default_affinity, new_value);
+	err = count;
 
-	return count;
+out:
+	free_cpumask_var(new_value);
+	return err;
 }
 
 static int default_affinity_open(struct inode *inode, struct file *file)
-- 
cgit v0.10.2


From bd232f97b30f6bb630efa136a777647545db3039 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:26 +1030
Subject: cpumask: convert RCU implementations

Impact: use new cpumask API.

rcu_ctrlblk contains a cpumask, and it's highly optimized so I don't want
a cpumask_var_t (ie. a pointer) for the CONFIG_CPUMASK_OFFSTACK case.  It
could use a dangling bitmap, and be allocated in __rcu_init to save memory,
but for the moment we use a bitmap.

(Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK,
so we use a bitmap here to show we really mean it).

We remove on-stack cpumasks, using cpumask_var_t for
rcu_torture_shuffle_tasks() and for_each_cpu_and in force_quiescent_state().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 301dda8..f3f697d 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -59,8 +59,8 @@ struct rcu_ctrlblk {
 	int	signaled;
 
 	spinlock_t	lock	____cacheline_internodealigned_in_smp;
-	cpumask_t	cpumask; /* CPUs that need to switch in order    */
-				 /* for current batch to proceed.        */
+	DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */
+					  /* current batch to proceed.     */
 } ____cacheline_internodealigned_in_smp;
 
 /* Is batch a before batch b ? */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a00..0ff9b05 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -63,14 +63,14 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
 	.completed = -300,
 	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
-	.cpumask = CPU_MASK_NONE,
+	.cpumask = CPU_BITS_NONE,
 };
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
 	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
-	.cpumask = CPU_MASK_NONE,
+	.cpumask = CPU_BITS_NONE,
 };
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -85,7 +85,6 @@ static void force_quiescent_state(struct rcu_data *rdp,
 			struct rcu_ctrlblk *rcp)
 {
 	int cpu;
-	cpumask_t cpumask;
 	unsigned long flags;
 
 	set_need_resched();
@@ -96,10 +95,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
 		 * Don't send IPI to itself. With irqs disabled,
 		 * rdp->cpu is the current cpu.
 		 *
-		 * cpu_online_map is updated by the _cpu_down()
+		 * cpu_online_mask is updated by the _cpu_down()
 		 * using __stop_machine(). Since we're in irqs disabled
 		 * section, __stop_machine() is not exectuting, hence
-		 * the cpu_online_map is stable.
+		 * the cpu_online_mask is stable.
 		 *
 		 * However,  a cpu might have been offlined _just_ before
 		 * we disabled irqs while entering here.
@@ -107,13 +106,14 @@ static void force_quiescent_state(struct rcu_data *rdp,
 		 * notification, leading to the offlined cpu's bit
 		 * being set in the rcp->cpumask.
 		 *
-		 * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+		 * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
 		 * sending smp_reschedule() to an offlined CPU.
 		 */
-		cpus_and(cpumask, rcp->cpumask, cpu_online_map);
-		cpu_clear(rdp->cpu, cpumask);
-		for_each_cpu_mask_nr(cpu, cpumask)
-			smp_send_reschedule(cpu);
+		for_each_cpu_and(cpu,
+				  to_cpumask(rcp->cpumask), cpu_online_mask) {
+			if (cpu != rdp->cpu)
+				smp_send_reschedule(cpu);
+		}
 	}
 	spin_unlock_irqrestore(&rcp->lock, flags);
 }
@@ -193,7 +193,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
 
 	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
 	for_each_possible_cpu(cpu) {
-		if (cpu_isset(cpu, rcp->cpumask))
+		if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
 			printk(" %d", cpu);
 	}
 	printk(" (detected by %d, t=%ld jiffies)\n",
@@ -221,7 +221,8 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
 	long delta;
 
 	delta = jiffies - rcp->jiffies_stall;
-	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+	if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
+		delta >= 0) {
 
 		/* We haven't checked in, so go dump stack. */
 		print_cpu_stall(rcp);
@@ -393,7 +394,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpumask_andnot(to_cpumask(rcp->cpumask),
+			       cpu_online_mask, &nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
@@ -406,8 +408,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
  */
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
-	cpu_clear(cpu, rcp->cpumask);
-	if (cpus_empty(rcp->cpumask)) {
+	cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
+	if (cpumask_empty(to_cpumask(rcp->cpumask))) {
 		/* batch completed ! */
 		rcp->completed = rcp->cur;
 		rcu_start_batch(rcp);
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 0498265..f9dc8f3 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -164,7 +164,8 @@ static char *rcu_try_flip_state_names[] =
 	{ "idle", "waitack", "waitzero", "waitmb" };
 #endif /* #ifdef CONFIG_RCU_TRACE */
 
-static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
+static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly
+	= CPU_BITS_NONE;
 
 /*
  * Enum and per-CPU flag to determine when each CPU has seen
@@ -758,7 +759,7 @@ rcu_try_flip_idle(void)
 
 	/* Now ask each CPU for acknowledgement of the flip. */
 
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
 		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
 		dyntick_save_progress_counter(cpu);
 	}
@@ -776,7 +777,7 @@ rcu_try_flip_waitack(void)
 	int cpu;
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		if (rcu_try_flip_waitack_needed(cpu) &&
 		    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -808,7 +809,7 @@ rcu_try_flip_waitzero(void)
 	/* Check to see if the sum of the "last" counters is zero. */
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
 	if (sum != 0) {
 		RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -823,7 +824,7 @@ rcu_try_flip_waitzero(void)
 	smp_mb();  /*  ^^^^^^^^^^^^ */
 
 	/* Call for a memory barrier from each CPU. */
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
 		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
 		dyntick_save_progress_counter(cpu);
 	}
@@ -843,7 +844,7 @@ rcu_try_flip_waitmb(void)
 	int cpu;
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		if (rcu_try_flip_waitmb_needed(cpu) &&
 		    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
@@ -1032,7 +1033,7 @@ void rcu_offline_cpu(int cpu)
 	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
 	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
 
-	cpu_clear(cpu, rcu_cpu_online_map);
+	cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map));
 
 	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 
@@ -1072,7 +1073,7 @@ void __cpuinit rcu_online_cpu(int cpu)
 	struct rcu_data *rdp;
 
 	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
-	cpu_set(cpu, rcu_cpu_online_map);
+	cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map));
 	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 
 	/*
@@ -1430,7 +1431,7 @@ void __init __rcu_init(void)
 	 * We don't need protection against CPU-Hotplug here
 	 * since
 	 * a) If a CPU comes online while we are iterating over the
-	 *    cpu_online_map below, we would only end up making a
+	 *    cpu_online_mask below, we would only end up making a
 	 *    duplicate call to rcu_online_cpu() which sets the corresponding
 	 *    CPU's mask in the rcu_cpu_online_map.
 	 *
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b310655..3245b40 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -868,49 +868,52 @@ static int rcu_idle_cpu;	/* Force all torture tasks off this CPU */
  */
 static void rcu_torture_shuffle_tasks(void)
 {
-	cpumask_t tmp_mask;
+	cpumask_var_t tmp_mask;
 	int i;
 
-	cpus_setall(tmp_mask);
+	if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
+		BUG();
+
+	cpumask_setall(tmp_mask);
 	get_online_cpus();
 
 	/* No point in shuffling if there is only one online CPU (ex: UP) */
-	if (num_online_cpus() == 1) {
-		put_online_cpus();
-		return;
-	}
+	if (num_online_cpus() == 1)
+		goto out;
 
 	if (rcu_idle_cpu != -1)
-		cpu_clear(rcu_idle_cpu, tmp_mask);
+		cpumask_clear_cpu(rcu_idle_cpu, tmp_mask);
 
-	set_cpus_allowed_ptr(current, &tmp_mask);
+	set_cpus_allowed_ptr(current, tmp_mask);
 
 	if (reader_tasks) {
 		for (i = 0; i < nrealreaders; i++)
 			if (reader_tasks[i])
 				set_cpus_allowed_ptr(reader_tasks[i],
-						     &tmp_mask);
+						     tmp_mask);
 	}
 
 	if (fakewriter_tasks) {
 		for (i = 0; i < nfakewriters; i++)
 			if (fakewriter_tasks[i])
 				set_cpus_allowed_ptr(fakewriter_tasks[i],
-						     &tmp_mask);
+						     tmp_mask);
 	}
 
 	if (writer_task)
-		set_cpus_allowed_ptr(writer_task, &tmp_mask);
+		set_cpus_allowed_ptr(writer_task, tmp_mask);
 
 	if (stats_task)
-		set_cpus_allowed_ptr(stats_task, &tmp_mask);
+		set_cpus_allowed_ptr(stats_task, tmp_mask);
 
 	if (rcu_idle_cpu == -1)
 		rcu_idle_cpu = num_online_cpus() - 1;
 	else
 		rcu_idle_cpu--;
 
+out:
 	put_online_cpus();
+	free_cpumask_var(tmp_mask);
 }
 
 /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
-- 
cgit v0.10.2


From c309b917cab55799ea489d7b5f1b77025d9f8462 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:27 +1030
Subject: cpumask: convert kernel/profile.c

Impact: Reduce kernel memory usage, use new cpumask API.

Avoid a static cpumask_t for prof_cpu_mask, and an on-stack cpumask_t
in prof_cpu_mask_write_proc.  Both become cpumask_var_t.

prof_cpu_mask is only allocated when profiling is on, but the NULL
checks are optimized out by gcc for the !CPUMASK_OFFSTACK case.

Also removed some strange and unnecessary casts.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/profile.c b/kernel/profile.c
index 4cb7d68..d18e2d2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -45,7 +45,7 @@ static unsigned long prof_len, prof_shift;
 int prof_on __read_mostly;
 EXPORT_SYMBOL_GPL(prof_on);
 
-static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+static cpumask_var_t prof_cpu_mask;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
 static DEFINE_PER_CPU(int, cpu_profile_flip);
@@ -113,9 +113,13 @@ int __ref profile_init(void)
 	buffer_bytes = prof_len*sizeof(atomic_t);
 	if (!slab_is_available()) {
 		prof_buffer = alloc_bootmem(buffer_bytes);
+		alloc_bootmem_cpumask_var(&prof_cpu_mask);
 		return 0;
 	}
 
+	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
 	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
 	if (prof_buffer)
 		return 0;
@@ -128,6 +132,7 @@ int __ref profile_init(void)
 	if (prof_buffer)
 		return 0;
 
+	free_cpumask_var(prof_cpu_mask);
 	return -ENOMEM;
 }
 
@@ -386,13 +391,15 @@ out_free:
 		return NOTIFY_BAD;
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		cpu_set(cpu, prof_cpu_mask);
+		if (prof_cpu_mask != NULL)
+			cpumask_set_cpu(cpu, prof_cpu_mask);
 		break;
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		cpu_clear(cpu, prof_cpu_mask);
+		if (prof_cpu_mask != NULL)
+			cpumask_clear_cpu(cpu, prof_cpu_mask);
 		if (per_cpu(cpu_profile_hits, cpu)[0]) {
 			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
 			per_cpu(cpu_profile_hits, cpu)[0] = NULL;
@@ -430,7 +437,8 @@ void profile_tick(int type)
 
 	if (type == CPU_PROFILING && timer_hook)
 		timer_hook(regs);
-	if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
+	if (!user_mode(regs) && prof_cpu_mask != NULL &&
+	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
 }
 
@@ -442,7 +450,7 @@ void profile_tick(int type)
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
 {
-	int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
+	int len = cpumask_scnprintf(page, count, data);
 	if (count - len < 2)
 		return -EINVAL;
 	len += sprintf(page + len, "\n");
@@ -452,16 +460,20 @@ static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 static int prof_cpu_mask_write_proc(struct file *file,
 	const char __user *buffer,  unsigned long count, void *data)
 {
-	cpumask_t *mask = (cpumask_t *)data;
+	struct cpumask *mask = data;
 	unsigned long full_count = count, err;
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
-	if (err)
-		return err;
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
 
-	*mask = new_value;
-	return full_count;
+	err = cpumask_parse_user(buffer, count, new_value);
+	if (!err) {
+		cpumask_copy(mask, new_value);
+		err = full_count;
+	}
+	free_cpumask_var(new_value);
+	return err;
 }
 
 void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
@@ -472,7 +484,7 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
 	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
 	if (!entry)
 		return;
-	entry->data = (void *)&prof_cpu_mask;
+	entry->data = prof_cpu_mask;
 	entry->read_proc = prof_cpu_mask_read_proc;
 	entry->write_proc = prof_cpu_mask_write_proc;
 }
-- 
cgit v0.10.2


From e0b582ec56f1a1d8b30ebf340a7b91fb09f26c8c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:28 +1030
Subject: cpumask: convert kernel/cpu.c

Impact: Reduce kernel stack and memory usage, use new cpumask API.

Use cpumask_var_t for take_cpu_down() stack var, and frozen_cpus.

Note that notify_cpu_starting() can be called before core_initcall
allocates frozen_cpus, but the NULL check is optimized out by gcc for
the CONFIG_CPUMASK_OFFSTACK=n case.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2c9f78f..47fff3b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -194,7 +194,7 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_t old_allowed, tmp;
+	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
@@ -208,6 +208,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
+	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+		return -ENOMEM;
+
 	cpu_hotplug_begin();
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
 					hcpu, -1, &nr_calls);
@@ -222,13 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
-	old_allowed = current->cpus_allowed;
-	cpus_setall(tmp);
-	cpu_clear(cpu, tmp);
-	set_cpus_allowed_ptr(current, &tmp);
-	tmp = cpumask_of_cpu(cpu);
+	cpumask_copy(old_allowed, &current->cpus_allowed);
+	set_cpus_allowed_ptr(current,
+			     cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
 
-	err = __stop_machine(take_cpu_down, &tcd_param, &tmp);
+	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
@@ -254,7 +255,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	check_for_tasks(cpu);
 
 out_allowed:
-	set_cpus_allowed_ptr(current, &old_allowed);
+	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -262,6 +263,7 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
+	free_cpumask_var(old_allowed);
 	return err;
 }
 
@@ -280,7 +282,7 @@ int __ref cpu_down(unsigned int cpu)
 
 	/*
 	 * Make sure the all cpus did the reschedule and are not
-	 * using stale version of the cpu_active_map.
+	 * using stale version of the cpu_active_mask.
 	 * This is not strictly necessary becuase stop_machine()
 	 * that we run down the line already provides the required
 	 * synchronization. But it's really a side effect and we do not
@@ -344,7 +346,7 @@ out_notify:
 int __cpuinit cpu_up(unsigned int cpu)
 {
 	int err = 0;
-	if (!cpu_isset(cpu, cpu_possible_map)) {
+	if (!cpu_possible(cpu)) {
 		printk(KERN_ERR "can't online cpu %d because it is not "
 			"configured as may-hotadd at boot time\n", cpu);
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
@@ -369,25 +371,25 @@ out:
 }
 
 #ifdef CONFIG_PM_SLEEP_SMP
-static cpumask_t frozen_cpus;
+static cpumask_var_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
 {
 	int cpu, first_cpu, error = 0;
 
 	cpu_maps_update_begin();
-	first_cpu = first_cpu(cpu_online_map);
+	first_cpu = cpumask_first(cpu_online_mask);
 	/* We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
-	cpus_clear(frozen_cpus);
+	cpumask_clear(frozen_cpus);
 	printk("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
 			continue;
 		error = _cpu_down(cpu, 1);
 		if (!error) {
-			cpu_set(cpu, frozen_cpus);
+			cpumask_set_cpu(cpu, frozen_cpus);
 			printk("CPU%d is down\n", cpu);
 		} else {
 			printk(KERN_ERR "Error taking CPU%d down: %d\n",
@@ -413,11 +415,11 @@ void __ref enable_nonboot_cpus(void)
 	/* Allow everyone to use the CPU hotplug again */
 	cpu_maps_update_begin();
 	cpu_hotplug_disabled = 0;
-	if (cpus_empty(frozen_cpus))
+	if (cpumask_empty(frozen_cpus))
 		goto out;
 
 	printk("Enabling non-boot CPUs ...\n");
-	for_each_cpu_mask_nr(cpu, frozen_cpus) {
+	for_each_cpu(cpu, frozen_cpus) {
 		error = _cpu_up(cpu, 1);
 		if (!error) {
 			printk("CPU%d is up\n", cpu);
@@ -425,10 +427,18 @@ void __ref enable_nonboot_cpus(void)
 		}
 		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
 	}
-	cpus_clear(frozen_cpus);
+	cpumask_clear(frozen_cpus);
 out:
 	cpu_maps_update_done();
 }
+
+static int alloc_frozen_cpus(void)
+{
+	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
+		return -ENOMEM;
+	return 0;
+}
+core_initcall(alloc_frozen_cpus);
 #endif /* CONFIG_PM_SLEEP_SMP */
 
 /**
@@ -444,7 +454,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
 	unsigned long val = CPU_STARTING;
 
 #ifdef CONFIG_PM_SLEEP_SMP
-	if (cpu_isset(cpu, frozen_cpus))
+	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
 		val = CPU_STARTING_FROZEN;
 #endif /* CONFIG_PM_SLEEP_SMP */
 	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
@@ -456,7 +466,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
  * cpu_bit_bitmap[] is a special, "compressed" data structure that
  * represents all NR_CPUS bits binary values of 1<<nr.
  *
- * It is used by cpumask_of_cpu() to get a constant address to a CPU
+ * It is used by cpumask_of() to get a constant address to a CPU
  * mask value that has a single bit set only.
  */
 
-- 
cgit v0.10.2


From 41c7bb9588904eb060a95bcad47bd3804a1ece25 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:28 +1030
Subject: cpumask: convert rest of files in kernel/

Impact: Reduce stack usage, use new cpumask API.

Mainly changing cpumask_t to 'struct cpumask' and similar simple API
conversion.  Two conversions worth mentioning:

1) we use cpumask_any_but to avoid a temporary in kernel/softlockup.c,
2) Use cpumask_var_t in taskstats_user_cmd().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index faf1519..74d59a6 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -23,7 +23,7 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
+int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
 /**
  * __stop_machine: freeze the machine on all CPUs and run this function
@@ -34,11 +34,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
  * Description: This is a special version of the above, which assumes cpus
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
-int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
+int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
-			       const cpumask_t *cpus)
+			       const struct cpumask *cpus)
 {
 	int ret;
 	local_irq_disable();
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 72016f0..9789083 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -27,7 +27,7 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
 	/* run sysrq poweroff on boot cpu */
-	schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
+	schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
 }
 
 static struct sysrq_key_op	sysrq_poweroff_op = {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 492f0c7..d9188c6 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -310,10 +310,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		if (hotcpu == check_cpu) {
-			cpumask_t temp_cpu_online_map = cpu_online_map;
-
-			cpu_clear(hotcpu, temp_cpu_online_map);
-			check_cpu = cpumask_any(&temp_cpu_online_map);
+			/* Pick any other online cpu. */
+			check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
 		}
 		break;
 
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 24e8cea..286c417 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -69,10 +69,10 @@ static void stop_cpu(struct work_struct *unused)
 	int err;
 
 	if (!active_cpus) {
-		if (cpu == first_cpu(cpu_online_map))
+		if (cpu == cpumask_first(cpu_online_mask))
 			smdata = &active;
 	} else {
-		if (cpu_isset(cpu, *active_cpus))
+		if (cpumask_test_cpu(cpu, active_cpus))
 			smdata = &active;
 	}
 	/* Simple state machine */
@@ -109,7 +109,7 @@ static int chill(void *unused)
 	return 0;
 }
 
-int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
+int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	struct work_struct *sm_work;
 	int i, ret;
@@ -142,7 +142,7 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 	return ret;
 }
 
-int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
+int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	int ret;
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 6d7dc4e..888adbc 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -290,18 +290,17 @@ ret:
 	return;
 }
 
-static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
+static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 {
 	struct listener_list *listeners;
 	struct listener *s, *tmp;
 	unsigned int cpu;
-	cpumask_t mask = *maskp;
 
-	if (!cpus_subset(mask, cpu_possible_map))
+	if (!cpumask_subset(mask, cpu_possible_mask))
 		return -EINVAL;
 
 	if (isadd == REGISTER) {
-		for_each_cpu_mask_nr(cpu, mask) {
+		for_each_cpu(cpu, mask) {
 			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
 					 cpu_to_node(cpu));
 			if (!s)
@@ -320,7 +319,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
 
 	/* Deregister or cleanup */
 cleanup:
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu(cpu, mask) {
 		listeners = &per_cpu(listener_array, cpu);
 		down_write(&listeners->sem);
 		list_for_each_entry_safe(s, tmp, &listeners->list, list) {
@@ -335,7 +334,7 @@ cleanup:
 	return 0;
 }
 
-static int parse(struct nlattr *na, cpumask_t *mask)
+static int parse(struct nlattr *na, struct cpumask *mask)
 {
 	char *data;
 	int len;
@@ -428,23 +427,33 @@ err:
 
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
-	int rc = 0;
+	int rc;
 	struct sk_buff *rep_skb;
 	struct taskstats *stats;
 	size_t size;
-	cpumask_t mask;
+	cpumask_var_t mask;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
 	if (rc < 0)
-		return rc;
-	if (rc == 0)
-		return add_del_listener(info->snd_pid, &mask, REGISTER);
+		goto free_return_rc;
+	if (rc == 0) {
+		rc = add_del_listener(info->snd_pid, mask, REGISTER);
+		goto free_return_rc;
+	}
 
-	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
 	if (rc < 0)
+		goto free_return_rc;
+	if (rc == 0) {
+		rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
+free_return_rc:
+		free_cpumask_var(mask);
 		return rc;
-	if (rc == 0)
-		return add_del_listener(info->snd_pid, &mask, DEREGISTER);
+	}
+	free_cpumask_var(mask);
 
 	/*
 	 * Size includes space for nested attributes
-- 
cgit v0.10.2


From 174596a0b9f21e8844d70566a6bb29bf48a87750 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:29 +1030
Subject: cpumask: convert mm/

Impact: Use new API

Convert kernel mm functions to use struct cpumask.

We skip include/linux/percpu.h and mm/allocpercpu.c, which are in flux.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>

diff --git a/mm/pdflush.c b/mm/pdflush.c
index a0a14c4..15de509 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -172,7 +172,16 @@ static int __pdflush(struct pdflush_work *my_work)
 static int pdflush(void *dummy)
 {
 	struct pdflush_work my_work;
-	cpumask_t cpus_allowed;
+	cpumask_var_t cpus_allowed;
+
+	/*
+	 * Since the caller doesn't even check kthread_run() worked, let's not
+	 * freak out too much if this fails.
+	 */
+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+		printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
+		return 0;
+	}
 
 	/*
 	 * pdflush can spend a lot of time doing encryption via dm-crypt.  We
@@ -187,8 +196,9 @@ static int pdflush(void *dummy)
 	 * This is needed as pdflush's are dynamically created and destroyed.
 	 * The boottime pdflush's are easily placed w/o these 2 lines.
 	 */
-	cpuset_cpus_allowed(current, &cpus_allowed);
-	set_cpus_allowed_ptr(current, &cpus_allowed);
+	cpuset_cpus_allowed(current, cpus_allowed);
+	set_cpus_allowed_ptr(current, cpus_allowed);
+	free_cpumask_var(cpus_allowed);
 
 	return __pdflush(&my_work);
 }
diff --git a/mm/slab.c b/mm/slab.c
index f97e564..ddc41f3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2157,7 +2157,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 
 	/*
 	 * We use cache_chain_mutex to ensure a consistent view of
-	 * cpu_online_map as well.  Please see cpuup_callback
+	 * cpu_online_mask as well.  Please see cpuup_callback
 	 */
 	get_online_cpus();
 	mutex_lock(&cache_chain_mutex);
diff --git a/mm/slub.c b/mm/slub.c
index 0d861c3..f0e2892 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1970,7 +1970,7 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu,
 				kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
 
 static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
+static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
 
 static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
 							int cpu, gfp_t flags)
@@ -2045,13 +2045,13 @@ static void init_alloc_cpu_cpu(int cpu)
 {
 	int i;
 
-	if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
+	if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
 		return;
 
 	for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
 		free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
 
-	cpu_set(cpu, kmem_cach_cpu_free_init_once);
+	cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
 }
 
 static void __init init_alloc_cpu(void)
@@ -3451,7 +3451,7 @@ struct location {
 	long max_time;
 	long min_pid;
 	long max_pid;
-	cpumask_t cpus;
+	DECLARE_BITMAP(cpus, NR_CPUS);
 	nodemask_t nodes;
 };
 
@@ -3526,7 +3526,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 				if (track->pid > l->max_pid)
 					l->max_pid = track->pid;
 
-				cpu_set(track->cpu, l->cpus);
+				cpumask_set_cpu(track->cpu,
+						to_cpumask(l->cpus));
 			}
 			node_set(page_to_nid(virt_to_page(track)), l->nodes);
 			return 1;
@@ -3556,8 +3557,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 	l->max_time = age;
 	l->min_pid = track->pid;
 	l->max_pid = track->pid;
-	cpus_clear(l->cpus);
-	cpu_set(track->cpu, l->cpus);
+	cpumask_clear(to_cpumask(l->cpus));
+	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
 	nodes_clear(l->nodes);
 	node_set(page_to_nid(virt_to_page(track)), l->nodes);
 	return 1;
@@ -3638,11 +3639,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
 			len += sprintf(buf + len, " pid=%ld",
 				l->min_pid);
 
-		if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
+		if (num_online_cpus() > 1 &&
+				!cpumask_empty(to_cpumask(l->cpus)) &&
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-					&l->cpus);
+						 to_cpumask(l->cpus));
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 240f062..d196f46 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1902,7 +1902,7 @@ static int kswapd(void *p)
 	};
 	node_to_cpumask_ptr(cpumask, pgdat->node_id);
 
-	if (!cpus_empty(*cpumask))
+	if (!cpumask_empty(cpumask))
 		set_cpus_allowed_ptr(tsk, cpumask);
 	current->reclaim_state = &reclaim_state;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c3ccfda..9114974 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -20,7 +20,7 @@
 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
 EXPORT_PER_CPU_SYMBOL(vm_event_states);
 
-static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask)
 {
 	int cpu;
 	int i;
@@ -43,7 +43,7 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
 void all_vm_events(unsigned long *ret)
 {
 	get_online_cpus();
-	sum_vm_events(ret, &cpu_online_map);
+	sum_vm_events(ret, cpu_online_mask);
 	put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(all_vm_events);
-- 
cgit v0.10.2


From 5db0e1e9e0f30f160b832a0b5cd1131954bf4f6e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:29 +1030
Subject: cpumask: replace for_each_cpu_mask_nr with for_each_cpu in
 kernel/time/

Impact: cleanup

Simple replacement, now the _nr is redundant.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 32141b1..ca89e15 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -145,7 +145,8 @@ static void clocksource_watchdog(unsigned long data)
 		 * Cycle through CPUs to check if the CPUs stay
 		 * synchronized to each other.
 		 */
-		int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
+		int next_cpu = cpumask_next(raw_smp_processor_id(),
+					    cpu_online_mask);
 
 		if (next_cpu >= nr_cpu_ids)
 			next_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 356fac5..118a3b3 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -512,7 +512,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 	struct tick_device *td;
 	int cpu;
 
-	for_each_cpu_mask_nr(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		td = &per_cpu(tick_cpu_device, cpu);
 		if (td->evtdev)
 			td->evtdev->next_event = expires;
-- 
cgit v0.10.2


From 2a53008033189ed09bfe241c6b33811ba4ce980d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:30 +1030
Subject: cpumask: zero extra bits in alloc_cpumask_var_node

Impact: extra safety checks during transition

When CONFIG_CPUMASKS_OFFSTACK is set, the new cpumask_ operators only
use bits up to nr_cpu_ids, not NR_CPUS.  Using the old cpus_ operators
on these masks can mean accessing undefined bits.

After some discussion, Mike and I decided to err on the side of caution;
we zero the "undefined" bits in alloc_cpumask_var_node() until all the
old cpumask functions are removed.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 8e1496c..3389e24 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -107,6 +107,14 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 		dump_stack();
 	}
 #endif
+	/* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
+	if (*mask) {
+		unsigned int tail;
+		tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
+		memset(cpumask_bits(*mask) + cpumask_size() - tail,
+		       0, tail);
+	}
+
 	return *mask != NULL;
 }
 EXPORT_SYMBOL(alloc_cpumask_var_node);
-- 
cgit v0.10.2


From 8c384cdee3e04d6194a2c2b192b624754f990835 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:30 +1030
Subject: cpumask: CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS

Impact: new debug CONFIG options

This helps find unconverted code.  It currently breaks compile horribly,
but we never wanted a flag day so that's expected.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 7c178a6..9f31538 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -144,6 +144,7 @@
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 extern cpumask_t _unused_cpumask_arg_;
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
 static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
 {
@@ -267,6 +268,7 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 {
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 /**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
@@ -304,6 +306,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	return to_cpumask(p);
 }
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 /*
  * In cases where we take the address of the cpumask immediately,
  * gcc optimizes it out (it's a constant) and there's no huge stack
@@ -389,19 +392,22 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
 {
 	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
 }
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 #if NR_CPUS == 1
 
 #define nr_cpu_ids		1
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define first_cpu(src)		({ (void)(src); 0; })
 #define next_cpu(n, src)	({ (void)(src); 1; })
 #define any_online_cpu(mask)	0
 #define for_each_cpu_mask(cpu, mask)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #else /* NR_CPUS > 1 */
 
 extern int nr_cpu_ids;
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 int __first_cpu(const cpumask_t *srcp);
 int __next_cpu(int n, const cpumask_t *srcp);
 int __any_online_cpu(const cpumask_t *mask);
@@ -413,8 +419,10 @@ int __any_online_cpu(const cpumask_t *mask);
 	for ((cpu) = -1;				\
 		(cpu) = next_cpu((cpu), (mask)),	\
 		(cpu) < NR_CPUS; )
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #endif
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #if NR_CPUS <= 64
 
 #define next_cpu_nr(n, src)		next_cpu(n, src)
@@ -432,6 +440,7 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
 		(cpu) < nr_cpu_ids; )
 
 #endif /* NR_CPUS > 64 */
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 /*
  * The following particular system cpumasks and operations manage
diff --git a/lib/Kconfig b/lib/Kconfig
index fc5f5ee..03c2c24 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -170,4 +170,8 @@ config CPUMASK_OFFSTACK
 	  them on the stack.  This is a bit more expensive, but avoids
 	  stack overflow.
 
+config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+       bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
+       depends on EXPERIMENTAL && BROKEN
+
 endmenu
-- 
cgit v0.10.2


From 42a6e66f1e40a930d093c33ba0bb9d8d8e4555ed Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 29 Dec 2008 11:23:02 +0100
Subject: ALSA: sound/usb: use USB API functions rather than constants

This set of patches introduces calls to the following set of functions:

usb_endpoint_dir_in(epd)
usb_endpoint_dir_out(epd)
usb_endpoint_is_bulk_in(epd)
usb_endpoint_is_bulk_out(epd)
usb_endpoint_is_int_in(epd)
usb_endpoint_is_int_out(epd)
usb_endpoint_num(epd)
usb_endpoint_type(epd)
usb_endpoint_xfer_bulk(epd)
usb_endpoint_xfer_control(epd)
usb_endpoint_xfer_int(epd)
usb_endpoint_xfer_isoc(epd)

In some cases, introducing one of these functions is not possible, and it
just replaces an explicit integer value by one of the following constants:

USB_ENDPOINT_XFER_BULK
USB_ENDPOINT_XFER_CONTROL
USB_ENDPOINT_XFER_INT
USB_ENDPOINT_XFER_ISOC

An extract of the semantic patch that makes these changes is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r1@ struct usb_endpoint_descriptor *epd; @@

- ((epd->bmAttributes & \(USB_ENDPOINT_XFERTYPE_MASK\|3\)) ==
- \(USB_ENDPOINT_XFER_CONTROL\|0\))
+ usb_endpoint_xfer_control(epd)

@r5@ struct usb_endpoint_descriptor *epd; @@

- ((epd->bEndpointAddress & \(USB_ENDPOINT_DIR_MASK\|0x80\)) ==
-  \(USB_DIR_IN\|0x80\))
+ usb_endpoint_dir_in(epd)

@inc@
@@

#include <linux/usb.h>

@depends on !inc && (r1||r5)@
@@

+ #include <linux/usb.h>
  #include <linux/usb/...>
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index 6d9f9b1..3a9a9fe 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -1392,8 +1392,8 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 	for (i = 0; i < intfd->bNumEndpoints; ++i) {
 		hostep = &hostif->endpoint[i];
 		ep = get_ep_desc(hostep);
-		if ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_BULK &&
-		    (ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT)
+		if (usb_endpoint_type(ep) != USB_ENDPOINT_XFER_BULK &&
+		    usb_endpoint_type(ep) != USB_ENDPOINT_XFER_INT)
 			continue;
 		ms_ep = (struct usb_ms_endpoint_descriptor*)hostep->extra;
 		if (hostep->extralen < 4 ||
@@ -1401,15 +1401,15 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 		    ms_ep->bDescriptorType != USB_DT_CS_ENDPOINT ||
 		    ms_ep->bDescriptorSubtype != MS_GENERAL)
 			continue;
-		if ((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT) {
+		if (usb_endpoint_dir_out(ep)) {
 			if (endpoints[epidx].out_ep) {
 				if (++epidx >= MIDI_MAX_ENDPOINTS) {
 					snd_printk(KERN_WARNING "too many endpoints\n");
 					break;
 				}
 			}
-			endpoints[epidx].out_ep = ep->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
-			if ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT)
+			endpoints[epidx].out_ep = usb_endpoint_num(ep);
+			if (usb_endpoint_xfer_int(ep))
 				endpoints[epidx].out_interval = ep->bInterval;
 			else if (snd_usb_get_speed(umidi->chip->dev) == USB_SPEED_LOW)
 				/*
@@ -1428,8 +1428,8 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 					break;
 				}
 			}
-			endpoints[epidx].in_ep = ep->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
-			if ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT)
+			endpoints[epidx].in_ep = usb_endpoint_num(ep);
+			if (usb_endpoint_xfer_int(ep))
 				endpoints[epidx].in_interval = ep->bInterval;
 			else if (snd_usb_get_speed(umidi->chip->dev) == USB_SPEED_LOW)
 				endpoints[epidx].in_interval = 1;
@@ -1495,20 +1495,20 @@ static int snd_usbmidi_detect_endpoints(struct snd_usb_midi* umidi,
 
 	for (i = 0; i < intfd->bNumEndpoints; ++i) {
 		epd = get_endpoint(hostif, i);
-		if ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_BULK &&
-		    (epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT)
+		if (usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK &&
+		    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_INT)
 			continue;
 		if (out_eps < max_endpoints &&
-		    (epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT) {
-			endpoint[out_eps].out_ep = epd->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
-			if ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT)
+		    usb_endpoint_dir_out(epd)) {
+			endpoint[out_eps].out_ep = usb_endpoint_num(epd);
+			if (usb_endpoint_xfer_int(epd))
 				endpoint[out_eps].out_interval = epd->bInterval;
 			++out_eps;
 		}
 		if (in_eps < max_endpoints &&
-		    (epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN) {
-			endpoint[in_eps].in_ep = epd->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
-			if ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT)
+		    usb_endpoint_dir_in(epd)) {
+			endpoint[in_eps].in_ep = usb_endpoint_num(epd);
+			if (usb_endpoint_xfer_int(epd))
 				endpoint[in_eps].in_interval = epd->bInterval;
 			++in_eps;
 		}
@@ -1607,21 +1607,21 @@ static int snd_usbmidi_create_endpoints_midiman(struct snd_usb_midi* umidi,
 	}
 
 	epd = get_endpoint(hostif, 0);
-	if ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != USB_DIR_IN ||
-	    (epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT) {
+	if (usb_endpoint_dir_out(epd) ||
+	    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_INT) {
 		snd_printdd(KERN_ERR "endpoint[0] isn't interrupt\n");
 		return -ENXIO;
 	}
 	epd = get_endpoint(hostif, 2);
-	if ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != USB_DIR_OUT ||
-	    (epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_BULK) {
+	if (usb_endpoint_dir_in(epd) ||
+	    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK) {
 		snd_printdd(KERN_ERR "endpoint[2] isn't bulk output\n");
 		return -ENXIO;
 	}
 	if (endpoint->out_cables > 0x0001) {
 		epd = get_endpoint(hostif, 4);
-		if ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != USB_DIR_OUT ||
-		    (epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_BULK) {
+		if (usb_endpoint_dir_in(epd) ||
+		    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK) {
 			snd_printdd(KERN_ERR "endpoint[4] isn't bulk output\n");
 			return -ENXIO;
 		}
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index a492461..9ce626f 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -1755,11 +1755,11 @@ static int snd_usb_mixer_status_create(struct usb_mixer_interface *mixer)
 	if (get_iface_desc(hostif)->bNumEndpoints < 1)
 		return 0;
 	ep = get_endpoint(hostif, 0);
-	if ((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != USB_DIR_IN ||
-	    (ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT)
+	if (usb_endpoint_dir_out(ep) ||
+	    usb_endpoint_type(ep) != USB_ENDPOINT_XFER_INT)
 		return 0;
 
-	epnum = ep->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
+	epnum = usb_endpoint_num(ep);
 	buffer_length = le16_to_cpu(ep->wMaxPacketSize);
 	transfer_buffer = kmalloc(buffer_length, GFP_KERNEL);
 	if (!transfer_buffer)
-- 
cgit v0.10.2


From 3fea2cb0451b9009af32d1418ea77cc674fe7e02 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 26 Dec 2008 12:20:43 +0800
Subject: ALSA: hda - fix name for ALC1200

Move the more specific preset for ALC1200 above the general one for
ALC888, so that it will have the chance to get matched and selected.

Reported-by: Thomas Schneider <nailstudio@gmx.net>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0bd4e6b..69a251b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -16638,9 +16638,9 @@ static struct hda_codec_preset snd_hda_preset_realtek[] = {
 	  .patch = patch_alc882 }, /* should be patch_alc883() in future */
 	{ .id = 0x10ec0885, .name = "ALC885", .patch = patch_alc882 },
 	{ .id = 0x10ec0887, .name = "ALC887", .patch = patch_alc883 },
-	{ .id = 0x10ec0888, .name = "ALC888", .patch = patch_alc883 },
 	{ .id = 0x10ec0888, .rev = 0x100101, .name = "ALC1200",
 	  .patch = patch_alc883 },
+	{ .id = 0x10ec0888, .name = "ALC888", .patch = patch_alc883 },
 	{ .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc883 },
 	{} /* terminator */
 };
-- 
cgit v0.10.2


From 06bf3e15f64aacfb068fed5002b6544f870cc638 Mon Sep 17 00:00:00 2001
From: Chris Bagwell <chris@cnpbagwell.com>
Date: Thu, 1 Jan 2009 10:32:08 +0100
Subject: LSA: hda - Add HP Acacia detection

Add automatic mapping of HP Acacia motherboards to 3stack-hp.  Allows
for greater then 2 channel audio by enabling Channel Mode option in mixer.

Motherboard specs:
http://h10025.www1.hp.com/ewfrf/wc/document?docname=c01321559&lc=en&dlc=en&cc=us&product=3829353&os=2093&lang=en#

Signed-off-by: Chris Bagwell <chris at cnpbagwell dot com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 69a251b..9065ebf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8467,6 +8467,7 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2a4f, "HP Samba", ALC888_3ST_HP),
 	SND_PCI_QUIRK(0x103c, 0x2a60, "HP Lucknow", ALC888_3ST_HP),
 	SND_PCI_QUIRK(0x103c, 0x2a61, "HP Nettle", ALC883_6ST_DIG),
+	SND_PCI_QUIRK(0x103c, 0x2a66, "HP Acacia", ALC888_3ST_HP),
 	SND_PCI_QUIRK(0x1043, 0x1873, "Asus M90V", ALC888_ASUS_M90V),
 	SND_PCI_QUIRK(0x1043, 0x8249, "Asus M2A-VM HDMI", ALC883_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x1043, 0x82fe, "Asus P5Q-EM HDMI", ALC1200_ASUS_P5Q),
-- 
cgit v0.10.2


From 9bef6489d72abd8f598aede92be3854a69324c50 Mon Sep 17 00:00:00 2001
From: Stephen Ware <stephen.ware@eqware.net>
Date: Wed, 31 Dec 2008 14:39:23 -0800
Subject: ASoC: Fix pxa2xx-pcm checks for invalid DMA channels

Set the invalid dma channel to -1 (and check properly for it) in
pxa2xx_pcm_hw_free().  Was assuming 0 is an invalid channel number but 0
is a valid pxa dma channel num.

Signed-off-by: stephen <stephen.ware@eqware.net>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c
index c670d08..53b9fb1 100644
--- a/sound/soc/pxa/pxa2xx-pcm.c
+++ b/sound/soc/pxa/pxa2xx-pcm.c
@@ -61,9 +61,9 @@ static int pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream)
 
 	__pxa2xx_pcm_hw_free(substream);
 
-	if (prtd->dma_ch) {
+	if (prtd->dma_ch >= 0) {
 		pxa_free_dma(prtd->dma_ch);
-		prtd->dma_ch = 0;
+		prtd->dma_ch = -1;
 	}
 
 	return 0;
-- 
cgit v0.10.2


From ac11a2b35cc25c77d28218aaf60e7f7c6c7ee5d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Jan 2009 12:18:17 +0000
Subject: ASoC: Clean up kerneldoc warnings

Almost all parameters that have been misnamed in the comments.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index b098c0b..f73c134 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1300,6 +1300,8 @@ EXPORT_SYMBOL_GPL(snd_soc_test_bits);
 /**
  * snd_soc_new_pcms - create new sound card and pcms
  * @socdev: the SoC audio device
+ * @idx: ALSA card index
+ * @xid: card identification
  *
  * Create a new sound card based upon the codec and interface pcms.
  *
@@ -1472,7 +1474,7 @@ EXPORT_SYMBOL_GPL(snd_soc_set_runtime_hwparams);
  * snd_soc_cnew - create new control
  * @_template: control template
  * @data: control private data
- * @lnng_name: control long name
+ * @long_name: control long name
  *
  * Create a new mixer control from a template control.
  *
@@ -1522,7 +1524,7 @@ EXPORT_SYMBOL_GPL(snd_soc_info_enum_double);
 /**
  * snd_soc_get_enum_double - enumerated double mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a double enumerated mixer.
  *
@@ -1551,7 +1553,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_enum_double);
 /**
  * snd_soc_put_enum_double - enumerated double mixer put callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a double enumerated mixer.
  *
@@ -1668,7 +1670,7 @@ EXPORT_SYMBOL_GPL(snd_soc_info_volsw);
 /**
  * snd_soc_get_volsw - single mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a single mixer control.
  *
@@ -1707,7 +1709,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_volsw);
 /**
  * snd_soc_put_volsw - single mixer put callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a single mixer control.
  *
@@ -1775,7 +1777,7 @@ EXPORT_SYMBOL_GPL(snd_soc_info_volsw_2r);
 /**
  * snd_soc_get_volsw_2r - double mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a double mixer control that spans 2 registers.
  *
@@ -1812,7 +1814,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_volsw_2r);
 /**
  * snd_soc_put_volsw_2r - double mixer set callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a double mixer control that spans 2 registers.
  *
@@ -1882,7 +1884,7 @@ EXPORT_SYMBOL_GPL(snd_soc_info_volsw_s8);
 /**
  * snd_soc_get_volsw_s8 - signed mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a signed mixer control.
  *
@@ -1909,7 +1911,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_volsw_s8);
 /**
  * snd_soc_put_volsw_sgn - signed mixer put callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a signed mixer control.
  *
@@ -1954,7 +1956,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk);
 /**
  * snd_soc_dai_set_clkdiv - configure DAI clock dividers.
  * @dai: DAI
- * @clk_id: DAI specific clock divider ID
+ * @div_id: DAI specific clock divider ID
  * @div: new clock divisor.
  *
  * Configures the clock dividers. This is used to derive the best DAI bit and
@@ -2060,7 +2062,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute);
 /**
  * snd_soc_register_card - Register a card with the ASoC core
  *
- * @param card Card to register
+ * @card: Card to register
  *
  * Note that currently this is an internal only function: it will be
  * exposed to machine drivers after further backporting of ASoC v2
@@ -2087,7 +2089,7 @@ static int snd_soc_register_card(struct snd_soc_card *card)
 /**
  * snd_soc_unregister_card - Unregister a card with the ASoC core
  *
- * @param card Card to unregister
+ * @card: Card to unregister
  *
  * Note that currently this is an internal only function: it will be
  * exposed to machine drivers after further backporting of ASoC v2
@@ -2107,7 +2109,7 @@ static int snd_soc_unregister_card(struct snd_soc_card *card)
 /**
  * snd_soc_register_dai - Register a DAI with the ASoC core
  *
- * @param dai DAI to register
+ * @dai: DAI to register
  */
 int snd_soc_register_dai(struct snd_soc_dai *dai)
 {
@@ -2134,7 +2136,7 @@ EXPORT_SYMBOL_GPL(snd_soc_register_dai);
 /**
  * snd_soc_unregister_dai - Unregister a DAI from the ASoC core
  *
- * @param dai DAI to unregister
+ * @dai: DAI to unregister
  */
 void snd_soc_unregister_dai(struct snd_soc_dai *dai)
 {
@@ -2149,8 +2151,8 @@ EXPORT_SYMBOL_GPL(snd_soc_unregister_dai);
 /**
  * snd_soc_register_dais - Register multiple DAIs with the ASoC core
  *
- * @param dai Array of DAIs to register
- * @param count Number of DAIs
+ * @dai: Array of DAIs to register
+ * @count: Number of DAIs
  */
 int snd_soc_register_dais(struct snd_soc_dai *dai, size_t count)
 {
@@ -2175,8 +2177,8 @@ EXPORT_SYMBOL_GPL(snd_soc_register_dais);
 /**
  * snd_soc_unregister_dais - Unregister multiple DAIs from the ASoC core
  *
- * @param dai Array of DAIs to unregister
- * @param count Number of DAIs
+ * @dai: Array of DAIs to unregister
+ * @count: Number of DAIs
  */
 void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count)
 {
@@ -2190,7 +2192,7 @@ EXPORT_SYMBOL_GPL(snd_soc_unregister_dais);
 /**
  * snd_soc_register_platform - Register a platform with the ASoC core
  *
- * @param platform platform to register
+ * @platform: platform to register
  */
 int snd_soc_register_platform(struct snd_soc_platform *platform)
 {
@@ -2213,7 +2215,7 @@ EXPORT_SYMBOL_GPL(snd_soc_register_platform);
 /**
  * snd_soc_unregister_platform - Unregister a platform from the ASoC core
  *
- * @param platform platform to unregister
+ * @platform: platform to unregister
  */
 void snd_soc_unregister_platform(struct snd_soc_platform *platform)
 {
@@ -2228,7 +2230,7 @@ EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
 /**
  * snd_soc_register_codec - Register a codec with the ASoC core
  *
- * @param codec codec to register
+ * @codec: codec to register
  */
 int snd_soc_register_codec(struct snd_soc_codec *codec)
 {
@@ -2255,7 +2257,7 @@ EXPORT_SYMBOL_GPL(snd_soc_register_codec);
 /**
  * snd_soc_unregister_codec - Unregister a codec from the ASoC core
  *
- * @param codec codec to unregister
+ * @codec: codec to unregister
  */
 void snd_soc_unregister_codec(struct snd_soc_codec *codec)
 {
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8863edd..6c79ca6 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1077,7 +1077,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_new_widgets);
 /**
  * snd_soc_dapm_get_volsw - dapm mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a dapm mixer control.
  *
@@ -1122,7 +1122,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_get_volsw);
 /**
  * snd_soc_dapm_put_volsw - dapm mixer set callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a dapm mixer control.
  *
@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_put_volsw);
 /**
  * snd_soc_dapm_get_enum_double - dapm enumerated double mixer get callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to get the value of a dapm enumerated double mixer control.
  *
@@ -1221,7 +1221,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_get_enum_double);
 /**
  * snd_soc_dapm_put_enum_double - dapm enumerated double mixer set callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a dapm enumerated double mixer control.
  *
@@ -1419,7 +1419,7 @@ int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
 
 /**
  * snd_soc_dapm_enable_pin - enable pin.
- * @snd_soc_codec: SoC codec
+ * @codec: SoC codec
  * @pin: pin name
  *
  * Enables input/output pin and it's parents or children widgets iff there is
-- 
cgit v0.10.2


From c64d8996bd758cedc2ddc04b86ca66fa1d8599cf Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 2 Jan 2009 11:27:18 +0300
Subject: x86: early_printk - use sizeof instead of hardcoded number

Impact: cleanup

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e..504ad19 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
 	va_list ap;
 
 	va_start(ap, fmt);
-	n = vscnprintf(buf, 512, fmt, ap);
+	n = vscnprintf(buf, sizeof(buf), fmt, ap);
 	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
-- 
cgit v0.10.2


From f4e9749f451747f7cdd334eae951357f839c57f2 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 1 Jan 2009 18:14:35 +0100
Subject: ALSA: Use usb_set/get_intfdata

Use the USB functions usb_get_intfdata and usb_set_intfdata instead of
dev_get_drvdata and dev_set_drvdata, respectively.

The semantic patch that makes this change for the usb_get_intfdata case is
as follows: (http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@header@
@@

#include <linux/usb.h>

@same depends on header@
position p;
@@

usb_get_intfdata@p(...) { ... }

@depends on header@
position _p!=same.p;
identifier _f;
struct usb_interface*intf;
@@

_f@_p(...) { <+...
- dev_get_drvdata(&intf->dev)
+ usb_get_intfdata(intf)
...+> }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/caiaq-device.c b/sound/usb/caiaq/caiaq-device.c
index b143ef7..a62500e 100644
--- a/sound/usb/caiaq/caiaq-device.c
+++ b/sound/usb/caiaq/caiaq-device.c
@@ -446,7 +446,7 @@ static int __devinit snd_probe(struct usb_interface *intf,
 	if (!card)
 		return -ENOMEM;
 			
-	dev_set_drvdata(&intf->dev, card);
+	usb_set_intfdata(intf, card);
 	ret = init_card(caiaqdev(card));
 	if (ret < 0) {
 		log("unable to init card! (ret=%d)\n", ret);
@@ -460,7 +460,7 @@ static int __devinit snd_probe(struct usb_interface *intf,
 static void snd_disconnect(struct usb_interface *intf)
 {
 	struct snd_usb_caiaqdev *dev;
-	struct snd_card *card = dev_get_drvdata(&intf->dev);
+	struct snd_card *card = usb_get_intfdata(intf);
 
 	debug("%s(%p)\n", __func__, intf);
 
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index bbd70d5..c709b95 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -3709,7 +3709,7 @@ static int usb_audio_probe(struct usb_interface *intf,
 	void *chip;
 	chip = snd_usb_audio_probe(interface_to_usbdev(intf), intf, id);
 	if (chip) {
-		dev_set_drvdata(&intf->dev, chip);
+		usb_set_intfdata(intf, chip);
 		return 0;
 	} else
 		return -EIO;
@@ -3718,13 +3718,13 @@ static int usb_audio_probe(struct usb_interface *intf,
 static void usb_audio_disconnect(struct usb_interface *intf)
 {
 	snd_usb_audio_disconnect(interface_to_usbdev(intf),
-				 dev_get_drvdata(&intf->dev));
+				 usb_get_intfdata(intf));
 }
 
 #ifdef CONFIG_PM
 static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message)
 {
-	struct snd_usb_audio *chip = dev_get_drvdata(&intf->dev);
+	struct snd_usb_audio *chip = usb_get_intfdata(intf);
 	struct list_head *p;
 	struct snd_usb_stream *as;
 
@@ -3744,7 +3744,7 @@ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message)
 
 static int usb_audio_resume(struct usb_interface *intf)
 {
-	struct snd_usb_audio *chip = dev_get_drvdata(&intf->dev);
+	struct snd_usb_audio *chip = usb_get_intfdata(intf);
 
 	if (chip == (void *)-1L)
 		return 0;
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index c2515b6..73e59f4 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -589,7 +589,7 @@ static int snd_us122l_suspend(struct usb_interface *intf, pm_message_t message)
 	struct us122l *us122l;
 	struct list_head *p;
 
-	card = dev_get_drvdata(&intf->dev);
+	card = usb_get_intfdata(intf);
 	if (!card)
 		return 0;
 	snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
@@ -615,7 +615,7 @@ static int snd_us122l_resume(struct usb_interface *intf)
 	struct list_head *p;
 	int err;
 
-	card = dev_get_drvdata(&intf->dev);
+	card = usb_get_intfdata(intf);
 	if (!card)
 		return 0;
 
diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c
index e5981a6..ca26c53 100644
--- a/sound/usb/usx2y/usbusx2y.c
+++ b/sound/usb/usx2y/usbusx2y.c
@@ -392,7 +392,7 @@ static int snd_usX2Y_probe(struct usb_interface *intf, const struct usb_device_i
 	void *chip;
 	chip = usX2Y_usb_probe(interface_to_usbdev(intf), intf, id);
 	if (chip) {
-		dev_set_drvdata(&intf->dev, chip);
+		usb_set_intfdata(intf, chip);
 		return 0;
 	} else
 		return -EIO;
@@ -401,7 +401,7 @@ static int snd_usX2Y_probe(struct usb_interface *intf, const struct usb_device_i
 static void snd_usX2Y_disconnect(struct usb_interface *intf)
 {
 	usX2Y_usb_disconnect(interface_to_usbdev(intf),
-				 dev_get_drvdata(&intf->dev));
+				 usb_get_intfdata(intf));
 }
 
 MODULE_DEVICE_TABLE(usb, snd_usX2Y_usb_id_table);
-- 
cgit v0.10.2


From a9067d537615d534dcef06c0d819472e43a0d152 Mon Sep 17 00:00:00 2001
From: Ingo Brueckl <ib@wupperonline.de>
Date: Fri, 2 Jan 2009 14:42:00 +0100
Subject: x86: convert permanent_kmaps_init() from macro to inline

Impact: cleanup

This compiler warning:

  arch/x86/mm/init_32.c:515: warning: unused variable 'pgd_base'

triggers because permanent_kmaps_init() is a CPP macro in the
!CONFIG_HIGHMEM case, that does not tell the compiler that the
'pgd_base' parameter is used.

Convert permanent_kmaps_init() (and set_highmem_pages_init()) to
C inline functions - which gives the parameter a proper type and
which gets rid of the compiler warning as well.

Signed-off-by: Ingo Brueckl <ib@wupperonline.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 800e1d9..ad98b18 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -434,8 +434,12 @@ static void __init set_highmem_pages_init(void)
 #endif /* !CONFIG_NUMA */
 
 #else
-# define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init()	do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
 #endif /* CONFIG_HIGHMEM */
 
 void __init native_pagetable_setup_start(pgd_t *base)
-- 
cgit v0.10.2


From 3ee86dcdd273aa91cb9b4fe1e3d4f69035750a12 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:46 +0100
Subject: tx493x: fix indentation

Trivial CodingStyle fixup for tx4938ide and tx4939ide drivers.

Acked-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Sergei Shtyltov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index 13b63e7..0bb8b0c 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -216,16 +216,16 @@ static const struct ide_tp_ops tx4938ide_tp_ops = {
 #endif	/* __BIG_ENDIAN */
 
 static const struct ide_port_ops tx4938ide_port_ops = {
-	.set_pio_mode = tx4938ide_set_pio_mode,
+	.set_pio_mode		= tx4938ide_set_pio_mode,
 };
 
 static const struct ide_port_info tx4938ide_port_info __initdata = {
-	.port_ops = &tx4938ide_port_ops,
+	.port_ops		= &tx4938ide_port_ops,
 #ifdef __BIG_ENDIAN
-	.tp_ops = &tx4938ide_tp_ops,
+	.tp_ops			= &tx4938ide_tp_ops,
 #endif
-	.host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.pio_mask = ATA_PIO5,
+	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+	.pio_mask		= ATA_PIO5,
 };
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 97cd9e0..65cd097 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -623,33 +623,33 @@ static const struct ide_tp_ops tx4939ide_tp_ops = {
 #endif	/* __LITTLE_ENDIAN */
 
 static const struct ide_port_ops tx4939ide_port_ops = {
-	.set_pio_mode = tx4939ide_set_pio_mode,
-	.set_dma_mode = tx4939ide_set_dma_mode,
-	.clear_irq = tx4939ide_clear_irq,
-	.cable_detect = tx4939ide_cable_detect,
+	.set_pio_mode		= tx4939ide_set_pio_mode,
+	.set_dma_mode		= tx4939ide_set_dma_mode,
+	.clear_irq		= tx4939ide_clear_irq,
+	.cable_detect		= tx4939ide_cable_detect,
 };
 
 static const struct ide_dma_ops tx4939ide_dma_ops = {
-	.dma_host_set = tx4939ide_dma_host_set,
-	.dma_setup = tx4939ide_dma_setup,
-	.dma_exec_cmd = ide_dma_exec_cmd,
-	.dma_start = ide_dma_start,
-	.dma_end = tx4939ide_dma_end,
-	.dma_test_irq = tx4939ide_dma_test_irq,
-	.dma_lost_irq = ide_dma_lost_irq,
-	.dma_timeout = ide_dma_timeout,
+	.dma_host_set		= tx4939ide_dma_host_set,
+	.dma_setup		= tx4939ide_dma_setup,
+	.dma_exec_cmd		= ide_dma_exec_cmd,
+	.dma_start		= ide_dma_start,
+	.dma_end		= tx4939ide_dma_end,
+	.dma_test_irq		= tx4939ide_dma_test_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_timeout		= ide_dma_timeout,
 };
 
 static const struct ide_port_info tx4939ide_port_info __initdata = {
-	.init_hwif = tx4939ide_init_hwif,
-	.init_dma = tx4939ide_init_dma,
-	.port_ops = &tx4939ide_port_ops,
-	.dma_ops = &tx4939ide_dma_ops,
-	.tp_ops = &tx4939ide_tp_ops,
-	.host_flags = IDE_HFLAG_MMIO,
-	.pio_mask = ATA_PIO4,
-	.mwdma_mask = ATA_MWDMA2,
-	.udma_mask = ATA_UDMA5,
+	.init_hwif		= tx4939ide_init_hwif,
+	.init_dma		= tx4939ide_init_dma,
+	.port_ops		= &tx4939ide_port_ops,
+	.dma_ops		= &tx4939ide_dma_ops,
+	.tp_ops			= &tx4939ide_tp_ops,
+	.host_flags		= IDE_HFLAG_MMIO,
+	.pio_mask		= ATA_PIO4,
+	.mwdma_mask		= ATA_MWDMA2,
+	.udma_mask		= ATA_UDMA5,
 };
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
-- 
cgit v0.10.2


From b1d249e845efb07975183c62b4f75576c4a8d467 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:47 +0100
Subject: ide: remove chipset type fixup from ide_host_register()

* Set chipset type explicitly in tx4938ide and tx4939ide host drivers
  (all other host drivers were updated already).

* Remove no longer used chipset type fixup from ide_host_register().

Acked-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a64ec25..291dace 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1655,9 +1655,6 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 		if (hwif == NULL)
 			continue;
 
-		if (hwif->chipset == ide_unknown)
-			hwif->chipset = ide_generic;
-
 		if (hwif->present)
 			hwif_register_devices(hwif);
 	}
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index 0bb8b0c..b4ef218 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -226,6 +226,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 #endif
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.pio_mask		= ATA_PIO5,
+	.chipset		= ide_generic,
 };
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 65cd097..4a8c5a2 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -650,6 +650,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
 	.udma_mask		= ATA_UDMA5,
+	.chipset		= ide_generic,
 };
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
-- 
cgit v0.10.2


From 96d40941236722777c259775640b8880b7dc6f33 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:47 +0100
Subject: ide: small ide_register_port() cleanup

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 291dace..7576a90 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -641,14 +641,9 @@ static int ide_register_port(ide_hwif_t *hwif)
 	/* register with global device tree */
 	dev_set_name(&hwif->gendev, hwif->name);
 	hwif->gendev.driver_data = hwif;
-	if (hwif->gendev.parent == NULL) {
-		if (hwif->dev)
-			hwif->gendev.parent = hwif->dev;
-		else
-			/* Would like to do = &device_legacy */
-			hwif->gendev.parent = NULL;
-	}
+	hwif->gendev.parent = hwif->dev;
 	hwif->gendev.release = hwif_release_dev;
+
 	ret = device_register(&hwif->gendev);
 	if (ret < 0) {
 		printk(KERN_WARNING "IDE: %s: device_register error: %d\n",
-- 
cgit v0.10.2


From 24630dc68a499baec367d24285bc6b92207cc100 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:47 +0100
Subject: ide: factor out device type classifying from do_identify()

Factor out device type classifying from do_identify()
to ide_classify_ata_dev() and ide_classify_atapi_dev().

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 7576a90..91f5fae 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -101,6 +101,82 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
 	}
 }
 
+static void ide_classify_ata_dev(ide_drive_t *drive)
+{
+	u16 *id = drive->id;
+	char *m = (char *)&id[ATA_ID_PROD];
+	int is_cfa = ata_id_is_cfa(id);
+
+	/* CF devices are *not* removable in Linux definition of the term */
+	if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+
+	drive->media = ide_disk;
+
+	if (!ata_id_has_unload(drive->id))
+		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
+
+	printk(KERN_INFO "%s: %s, %s DISK drive\n", drive->name, m,
+		is_cfa ? "CFA" : "ATA");
+}
+
+static void ide_classify_atapi_dev(ide_drive_t *drive)
+{
+	u16 *id = drive->id;
+	char *m = (char *)&id[ATA_ID_PROD];
+	u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
+
+	printk(KERN_INFO "%s: %s, ATAPI ", drive->name, m);
+	switch (type) {
+	case ide_floppy:
+		if (!strstr(m, "CD-ROM")) {
+			if (!strstr(m, "oppy") &&
+			    !strstr(m, "poyp") &&
+			    !strstr(m, "ZIP"))
+				printk(KERN_CONT "cdrom or floppy?, assuming ");
+			if (drive->media != ide_cdrom) {
+				printk(KERN_CONT "FLOPPY");
+				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+				break;
+			}
+		}
+		/* Early cdrom models used zero */
+		type = ide_cdrom;
+	case ide_cdrom:
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+#ifdef CONFIG_PPC
+		/* kludge for Apple PowerBook internal zip */
+		if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
+			printk(KERN_CONT "FLOPPY");
+			type = ide_floppy;
+			break;
+		}
+#endif
+		printk(KERN_CONT "CD/DVD-ROM");
+		break;
+	case ide_tape:
+		printk(KERN_CONT "TAPE");
+		break;
+	case ide_optical:
+		printk(KERN_CONT "OPTICAL");
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+		break;
+	default:
+		printk(KERN_CONT "UNKNOWN (type %d)", type);
+		break;
+	}
+
+	printk(KERN_CONT " drive\n");
+	drive->media = type;
+	/* an ATAPI device ignores DRDY */
+	drive->ready_stat = 0;
+	if (ata_id_cdb_intr(id))
+		drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+	drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
+	/* we don't do head unloading on ATAPI devices */
+	drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
+}
+
 /**
  *	do_identify	-	identify a drive
  *	@drive: drive to identify 
@@ -117,7 +193,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd)
 	u16 *id = drive->id;
 	char *m = (char *)&id[ATA_ID_PROD];
 	unsigned long flags;
-	int bswap = 1, is_cfa;
+	int bswap = 1;
 
 	/* local CPU only; some systems need this */
 	local_irq_save(flags);
@@ -154,91 +230,23 @@ static void do_identify(ide_drive_t *drive, u8 cmd)
 	if (strstr(m, "E X A B Y T E N E S T"))
 		goto err_misc;
 
-	printk(KERN_INFO "%s: %s, ", drive->name, m);
-
 	drive->dev_flags |= IDE_DFLAG_PRESENT;
 	drive->dev_flags &= ~IDE_DFLAG_DEAD;
 
 	/*
 	 * Check for an ATAPI device
 	 */
-	if (cmd == ATA_CMD_ID_ATAPI) {
-		u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
-
-		printk(KERN_CONT "ATAPI ");
-		switch (type) {
-			case ide_floppy:
-				if (!strstr(m, "CD-ROM")) {
-					if (!strstr(m, "oppy") &&
-					    !strstr(m, "poyp") &&
-					    !strstr(m, "ZIP"))
-						printk(KERN_CONT "cdrom or floppy?, assuming ");
-					if (drive->media != ide_cdrom) {
-						printk(KERN_CONT "FLOPPY");
-						drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-						break;
-					}
-				}
-				/* Early cdrom models used zero */
-				type = ide_cdrom;
-			case ide_cdrom:
-				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-#ifdef CONFIG_PPC
-				/* kludge for Apple PowerBook internal zip */
-				if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
-					printk(KERN_CONT "FLOPPY");
-					type = ide_floppy;
-					break;
-				}
-#endif
-				printk(KERN_CONT "CD/DVD-ROM");
-				break;
-			case ide_tape:
-				printk(KERN_CONT "TAPE");
-				break;
-			case ide_optical:
-				printk(KERN_CONT "OPTICAL");
-				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-				break;
-			default:
-				printk(KERN_CONT "UNKNOWN (type %d)", type);
-				break;
-		}
-		printk(KERN_CONT " drive\n");
-		drive->media = type;
-		/* an ATAPI device ignores DRDY */
-		drive->ready_stat = 0;
-		if (ata_id_cdb_intr(id))
-			drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
-		drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
-		/* we don't do head unloading on ATAPI devices */
-		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-		return;
-	}
-
+	if (cmd == ATA_CMD_ID_ATAPI)
+		ide_classify_atapi_dev(drive);
+	else
 	/*
 	 * Not an ATAPI device: looks like a "regular" hard disk
 	 */
-
-	is_cfa = ata_id_is_cfa(id);
-
-	/* CF devices are *not* removable in Linux definition of the term */
-	if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
-		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-
-	drive->media = ide_disk;
-
-	if (!ata_id_has_unload(drive->id))
-		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-
-	printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA");
-
+		ide_classify_ata_dev(drive);
 	return;
-
 err_misc:
 	kfree(id);
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-	return;
 }
 
 /**
-- 
cgit v0.10.2


From ebdab07dad3d3a008e519b0a028e1e1ad5ecaef0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:48 +0100
Subject: ide: move sysfs support to ide-sysfs.c

While at it:
- media_string() -> ide_media_string()

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 177e3f8..4107289 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -5,7 +5,7 @@
 EXTRA_CFLAGS				+= -Idrivers/ide
 
 ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o
+	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o ide-sysfs.o
 
 # core IDE code
 ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 91f5fae..f9efd06 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1420,58 +1420,6 @@ static void ide_port_cable_detect(ide_hwif_t *hwif)
 	}
 }
 
-static ssize_t store_delete_devices(struct device *portdev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
-
-static ssize_t store_scan(struct device *portdev,
-			  struct device_attribute *attr,
-			  const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-	ide_port_scan(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
-
-static struct device_attribute *ide_port_attrs[] = {
-	&dev_attr_delete_devices,
-	&dev_attr_scan,
-	NULL
-};
-
-static int ide_sysfs_register_port(ide_hwif_t *hwif)
-{
-	int i, uninitialized_var(rc);
-
-	for (i = 0; ide_port_attrs[i]; i++) {
-		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
 static unsigned int ide_indexes;
 
 /**
diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c
new file mode 100644
index 0000000..883ffac
--- /dev/null
+++ b/drivers/ide/ide-sysfs.c
@@ -0,0 +1,125 @@
+#include <linux/kernel.h>
+#include <linux/ide.h>
+
+char *ide_media_string(ide_drive_t *drive)
+{
+	switch (drive->media) {
+	case ide_disk:
+		return "disk";
+	case ide_cdrom:
+		return "cdrom";
+	case ide_tape:
+		return "tape";
+	case ide_floppy:
+		return "floppy";
+	case ide_optical:
+		return "optical";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static ssize_t media_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", ide_media_string(drive));
+}
+
+static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", drive->name);
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "ide:m-%s\n", ide_media_string(drive));
+}
+
+static ssize_t model_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
+}
+
+static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
+}
+
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
+}
+
+struct device_attribute ide_dev_attrs[] = {
+	__ATTR_RO(media),
+	__ATTR_RO(drivename),
+	__ATTR_RO(modalias),
+	__ATTR_RO(model),
+	__ATTR_RO(firmware),
+	__ATTR(serial, 0400, serial_show, NULL),
+	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
+	__ATTR_NULL
+};
+
+static ssize_t store_delete_devices(struct device *portdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
+
+static ssize_t store_scan(struct device *portdev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+	ide_port_scan(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
+
+static struct device_attribute *ide_port_attrs[] = {
+	&dev_attr_delete_devices,
+	&dev_attr_scan,
+	NULL
+};
+
+int ide_sysfs_register_port(ide_hwif_t *hwif)
+{
+	int i, uninitialized_var(rc);
+
+	for (i = 0; ide_port_attrs[i]; i++) {
+		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index f0f09f7..46a2d4c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -440,81 +440,13 @@ static int ide_bus_match(struct device *dev, struct device_driver *drv)
 	return 1;
 }
 
-static char *media_string(ide_drive_t *drive)
-{
-	switch (drive->media) {
-	case ide_disk:
-		return "disk";
-	case ide_cdrom:
-		return "cdrom";
-	case ide_tape:
-		return "tape";
-	case ide_floppy:
-		return "floppy";
-	case ide_optical:
-		return "optical";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t media_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", media_string(drive));
-}
-
-static ssize_t drivename_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", drive->name);
-}
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "ide:m-%s\n", media_string(drive));
-}
-
-static ssize_t model_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
-}
-
-static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
-}
-
-static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
-}
-
-static struct device_attribute ide_dev_attrs[] = {
-	__ATTR_RO(media),
-	__ATTR_RO(drivename),
-	__ATTR_RO(modalias),
-	__ATTR_RO(model),
-	__ATTR_RO(firmware),
-	__ATTR(serial, 0400, serial_show, NULL),
-	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
-	__ATTR_NULL
-};
-
 static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	ide_drive_t *drive = to_ide_device(dev);
 
-	add_uevent_var(env, "MEDIA=%s", media_string(drive));
+	add_uevent_var(env, "MEDIA=%s", ide_media_string(drive));
 	add_uevent_var(env, "DRIVENAME=%s", drive->name);
-	add_uevent_var(env, "MODALIAS=ide:m-%s", media_string(drive));
+	add_uevent_var(env, "MODALIAS=ide:m-%s", ide_media_string(drive));
 	return 0;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e99c56d..62fccae 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1533,6 +1533,7 @@ void ide_unregister_region(struct gendisk *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
+int ide_sysfs_register_port(ide_hwif_t *);
 
 struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
 void ide_host_free(struct ide_host *);
@@ -1627,6 +1628,9 @@ extern struct mutex ide_cfg_mtx;
 
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
+char *ide_media_string(ide_drive_t *);
+
+extern struct device_attribute ide_dev_attrs[];
 extern struct bus_type ide_bus_type;
 extern struct class *ide_port_class;
 
-- 
cgit v0.10.2


From 295f00042aaf6b553b5f37348f89bab463d4a469 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:48 +0100
Subject: ide: don't execute the next queued command from the hard-IRQ context
 (v2)

* Tell the block layer that we are not done handling requests by using
  blk_plug_device() in ide_do_request() (request handling function)
  and ide_timer_expiry() (timeout handler) if the queue is not empty.

* Remove optimization which directly calls ide_do_request() for the next
  queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().

* Remove no longer needed IRQ masking from ide_do_request() - in case of
  IDE ports needing serialization disable_irq_nosync()/enable_irq() was
  used for the (possibly shared) IRQ of the other IDE port.

* Put the misplaced comment in the right place in ide_do_request().

* Drop no longer needed 'int masked_irq' argument from ide_do_request().

* Merge ide_do_request() into do_ide_request().

* Remove no longer needed IDE_NO_IRQ define.

While at it:

* Don't use HWGROUP() macro in do_ide_request().

* Use __func__ in ide_intr().

This patch reduces IRQ hadling latency for IDE and improves the system-wide
handling of shared IRQs (which should result in more timeout resistant and
stable IDE systems).  It also makes it possible to do some further changes
later (i.e. replace some busy-waiting delays with sleeping equivalents).

v2:
Changes per review from Elias Oltmanns:
- fix wrong goto statement in 'if (startstop == ide_stopped)' block
- use spin_unlock_irq()
- don't use obsolete HWIF() macro

Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ecacc00..23754bc 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -778,8 +778,10 @@ repeat:
  * the driver.  This makes the driver much more friendlier to shared IRQs
  * than previous designs, while remaining 100% (?) SMP safe and capable.
  */
-static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+void do_ide_request(struct request_queue *q)
 {
+	ide_drive_t	*orig_drive = q->queuedata;
+	ide_hwgroup_t	*hwgroup = orig_drive->hwif->hwgroup;
 	ide_drive_t	*drive;
 	ide_hwif_t	*hwif;
 	struct request	*rq;
@@ -837,10 +839,14 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 			}
 
 			/* no more work for this hwgroup (for now) */
-			return;
+			goto plug_device;
 		}
-	again:
-		hwif = HWIF(drive);
+
+		if (drive != orig_drive)
+			goto plug_device;
+again:
+		hwif = drive->hwif;
+
 		if (hwif != hwgroup->hwif) {
 			/*
 			 * set nIEN for previous hwif, drives in the
@@ -888,41 +894,26 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 				goto again;
 			/* We clear busy, there should be no pending ATA command at this point. */
 			hwgroup->busy = 0;
-			break;
+			goto plug_device;
 		}
 
 		hwgroup->rq = rq;
 
-		/*
-		 * Some systems have trouble with IDE IRQs arriving while
-		 * the driver is still setting things up.  So, here we disable
-		 * the IRQ used by this interface while the request is being started.
-		 * This may look bad at first, but pretty much the same thing
-		 * happens anyway when any interrupt comes in, IDE or otherwise
-		 *  -- the kernel masks the IRQ while it is being handled.
-		 */
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			disable_irq_nosync(hwif->irq);
-		spin_unlock(&hwgroup->lock);
-		local_irq_enable_in_hardirq();
-			/* allow other IRQs while we start this request */
+		spin_unlock_irq(&hwgroup->lock);
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwgroup->lock);
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			enable_irq(hwif->irq);
-		if (startstop == ide_stopped)
+
+		if (startstop == ide_stopped) {
 			hwgroup->busy = 0;
+			if (!elv_queue_empty(orig_drive->queue))
+				blk_plug_device(orig_drive->queue);
+		}
 	}
-}
+	return;
 
-/*
- * Passes the stuff to ide_do_request
- */
-void do_ide_request(struct request_queue *q)
-{
-	ide_drive_t *drive = q->queuedata;
-
-	ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
+plug_device:
+	if (!elv_queue_empty(orig_drive->queue))
+		blk_plug_device(orig_drive->queue);
 }
 
 /*
@@ -1074,11 +1065,13 @@ void ide_timer_expiry (unsigned long data)
 			drive->service_time = jiffies - drive->service_start;
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
-			if (startstop == ide_stopped)
+			if (startstop == ide_stopped) {
 				hwgroup->busy = 0;
+				if (!elv_queue_empty(drive->queue))
+					blk_plug_device(drive->queue);
+			}
 		}
 	}
-	ide_do_request(hwgroup, IDE_NO_IRQ);
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
@@ -1271,11 +1264,11 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
 			hwgroup->busy = 0;
-			ide_do_request(hwgroup, hwif->irq);
-		} else {
-			printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
-				"on exit\n", drive->name);
-		}
+			if (!elv_queue_empty(drive->queue))
+				blk_plug_device(drive->queue);
+		} else
+			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
+					"on exit\n", __func__, drive->name);
 	}
 out_handled:
 	irq_ret = IRQ_HANDLED;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 62fccae..968ca8f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,13 +32,6 @@
 # define SUPPORT_VLB_SYNC 1
 #endif
 
-/*
- * Used to indicate "no IRQ", should be a value that cannot be an IRQ
- * number.
- */
- 
-#define IDE_NO_IRQ		(-1)
-
 typedef unsigned char	byte;	/* used everywhere */
 
 /*
-- 
cgit v0.10.2


From 2fb211502e2c0513e12d677ed4d7891f3c5e1413 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:49 +0100
Subject: ide: remove IDE PM hack from do_ide_request()

We now tell block layer that there is still work to do using
blk_plug_device() so hack for IDE Power Management can be removed
(it was buggy for hwgroups having more than 4 devices anyway).

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 23754bc..40327d1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -786,7 +786,6 @@ void do_ide_request(struct request_queue *q)
 	ide_hwif_t	*hwif;
 	struct request	*rq;
 	ide_startstop_t	startstop;
-	int             loops = 0;
 
 	/* caller must own hwgroup->lock */
 	BUG_ON(!irqs_disabled());
@@ -844,7 +843,7 @@ void do_ide_request(struct request_queue *q)
 
 		if (drive != orig_drive)
 			goto plug_device;
-again:
+
 		hwif = drive->hwif;
 
 		if (hwif != hwgroup->hwif) {
@@ -882,16 +881,10 @@ again:
 		 * though. I hope that doesn't happen too much, hopefully not
 		 * unless the subdriver triggers such a thing in its own PM
 		 * state machine.
-		 *
-		 * We count how many times we loop here to make sure we service
-		 * all drives in the hwgroup without looping for ever
 		 */
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
-			drive = drive->next ? drive->next : hwgroup->drive;
-			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
-				goto again;
 			/* We clear busy, there should be no pending ATA command at this point. */
 			hwgroup->busy = 0;
 			goto plug_device;
-- 
cgit v0.10.2


From b2cfb05a701809abee591265a198afa029d68bff Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:49 +0100
Subject: ide: remove "paranoia" checks for hwgroup->busy

Remove "paranoia" checks for hwgroup->busy from ide_timer_expiry()
and ide_intr().  This is a preparation for future changes.

Cc: Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 40327d1..c605121 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1011,10 +1011,7 @@ void ide_timer_expiry (unsigned long data)
 		} else {
 			ide_hwif_t *hwif;
 			ide_startstop_t startstop = ide_stopped;
-			if (!hwgroup->busy) {
-				hwgroup->busy = 1;	/* paranoia */
-				printk(KERN_ERR "%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
-			}
+
 			if ((expiry = hwgroup->expiry) != NULL) {
 				/* continue */
 				if ((wait = expiry(drive)) > 0) {
@@ -1227,10 +1224,6 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 		 */
 		goto out;
 
-	if (!hwgroup->busy) {
-		hwgroup->busy = 1;	/* paranoia */
-		printk(KERN_ERR "%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
-	}
 	hwgroup->handler = NULL;
 	hwgroup->req_gen++;
 	del_timer(&hwgroup->timer);
-- 
cgit v0.10.2


From 631de3708d595d153e8a510a3608689290f4c0ed Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: add ide_[un]lock_hwgroup() helpers

Add ide_[un]lock_hwgroup() inline helpers for obtaining exclusive
access to the given hwgroup and update the core code accordingly.

[ This change besides making code saner results in more efficient
  use of ide_{get,release}_lock(). ]

Cc: Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index c605121..ab48004 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -790,10 +790,7 @@ void do_ide_request(struct request_queue *q)
 	/* caller must own hwgroup->lock */
 	BUG_ON(!irqs_disabled());
 
-	while (!hwgroup->busy) {
-		hwgroup->busy = 1;
-		/* for atari only */
-		ide_get_lock(ide_intr, hwgroup);
+	while (!ide_lock_hwgroup(hwgroup)) {
 		drive = choose_drive(hwgroup);
 		if (drive == NULL) {
 			int sleeping = 0;
@@ -825,17 +822,10 @@ void do_ide_request(struct request_queue *q)
 				hwgroup->sleeping = 1;
 				hwgroup->req_gen_timer = hwgroup->req_gen;
 				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup->busy==1
+				/* we purposely leave hwgroup locked
 				 * while sleeping */
-			} else {
-				/* Ugly, but how can we sleep for the lock
-				 * otherwise? perhaps from tq_disk?
-				 */
-
-				/* for atari only */
-				ide_release_lock();
-				hwgroup->busy = 0;
-			}
+			} else
+				ide_unlock_hwgroup(hwgroup);
 
 			/* no more work for this hwgroup (for now) */
 			goto plug_device;
@@ -865,7 +855,7 @@ void do_ide_request(struct request_queue *q)
 		 */
 		rq = elv_next_request(drive->queue);
 		if (!rq) {
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			break;
 		}
 
@@ -885,8 +875,8 @@ void do_ide_request(struct request_queue *q)
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
-			/* We clear busy, there should be no pending ATA command at this point. */
-			hwgroup->busy = 0;
+			/* there should be no pending command at this point */
+			ide_unlock_hwgroup(hwgroup);
 			goto plug_device;
 		}
 
@@ -897,7 +887,7 @@ void do_ide_request(struct request_queue *q)
 		spin_lock_irq(&hwgroup->lock);
 
 		if (startstop == ide_stopped) {
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			if (!elv_queue_empty(orig_drive->queue))
 				blk_plug_device(orig_drive->queue);
 		}
@@ -1001,7 +991,7 @@ void ide_timer_expiry (unsigned long data)
 		 */
 		if (hwgroup->sleeping) {
 			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 		}
 	} else {
 		ide_drive_t *drive = hwgroup->drive;
@@ -1056,7 +1046,7 @@ void ide_timer_expiry (unsigned long data)
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
-				hwgroup->busy = 0;
+				ide_unlock_hwgroup(hwgroup);
 				if (!elv_queue_empty(drive->queue))
 					blk_plug_device(drive->queue);
 			}
@@ -1249,7 +1239,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			if (!elv_queue_empty(drive->queue))
 				blk_plug_device(drive->queue);
 		} else
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 63d01c5..44c6787 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -22,7 +22,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 		if (reset_timer && hwgroup->sleeping &&
 		    del_timer(&hwgroup->timer)) {
 			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			blk_start_queueing(q);
 		}
 		spin_unlock_irq(&hwgroup->lock);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 968ca8f..f408d61 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1280,6 +1280,26 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
+
+static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	if (hwgroup->busy)
+		return 1;
+
+	hwgroup->busy = 1;
+	/* for atari only */
+	ide_get_lock(ide_intr, hwgroup);
+
+	return 0;
+}
+
+static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	/* for atari only */
+	ide_release_lock();
+	hwgroup->busy = 0;
+}
+
 extern void do_ide_request(struct request_queue *);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
-- 
cgit v0.10.2


From 201bffa46466b4afdf7d29db8eca3fa5decb39c8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: use per-device request queue locks (v2)

* Move hack for flush requests from choose_drive() to do_ide_request().

* Add ide_plug_device() helper and convert core IDE code from using
  per-hwgroup lock as a request lock to use the ->queue_lock instead.

* Remove no longer needed:
  - choose_drive() function
  - WAKEUP() macro
  - 'sleeping' flag from ide_hwif_t
  - 'service_{start,time}' fields from ide_drive_t

This patch results in much simpler and more maintainable code
(besides being a scalability improvement).

v2:
* Fixes/improvements based on review from Elias:
  - take as many requests off the queue as possible
  - remove now redundant BUG_ON()

Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ab48004..bb3248a 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -667,85 +667,10 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
 	drive->sleep = timeout + jiffies;
 	drive->dev_flags |= IDE_DFLAG_SLEEPING;
 }
-
 EXPORT_SYMBOL(ide_stall_queue);
 
-#define WAKEUP(drive)	((drive)->service_start + 2 * (drive)->service_time)
-
-/**
- *	choose_drive		-	select a drive to service
- *	@hwgroup: hardware group to select on
- *
- *	choose_drive() selects the next drive which will be serviced.
- *	This is necessary because the IDE layer can't issue commands
- *	to both drives on the same cable, unlike SCSI.
- */
- 
-static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
-{
-	ide_drive_t *drive, *best;
-
-repeat:	
-	best = NULL;
-	drive = hwgroup->drive;
-
-	/*
-	 * drive is doing pre-flush, ordered write, post-flush sequence. even
-	 * though that is 3 requests, it must be seen as a single transaction.
-	 * we must not preempt this drive until that is complete
-	 */
-	if (blk_queue_flushing(drive->queue)) {
-		/*
-		 * small race where queue could get replugged during
-		 * the 3-request flush cycle, just yank the plug since
-		 * we want it to finish asap
-		 */
-		blk_remove_plug(drive->queue);
-		return drive;
-	}
-
-	do {
-		u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING);
-		u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING));
-
-		if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) &&
-		    !elv_queue_empty(drive->queue)) {
-			if (best == NULL ||
-			    (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) ||
-			    (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) {
-				if (!blk_queue_plugged(drive->queue))
-					best = drive;
-			}
-		}
-	} while ((drive = drive->next) != hwgroup->drive);
-
-	if (best && (best->dev_flags & IDE_DFLAG_NICE1) &&
-	    (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 &&
-	    best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
-		long t = (signed long)(WAKEUP(best) - jiffies);
-		if (t >= WAIT_MIN_SLEEP) {
-		/*
-		 * We *may* have some time to spare, but first let's see if
-		 * someone can potentially benefit from our nice mood today..
-		 */
-			drive = best->next;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0
-				 && time_before(jiffies - best->service_time, WAKEUP(drive))
-				 && time_before(WAKEUP(drive), jiffies + t))
-				{
-					ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP));
-					goto repeat;
-				}
-			} while ((drive = drive->next) != best);
-		}
-	}
-	return best;
-}
-
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -757,8 +682,7 @@ repeat:
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses a per-hwgroup spinlock to protect
- * access to the request queues, and to protect the hwgroup->busy flag.
+ * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
  *
  * The first thread into the driver for a particular hwgroup sets the
  * hwgroup->busy flag to indicate that this hwgroup is now active,
@@ -780,61 +704,38 @@ repeat:
  */
 void do_ide_request(struct request_queue *q)
 {
-	ide_drive_t	*orig_drive = q->queuedata;
-	ide_hwgroup_t	*hwgroup = orig_drive->hwif->hwgroup;
-	ide_drive_t	*drive;
-	ide_hwif_t	*hwif;
+	ide_drive_t	*drive = q->queuedata;
+	ide_hwif_t	*hwif = drive->hwif;
+	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
 	struct request	*rq;
 	ide_startstop_t	startstop;
 
-	/* caller must own hwgroup->lock */
-	BUG_ON(!irqs_disabled());
-
-	while (!ide_lock_hwgroup(hwgroup)) {
-		drive = choose_drive(hwgroup);
-		if (drive == NULL) {
-			int sleeping = 0;
-			unsigned long sleep = 0; /* shut up, gcc */
-			hwgroup->rq = NULL;
-			drive = hwgroup->drive;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) &&
-				    (sleeping == 0 ||
-				     time_before(drive->sleep, sleep))) {
-					sleeping = 1;
-					sleep = drive->sleep;
-				}
-			} while ((drive = drive->next) != hwgroup->drive);
-			if (sleeping) {
+	/*
+	 * drive is doing pre-flush, ordered write, post-flush sequence. even
+	 * though that is 3 requests, it must be seen as a single transaction.
+	 * we must not preempt this drive until that is complete
+	 */
+	if (blk_queue_flushing(q))
 		/*
-		 * Take a short snooze, and then wake up this hwgroup again.
-		 * This gives other hwgroups on the same a chance to
-		 * play fairly with us, just in case there are big differences
-		 * in relative throughputs.. don't want to hog the cpu too much.
+		 * small race where queue could get replugged during
+		 * the 3-request flush cycle, just yank the plug since
+		 * we want it to finish asap
 		 */
-				if (time_before(sleep, jiffies + WAIT_MIN_SLEEP))
-					sleep = jiffies + WAIT_MIN_SLEEP;
-#if 1
-				if (timer_pending(&hwgroup->timer))
-					printk(KERN_CRIT "ide_set_handler: timer already active\n");
-#endif
-				/* so that ide_timer_expiry knows what to do */
-				hwgroup->sleeping = 1;
-				hwgroup->req_gen_timer = hwgroup->req_gen;
-				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup locked
-				 * while sleeping */
-			} else
-				ide_unlock_hwgroup(hwgroup);
+		blk_remove_plug(q);
 
-			/* no more work for this hwgroup (for now) */
-			goto plug_device;
-		}
+	spin_unlock_irq(q->queue_lock);
+	spin_lock_irq(&hwgroup->lock);
 
-		if (drive != orig_drive)
-			goto plug_device;
+	if (!ide_lock_hwgroup(hwgroup)) {
+repeat:
+		hwgroup->rq = NULL;
 
-		hwif = drive->hwif;
+		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
+			if (time_before(drive->sleep, jiffies)) {
+				ide_unlock_hwgroup(hwgroup);
+				goto plug_device;
+			}
+		}
 
 		if (hwif != hwgroup->hwif) {
 			/*
@@ -847,16 +748,20 @@ void do_ide_request(struct request_queue *q)
 		hwgroup->hwif = hwif;
 		hwgroup->drive = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
-		drive->service_start = jiffies;
 
+		spin_unlock_irq(&hwgroup->lock);
+		spin_lock_irq(q->queue_lock);
 		/*
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
 		rq = elv_next_request(drive->queue);
+		spin_unlock_irq(q->queue_lock);
+		spin_lock_irq(&hwgroup->lock);
+
 		if (!rq) {
 			ide_unlock_hwgroup(hwgroup);
-			break;
+			goto out;
 		}
 
 		/*
@@ -886,17 +791,21 @@ void do_ide_request(struct request_queue *q)
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwgroup->lock);
 
-		if (startstop == ide_stopped) {
-			ide_unlock_hwgroup(hwgroup);
-			if (!elv_queue_empty(orig_drive->queue))
-				blk_plug_device(orig_drive->queue);
-		}
-	}
+		if (startstop == ide_stopped)
+			goto repeat;
+	} else
+		goto plug_device;
+out:
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
 	return;
 
 plug_device:
-	if (!elv_queue_empty(orig_drive->queue))
-		blk_plug_device(orig_drive->queue);
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
+
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
 }
 
 /*
@@ -957,6 +866,17 @@ out:
 	return ret;
 }
 
+static void ide_plug_device(ide_drive_t *drive)
+{
+	struct request_queue *q = drive->queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
 /**
  *	ide_timer_expiry	-	handle lack of an IDE interrupt
  *	@data: timer callback magic (hwgroup)
@@ -974,10 +894,12 @@ out:
 void ide_timer_expiry (unsigned long data)
 {
 	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
 	ide_expiry_t	*expiry;
 	unsigned long	flags;
 	unsigned long	wait = -1;
+	int		plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -989,12 +911,8 @@ void ide_timer_expiry (unsigned long data)
 		 * or we were "sleeping" to give other devices a chance.
 		 * Either way, we don't really want to complain about anything.
 		 */
-		if (hwgroup->sleeping) {
-			hwgroup->sleeping = 0;
-			ide_unlock_hwgroup(hwgroup);
-		}
 	} else {
-		ide_drive_t *drive = hwgroup->drive;
+		drive = hwgroup->drive;
 		if (!drive) {
 			printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
 			hwgroup->handler = NULL;
@@ -1042,17 +960,18 @@ void ide_timer_expiry (unsigned long data)
 					ide_error(drive, "irq timeout",
 						  hwif->tp_ops->read_status(hwif));
 			}
-			drive->service_time = jiffies - drive->service_start;
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
 				ide_unlock_hwgroup(hwgroup);
-				if (!elv_queue_empty(drive->queue))
-					blk_plug_device(drive->queue);
+				plug_device = 1;
 			}
 		}
 	}
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
 }
 
 /**
@@ -1146,10 +1065,11 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
 	ide_hwif_t *hwif = hwgroup->hwif;
-	ide_drive_t *drive;
+	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
+	int plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -1236,12 +1156,10 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 * same irq as is currently being serviced here, and Linux
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
-	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
 			ide_unlock_hwgroup(hwgroup);
-			if (!elv_queue_empty(drive->queue))
-				blk_plug_device(drive->queue);
+			plug_device = 1;
 		} else
 			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
 					"on exit\n", __func__, drive->name);
@@ -1250,6 +1168,10 @@ out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
+
 	return irq_ret;
 }
 
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 44c6787..678454a 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -16,16 +16,19 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_lock_irq(&hwgroup->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
 		int reset_timer = time_before(timeout, drive->sleep);
+		int start_queue = 0;
 
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
-		if (reset_timer && hwgroup->sleeping &&
-		    del_timer(&hwgroup->timer)) {
-			hwgroup->sleeping = 0;
-			ide_unlock_hwgroup(hwgroup);
+		if (reset_timer && del_timer(&hwgroup->timer))
+			start_queue = 1;
+		spin_unlock_irq(&hwgroup->lock);
+
+		if (start_queue) {
+			spin_lock_irq(q->queue_lock);
 			blk_start_queueing(q);
+			spin_unlock_irq(q->queue_lock);
 		}
-		spin_unlock_irq(&hwgroup->lock);
 		return;
 	}
 	spin_unlock_irq(&hwgroup->lock);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f9efd06..966b74c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -881,8 +881,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	do not.
 	 */
 
-	q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
-				hwif_to_node(hwif));
+	q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f408d61..5f86ad4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -603,8 +603,6 @@ struct ide_drive_s {
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
-	unsigned long service_start;	/* time we started last request */
-	unsigned long service_time;	/* service time of last request */
 	unsigned long timeout;		/* max time to wait for irq */
 
 	special_t	special;	/* special action flags */
@@ -872,8 +870,6 @@ typedef struct hwgroup_s {
 
 		/* BOOL: protects all fields below */
 	volatile int busy;
-		/* BOOL: wake us up on timer expiry */
-	unsigned int sleeping	: 1;
 		/* BOOL: polling active & poll_timeout field valid */
 	unsigned int polling	: 1;
 
-- 
cgit v0.10.2


From 0f38aaa4980fdf5de215e0a8bf6d6032164a6c4b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide-cd: move debug defines into header

While at it:
- disable compiling-in debug support by default

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: fixup patch description]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5daa4dd..65e5513 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -53,14 +53,6 @@
 
 #include "ide-cd.h"
 
-#define IDECD_DEBUG_LOG		1
-
-#if IDECD_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 static DEFINE_MUTEX(idecd_ref_mutex);
 
 static void ide_cd_release(struct kref *);
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index d5ce336..389faa4 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -8,6 +8,14 @@
 #include <linux/cdrom.h>
 #include <asm/byteorder.h>
 
+#define IDECD_DEBUG_LOG		0
+
+#if IDECD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
 /*
  * typical timeout for packet command
  */
-- 
cgit v0.10.2


From bf64741fe89280bd81a9e3a1beadec1570861848 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: make IDE_AFLAG_.. numbering continuous again

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 5f86ad4..eb4c01f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -473,53 +473,53 @@ enum {
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
-	IDE_AFLAG_NO_EJECT		= (1 << 3),
+	IDE_AFLAG_NO_EJECT		= (1 << 1),
 	/* Drive is a pre ATAPI 1.2 drive. */
-	IDE_AFLAG_PRE_ATAPI12		= (1 << 4),
+	IDE_AFLAG_PRE_ATAPI12		= (1 << 2),
 	/* TOC addresses are in BCD. */
-	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 5),
+	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 3),
 	/* TOC track numbers are in BCD. */
-	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 6),
+	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 4),
 	/*
 	 * Drive does not provide data in multiples of SECTOR_SIZE
 	 * when more than one interrupt is needed.
 	 */
-	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 7),
+	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 5),
 	/* Saved TOC information is current. */
-	IDE_AFLAG_TOC_VALID		= (1 << 9),
+	IDE_AFLAG_TOC_VALID		= (1 << 6),
 	/* We think that the drive door is locked. */
-	IDE_AFLAG_DOOR_LOCKED		= (1 << 10),
+	IDE_AFLAG_DOOR_LOCKED		= (1 << 7),
 	/* SET_CD_SPEED command is unsupported. */
-	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 11),
-	IDE_AFLAG_VERTOS_300_SSD	= (1 << 12),
-	IDE_AFLAG_VERTOS_600_ESD	= (1 << 13),
-	IDE_AFLAG_SANYO_3CD		= (1 << 14),
-	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 15),
-	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 16),
-	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
+	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 8),
+	IDE_AFLAG_VERTOS_300_SSD	= (1 << 9),
+	IDE_AFLAG_VERTOS_600_ESD	= (1 << 10),
+	IDE_AFLAG_SANYO_3CD		= (1 << 11),
+	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 12),
+	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 13),
+	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 14),
 
 	/* ide-floppy */
 	/* Avoid commands not supported in Clik drive */
-	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
+	IDE_AFLAG_CLIK_DRIVE		= (1 << 15),
 	/* Requires BH algorithm for packets */
-	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
+	IDE_AFLAG_ZIP_DRIVE		= (1 << 16),
 	/* Supports format progress report */
-	IDE_AFLAG_SRFP			= (1 << 22),
+	IDE_AFLAG_SRFP			= (1 << 17),
 
 	/* ide-tape */
-	IDE_AFLAG_IGNORE_DSC		= (1 << 23),
+	IDE_AFLAG_IGNORE_DSC		= (1 << 18),
 	/* 0 When the tape position is unknown */
-	IDE_AFLAG_ADDRESS_VALID		= (1 <<	24),
+	IDE_AFLAG_ADDRESS_VALID		= (1 <<	19),
 	/* Device already opened */
-	IDE_AFLAG_BUSY			= (1 << 25),
+	IDE_AFLAG_BUSY			= (1 << 20),
 	/* Attempt to auto-detect the current user block size */
-	IDE_AFLAG_DETECT_BS		= (1 << 26),
+	IDE_AFLAG_DETECT_BS		= (1 << 21),
 	/* Currently on a filemark */
-	IDE_AFLAG_FILEMARK		= (1 << 27),
+	IDE_AFLAG_FILEMARK		= (1 << 22),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 28),
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 23),
 
-	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 29),
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 24),
 };
 
 /* device flags */
-- 
cgit v0.10.2


From 07bd3f4731f9c7ebcbab90905ca4ad6fc6825f96 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 16:12:51 +0100
Subject: ide-floppy: allocate only toplevel packet commands

This makes the top-level function just allocate a single pc entry, and then
pass it down as a pointer to all the helper functions that also need one
of those "struct ide_atapi_pc" things. As far as I can tell, the use of
these things never overlaps each other, BUT I DID NOT CHECK VERY CLOSELY!

So I'm not guaranteeing this is correct, and I don't have the hardware. It
would be good for somebody who knows the code more, and has the hardware,
could please test this?

With this, ide-floppy still has fairly big stack usage, but instead of

	idefloppy_ioctl [vmlinux]:              1208
	ide_floppy_get_capacity [vmlinux]:      872
	idefloppy_release [vmlinux]:            408
	idefloppy_open [vmlinux]:               408

where those two first ones are at the very top of the list of stack users
for me, it's now

	ide_floppy_get_capacity [vmlinux]:           404
	ide_floppy_ioctl [vmlinux]:                  364

ie they are still high, but they are no longer at the top.

Borislav: Since ide_floppy_get_capacity is passed as a function pointer to other
parts of the kernel (e.g., block layer) we need that ide_atapi_pc to be created
on stack. Also, redid stack users numbers above. The two functions missing from
Linus' original 'make stackusage' output are due to ide being
rewritten/reorganized atm.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index aeb1ad7..1f07f38 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -342,38 +342,38 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
  * Look at the flexible disk page parameters. We ignore the CHS capacity
  * parameters and use the LBA parameters instead.
  */
-static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
+static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive,
+					     struct ide_atapi_pc *pc)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
-	struct ide_atapi_pc pc;
 	u8 *page;
 	int capacity, lba_capacity;
 	u16 transfer_rate, sector_size, cyls, rpm;
 	u8 heads, sectors;
 
-	ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
+	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
 
-	if (ide_queue_pc_tail(drive, disk, &pc)) {
+	if (ide_queue_pc_tail(drive, disk, pc)) {
 		printk(KERN_ERR PFX "Can't get flexible disk page params\n");
 		return 1;
 	}
 
-	if (pc.buf[3] & 0x80)
+	if (pc->buf[3] & 0x80)
 		drive->dev_flags |= IDE_DFLAG_WP;
 	else
 		drive->dev_flags &= ~IDE_DFLAG_WP;
 
 	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
-	page = &pc.buf[8];
+	page = &pc->buf[8];
 
-	transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]);
-	sector_size   = be16_to_cpup((__be16 *)&pc.buf[8 + 6]);
-	cyls          = be16_to_cpup((__be16 *)&pc.buf[8 + 8]);
-	rpm           = be16_to_cpup((__be16 *)&pc.buf[8 + 28]);
-	heads         = pc.buf[8 + 4];
-	sectors       = pc.buf[8 + 5];
+	transfer_rate = be16_to_cpup((__be16 *)&pc->buf[8 + 2]);
+	sector_size   = be16_to_cpup((__be16 *)&pc->buf[8 + 6]);
+	cyls          = be16_to_cpup((__be16 *)&pc->buf[8 + 8]);
+	rpm           = be16_to_cpup((__be16 *)&pc->buf[8 + 28]);
+	heads         = pc->buf[8 + 4];
+	sectors       = pc->buf[8 + 5];
 
 	capacity = cyls * heads * sectors * sector_size;
 
@@ -499,7 +499,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 
 	/* Clik! disk does not support get_flexible_disk_page */
 	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
-		(void) ide_floppy_get_flexible_disk_page(drive);
+		(void) ide_floppy_get_flexible_disk_page(drive, &pc);
 
 	return rc;
 }
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index 2bc51ff..8f8be85 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -31,10 +31,11 @@
  * On exit we set nformats to the number of records we've actually initialized.
  */
 
-static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_get_format_capacities(ide_drive_t *drive,
+					    struct ide_atapi_pc *pc,
+					    int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	u8 header_len, desc_cnt;
 	int i, blocks, length, u_array_size, u_index;
 	int __user *argp;
@@ -45,13 +46,13 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 	if (u_array_size <= 0)
 		return -EINVAL;
 
-	ide_floppy_create_read_capacity_cmd(&pc);
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc)) {
+	ide_floppy_create_read_capacity_cmd(pc);
+	if (ide_queue_pc_tail(drive, floppy->disk, pc)) {
 		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
 		return -EIO;
 	}
 
-	header_len = pc.buf[3];
+	header_len = pc->buf[3];
 	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
 
 	u_index = 0;
@@ -68,8 +69,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 		if (u_index >= u_array_size)
 			break;	/* User-supplied buffer too small */
 
-		blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
+		blocks = be32_to_cpup((__be32 *)&pc->buf[desc_start]);
+		length = be16_to_cpup((__be16 *)&pc->buf[desc_start + 6]);
 
 		if (put_user(blocks, argp))
 			return -EFAULT;
@@ -111,29 +112,28 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
 	pc->flags |= PC_FLAG_WRITING;
 }
 
-static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
+static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 
 	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
 
-	ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE);
-	pc.flags |= PC_FLAG_SUPPRESS_ERROR;
+	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE);
+	pc->flags |= PC_FLAG_SUPPRESS_ERROR;
 
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc))
 		return 1;
 
-	if (pc.buf[8 + 2] & 0x40)
+	if (pc->buf[8 + 2] & 0x40)
 		drive->atapi_flags |= IDE_AFLAG_SRFP;
 
 	return 0;
 }
 
-static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc,
+				  int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	int blocks, length, flags, err = 0;
 
 	if (floppy->openers > 1) {
@@ -166,10 +166,10 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 		goto out;
 	}
 
-	(void)ide_floppy_get_sfrp_bit(drive);
-	ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags);
+	ide_floppy_get_sfrp_bit(drive, pc);
+	ide_floppy_create_format_unit_cmd(pc, blocks, length, flags);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc))
 		err = -EIO;
 
 out:
@@ -188,15 +188,16 @@ out:
  * the dsc bit, and return either 0 or 65536.
  */
 
-static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_get_format_progress(ide_drive_t *drive,
+					  struct ide_atapi_pc *pc,
+					  int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	int progress_indication = 0x10000;
 
 	if (drive->atapi_flags & IDE_AFLAG_SRFP) {
-		ide_create_request_sense_cmd(drive, &pc);
-		if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+		ide_create_request_sense_cmd(drive, pc);
+		if (ide_queue_pc_tail(drive, floppy->disk, pc))
 			return -EIO;
 
 		if (floppy->sense_key == 2 &&
@@ -241,20 +242,21 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	return 0;
 }
 
-static int ide_floppy_format_ioctl(ide_drive_t *drive, fmode_t mode,
-				   unsigned int cmd, void __user *argp)
+static int ide_floppy_format_ioctl(ide_drive_t *drive, struct ide_atapi_pc *pc,
+				   fmode_t mode, unsigned int cmd,
+				   void __user *argp)
 {
 	switch (cmd) {
 	case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED:
 		return 0;
 	case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY:
-		return ide_floppy_get_format_capacities(drive, argp);
+		return ide_floppy_get_format_capacities(drive, pc, argp);
 	case IDEFLOPPY_IOCTL_FORMAT_START:
 		if (!(mode & FMODE_WRITE))
 			return -EPERM;
-		return ide_floppy_format_unit(drive, (int __user *)argp);
+		return ide_floppy_format_unit(drive, pc, (int __user *)argp);
 	case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS:
-		return ide_floppy_get_format_progress(drive, argp);
+		return ide_floppy_get_format_progress(drive, pc, argp);
 	default:
 		return -ENOTTY;
 	}
@@ -270,7 +272,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
 	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR)
 		return ide_floppy_lockdoor(drive, &pc, arg, cmd);
 
-	err = ide_floppy_format_ioctl(drive, mode, cmd, argp);
+	err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
 	if (err != -ENOTTY)
 		return err;
 
-- 
cgit v0.10.2


From 93c164af19f608c5f737eb9bed8cb4de3a872329 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 2 Jan 2009 16:12:51 +0100
Subject: remove ide-scsi

As planed, this removes ide-scsi.

The 2.6 kernel supports direct writing to ide CD drives, which
eliminates the need for ide-scsi. ide-scsi has been unmaintained and
marked as deprecated.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James.Bottomley@HansenPartnership.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index dc7c681..df18d87 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -310,15 +310,6 @@ Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
 
 ---------------------------
 
-What: ide-scsi (BLK_DEV_IDESCSI)
-When: 2.6.29
-Why:  The 2.6 kernel supports direct writing to ide CD drives, which
-      eliminates the need for ide-scsi. The new method is more
-      efficient in every way.
-Who:  FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
-
----------------------------
-
 What:	i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
 When:	2.6.29 (ideally) or 2.6.30 (more likely)
 Why:	Deprecated by the new (standard) device driver binding model. Use
diff --git a/MAINTAINERS b/MAINTAINERS
index ceb32ee..144766c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2146,11 +2146,6 @@ M:	Gadi Oxman <gadio@netvision.net.il>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
-IDE-SCSI DRIVER
-L:	linux-ide@vger.kernel.org
-L:	linux-scsi@vger.kernel.org
-S:	Orphan
-
 IDLE-I7300
 P:	Andy Henroid
 M:	andrew.d.henroid@intel.com
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index c9f21e3..937945e 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -185,23 +185,6 @@ config BLK_DEV_IDETAPE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ide-tape.
 
-config BLK_DEV_IDESCSI
-	tristate "SCSI emulation support (DEPRECATED)"
-	depends on SCSI
-	select IDE_ATAPI
-	---help---
-	  WARNING: ide-scsi is no longer needed for cd writing applications!
-	  The 2.6 kernel supports direct writing to ide-cd, which eliminates
-	  the need for ide-scsi + the entire scsi stack just for writing a
-	  cd. The new method is more efficient in every way.
-
-	  This will provide SCSI host adapter emulation for IDE ATAPI devices,
-	  and will allow you to use a SCSI device driver instead of a native
-	  ATAPI driver.
-
-	  If both this SCSI emulation and native ATAPI support are compiled
-	  into the kernel, the native support will be used.
-
 config BLK_DEV_IDEACPI
 	bool "IDE ACPI support"
 	depends on ACPI
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 152d4aa..b732297 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -21,7 +21,7 @@ config SCSI
 	  You also need to say Y here if you have a device which speaks
 	  the SCSI protocol.  Examples of this include the parallel port
 	  version of the IOMEGA ZIP drive, USB storage devices, Fibre
-	  Channel, FireWire storage and the IDE-SCSI emulation driver.
+	  Channel, and FireWire storage.
 
 	  To compile this driver as a module, choose M here and read
 	  <file:Documentation/scsi/scsi.txt>.
@@ -101,9 +101,9 @@ config CHR_DEV_OSST
 	---help---
 	  The OnStream SC-x0 SCSI tape drives cannot be driven by the
 	  standard st driver, but instead need this special osst driver and
-	  use the  /dev/osstX char device nodes (major 206).  Via usb-storage
-	  and ide-scsi, you may be able to drive the USB-x0 and DI-x0 drives
-	  as well.  Note that there is also a second generation of OnStream
+	  use the  /dev/osstX char device nodes (major 206).  Via usb-storage,
+	  you may be able to drive the USB-x0 and DI-x0 drives as well.
+	  Note that there is also a second generation of OnStream
 	  tape drives (ADR-x0) that supports the standard SCSI-2 commands for
 	  tapes (QIC-157) and can be driven by the standard driver st.
 	  For more information, you may have a look at the SCSI-HOWTO
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 1410697..7461eb0 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -105,7 +105,6 @@ obj-$(CONFIG_SCSI_GDTH)		+= gdth.o
 obj-$(CONFIG_SCSI_INITIO)	+= initio.o
 obj-$(CONFIG_SCSI_INIA100)	+= a100u2w.o
 obj-$(CONFIG_SCSI_QLOGICPTI)	+= qlogicpti.o
-obj-$(CONFIG_BLK_DEV_IDESCSI)	+= ide-scsi.o
 obj-$(CONFIG_SCSI_MESH)		+= mesh.o
 obj-$(CONFIG_SCSI_MAC53C94)	+= mac53c94.o
 obj-$(CONFIG_BLK_DEV_3W_XXXX_RAID) += 3w-xxxx.o
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
deleted file mode 100644
index c24140a..0000000
--- a/drivers/scsi/ide-scsi.c
+++ /dev/null
@@ -1,840 +0,0 @@
-/*
- * Copyright (C) 1996-1999  Gadi Oxman <gadio@netvision.net.il>
- * Copyright (C) 2004-2005  Bartlomiej Zolnierkiewicz
- */
-
-/*
- * Emulation of a SCSI host adapter for IDE ATAPI devices.
- *
- * With this driver, one can use the Linux SCSI drivers instead of the
- * native IDE ATAPI drivers.
- *
- * Ver 0.1   Dec  3 96   Initial version.
- * Ver 0.2   Jan 26 97   Fixed bug in cleanup_module() and added emulation
- *                        of MODE_SENSE_6/MODE_SELECT_6 for cdroms. Thanks
- *                        to Janos Farkas for pointing this out.
- *                       Avoid using bitfields in structures for m68k.
- *                       Added Scatter/Gather and DMA support.
- * Ver 0.4   Dec  7 97   Add support for ATAPI PD/CD drives.
- *                       Use variable timeout for each command.
- * Ver 0.5   Jan  2 98   Fix previous PD/CD support.
- *                       Allow disabling of SCSI-6 to SCSI-10 transformation.
- * Ver 0.6   Jan 27 98   Allow disabling of SCSI command translation layer
- *                        for access through /dev/sg.
- *                       Fix MODE_SENSE_6/MODE_SELECT_6/INQUIRY translation.
- * Ver 0.7   Dec 04 98   Ignore commands where lun != 0 to avoid multiple
- *                        detection of devices with CONFIG_SCSI_MULTI_LUN
- * Ver 0.8   Feb 05 99   Optical media need translation too. Reverse 0.7.
- * Ver 0.9   Jul 04 99   Fix a bug in SG_SET_TRANSFORM.
- * Ver 0.91  Jun 10 02   Fix "off by one" error in transforms
- * Ver 0.92  Dec 31 02   Implement new SCSI mid level API
- */
-
-#define IDESCSI_VERSION "0.92"
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/ide.h>
-#include <linux/scatterlist.h>
-#include <linux/delay.h>
-#include <linux/mutex.h>
-#include <linux/bitops.h>
-
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/sg.h>
-
-#define IDESCSI_DEBUG_LOG		0
-
-#if IDESCSI_DEBUG_LOG
-#define debug_log(fmt, args...) \
-	printk(KERN_INFO "ide-scsi: " fmt, ## args)
-#else
-#define debug_log(fmt, args...) do {} while (0)
-#endif
-
-/*
- *	SCSI command transformation layer
- */
-#define IDESCSI_SG_TRANSFORM		1	/* /dev/sg transformation */
-
-/*
- *	Log flags
- */
-#define IDESCSI_LOG_CMD			0	/* Log SCSI commands */
-
-typedef struct ide_scsi_obj {
-	ide_drive_t		*drive;
-	ide_driver_t		*driver;
-	struct gendisk		*disk;
-	struct Scsi_Host	*host;
-
-	unsigned long transform;		/* SCSI cmd translation layer */
-	unsigned long log;			/* log flags */
-} idescsi_scsi_t;
-
-static DEFINE_MUTEX(idescsi_ref_mutex);
-/* Set by module param to skip cd */
-static int idescsi_nocd;
-
-#define ide_scsi_g(disk) \
-	container_of((disk)->private_data, struct ide_scsi_obj, driver)
-
-static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
-{
-	struct ide_scsi_obj *scsi = NULL;
-
-	mutex_lock(&idescsi_ref_mutex);
-	scsi = ide_scsi_g(disk);
-	if (scsi) {
-		if (ide_device_get(scsi->drive))
-			scsi = NULL;
-		else
-			scsi_host_get(scsi->host);
-	}
-	mutex_unlock(&idescsi_ref_mutex);
-	return scsi;
-}
-
-static void ide_scsi_put(struct ide_scsi_obj *scsi)
-{
-	ide_drive_t *drive = scsi->drive;
-
-	mutex_lock(&idescsi_ref_mutex);
-	scsi_host_put(scsi->host);
-	ide_device_put(drive);
-	mutex_unlock(&idescsi_ref_mutex);
-}
-
-static inline idescsi_scsi_t *scsihost_to_idescsi(struct Scsi_Host *host)
-{
-	return (idescsi_scsi_t*) (&host[1]);
-}
-
-static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive)
-{
-	return scsihost_to_idescsi(ide_drive->driver_data);
-}
-
-static void ide_scsi_hex_dump(u8 *data, int len)
-{
-	print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
-}
-
-static int idescsi_end_request(ide_drive_t *, int, int);
-
-static void ide_scsi_callback(ide_drive_t *drive, int dsc)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct ide_atapi_pc *pc = drive->pc;
-
-	if (pc->flags & PC_FLAG_TIMEDOUT)
-		debug_log("%s: got timed out packet %lu at %lu\n", __func__,
-			  pc->scsi_cmd->serial_number, jiffies);
-		/* end this request now - scsi should retry it*/
-	else if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk(KERN_INFO "Packet command completed, %d bytes"
-				 " transferred\n", pc->xferred);
-
-	idescsi_end_request(drive, 1, 0);
-}
-
-static int idescsi_check_condition(ide_drive_t *drive,
-		struct request *failed_cmd)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct ide_atapi_pc   *pc;
-	struct request *rq;
-	u8             *buf;
-
-	/* stuff a sense request in front of our current request */
-	pc = kzalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = blk_get_request(drive->queue, READ, GFP_ATOMIC);
-	buf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_ATOMIC);
-	if (!pc || !rq || !buf) {
-		kfree(buf);
-		if (rq)
-			blk_put_request(rq);
-		kfree(pc);
-		return -ENOMEM;
-	}
-	rq->special = (char *) pc;
-	pc->rq = rq;
-	pc->buf = buf;
-	pc->c[0] = REQUEST_SENSE;
-	pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE;
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-	pc->timeout = jiffies + WAIT_READY;
-	/* NOTE! Save the failed packet command in "rq->buffer" */
-	rq->buffer = (void *) failed_cmd->special;
-	pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd;
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
-		printk ("ide-scsi: %s: queue cmd = ", drive->name);
-		ide_scsi_hex_dump(pc->c, 6);
-	}
-	rq->rq_disk = scsi->disk;
-	rq->ref_count++;
-	memcpy(rq->cmd, pc->c, 12);
-	ide_do_drive_cmd(drive, rq);
-	return 0;
-}
-
-static ide_startstop_t
-idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-		/* force an abort */
-		hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
-
-	rq->errors++;
-
-	idescsi_end_request(drive, 0, 0);
-
-	return ide_stopped;
-}
-
-static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct request *rq = HWGROUP(drive)->rq;
-	struct ide_atapi_pc *pc = (struct ide_atapi_pc *) rq->special;
-	int log = test_bit(IDESCSI_LOG_CMD, &scsi->log);
-	struct Scsi_Host *host;
-	int errors = rq->errors;
-	unsigned long flags;
-
-	if (!blk_special_request(rq) && !blk_sense_request(rq)) {
-		ide_end_request(drive, uptodate, nrsecs);
-		return 0;
-	}
-	ide_end_drive_cmd (drive, 0, 0);
-	if (blk_sense_request(rq)) {
-		struct ide_atapi_pc *opc = (struct ide_atapi_pc *) rq->buffer;
-		if (log) {
-			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
-			ide_scsi_hex_dump(pc->buf, 16);
-		}
-		memcpy((void *) opc->scsi_cmd->sense_buffer, pc->buf,
-			SCSI_SENSE_BUFFERSIZE);
-		kfree(pc->buf);
-		kfree(pc);
-		blk_put_request(rq);
-		pc = opc;
-		rq = pc->rq;
-		pc->scsi_cmd->result = (CHECK_CONDITION << 1) |
-				(((pc->flags & PC_FLAG_TIMEDOUT) ?
-				  DID_TIME_OUT :
-				  DID_OK) << 16);
-	} else if (pc->flags & PC_FLAG_TIMEDOUT) {
-		if (log)
-			printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n",
-					drive->name, pc->scsi_cmd->serial_number);
-		pc->scsi_cmd->result = DID_TIME_OUT << 16;
-	} else if (errors >= ERROR_MAX) {
-		pc->scsi_cmd->result = DID_ERROR << 16;
-		if (log)
-			printk ("ide-scsi: %s: I/O error for %lu\n", drive->name, pc->scsi_cmd->serial_number);
-	} else if (errors) {
-		if (log)
-			printk ("ide-scsi: %s: check condition for %lu\n", drive->name, pc->scsi_cmd->serial_number);
-		if (!idescsi_check_condition(drive, rq))
-			/* we started a request sense, so we'll be back, exit for now */
-			return 0;
-		pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
-	} else {
-		pc->scsi_cmd->result = DID_OK << 16;
-	}
-	host = pc->scsi_cmd->device->host;
-	spin_lock_irqsave(host->host_lock, flags);
-	pc->done(pc->scsi_cmd);
-	spin_unlock_irqrestore(host->host_lock, flags);
-	kfree(pc);
-	blk_put_request(rq);
-	drive->pc = NULL;
-	return 0;
-}
-
-static inline int idescsi_set_direction(struct ide_atapi_pc *pc)
-{
-	switch (pc->c[0]) {
-		case READ_6: case READ_10: case READ_12:
-			pc->flags &= ~PC_FLAG_WRITING;
-			return 0;
-		case WRITE_6: case WRITE_10: case WRITE_12:
-			pc->flags |= PC_FLAG_WRITING;
-			return 0;
-		default:
-			return 1;
-	}
-}
-
-static int idescsi_map_sg(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg, *scsi_sg;
-	int segments;
-
-	if (!pc->req_xfer || pc->req_xfer % 1024)
-		return 1;
-
-	if (idescsi_set_direction(pc))
-		return 1;
-
-	sg = hwif->sg_table;
-	scsi_sg = scsi_sglist(pc->scsi_cmd);
-	segments = scsi_sg_count(pc->scsi_cmd);
-
-	if (segments > hwif->sg_max_nents)
-		return 1;
-
-	hwif->sg_nents = segments;
-	memcpy(sg, scsi_sg, sizeof(*sg) * segments);
-
-	return 0;
-}
-
-static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
-		struct ide_atapi_pc *pc)
-{
-	/* Set the current packet command */
-	drive->pc = pc;
-
-	return ide_issue_pc(drive, ide_scsi_get_timeout(pc), ide_scsi_expiry);
-}
-
-/*
- *	idescsi_do_request is our request handling function.
- */
-static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
-{
-	debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,
-		  rq->cmd[0], rq->errors);
-	debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",
-		  rq->sector, rq->nr_sectors, rq->current_nr_sectors);
-
-	if (blk_sense_request(rq) || blk_special_request(rq)) {
-		struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special;
-
-		if ((drive->dev_flags & IDE_DFLAG_USING_DMA) &&
-		    idescsi_map_sg(drive, pc) == 0)
-			pc->flags |= PC_FLAG_DMA_OK;
-
-		return idescsi_issue_pc(drive, pc);
-	}
-	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
-	idescsi_end_request (drive, 0, 0);
-	return ide_stopped;
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
-};
-
-#define ide_scsi_devset_get(name, field) \
-static int get_##name(ide_drive_t *drive) \
-{ \
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
-	return scsi->field; \
-}
-
-#define ide_scsi_devset_set(name, field) \
-static int set_##name(ide_drive_t *drive, int arg) \
-{ \
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
-	scsi->field = arg; \
-	return 0; \
-}
-
-#define ide_scsi_devset_rw_field(_name, _field) \
-ide_scsi_devset_get(_name, _field); \
-ide_scsi_devset_set(_name, _field); \
-IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name);
-
-ide_devset_rw_field(bios_cyl, bios_cyl);
-ide_devset_rw_field(bios_head, bios_head);
-ide_devset_rw_field(bios_sect, bios_sect);
-
-ide_scsi_devset_rw_field(transform, transform);
-ide_scsi_devset_rw_field(log, log);
-
-static const struct ide_proc_devset idescsi_settings[] = {
-	IDE_PROC_DEVSET(bios_cyl,  0, 1023),
-	IDE_PROC_DEVSET(bios_head, 0,  255),
-	IDE_PROC_DEVSET(bios_sect, 0,	63),
-	IDE_PROC_DEVSET(log,	   0,	 1),
-	IDE_PROC_DEVSET(transform, 0,	 3),
-	{ 0 },
-};
-
-static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
-{
-	return idescsi_proc;
-}
-
-static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
-{
-	return idescsi_settings;
-}
-#endif
-
-/*
- *	Driver initialization.
- */
-static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi)
-{
-	clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-#if IDESCSI_DEBUG_LOG
-	set_bit(IDESCSI_LOG_CMD, &scsi->log);
-#endif /* IDESCSI_DEBUG_LOG */
-
-	drive->pc_callback	 = ide_scsi_callback;
-	drive->pc_update_buffers = NULL;
-	drive->pc_io_buffers	 = ide_io_buffers;
-
-	ide_proc_register_driver(drive, scsi->driver);
-}
-
-static void ide_scsi_remove(ide_drive_t *drive)
-{
-	struct Scsi_Host *scsihost = drive->driver_data;
-	struct ide_scsi_obj *scsi = scsihost_to_idescsi(scsihost);
-	struct gendisk *g = scsi->disk;
-
-	scsi_remove_host(scsihost);
-	ide_proc_unregister_driver(drive, scsi->driver);
-
-	ide_unregister_region(g);
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-
-	ide_scsi_put(scsi);
-
-	drive->dev_flags &= ~IDE_DFLAG_SCSI;
-}
-
-static int ide_scsi_probe(ide_drive_t *);
-
-static ide_driver_t idescsi_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-scsi",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_scsi_probe,
-	.remove			= ide_scsi_remove,
-	.version		= IDESCSI_VERSION,
-	.do_request		= idescsi_do_request,
-	.end_request		= idescsi_end_request,
-	.error                  = idescsi_atapi_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_scsi_proc_entries,
-	.proc_devsets		= ide_scsi_proc_devsets,
-#endif
-};
-
-static int idescsi_ide_open(struct block_device *bdev, fmode_t mode)
-{
-	struct ide_scsi_obj *scsi = ide_scsi_get(bdev->bd_disk);
-
-	if (!scsi)
-		return -ENXIO;
-
-	return 0;
-}
-
-static int idescsi_ide_release(struct gendisk *disk, fmode_t mode)
-{
-	ide_scsi_put(ide_scsi_g(disk));
-	return 0;
-}
-
-static int idescsi_ide_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct ide_scsi_obj *scsi = ide_scsi_g(bdev->bd_disk);
-	return generic_ide_ioctl(scsi->drive, bdev, cmd, arg);
-}
-
-static struct block_device_operations idescsi_ops = {
-	.owner		= THIS_MODULE,
-	.open		= idescsi_ide_open,
-	.release	= idescsi_ide_release,
-	.locked_ioctl	= idescsi_ide_ioctl,
-};
-
-static int idescsi_slave_configure(struct scsi_device * sdp)
-{
-	/* Configure detected device */
-	sdp->use_10_for_rw = 1;
-	sdp->use_10_for_ms = 1;
-	scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, sdp->host->cmd_per_lun);
-	return 0;
-}
-
-static const char *idescsi_info (struct Scsi_Host *host)
-{
-	return "SCSI host adapter emulation for IDE ATAPI devices";
-}
-
-static int idescsi_ioctl (struct scsi_device *dev, int cmd, void __user *arg)
-{
-	idescsi_scsi_t *scsi = scsihost_to_idescsi(dev->host);
-
-	if (cmd == SG_SET_TRANSFORM) {
-		if (arg)
-			set_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-		else
-			clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-		return 0;
-	} else if (cmd == SG_GET_TRANSFORM)
-		return put_user(test_bit(IDESCSI_SG_TRANSFORM, &scsi->transform), (int __user *) arg);
-	return -EINVAL;
-}
-
-static int idescsi_queue (struct scsi_cmnd *cmd,
-		void (*done)(struct scsi_cmnd *))
-{
-	struct Scsi_Host *host = cmd->device->host;
-	idescsi_scsi_t *scsi = scsihost_to_idescsi(host);
-	ide_drive_t *drive = scsi->drive;
-	struct request *rq = NULL;
-	struct ide_atapi_pc *pc = NULL;
-	int write = cmd->sc_data_direction == DMA_TO_DEVICE;
-
-	if (!drive) {
-		scmd_printk (KERN_ERR, cmd, "drive not present\n");
-		goto abort;
-	}
-	scsi = drive_to_idescsi(drive);
-	pc = kmalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = blk_get_request(drive->queue, write, GFP_ATOMIC);
-	if (rq == NULL || pc == NULL) {
-		printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name);
-		goto abort;
-	}
-
-	memset (pc->c, 0, 12);
-	pc->flags = 0;
-	if (cmd->sc_data_direction == DMA_TO_DEVICE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->rq = rq;
-	memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
-	pc->buf = NULL;
-	pc->sg = scsi_sglist(cmd);
-	pc->sg_cnt = scsi_sg_count(cmd);
-	pc->b_count = 0;
-	pc->req_xfer = pc->buf_size = scsi_bufflen(cmd);
-	pc->scsi_cmd = cmd;
-	pc->done = done;
-	pc->timeout = jiffies + cmd->request->timeout;
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
-		printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
-		ide_scsi_hex_dump(cmd->cmnd, cmd->cmd_len);
-		if (memcmp(pc->c, cmd->cmnd, cmd->cmd_len)) {
-			printk ("ide-scsi: %s: que %lu, tsl = ", drive->name, cmd->serial_number);
-			ide_scsi_hex_dump(pc->c, 12);
-		}
-	}
-
-	rq->special = (char *) pc;
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	spin_unlock_irq(host->host_lock);
-	rq->ref_count++;
-	memcpy(rq->cmd, pc->c, 12);
-	blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL);
-	spin_lock_irq(host->host_lock);
-	return 0;
-abort:
-	kfree (pc);
-	if (rq)
-		blk_put_request(rq);
-	cmd->result = DID_ERROR << 16;
-	done(cmd);
-	return 0;
-}
-
-static int idescsi_eh_abort (struct scsi_cmnd *cmd)
-{
-	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
-	ide_drive_t    *drive = scsi->drive;
-	ide_hwif_t     *hwif;
-	ide_hwgroup_t  *hwgroup;
-	int		busy;
-	int             ret   = FAILED;
-
-	struct ide_atapi_pc *pc;
-
-	/* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number);
-
-	if (!drive) {
-		printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n");
-		WARN_ON(1);
-		goto no_drive;
-	}
-
-	hwif = drive->hwif;
-	hwgroup = hwif->hwgroup;
-
-	/* First give it some more time, how much is "right" is hard to say :-(
-	   FIXME - uses mdelay which causes latency? */
-	busy = ide_wait_not_busy(hwif, 100);
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
-
-	spin_lock_irq(&hwgroup->lock);
-
-	/* If there is no pc running we're done (our interrupt took care of it) */
-	pc = drive->pc;
-	if (pc == NULL) {
-		ret = SUCCESS;
-		goto ide_unlock;
-	}
-
-	/* It's somewhere in flight. Does ide subsystem agree? */
-	if (pc->scsi_cmd->serial_number == cmd->serial_number && !busy &&
-	    elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != pc->rq) {
-		/*
-		 * FIXME - not sure this condition can ever occur
-		 */
-		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
-
-		if (blk_sense_request(pc->rq))
-			kfree(pc->buf);
-		/* we need to call blk_put_request twice. */
-		blk_put_request(pc->rq);
-		blk_put_request(pc->rq);
-		kfree(pc);
-		drive->pc = NULL;
-
-		ret = SUCCESS;
-	}
-
-ide_unlock:
-	spin_unlock_irq(&hwgroup->lock);
-no_drive:
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
-
-	return ret;
-}
-
-static int idescsi_eh_reset (struct scsi_cmnd *cmd)
-{
-	struct request *req;
-	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
-	ide_drive_t    *drive = scsi->drive;
-	ide_hwgroup_t  *hwgroup;
-	int             ready = 0;
-	int             ret   = SUCCESS;
-
-	struct ide_atapi_pc *pc;
-
-	/* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number);
-
-	if (!drive) {
-		printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n");
-		WARN_ON(1);
-		return FAILED;
-	}
-
-	hwgroup = drive->hwif->hwgroup;
-
-	spin_lock_irq(cmd->device->host->host_lock);
-	spin_lock(&hwgroup->lock);
-
-	pc = drive->pc;
-	if (pc)
-		req = pc->rq;
-
-	if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) {
-		printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
-		spin_unlock(&hwgroup->lock);
-		spin_unlock_irq(cmd->device->host->host_lock);
-		return FAILED;
-	}
-
-	/* kill current request */
-	if (__blk_end_request(req, -EIO, 0))
-		BUG();
-	if (blk_sense_request(req))
-		kfree(pc->buf);
-	kfree(pc);
-	drive->pc = NULL;
-	blk_put_request(req);
-
-	/* now nuke the drive queue */
-	while ((req = elv_next_request(drive->queue))) {
-		if (__blk_end_request(req, -EIO, 0))
-			BUG();
-	}
-
-	hwgroup->rq = NULL;
-	hwgroup->handler = NULL;
-	hwgroup->busy = 1; /* will set this to zero when ide reset finished */
-	spin_unlock(&hwgroup->lock);
-
-	ide_do_reset(drive);
-
-	/* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */
-
-	do {
-		spin_unlock_irq(cmd->device->host->host_lock);
-		msleep(50);
-		spin_lock_irq(cmd->device->host->host_lock);
-	} while ( HWGROUP(drive)->handler );
-
-	ready = drive_is_ready(drive);
-	HWGROUP(drive)->busy--;
-	if (!ready) {
-		printk (KERN_ERR "ide-scsi: reset failed!\n");
-		ret = FAILED;
-	}
-
-	spin_unlock_irq(cmd->device->host->host_lock);
-	return ret;
-}
-
-static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev,
-		sector_t capacity, int *parm)
-{
-	idescsi_scsi_t *idescsi = scsihost_to_idescsi(sdev->host);
-	ide_drive_t *drive = idescsi->drive;
-
-	if (drive->bios_cyl && drive->bios_head && drive->bios_sect) {
-		parm[0] = drive->bios_head;
-		parm[1] = drive->bios_sect;
-		parm[2] = drive->bios_cyl;
-	}
-	return 0;
-}
-
-static struct scsi_host_template idescsi_template = {
-	.module			= THIS_MODULE,
-	.name			= "idescsi",
-	.info			= idescsi_info,
-	.slave_configure        = idescsi_slave_configure,
-	.ioctl			= idescsi_ioctl,
-	.queuecommand		= idescsi_queue,
-	.eh_abort_handler	= idescsi_eh_abort,
-	.eh_host_reset_handler  = idescsi_eh_reset,
-	.bios_param		= idescsi_bios,
-	.can_queue		= 40,
-	.this_id		= -1,
-	.sg_tablesize		= 256,
-	.cmd_per_lun		= 5,
-	.max_sectors		= 128,
-	.use_clustering		= DISABLE_CLUSTERING,
-	.emulated		= 1,
-	.proc_name		= "ide-scsi",
-};
-
-static int ide_scsi_probe(ide_drive_t *drive)
-{
-	idescsi_scsi_t *idescsi;
-	struct Scsi_Host *host;
-	struct gendisk *g;
-	static int warned;
-	int err = -ENOMEM;
-	u16 last_lun;
-
-	if (!warned && drive->media == ide_cdrom) {
-		printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n");
-		warned = 1;
-	}
-
-	if (idescsi_nocd && drive->media == ide_cdrom)
-		return -ENODEV;
-
-	if (!strstr("ide-scsi", drive->driver_req) ||
-	    drive->media == ide_disk ||
-	    !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t))))
-		return -ENODEV;
-
-	drive->dev_flags |= IDE_DFLAG_SCSI;
-
-	g = alloc_disk(1 << PARTN_BITS);
-	if (!g)
-		goto out_host_put;
-
-	ide_init_disk(g, drive);
-
-	host->max_id = 1;
-
-	last_lun = drive->id[ATA_ID_LAST_LUN];
-	if (last_lun)
-		debug_log("%s: last_lun=%u\n", drive->name, last_lun);
-
-	if ((last_lun & 7) != 7)
-		host->max_lun = (last_lun & 7) + 1;
-	else
-		host->max_lun = 1;
-
-	drive->driver_data = host;
-	idescsi = scsihost_to_idescsi(host);
-	idescsi->drive = drive;
-	idescsi->driver = &idescsi_driver;
-	idescsi->host = host;
-	idescsi->disk = g;
-	g->private_data = &idescsi->driver;
-	err = 0;
-	idescsi_setup(drive, idescsi);
-	g->fops = &idescsi_ops;
-	ide_register_region(g);
-	err = scsi_add_host(host, &drive->gendev);
-	if (!err) {
-		scsi_scan_host(host);
-		return 0;
-	}
-	/* fall through on error */
-	ide_unregister_region(g);
-	ide_proc_unregister_driver(drive, &idescsi_driver);
-
-	put_disk(g);
-out_host_put:
-	drive->dev_flags &= ~IDE_DFLAG_SCSI;
-	scsi_host_put(host);
-	return err;
-}
-
-static int __init init_idescsi_module(void)
-{
-	return driver_register(&idescsi_driver.gen_driver);
-}
-
-static void __exit exit_idescsi_module(void)
-{
-	driver_unregister(&idescsi_driver.gen_driver);
-}
-
-module_param(idescsi_nocd, int, 0600);
-MODULE_PARM_DESC(idescsi_nocd, "Disable handling of CD-ROMs so they may be driven by ide-cd");
-module_init(init_idescsi_module);
-module_exit(exit_idescsi_module);
-MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 991cb26a6ad287c3bc6555c41e830590a23910c4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:52 +0100
Subject: ide-atapi: add a dev_is_idecd-inline

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 4e58b9e..33a1534 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -14,6 +14,12 @@
 #define debug_log(fmt, args...) do {} while (0)
 #endif
 
+static inline int dev_is_idecd(ide_drive_t *drive)
+{
+	return (drive->media == ide_cdrom || drive->media == ide_optical) &&
+		!(drive->dev_flags & IDE_DFLAG_SCSI);
+}
+
 /*
  * Check whether we can support a device,
  * based on the ATAPI IDENTIFY command results.
@@ -577,7 +583,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 
 	if (scsi)
 		tf_flags = 0;
-	else if (drive->media == ide_cdrom || drive->media == ide_optical)
+	else if (dev_is_idecd(drive))
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 	else
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
-- 
cgit v0.10.2


From ed48554fad7091b9613b967462f082bf1a9cb035 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:52 +0100
Subject: ide-atapi: combine drive-specific assignments

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 33a1534..b6e0aac 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -551,18 +551,24 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
 	u32 tf_flags;
-	u16 bcount;
+	u16 bcount = 0;
 	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
 
 	/* We haven't transferred any data yet */
 	pc->xferred = 0;
 	pc->cur_pos = pc->buf;
 
-	/* Request to transfer the entire buffer at once */
-	if (drive->media == ide_tape && scsi == 0)
-		bcount = pc->req_xfer;
-	else
+	if (dev_is_idecd(drive)) {
+		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
+	} else if (scsi) {
+		tf_flags = 0;
 		bcount = min(pc->req_xfer, 63 * 1024);
+	} else {
+		tf_flags = IDE_TFLAG_OUT_DEVICE;
+		bcount = ((drive->media == ide_tape) ?
+				pc->req_xfer :
+				min(pc->req_xfer, 63 * 1024));
+	}
 
 	if (pc->flags & PC_FLAG_DMA_ERROR) {
 		pc->flags &= ~PC_FLAG_DMA_ERROR;
@@ -581,13 +587,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	if (!drive->dma)
 		pc->flags &= ~PC_FLAG_DMA_OK;
 
-	if (scsi)
-		tf_flags = 0;
-	else if (dev_is_idecd(drive))
-		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
-	else
-		tf_flags = IDE_TFLAG_OUT_DEVICE;
-
 	ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma);
 
 	/* Issue the packet command */
-- 
cgit v0.10.2


From 4f02ff06b4d33aba50ce5157c23e99cd21d447ee Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:52 +0100
Subject: ide-atapi: setup dma for ide-cd

There should be no functional change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index b6e0aac..74273fd 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -575,8 +575,9 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 		ide_dma_off(drive);
 	}
 
-	if ((pc->flags & PC_FLAG_DMA_OK) &&
-	    (drive->dev_flags & IDE_DFLAG_USING_DMA)) {
+	if (((pc->flags & PC_FLAG_DMA_OK) &&
+		(drive->dev_flags & IDE_DFLAG_USING_DMA)) ||
+	    drive->dma) {
 		if (scsi)
 			hwif->sg_mapped = 1;
 		drive->dma = !hwif->dma_ops->dma_setup(drive);
-- 
cgit v0.10.2


From 392de1d53dd40e2eebee3a0a26aa647a3865ca78 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:52 +0100
Subject: ide-atapi: accomodate transfer length calculation for ide-cd

... by factoring it out of ide_cd_do_request() into a helper, as suggested by
Bart.

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: BLK_DEV_IDECD needs to select IDE_ATAPI now]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 937945e..4ee85fc 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -137,6 +137,7 @@ config BLK_DEV_DELKIN
 
 config BLK_DEV_IDECD
 	tristate "Include IDE/ATAPI CDROM support"
+	select IDE_ATAPI
 	---help---
 	  If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is
 	  a newer protocol used by IDE CD-ROM and TAPE drives, similar to the
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 74273fd..8884877 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -252,6 +252,18 @@ int ide_scsi_expiry(ide_drive_t *drive)
 }
 EXPORT_SYMBOL_GPL(ide_scsi_expiry);
 
+int ide_cd_get_xferlen(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return 32768;
+	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
+			 rq->cmd_type == REQ_TYPE_ATA_PC)
+		return rq->data_len;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
+
 /*
  * This is the usual interrupt handler which will be called during a packet
  * command.  We will transfer some of the data (as requested by the drive)
@@ -551,7 +563,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
 	u32 tf_flags;
-	u16 bcount = 0;
+	u16 bcount;
 	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
 
 	/* We haven't transferred any data yet */
@@ -560,6 +572,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
+		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 	} else if (scsi) {
 		tf_flags = 0;
 		bcount = min(pc->req_xfer, 63 * 1024);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 65e5513..8d3c771 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1214,8 +1214,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		      __func__, rq->cmd[0], rq->cmd_type,
 		      (unsigned long long)block);
 
+	xferlen = ide_cd_get_xferlen(rq);
+
 	if (blk_fs_request(rq)) {
-		xferlen = 32768;
 		fn = cdrom_start_rw_cont;
 
 		if (cdrom_start_rw(drive, rq) == ide_stopped)
@@ -1225,7 +1226,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
-		xferlen = rq->data_len;
 		fn = cdrom_do_newpc_cont;
 
 		if (!rq->timeout)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index eb4c01f..e35ff68 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1254,6 +1254,8 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
 
 int ide_scsi_expiry(ide_drive_t *);
 
+int ide_cd_get_xferlen(struct request *);
+
 ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
-- 
cgit v0.10.2


From 5f25843fa79b7c35097b0ffe8b2c5cc2428d6495 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:53 +0100
Subject: ide-atapi: teach ide atapi about drive->waiting_for_dma

In addition, we wait for DRQ to be asserted by repeatedly polling
device status no matter what DRQ type each device implements.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 8884877..8c5cf68 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -511,6 +511,11 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 		return startstop;
 	}
 
+	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
+		if (drive->dma)
+			drive->waiting_for_dma = 1;
+	}
+
 	ireason = ide_read_ireason(drive);
 	if (drive->media == ide_tape &&
 	    (drive->dev_flags & IDE_DFLAG_SCSI) == 0)
@@ -605,6 +610,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 
 	/* Issue the packet command */
 	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
+		if (drive->dma)
+			drive->waiting_for_dma = 0;
 		ide_execute_command(drive, ATA_CMD_PACKET, ide_transfer_pc,
 				    timeout, NULL);
 		return ide_started;
-- 
cgit v0.10.2


From 4cad085efbce8dcc5006b0d1034089758b4fc7ba Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Fri, 2 Jan 2009 16:12:53 +0100
Subject: ide-cd: move cdrom_timer_expiry to ide-atapi.c

- cdrom_timer_expiry -> ide_cd_expiry
- remove expiry-arg to ide_issue_pc as it is redundant now
- ide_debug_log -> debug_log

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 8c5cf68..c110329 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/cdrom.h>
 #include <linux/delay.h>
 #include <linux/ide.h>
 #include <scsi/scsi.h>
@@ -252,6 +253,38 @@ int ide_scsi_expiry(ide_drive_t *drive)
 }
 EXPORT_SYMBOL_GPL(ide_scsi_expiry);
 
+int ide_cd_expiry(ide_drive_t *drive)
+{
+	struct request *rq = HWGROUP(drive)->rq;
+	unsigned long wait = 0;
+
+	debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
+
+	/*
+	 * Some commands are *slow* and normally take a long time to complete.
+	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
+	 * commands/drives support that. Let ide_timer_expiry keep polling us
+	 * for these.
+	 */
+	switch (rq->cmd[0]) {
+	case GPCMD_BLANK:
+	case GPCMD_FORMAT_UNIT:
+	case GPCMD_RESERVE_RZONE_TRACK:
+	case GPCMD_CLOSE_TRACK:
+	case GPCMD_FLUSH_CACHE:
+		wait = ATAPI_WAIT_PC;
+		break;
+	default:
+		if (!(rq->cmd_flags & REQ_QUIET))
+			printk(KERN_INFO "cmd 0x%x timed out\n",
+					 rq->cmd[0]);
+		wait = 0;
+		break;
+	}
+	return wait;
+}
+EXPORT_SYMBOL_GPL(ide_cd_expiry);
+
 int ide_cd_get_xferlen(struct request *rq)
 {
 	if (blk_fs_request(rq))
@@ -562,11 +595,11 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	return ide_started;
 }
 
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
-			     ide_expiry_t *expiry)
+ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 {
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
+	ide_expiry_t *expiry = NULL;
 	u32 tf_flags;
 	u16 bcount;
 	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
@@ -578,9 +611,11 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
+		expiry = ide_cd_expiry;
 	} else if (scsi) {
 		tf_flags = 0;
 		bcount = min(pc->req_xfer, 63 * 1024);
+		expiry = ide_scsi_expiry;
 	} else {
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
@@ -613,7 +648,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 		if (drive->dma)
 			drive->waiting_for_dma = 0;
 		ide_execute_command(drive, ATA_CMD_PACKET, ide_transfer_pc,
-				    timeout, NULL);
+				    timeout, expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 8d3c771..105e4d8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -511,38 +511,6 @@ end_request:
 	return 1;
 }
 
-static int cdrom_timer_expiry(ide_drive_t *drive)
-{
-	struct request *rq = HWGROUP(drive)->rq;
-	unsigned long wait = 0;
-
-	ide_debug_log(IDE_DBG_RQ, "Call %s: rq->cmd[0]: 0x%x\n", __func__,
-		      rq->cmd[0]);
-
-	/*
-	 * Some commands are *slow* and normally take a long time to complete.
-	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
-	 * commands/drives support that. Let ide_timer_expiry keep polling us
-	 * for these.
-	 */
-	switch (rq->cmd[0]) {
-	case GPCMD_BLANK:
-	case GPCMD_FORMAT_UNIT:
-	case GPCMD_RESERVE_RZONE_TRACK:
-	case GPCMD_CLOSE_TRACK:
-	case GPCMD_FLUSH_CACHE:
-		wait = ATAPI_WAIT_PC;
-		break;
-	default:
-		if (!(rq->cmd_flags & REQ_QUIET))
-			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
-					 rq->cmd[0]);
-		wait = 0;
-		break;
-	}
-	return wait;
-}
-
 /*
  * Set up the device registers for transferring a packet command on DEV,
  * expecting to later transfer XFERLEN bytes.  HANDLER is the routine
@@ -574,7 +542,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
 
 		/* packet command */
 		ide_execute_command(drive, ATA_CMD_PACKET, handler,
-				    ATAPI_WAIT_PC, cdrom_timer_expiry);
+				    ATAPI_WAIT_PC, ide_cd_expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
@@ -621,7 +589,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
 	}
 
 	/* arm the interrupt handler */
-	ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry);
+	ide_set_handler(drive, handler, rq->timeout, ide_cd_expiry);
 
 	/* ATAPI commands get padded out to 12 bytes minimum */
 	cmd_len = COMMAND_SIZE(rq->cmd[0]);
@@ -1088,7 +1056,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 	} else {
 		timeout = ATAPI_WAIT_PC;
 		if (!blk_fs_request(rq))
-			expiry = cdrom_timer_expiry;
+			expiry = ide_cd_expiry;
 	}
 
 	ide_set_handler(drive, cdrom_newpc_intr, timeout, expiry);
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 389faa4..bf676b2 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -16,10 +16,6 @@
 #define ide_debug_log(lvl, fmt, args...) do {} while (0)
 #endif
 
-/*
- * typical timeout for packet command
- */
-#define ATAPI_WAIT_PC		(60 * HZ)
 #define ATAPI_WAIT_WRITE_BUSY	(10 * HZ)
 
 /************************************************************************/
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 1f07f38..fdec729 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -197,7 +197,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_FLOPPY_CMD, NULL);
+	return ide_issue_pc(drive, WAIT_FLOPPY_CMD);
 }
 
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a2d470e..ac9e29a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -694,7 +694,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_TAPE_CMD, NULL);
+	return ide_issue_pc(drive, WAIT_TAPE_CMD);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e35ff68..e20e0b5 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -396,6 +396,7 @@ enum {
  * This is used for several packet commands (not for READ/WRITE commands).
  */
 #define IDE_PC_BUFFER_SIZE	256
+#define ATAPI_WAIT_PC		(60 * HZ)
 
 struct ide_atapi_pc {
 	/* actual packet bytes */
@@ -1253,10 +1254,11 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
 }
 
 int ide_scsi_expiry(ide_drive_t *);
+int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
 
-ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *);
+ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
-- 
cgit v0.10.2


From 152fe1cc38ebebb81724663e3b1e1e10272a729e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:53 +0100
Subject: ide-atapi: remove ide-scsi remnants from ide_issue_pc

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c110329..ff6b567 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -602,7 +602,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 	ide_expiry_t *expiry = NULL;
 	u32 tf_flags;
 	u16 bcount;
-	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
 
 	/* We haven't transferred any data yet */
 	pc->xferred = 0;
@@ -612,10 +611,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 		expiry = ide_cd_expiry;
-	} else if (scsi) {
-		tf_flags = 0;
-		bcount = min(pc->req_xfer, 63 * 1024);
-		expiry = ide_scsi_expiry;
 	} else {
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
@@ -630,13 +625,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 
 	if (((pc->flags & PC_FLAG_DMA_OK) &&
 		(drive->dev_flags & IDE_DFLAG_USING_DMA)) ||
-	    drive->dma) {
-		if (scsi)
-			hwif->sg_mapped = 1;
+	    drive->dma)
 		drive->dma = !hwif->dma_ops->dma_setup(drive);
-		if (scsi)
-			hwif->sg_mapped = 0;
-	}
 
 	if (!drive->dma)
 		pc->flags &= ~PC_FLAG_DMA_OK;
-- 
cgit v0.10.2


From 5fe3110431ccf437607bdc11ac3677bf3eeee6e3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:53 +0100
Subject: ide-atapi: remove ide-scsi remnants from ide_transfer_pc()

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index ff6b567..f5bf405 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -550,8 +550,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	}
 
 	ireason = ide_read_ireason(drive);
-	if (drive->media == ide_tape &&
-	    (drive->dev_flags & IDE_DFLAG_SCSI) == 0)
+	if (drive->media == ide_tape)
 		ireason = ide_wait_ireason(drive, ireason);
 
 	if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
@@ -569,14 +568,9 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 		timeout = drive->pc_delay;
 		expiry = &ide_delayed_transfer_pc;
 	} else {
-		if (drive->dev_flags & IDE_DFLAG_SCSI) {
-			timeout = ide_scsi_get_timeout(pc);
-			expiry = ide_scsi_expiry;
-		} else {
-			timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-							       : WAIT_TAPE_CMD;
-			expiry = NULL;
-		}
+		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+						       : WAIT_TAPE_CMD;
+		expiry = NULL;
 	}
 
 	/* Set the interrupt routine */
-- 
cgit v0.10.2


From 5d655a03b847fbe5353a8a74bbeb75e18708dca3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:54 +0100
Subject: ide-atapi: remove ide-scsi remnants from ide_pc_intr()

As a result, remove now unused ide_scsi_get_timeout and ide_scsi_expiry.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index f5bf405..7a04509 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -240,19 +240,6 @@ void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
-int ide_scsi_expiry(ide_drive_t *drive)
-{
-	struct ide_atapi_pc *pc = drive->pc;
-
-	debug_log("%s called for %lu at %lu\n", __func__,
-		  pc->scsi_cmd->serial_number, jiffies);
-
-	pc->flags |= PC_FLAG_TIMEDOUT;
-
-	return 0; /* we do not want the IDE subsystem to retry */
-}
-EXPORT_SYMBOL_GPL(ide_scsi_expiry);
-
 int ide_cd_expiry(ide_drive_t *drive)
 {
 	struct request *rq = HWGROUP(drive)->rq;
@@ -309,21 +296,14 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct request *rq = hwif->hwgroup->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	xfer_func_t *xferfunc;
-	ide_expiry_t *expiry;
 	unsigned int timeout, temp;
 	u16 bcount;
-	u8 stat, ireason, scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI), dsc = 0;
+	u8 stat, ireason, dsc = 0;
 
 	debug_log("Enter %s - interrupt handler\n", __func__);
 
-	if (scsi) {
-		timeout = ide_scsi_get_timeout(pc);
-		expiry = ide_scsi_expiry;
-	} else {
-		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-						       : WAIT_TAPE_CMD;
-		expiry = NULL;
-	}
+	timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+					       : WAIT_TAPE_CMD;
 
 	if (pc->flags & PC_FLAG_TIMEDOUT) {
 		drive->pc_callback(drive, 0);
@@ -335,8 +315,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
 		if (hwif->dma_ops->dma_end(drive) ||
-		    (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) {
-			if (drive->media == ide_floppy && !scsi)
+		    (drive->media == ide_tape && (stat & ATA_ERR))) {
+			if (drive->media == ide_floppy)
 				printk(KERN_ERR "%s: DMA %s error\n",
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
@@ -358,7 +338,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		local_irq_enable_in_hardirq();
 
-		if (drive->media == ide_tape && !scsi &&
+		if (drive->media == ide_tape &&
 		    (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE)
 			stat &= ~ATA_ERR;
 
@@ -366,11 +346,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			/* Error detected */
 			debug_log("%s: I/O error\n", drive->name);
 
-			if (drive->media != ide_tape || scsi) {
+			if (drive->media != ide_tape)
 				pc->rq->errors++;
-				if (scsi)
-					goto cmd_finished;
-			}
 
 			if (rq->cmd[0] == REQUEST_SENSE) {
 				printk(KERN_ERR "%s: I/O error in request sense"
@@ -386,7 +363,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			/* queued, but not started */
 			return ide_stopped;
 		}
-cmd_finished:
 		pc->error = 0;
 
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
@@ -433,25 +409,8 @@ cmd_finished:
 						"us more data than expected - "
 						"discarding data\n",
 						drive->name);
-				if (scsi)
-					temp = pc->buf_size - pc->xferred;
-				else
-					temp = 0;
-				if (temp) {
-					if (pc->sg)
-						drive->pc_io_buffers(drive, pc,
-								     temp, 0);
-					else
-						tp_ops->input_data(drive, NULL,
-							pc->cur_pos, temp);
-					printk(KERN_ERR "%s: transferred %d of "
-							"%d bytes\n",
-							drive->name,
-							temp, bcount);
-				}
-				pc->xferred += temp;
-				pc->cur_pos += temp;
-				ide_pad_transfer(drive, 0, bcount - temp);
+
+				ide_pad_transfer(drive, 0, bcount);
 				goto next_irq;
 			}
 			debug_log("The device wants to send us more data than "
@@ -461,14 +420,13 @@ cmd_finished:
 	} else
 		xferfunc = tp_ops->output_data;
 
-	if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
-	    (drive->media == ide_tape && !scsi && pc->bh) ||
-	    (scsi && pc->sg)) {
+	if ((drive->media == ide_floppy && !pc->buf) ||
+	    (drive->media == ide_tape && pc->bh)) {
 		int done = drive->pc_io_buffers(drive, pc, bcount,
 				  !!(pc->flags & PC_FLAG_WRITING));
 
 		/* FIXME: don't do partial completions */
-		if (drive->media == ide_floppy && !scsi)
+		if (drive->media == ide_floppy)
 			ide_end_request(drive, 1, done >> 9);
 	} else
 		xferfunc(drive, NULL, pc->cur_pos, bcount);
@@ -481,7 +439,7 @@ cmd_finished:
 		  rq->cmd[0], bcount);
 next_irq:
 	/* And set the interrupt handler again */
-	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
+	ide_set_handler(drive, ide_pc_intr, timeout, NULL);
 	return ide_started;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e20e0b5..257524e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1248,12 +1248,6 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
-static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
-{
-	return max_t(unsigned long, WAIT_CMD, pc->timeout - jiffies);
-}
-
-int ide_scsi_expiry(ide_drive_t *);
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v0.10.2


From 5317464dccd0c03026d60f1e9968de4f9cd23f69 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:54 +0100
Subject: ide: remove the last ide-scsi remnants

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7a04509..d412bd2 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -17,8 +17,7 @@
 
 static inline int dev_is_idecd(ide_drive_t *drive)
 {
-	return (drive->media == ide_cdrom || drive->media == ide_optical) &&
-		!(drive->dev_flags & IDE_DFLAG_SCSI);
+	return drive->media == ide_cdrom || drive->media == ide_optical;
 }
 
 /*
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index bb3248a..1c36a8e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -426,9 +426,6 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
 
-	if (hwif->sg_mapped)	/* needed by ide-scsi */
-		return;
-
 	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 28232c6..1be263e 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -95,8 +95,7 @@ static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg)
 		return -EPERM;
 
 	if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) &&
-	    (drive->media != ide_tape ||
-	     (drive->dev_flags & IDE_DFLAG_SCSI)))
+	    (drive->media != ide_tape))
 		return -EPERM;
 
 	if ((arg >> IDE_NICE_DSC_OVERLAP) & 1)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 966b74c..c5adb7b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1141,8 +1141,6 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data)
 
 	if (drive->media == ide_disk)
 		request_module("ide-disk");
-	if (drive->dev_flags & IDE_DFLAG_SCSI)
-		request_module("ide-scsi");
 	if (drive->media == ide_cdrom || drive->media == ide_optical)
 		request_module("ide-cd");
 	if (drive->media == ide_tape)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 257524e..ad57a44 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -559,28 +559,26 @@ enum {
 	IDE_DFLAG_NODMA			= (1 << 16),
 	/* powermanagment told us not to do anything, so sleep nicely */
 	IDE_DFLAG_BLOCKED		= (1 << 17),
-	/* ide-scsi emulation */
-	IDE_DFLAG_SCSI			= (1 << 18),
 	/* sleeping & sleep field valid */
-	IDE_DFLAG_SLEEPING		= (1 << 19),
-	IDE_DFLAG_POST_RESET		= (1 << 20),
-	IDE_DFLAG_UDMA33_WARNED		= (1 << 21),
-	IDE_DFLAG_LBA48			= (1 << 22),
+	IDE_DFLAG_SLEEPING		= (1 << 18),
+	IDE_DFLAG_POST_RESET		= (1 << 19),
+	IDE_DFLAG_UDMA33_WARNED		= (1 << 20),
+	IDE_DFLAG_LBA48			= (1 << 21),
 	/* status of write cache */
-	IDE_DFLAG_WCACHE		= (1 << 23),
+	IDE_DFLAG_WCACHE		= (1 << 22),
 	/* used for ignoring ATA_DF */
-	IDE_DFLAG_NOWERR		= (1 << 24),
+	IDE_DFLAG_NOWERR		= (1 << 23),
 	/* retrying in PIO */
-	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 25),
-	IDE_DFLAG_LBA			= (1 << 26),
+	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 24),
+	IDE_DFLAG_LBA			= (1 << 25),
 	/* don't unload heads */
-	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
+	IDE_DFLAG_NO_UNLOAD		= (1 << 26),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28),
-	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	IDE_DFLAG_PARKED		= (1 << 27),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 28),
 	/* write protect */
-	IDE_DFLAG_WP			= (1 << 30),
-	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
+	IDE_DFLAG_WP			= (1 << 29),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
 };
 
 struct ide_drive_s {
-- 
cgit v0.10.2


From 8c662852d1aa35ed370942ef2740759cd334d2d5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:54 +0100
Subject: ide-atapi: compute cmd_len based on device type in ide_transfer_pc

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: move cmd_len check closer to ->output_data() call]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index d412bd2..5fdcb95 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -15,6 +15,8 @@
 #define debug_log(fmt, args...) do {} while (0)
 #endif
 
+#define ATAPI_MIN_CDB_BYTES	12
+
 static inline int dev_is_idecd(ide_drive_t *drive)
 {
 	return drive->media == ide_cdrom || drive->media == ide_optical;
@@ -492,6 +494,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	struct request *rq = hwif->hwgroup->rq;
 	ide_expiry_t *expiry;
 	unsigned int timeout;
+	int cmd_len;
 	ide_startstop_t startstop;
 	u8 ireason;
 
@@ -513,9 +516,18 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
 		printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
 				"a packet command\n", drive->name);
+
 		return ide_do_reset(drive);
 	}
 
+	if (dev_is_idecd(drive)) {
+		/* ATAPI commands get padded out to 12 bytes minimum */
+		cmd_len = COMMAND_SIZE(rq->cmd[0]);
+		if (cmd_len < ATAPI_MIN_CDB_BYTES)
+			cmd_len = ATAPI_MIN_CDB_BYTES;
+	} else
+		cmd_len = ATAPI_MIN_CDB_BYTES;
+
 	/*
 	 * If necessary schedule the packet transfer to occur 'timeout'
 	 * miliseconds later in ide_delayed_transfer_pc() after the device
@@ -541,7 +553,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 
 	/* Send the actual packet */
 	if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0)
-		hwif->tp_ops->output_data(drive, NULL, rq->cmd, 12);
+		hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
 
 	return ide_started;
 }
-- 
cgit v0.10.2


From def860d061d0fcab7fbbe193c0b8b8f0b9b4c828 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:55 +0100
Subject: ide-atapi: assign expiry and timeout based on device type

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5fdcb95..cf2b99c 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -525,21 +525,25 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 		cmd_len = COMMAND_SIZE(rq->cmd[0]);
 		if (cmd_len < ATAPI_MIN_CDB_BYTES)
 			cmd_len = ATAPI_MIN_CDB_BYTES;
-	} else
-		cmd_len = ATAPI_MIN_CDB_BYTES;
 
-	/*
-	 * If necessary schedule the packet transfer to occur 'timeout'
-	 * miliseconds later in ide_delayed_transfer_pc() after the device
-	 * says it's ready for a packet.
-	 */
-	if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
-		timeout = drive->pc_delay;
-		expiry = &ide_delayed_transfer_pc;
+		timeout = rq->timeout;
+		expiry  = ide_cd_expiry;
 	} else {
-		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-						       : WAIT_TAPE_CMD;
-		expiry = NULL;
+		cmd_len = ATAPI_MIN_CDB_BYTES;
+
+		/*
+		 * If necessary schedule the packet transfer to occur 'timeout'
+		 * miliseconds later in ide_delayed_transfer_pc() after the
+		 * device says it's ready for a packet.
+		 */
+		if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
+			timeout = drive->pc_delay;
+			expiry = &ide_delayed_transfer_pc;
+		} else {
+			timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+							       : WAIT_TAPE_CMD;
+			expiry = NULL;
+		}
 	}
 
 	/* Set the interrupt routine */
-- 
cgit v0.10.2


From d77612ab0ad7515623b084b952dfefd547073ada Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:55 +0100
Subject: ide-atapi: split drive-specific functionality in ide_issue_pc

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index cf2b99c..fa7a70a 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -564,39 +564,43 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 
 ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 {
-	struct ide_atapi_pc *pc = drive->pc;
+	struct ide_atapi_pc *pc;
 	ide_hwif_t *hwif = drive->hwif;
 	ide_expiry_t *expiry = NULL;
 	u32 tf_flags;
 	u16 bcount;
 
-	/* We haven't transferred any data yet */
-	pc->xferred = 0;
-	pc->cur_pos = pc->buf;
-
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 		expiry = ide_cd_expiry;
+
+		if (drive->dma)
+			drive->dma = !hwif->dma_ops->dma_setup(drive);
 	} else {
+		pc = drive->pc;
+
+		/* We haven't transferred any data yet */
+		pc->xferred = 0;
+		pc->cur_pos = pc->buf;
+
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
 				pc->req_xfer :
 				min(pc->req_xfer, 63 * 1024));
-	}
 
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
-		pc->flags &= ~PC_FLAG_DMA_ERROR;
-		ide_dma_off(drive);
-	}
+		if (pc->flags & PC_FLAG_DMA_ERROR) {
+			pc->flags &= ~PC_FLAG_DMA_ERROR;
+			ide_dma_off(drive);
+		}
 
-	if (((pc->flags & PC_FLAG_DMA_OK) &&
-		(drive->dev_flags & IDE_DFLAG_USING_DMA)) ||
-	    drive->dma)
-		drive->dma = !hwif->dma_ops->dma_setup(drive);
+		if ((pc->flags & PC_FLAG_DMA_OK) &&
+		     (drive->dev_flags & IDE_DFLAG_USING_DMA))
+			drive->dma = !hwif->dma_ops->dma_setup(drive);
 
-	if (!drive->dma)
-		pc->flags &= ~PC_FLAG_DMA_OK;
+		if (!drive->dma)
+			pc->flags &= ~PC_FLAG_DMA_OK;
+	}
 
 	ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma);
 
-- 
cgit v0.10.2


From 563d993153ab16d829ba373c5c070a118eb1eba4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:55 +0100
Subject: ide-cd: remove xferlen arg to cdrom_start_packet_command

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 105e4d8..34981f5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -520,10 +520,13 @@ end_request:
  * will be called immediately after the drive is prepared for the transfer.
  */
 static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
-						  int xferlen,
 						  ide_handler_t *handler)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->hwgroup->rq;
+	int xferlen;
+
+	xferlen = ide_cd_get_xferlen(rq);
 
 	ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen);
 
@@ -1175,15 +1178,12 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
 	ide_handler_t *fn;
-	int xferlen;
 
 	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
 		      "rq->cmd_type: 0x%x, block: %llu\n",
 		      __func__, rq->cmd[0], rq->cmd_type,
 		      (unsigned long long)block);
 
-	xferlen = ide_cd_get_xferlen(rq);
-
 	if (blk_fs_request(rq)) {
 		fn = cdrom_start_rw_cont;
 
@@ -1210,7 +1210,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		return ide_stopped;
 	}
 
-	return cdrom_start_packet_command(drive, xferlen, fn);
+	return cdrom_start_packet_command(drive, fn);
 }
 
 /*
-- 
cgit v0.10.2


From 65a3309e552585c4908e50e3c9736afb764c97c0 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:55 +0100
Subject: ide-cd: remove handler wrappers

Remove cdrom_do_newpc_cont and cdrom_start_rw_cont wrappers and pass
cdrom_transfer_packet_command to ide_execute_command directly.

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: don't move cdrom_start_packet_command() around, remove newlines]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 34981f5..1a7410f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -511,6 +511,9 @@ end_request:
 	return 1;
 }
 
+static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *);
+static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
+
 /*
  * Set up the device registers for transferring a packet command on DEV,
  * expecting to later transfer XFERLEN bytes.  HANDLER is the routine
@@ -519,8 +522,7 @@ end_request:
  * called when the interrupt from the drive arrives.  Otherwise, HANDLER
  * will be called immediately after the drive is prepared for the transfer.
  */
-static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
-						  ide_handler_t *handler)
+static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->hwgroup->rq;
@@ -544,13 +546,14 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
 			drive->waiting_for_dma = 0;
 
 		/* packet command */
-		ide_execute_command(drive, ATA_CMD_PACKET, handler,
+		ide_execute_command(drive, ATA_CMD_PACKET,
+				    cdrom_transfer_packet_command,
 				    ATAPI_WAIT_PC, ide_cd_expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
 
-		return (*handler) (drive);
+		return cdrom_transfer_packet_command(drive);
 	}
 }
 
@@ -561,11 +564,10 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
  * there's data ready.
  */
 #define ATAPI_MIN_CDB_BYTES 12
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
-					  struct request *rq,
-					  ide_handler_t *handler)
+static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->hwgroup->rq;
 	int cmd_len;
 	ide_startstop_t startstop;
 
@@ -592,7 +594,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
 	}
 
 	/* arm the interrupt handler */
-	ide_set_handler(drive, handler, rq->timeout, ide_cd_expiry);
+	ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry);
 
 	/* ATAPI commands get padded out to 12 bytes minimum */
 	cmd_len = COMMAND_SIZE(rq->cmd[0]);
@@ -680,8 +682,6 @@ static int ide_cd_check_transfer_size(ide_drive_t *drive, int len)
 	return 1;
 }
 
-static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
-
 static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive,
 						 struct request *rq)
 {
@@ -724,20 +724,6 @@ static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive,
 }
 
 /*
- * Routine to send a read/write packet command to the drive. This is usually
- * called directly from cdrom_start_{read,write}(). However, for drq_interrupt
- * devices, it is called from an interrupt when the drive is ready to accept
- * the command.
- */
-static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->hwgroup->rq;
-
-	/* send the command to the drive and return */
-	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
-}
-
-/*
  * Fix up a possibly partially-processed request so that we can start it over
  * entirely, or even put it back on the request queue.
  */
@@ -1126,13 +1112,6 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	return ide_started;
 }
 
-static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
-{
-	struct request *rq = HWGROUP(drive)->rq;
-
-	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
-}
-
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
@@ -1177,16 +1156,12 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	ide_handler_t *fn;
-
 	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
 		      "rq->cmd_type: 0x%x, block: %llu\n",
 		      __func__, rq->cmd[0], rq->cmd_type,
 		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
-		fn = cdrom_start_rw_cont;
-
 		if (cdrom_start_rw(drive, rq) == ide_stopped)
 			return ide_stopped;
 
@@ -1194,8 +1169,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
-		fn = cdrom_do_newpc_cont;
-
 		if (!rq->timeout)
 			rq->timeout = ATAPI_WAIT_PC;
 
@@ -1210,7 +1183,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		return ide_stopped;
 	}
 
-	return cdrom_start_packet_command(drive, fn);
+	return cdrom_start_packet_command(drive);
 }
 
 /*
-- 
cgit v0.10.2


From 28ad91db77755f1c49d79652de11b28ee2cfbf03 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:56 +0100
Subject: ide-atapi: remove timeout arg to ide_issue_pc

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index fa7a70a..c470dbb 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -562,11 +562,12 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	return ide_started;
 }
 
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
+ide_startstop_t ide_issue_pc(ide_drive_t *drive)
 {
 	struct ide_atapi_pc *pc;
 	ide_hwif_t *hwif = drive->hwif;
 	ide_expiry_t *expiry = NULL;
+	unsigned int timeout;
 	u32 tf_flags;
 	u16 bcount;
 
@@ -574,6 +575,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 		expiry = ide_cd_expiry;
+		timeout = ATAPI_WAIT_PC;
 
 		if (drive->dma)
 			drive->dma = !hwif->dma_ops->dma_setup(drive);
@@ -600,6 +602,9 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 
 		if (!drive->dma)
 			pc->flags &= ~PC_FLAG_DMA_OK;
+
+		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+						       : WAIT_TAPE_CMD;
 	}
 
 	ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fdec729..0a48e2d 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -197,7 +197,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_FLOPPY_CMD);
+	return ide_issue_pc(drive);
 }
 
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index ac9e29a..5d2aa22 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -694,7 +694,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_TAPE_CMD);
+	return ide_issue_pc(drive);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ad57a44..db5ef8a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1250,7 +1250,7 @@ int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
 
-ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int);
+ide_startstop_t ide_issue_pc(ide_drive_t *);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
-- 
cgit v0.10.2


From 06cc2778a1744b79edcfa394ce2d41f09134b2b1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:56 +0100
Subject: ide-atapi: put the rest of non-ide-cd code into the else-clause of
 ide_transfer_pc

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c470dbb..c9beda5 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -509,17 +509,6 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 			drive->waiting_for_dma = 1;
 	}
 
-	ireason = ide_read_ireason(drive);
-	if (drive->media == ide_tape)
-		ireason = ide_wait_ireason(drive, ireason);
-
-	if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
-		printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
-				"a packet command\n", drive->name);
-
-		return ide_do_reset(drive);
-	}
-
 	if (dev_is_idecd(drive)) {
 		/* ATAPI commands get padded out to 12 bytes minimum */
 		cmd_len = COMMAND_SIZE(rq->cmd[0]);
@@ -544,6 +533,17 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 							       : WAIT_TAPE_CMD;
 			expiry = NULL;
 		}
+
+		ireason = ide_read_ireason(drive);
+		if (drive->media == ide_tape)
+			ireason = ide_wait_ireason(drive, ireason);
+
+		if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
+			printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
+					"a packet command\n", drive->name);
+
+			return ide_do_reset(drive);
+		}
 	}
 
 	/* Set the interrupt routine */
-- 
cgit v0.10.2


From b16aabc9374217fa2d28e72fd9a6e6d60905e1b9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:56 +0100
Subject: ide-atapi: start dma in a drive-specific way

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c9beda5..e8688c0 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -489,7 +489,7 @@ static int ide_delayed_transfer_pc(ide_drive_t *drive)
 
 static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 {
-	struct ide_atapi_pc *pc = drive->pc;
+	struct ide_atapi_pc *uninitialized_var(pc);
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->hwgroup->rq;
 	ide_expiry_t *expiry;
@@ -518,6 +518,8 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 		timeout = rq->timeout;
 		expiry  = ide_cd_expiry;
 	} else {
+		pc = drive->pc;
+
 		cmd_len = ATAPI_MIN_CDB_BYTES;
 
 		/*
@@ -550,9 +552,14 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
 
 	/* Begin DMA, if necessary */
-	if (pc->flags & PC_FLAG_DMA_OK) {
-		pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-		hwif->dma_ops->dma_start(drive);
+	if (dev_is_idecd(drive)) {
+		if (drive->dma)
+			hwif->dma_ops->dma_start(drive);
+	} else {
+		if (pc->flags & PC_FLAG_DMA_OK) {
+			pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
+			hwif->dma_ops->dma_start(drive);
+		}
 	}
 
 	/* Send the actual packet */
-- 
cgit v0.10.2


From 26799a63110dcbe81291ea53178f6b4810d07424 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Wed, 31 Dec 2008 13:44:46 -0800
Subject: x86: fix incorrect __read_mostly on _boot_cpu_pda

The pda rework (commit 3461b0af025251bbc6b3d56c821c6ac2de6f7209)
to remove static boot cpu pdas introduced a performance bug.

_boot_cpu_pda is the actual pda used by the boot cpu and is definitely
not "__read_mostly" and ended up polluting the read mostly section with
writes.  This bug caused regression of about 8-10% on certain syscall
intensive workloads.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Acked-by: Mike Travis <travis@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a..b9a4d8c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
 #include <asm/trampoline.h>
 
 /* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
+static struct x8664_pda _boot_cpu_pda;
 
 #ifdef CONFIG_SMP
 /*
-- 
cgit v0.10.2


From 46814dded1b972a07b1609d81632eef3009fbb10 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Wed, 31 Dec 2008 13:20:50 -0600
Subject: x86, UV: remove erroneous BAU initialization

Impact: fix crash on x86/UV

UV is the SGI "UltraViolet" machine, which is x86_64 based.
BAU is the "Broadcast Assist Unit", used for TLB shootdown in UV.

This patch removes the allocation and initialization of an unused table.

This table is left over from a development test mode.  It is unused in
the present code.

And it was incorrectly initialized: 8 entries allocated but 17 initialized,
causing slab corruption.

This patch should go into 2.6.27 and 2.6.28 as well as the current tree.

Diffed against 2.6.28 (linux-next, 12/30/08)

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5f..f885023 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
 static struct bau_control * __init uv_table_bases_init(int blade, int node)
 {
 	int i;
-	int *ip;
 	struct bau_msg_status *msp;
 	struct bau_control *bau_tabp;
 
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
 		bau_cpubits_clear(&msp->seen_by, (int)
 				  uv_blade_nr_possible_cpus(blade));
 
-	bau_tabp->watching =
-	    kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
-	BUG_ON(!bau_tabp->watching);
-
-	for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
-		*ip = 0;
-
 	uv_bau_table_bases[blade] = bau_tabp;
 
 	return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
 		bcp->bau_msg_head	= bau_tablesp->va_queue_first;
 		bcp->va_queue_first	= bau_tablesp->va_queue_first;
 		bcp->va_queue_last	= bau_tablesp->va_queue_last;
-		bcp->watching		= bau_tablesp->watching;
 		bcp->msg_statuses	= bau_tablesp->msg_statuses;
 		bcp->descriptor_base	= adp;
 	}
-- 
cgit v0.10.2


From f634fa941188a91dbf1dab961fe7a4509852fd6e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 16:29:48 +0530
Subject: x86: cpuid.c fix style problems

Impact: cleanup

Fixes style problems:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 ERROR: "foo * bar" should be "foo *bar"
 ERROR: trailing whitespace
 WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc

total: 2 errors, 2 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1..85d28d5 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
-			  size_t count, loff_t * ppos)
+			  size_t count, loff_t *ppos)
 {
 	char __user *tmp = buf;
 	struct cpuid_regs cmd;
@@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	unsigned int cpu;
 	struct cpuinfo_x86 *c;
 	int ret = 0;
-	
+
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
-- 
cgit v0.10.2


From 423a54058f746579aff1430877dbc82f17442b34 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 16:42:20 +0530
Subject: x86: ldt.c fix style problems

Impact: cleanup

Fixes style problems:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 ERROR: space required before the open parenthesis '('

total: 1 errors, 1 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b4..71f1d99 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 	if (err < 0)
 		return err;
 
-	for(i = 0; i < old->size; i++)
+	for (i = 0; i < old->size; i++)
 		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
 	return 0;
 }
-- 
cgit v0.10.2


From f153b82121b0366fe0e5f9553545cce237335175 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 09:23:03 -0800
Subject: Sanitize gcc version header includes

 - include the gcc version-dependent header files from the generic gcc
   header file, rather than the other way around (iow: don't make the
   non-gcc header file have to know about gcc versions)

 - don't include compiler-gcc4.h for gcc 5 (for whenever it gets
   released).  That's just confusing and made us do odd things in the
   gcc4 header file (testing that we really had version 4!)

 - generate the name from the __GNUC__ version directly, rather than
   having a mess of #if conditionals.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5c8351b..af40f8e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -61,3 +61,8 @@
 #define  noinline			__attribute__((noinline))
 #define __attribute_const__		__attribute__((__const__))
 #define __maybe_unused			__attribute__((unused))
+
+#define __gcc_header(x) #x
+#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
+#define gcc_header(x) _gcc_header(x)
+#include gcc_header(__GNUC__)
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index e5eb795..2befe65 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,9 +2,6 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v3.x.  */
-#include <linux/compiler-gcc.h>
-
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 974f5b7..aa42621 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -2,9 +2,6 @@
 #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v4.x.  */
-#include <linux/compiler-gcc.h>
-
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
@@ -16,7 +13,7 @@
  */
 #define uninitialized_var(x) x = x
 
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#if __GNUC_MINOR__ >= 3
 /* Mark functions as cold. gcc will assume any path leading to a call
    to them will be unlikely.  This means a lot of manual unlikely()s
    are unnecessary now for any paths leading to the usual suspects
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ea7c6be..d95da10 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -36,12 +36,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
-#if __GNUC__ >= 4
-# include <linux/compiler-gcc4.h>
-#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2
-# include <linux/compiler-gcc3.h>
-#else
-# error Sorry, your compiler is too old/not recognized.
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
 #endif
 
 #define notrace __attribute__((no_instrument_function))
-- 
cgit v0.10.2


From f9d14250071eda9972e4c9cea745a11185952114 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 09:29:43 -0800
Subject: Disallow gcc versions 4.1.{0,1}

These compiler versions are known to miscompile __weak functions and
thus generate kernels that don't necessarily work correctly.  If a weak
function is int he same compilation unit as a caller, gcc may end up
inlining it, and thus binding the weak function too early.

See

    http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27781

for details.

Cc: Adrian Bunk <bunk@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index aa42621..0999271 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -2,6 +2,11 @@
 #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
 #endif
 
+/* GCC 4.1.[01] miscompiles __weak */
+#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+# error Your version of gcc miscompiles the __weak directive
+#endif
+
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
-- 
cgit v0.10.2


From dceb4521c8ed24b9fe4230e0d385cf4770260383 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 17:35:02 +0530
Subject: x86: nmi.c fix style problems

Impact: cleanup, fix style problems

Fixes style problems:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/nmi.h> instead of <asm/nmi.h>

total: 0 errors, 2 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9..45a09cc 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
 #include <linux/smp.h>
+#include <linux/nmi.h>
 
 #include <asm/i8259.h>
 #include <asm/io_apic.h>
-#include <asm/smp.h>
-#include <asm/nmi.h>
 #include <asm/proto.h>
 #include <asm/timer.h>
 
-- 
cgit v0.10.2


From 609e5b71d0eca163df017ecfcf917b149875e744 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 2 Jan 2009 16:16:16 +0100
Subject: kbuild: Remove gcc 4.1.0 quirk from init/main.c

Impact: cleanup

We now have a cleaner check for gcc 4.1.0/4.1.1 trouble in
include/linux/compiler-gcc4.h, so remove the 4.1.0 quirk from
init/main.c.

Reported-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/main.c b/init/main.c
index f5e64f2..ad8f9f5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,15 +75,6 @@
 #include <asm/smp.h>
 #endif
 
-/*
- * This is one of the first .c files built. Error out early if we have compiler
- * trouble.
- */
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
-#warning gcc-4.1.0 is known to miscompile the kernel.  A different compiler version is recommended.
-#endif
-
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
-- 
cgit v0.10.2


From 52e15f0eae193a8e4ca31c1520179b8d65c79811 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Fri, 2 Jan 2009 13:40:14 +0000
Subject: Blackfin Serial Driver: updates kgdb over Blackfin serial driver with
 kgdb framework

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 569f0e2..d63fad7 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -22,7 +22,8 @@
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
 
-#ifdef CONFIG_KGDB_UART
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
 #include <linux/kgdb.h>
 #include <asm/irq_regs.h>
 #endif
@@ -45,6 +46,16 @@
 static struct bfin_serial_port bfin_serial_ports[BFIN_UART_NR_PORTS];
 static int nr_active_ports = ARRAY_SIZE(bfin_serial_resource);
 
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+
+# ifndef CONFIG_SERIAL_BFIN_PIO
+#  error KGDB only support UART in PIO mode.
+# endif
+
+static int kgdboc_port_line;
+static int kgdboc_break_enabled;
+#endif
 /*
  * Setup for console. Argument comes from the menuconfig
  */
@@ -110,9 +121,7 @@ static void bfin_serial_start_tx(struct uart_port *port)
 static void bfin_serial_stop_rx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.line != CONFIG_KGDB_UART_PORT)
-#endif
+
 	UART_CLEAR_IER(uart, ERBFI);
 }
 
@@ -123,49 +132,6 @@ static void bfin_serial_enable_ms(struct uart_port *port)
 {
 }
 
-#ifdef CONFIG_KGDB_UART
-static int kgdb_entry_state;
-
-void kgdb_put_debug_char(int chr)
-{
-	struct bfin_serial_port *uart;
-
-	if (CONFIG_KGDB_UART_PORT < 0
-		|| CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS)
-		uart = &bfin_serial_ports[0];
-	else
-		uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-
-	while (!(UART_GET_LSR(uart) & THRE)) {
-		SSYNC();
-	}
-
-	UART_CLEAR_DLAB(uart);
-	UART_PUT_CHAR(uart, (unsigned char)chr);
-	SSYNC();
-}
-
-int kgdb_get_debug_char(void)
-{
-	struct bfin_serial_port *uart;
-	unsigned char chr;
-
-	if (CONFIG_KGDB_UART_PORT < 0
-		|| CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS)
-		uart = &bfin_serial_ports[0];
-	else
-		uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-
-	while(!(UART_GET_LSR(uart) & DR)) {
-		SSYNC();
-	}
-	UART_CLEAR_DLAB(uart);
-	chr = UART_GET_CHAR(uart);
-	SSYNC();
-
-	return chr;
-}
-#endif
 
 #if ANOMALY_05000363 && defined(CONFIG_SERIAL_BFIN_PIO)
 # define UART_GET_ANOMALY_THRESHOLD(uart)    ((uart)->anomaly_threshold)
@@ -178,7 +144,7 @@ int kgdb_get_debug_char(void)
 #ifdef CONFIG_SERIAL_BFIN_PIO
 static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = NULL;
 	unsigned int status, ch, flg;
 	static struct timeval anomaly_start = { .tv_sec = 0 };
 
@@ -188,27 +154,18 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
  	ch = UART_GET_CHAR(uart);
  	uart->port.icount.rx++;
 
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.line == CONFIG_KGDB_UART_PORT) {
-		struct pt_regs *regs = get_irq_regs();
-		if (uart->port.cons->index == CONFIG_KGDB_UART_PORT && ch == 0x1) { /* Ctrl + A */
-			kgdb_breakkey_pressed(regs);
-			return;
-		} else if (kgdb_entry_state == 0 && ch == '$') {/* connection from KGDB */
-			kgdb_entry_state = 1;
-		} else if (kgdb_entry_state == 1 && ch == 'q') {
-			kgdb_entry_state = 0;
-			kgdb_breakkey_pressed(regs);
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	if (kgdb_connected && kgdboc_port_line == uart->port.line)
+		if (ch == 0x3) {/* Ctrl + C */
+			kgdb_breakpoint();
 			return;
-		} else if (ch == 0x3) {/* Ctrl + C */
-			kgdb_entry_state = 0;
-			kgdb_breakkey_pressed(regs);
-			return;
-		} else {
-			kgdb_entry_state = 0;
 		}
-	}
+
+	if (!uart->port.info || !uart->port.info->tty)
+		return;
 #endif
+	tty = uart->port.info->tty;
 
 	if (ANOMALY_05000363) {
 		/* The BF533 (and BF561) family of processors have a nice anomaly
@@ -630,16 +587,16 @@ static int bfin_serial_startup(struct uart_port *port)
 	uart->rx_dma_timer.expires = jiffies + DMA_RX_FLUSH_JIFFIES;
 	add_timer(&(uart->rx_dma_timer));
 #else
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	if (kgdboc_port_line == uart->port.line && kgdboc_break_enabled)
+		kgdboc_break_enabled = 0;
+	else {
+# endif
 	if (request_irq(uart->port.irq, bfin_serial_rx_int, IRQF_DISABLED,
 	     "BFIN_UART_RX", uart)) {
-# ifdef	CONFIG_KGDB_UART
-		if (uart->port.line != CONFIG_KGDB_UART_PORT) {
-# endif
 		printk(KERN_NOTICE "Unable to attach BlackFin UART RX interrupt\n");
 		return -EBUSY;
-# ifdef	CONFIG_KGDB_UART
-		}
-# endif
 	}
 
 	if (request_irq
@@ -685,6 +642,10 @@ static int bfin_serial_startup(struct uart_port *port)
 		}
 	}
 # endif
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	}
+# endif
 #endif
 	UART_SET_IER(uart, ERBFI);
 	return 0;
@@ -716,9 +677,6 @@ static void bfin_serial_shutdown(struct uart_port *port)
 		break;
 	};
 #endif
-#ifdef	CONFIG_KGDB_UART
-	if (uart->port.line != CONFIG_KGDB_UART_PORT)
-#endif
 	free_irq(uart->port.irq, uart);
 	free_irq(uart->port.irq+1, uart);
 #endif
@@ -887,6 +845,51 @@ static void bfin_serial_set_ldisc(struct uart_port *port)
 	}
 }
 
+#ifdef CONFIG_CONSOLE_POLL
+static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
+{
+	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+
+	while (!(UART_GET_LSR(uart) & THRE))
+		cpu_relax();
+
+	UART_CLEAR_DLAB(uart);
+	UART_PUT_CHAR(uart, (unsigned char)chr);
+}
+
+static int bfin_serial_poll_get_char(struct uart_port *port)
+{
+	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+	unsigned char chr;
+
+	while (!(UART_GET_LSR(uart) & DR))
+		cpu_relax();
+
+	UART_CLEAR_DLAB(uart);
+	chr = UART_GET_CHAR(uart);
+
+	return chr;
+}
+#endif
+
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+static void bfin_kgdboc_port_shutdown(struct uart_port *port)
+{
+	if (kgdboc_break_enabled) {
+		kgdboc_break_enabled = 0;
+		bfin_serial_shutdown(port);
+	}
+}
+
+static int bfin_kgdboc_port_startup(struct uart_port *port)
+{
+	kgdboc_port_line = port->line;
+	kgdboc_break_enabled = !bfin_serial_startup(port);
+	return 0;
+}
+#endif
+
 static struct uart_ops bfin_serial_pops = {
 	.tx_empty	= bfin_serial_tx_empty,
 	.set_mctrl	= bfin_serial_set_mctrl,
@@ -905,6 +908,15 @@ static struct uart_ops bfin_serial_pops = {
 	.request_port	= bfin_serial_request_port,
 	.config_port	= bfin_serial_config_port,
 	.verify_port	= bfin_serial_verify_port,
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	.kgdboc_port_startup	= bfin_kgdboc_port_startup,
+	.kgdboc_port_shutdown	= bfin_kgdboc_port_shutdown,
+#endif
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_put_char	= bfin_serial_poll_put_char,
+	.poll_get_char	= bfin_serial_poll_get_char,
+#endif
 };
 
 static void __init bfin_serial_init_ports(void)
@@ -1076,10 +1088,7 @@ static int __init bfin_serial_rs_console_init(void)
 {
 	bfin_serial_init_ports();
 	register_console(&bfin_serial_console);
-#ifdef CONFIG_KGDB_UART
-	kgdb_entry_state = 0;
-	init_kgdb_uart();
-#endif
+
 	return 0;
 }
 console_initcall(bfin_serial_rs_console_init);
@@ -1235,10 +1244,6 @@ static struct platform_driver bfin_serial_driver = {
 static int __init bfin_serial_init(void)
 {
 	int ret;
-#ifdef CONFIG_KGDB_UART
-	struct bfin_serial_port *uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-	struct ktermios t;
-#endif
 
 	pr_info("Serial: Blackfin serial driver\n");
 
@@ -1252,21 +1257,6 @@ static int __init bfin_serial_init(void)
 			uart_unregister_driver(&bfin_serial_reg);
 		}
 	}
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.cons->index != CONFIG_KGDB_UART_PORT) {
-		request_irq(uart->port.irq, bfin_serial_rx_int,
-			IRQF_DISABLED, "BFIN_UART_RX", uart);
-		pr_info("Request irq for kgdb uart port\n");
-		UART_SET_IER(uart, ERBFI);
-		SSYNC();
-		t.c_cflag = CS8|B57600;
-		t.c_iflag = 0;
-		t.c_oflag = 0;
-		t.c_lflag = ICANON;
-		t.c_line = CONFIG_KGDB_UART_PORT;
-		bfin_serial_set_termios(&uart->port, &t, &t);
-	}
-#endif
 	return ret;
 }
 
@@ -1276,6 +1266,7 @@ static void __exit bfin_serial_exit(void)
 	uart_unregister_driver(&bfin_serial_reg);
 }
 
+
 module_init(bfin_serial_init);
 module_exit(bfin_serial_exit);
 
-- 
cgit v0.10.2


From 80d5c474b87da88eca8e1ab034e26daa9f688130 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Fri, 2 Jan 2009 13:40:22 +0000
Subject: Blackfin Serial Driver: fix bug - SIR driver stop receiving randomly

Bug description:
The IRDA receiver may can't receiving any more after processed some signals.

To duplicate this issue is put three IRDA devices together, one blackfin,
two none blackfin, they will detect each other. Let one none blackfin devices
irdaping the blackfin devices, when it stopped print out ping information,
it is the time that blackfin stoped receiving, the time is random.

The related register bit is OK, the other devices is sending data continuously.
But no interrupt come.

Fixing:
I tried Michael's suggestion that request the UARTx error interrupt, and reset
the IRDA when found FE error. This method helps much, but it can't completely
avoid stop.

Reset the IRDA before every time sending the data is more safe.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index d63fad7..59a221f 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -73,6 +73,8 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart);
 
 static void bfin_serial_mctrl_check(struct bfin_serial_port *uart);
 
+static void bfin_serial_reset_irda(struct uart_port *port);
+
 /*
  * interrupts are disabled on entry
  */
@@ -105,6 +107,14 @@ static void bfin_serial_stop_tx(struct uart_port *port)
 static void bfin_serial_start_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+	struct tty_struct *tty = uart->port.info->port.tty;
+
+	/*
+	 * To avoid losting RX interrupt, we reset IR function
+	 * before sending data.
+	 */
+	if (tty->termios->c_line == N_IRDA)
+		bfin_serial_reset_irda(port);
 
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	if (uart->tx_done)
@@ -890,6 +900,20 @@ static int bfin_kgdboc_port_startup(struct uart_port *port)
 }
 #endif
 
+static void bfin_serial_reset_irda(struct uart_port *port)
+{
+	int line = port->line;
+	unsigned short val;
+
+	val = UART_GET_GCTL(&bfin_serial_ports[line]);
+	val &= ~(IREN | RPOLC);
+	UART_PUT_GCTL(&bfin_serial_ports[line], val);
+	SSYNC();
+	val |= (IREN | RPOLC);
+	UART_PUT_GCTL(&bfin_serial_ports[line], val);
+	SSYNC();
+}
+
 static struct uart_ops bfin_serial_pops = {
 	.tx_empty	= bfin_serial_tx_empty,
 	.set_mctrl	= bfin_serial_set_mctrl,
-- 
cgit v0.10.2


From b6efa1eabbe8d23fd7dcad1eed8ce945f4adea83 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Fri, 2 Jan 2009 13:40:31 +0000
Subject: Blackfin Serial Driver: Clean serial console and early prink code.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 59a221f..88449d3 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -986,7 +986,7 @@ static void __init bfin_serial_init_ports(void)
 
 }
 
-#ifdef CONFIG_SERIAL_BFIN_CONSOLE
+#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
 /*
  * If the port was already initialised (eg, by a boot loader),
  * try to determine the current setup.
@@ -1030,24 +1030,20 @@ bfin_serial_console_get_options(struct bfin_serial_port *uart, int *baud,
 	}
 	pr_debug("%s:baud = %d, parity = %c, bits= %d\n", __func__, *baud, *parity, *bits);
 }
-#endif
 
-#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
 static struct uart_driver bfin_serial_reg;
 
 static int __init
 bfin_serial_console_setup(struct console *co, char *options)
 {
 	struct bfin_serial_port *uart;
-# ifdef CONFIG_SERIAL_BFIN_CONSOLE
 	int baud = 57600;
 	int bits = 8;
 	int parity = 'n';
-#  ifdef CONFIG_SERIAL_BFIN_CTSRTS
+# ifdef CONFIG_SERIAL_BFIN_CTSRTS
 	int flow = 'r';
-#  else
+# else
 	int flow = 'n';
-#  endif
 # endif
 
 	/*
@@ -1059,16 +1055,12 @@ bfin_serial_console_setup(struct console *co, char *options)
 		co->index = 0;
 	uart = &bfin_serial_ports[co->index];
 
-# ifdef CONFIG_SERIAL_BFIN_CONSOLE
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
 	else
 		bfin_serial_console_get_options(uart, &baud, &parity, &bits);
 
 	return uart_set_options(&uart->port, co, baud, parity, bits, flow);
-# else
-	return 0;
-# endif
 }
 #endif /* defined (CONFIG_SERIAL_BFIN_CONSOLE) ||
 				 defined (CONFIG_EARLY_PRINTK) */
@@ -1177,7 +1169,7 @@ struct console __init *bfin_earlyserial_init(unsigned int port,
 	return &bfin_early_serial_console;
 }
 
-#endif /* CONFIG_SERIAL_BFIN_CONSOLE */
+#endif /* CONFIG_EARLY_PRINTK */
 
 static struct uart_driver bfin_serial_reg = {
 	.owner			= THIS_MODULE,
-- 
cgit v0.10.2


From 68a784cb1add52543644a879ef601f3b52d18623 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Fri, 2 Jan 2009 13:40:38 +0000
Subject: Blackfin Serial Driver: Fix bug - BF527-EZKIT unable to receive large
 files over UART in DMA mode

Add spin_lock_irqsave() when receive and transfer data.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 88449d3..1c85039 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -81,7 +81,9 @@ static void bfin_serial_reset_irda(struct uart_port *port);
 static void bfin_serial_stop_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+#ifdef CONFIG_SERIAL_BFIN_DMA
 	struct circ_buf *xmit = &uart->port.info->xmit;
+#endif
 
 	while (!(UART_GET_LSR(uart) & TEMT))
 		cpu_relax();
@@ -412,7 +414,9 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
-	int x_pos, pos;
+	int x_pos, pos, flags;
+
+	spin_lock_irqsave(&uart->port.lock, flags);
 
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
@@ -430,6 +434,8 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
 	}
 
+	spin_unlock_irqrestore(&uart->port.lock, flags);
+
 	mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
 
@@ -464,10 +470,9 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id)
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
+	bfin_serial_dma_rx_chars(uart);
 	spin_unlock(&uart->port.lock);
 
-	mod_timer(&(uart->rx_dma_timer), jiffies);
-
 	return IRQ_HANDLED;
 }
 #endif
-- 
cgit v0.10.2


From e482a2378f3d1aef7fa8942f8f163078f01bb456 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Fri, 2 Jan 2009 13:40:45 +0000
Subject: Blackfin Serial Driver: Remove BI status for known_good_char

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 1c85039..318d69d 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -219,6 +219,7 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 			return;
 
  known_good_char:
+			status &= ~BI;
 			anomaly_start.tv_sec = 0;
 		}
 	}
-- 
cgit v0.10.2


From a88a69c91256418c5907c2f1f8a0ec0a36f9e6cc Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 2 Jan 2009 13:40:53 +0000
Subject: n_tty: Fix loss of echoed characters and remove bkl from n_tty

Fixes the loss of echoed (and other ldisc-generated characters) when
the tty is stopped or when the driver output buffer is full (happens
frequently for input during continuous program output, such as ^C)
and removes the Big Kernel Lock from the N_TTY line discipline.

Adds an "echo buffer" to the N_TTY line discipline that handles all
ldisc-generated output (including echoed characters).  Along with the
loss of characters, this also fixes the associated loss of sync between
tty output and the ldisc state when characters cannot be immediately
written to the tty driver.

The echo buffer stores (in addition to characters) state operations that need
to be done at the time of character output (like management of the column
position).  This allows echo to cooperate correctly with program output,
since the ldisc state remains consistent with actual characters written.

Since the echo buffer code now isolates the tty column state code
to the process_out* and process_echoes functions, we can remove the
Big Kernel Lock (BKL) and replace it with mutex locks.

Highlights are:

* Handles echo (and other ldisc output) when tty driver buffer is full
  - continuous program output can block echo
* Saves echo when tty is in stopped state (e.g. ^S)
  - (e.g.: ^Q will correctly cause held characters to be released for output)
* Control character pairs (e.g. "^C") are treated atomically and not
  split up by interleaved program output
* Line discipline state is kept consistent with characters sent to
  the tty driver
* Remove the big kernel lock (BKL) from N_TTY line discipline

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index efbfe961..a9bc576 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -62,6 +62,17 @@
 #define TTY_THRESHOLD_THROTTLE		128 /* now based on remaining room */
 #define TTY_THRESHOLD_UNTHROTTLE 	128
 
+/*
+ * Special byte codes used in the echo buffer to represent operations
+ * or special handling of characters.  Bytes in the echo buffer that
+ * are not part of such special blocks are treated as normal character
+ * codes.
+ */
+#define ECHO_OP_START 0xff
+#define ECHO_OP_MOVE_BACK_COL 0x80
+#define ECHO_OP_SET_CANON_COL 0x81
+#define ECHO_OP_ERASE_TAB 0x82
+
 static inline unsigned char *alloc_buf(void)
 {
 	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
@@ -169,6 +180,7 @@ static void check_unthrottle(struct tty_struct *tty)
  *
  *	Locking: tty_read_lock for read fields.
  */
+
 static void reset_buffer_flags(struct tty_struct *tty)
 {
 	unsigned long flags;
@@ -176,6 +188,11 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	spin_lock_irqsave(&tty->read_lock, flags);
 	tty->read_head = tty->read_tail = tty->read_cnt = 0;
 	spin_unlock_irqrestore(&tty->read_lock, flags);
+
+	mutex_lock(&tty->echo_lock);
+	tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+	mutex_unlock(&tty->echo_lock);
+
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
 	n_tty_set_room(tty);
@@ -266,89 +283,116 @@ static inline int is_continuation(unsigned char c, struct tty_struct *tty)
 }
 
 /**
- *	opost			-	output post processor
+ *	do_output_char			-	output one character
  *	@c: character (or partial unicode symbol)
  *	@tty: terminal device
+ *	@space: space available in tty driver write buffer
  *
- *	Perform OPOST processing.  Returns -1 when the output device is
- *	full and the character must be retried. Note that Linux currently
- *	ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. They simply aren't
- *	relevant in the world today. If you ever need them, add them here.
+ *	This is a helper function that handles one output character
+ *	(including special characters like TAB, CR, LF, etc.),
+ *	putting the results in the tty driver's write buffer.
+ *
+ *	Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY
+ *	and NLDLY.  They simply aren't relevant in the world today.
+ *	If you ever need them, add them here.
  *
- *	Called from both the receive and transmit sides and can be called
- *	re-entrantly. Relies on lock_kernel() for tty->column state.
+ *	Returns the number of bytes of buffer space used or -1 if
+ *	no space left.
+ *
+ *	Locking: should be called under the output_lock to protect
+ *		 the column state and space left in the buffer
  */
 
-static int opost(unsigned char c, struct tty_struct *tty)
+static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 {
-	int	space, spaces;
+	int	spaces;
 
-	space = tty_write_room(tty);
 	if (!space)
 		return -1;
-
-	lock_kernel();
-	if (O_OPOST(tty)) {
-		switch (c) {
-		case '\n':
-			if (O_ONLRET(tty))
-				tty->column = 0;
-			if (O_ONLCR(tty)) {
-				if (space < 2) {
-					unlock_kernel();
-					return -1;
-				}
-				tty_put_char(tty, '\r');
-				tty->column = 0;
-			}
-			tty->canon_column = tty->column;
-			break;
-		case '\r':
-			if (O_ONOCR(tty) && tty->column == 0) {
-				unlock_kernel();
-				return 0;
-			}
-			if (O_OCRNL(tty)) {
-				c = '\n';
-				if (O_ONLRET(tty))
-					tty->canon_column = tty->column = 0;
-				break;
-			}
+	
+	switch (c) {
+	case '\n':
+		if (O_ONLRET(tty))
+			tty->column = 0;
+		if (O_ONLCR(tty)) {
+			if (space < 2)
+				return -1;
 			tty->canon_column = tty->column = 0;
+			tty_put_char(tty, '\r');
+			tty_put_char(tty, c);
+			return 2;
+		}
+		tty->canon_column = tty->column;
+		break;
+	case '\r':
+		if (O_ONOCR(tty) && tty->column == 0)
+			return 0;
+		if (O_OCRNL(tty)) {
+			c = '\n';
+			if (O_ONLRET(tty))
+				tty->canon_column = tty->column = 0;
 			break;
-		case '\t':
-			spaces = 8 - (tty->column & 7);
-			if (O_TABDLY(tty) == XTABS) {
-				if (space < spaces) {
-					unlock_kernel();
-					return -1;
-				}
-				tty->column += spaces;
-				tty->ops->write(tty, "        ", spaces);
-				unlock_kernel();
-				return 0;
-			}
+		}
+		tty->canon_column = tty->column = 0;
+		break;
+	case '\t':
+		spaces = 8 - (tty->column & 7);
+		if (O_TABDLY(tty) == XTABS) {
+			if (space < spaces)
+				return -1;
 			tty->column += spaces;
-			break;
-		case '\b':
-			if (tty->column > 0)
-				tty->column--;
-			break;
-		default:
-			if (O_OLCUC(tty))
-				c = toupper(c);
-			if (!iscntrl(c) && !is_continuation(c, tty))
-				tty->column++;
-			break;
+			tty->ops->write(tty, "        ", spaces);
+			return spaces;
 		}
+		tty->column += spaces;
+		break;
+	case '\b':
+		if (tty->column > 0)
+			tty->column--;
+		break;
+	default:
+		if (O_OLCUC(tty))
+			c = toupper(c);
+		if (!iscntrl(c) && !is_continuation(c, tty))
+			tty->column++;
+		break;
 	}
+
 	tty_put_char(tty, c);
-	unlock_kernel();
-	return 0;
+	return 1;
+}
+
+/**
+ *	process_output			-	output post processor
+ *	@c: character (or partial unicode symbol)
+ *	@tty: terminal device
+ *
+ *	Perform OPOST processing.  Returns -1 when the output device is
+ *	full and the character must be retried.
+ *
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
+ */
+
+static int process_output(unsigned char c, struct tty_struct *tty)
+{
+	int	space, retval;
+
+	mutex_lock(&tty->output_lock);
+
+	space = tty_write_room(tty);
+	retval = do_output_char(c, tty, space);
+
+	mutex_unlock(&tty->output_lock);
+	if (retval < 0)
+		return -1;
+	else
+		return 0;
 }
 
 /**
- *	opost_block		-	block postprocess
+ *	process_output_block		-	block post processor
  *	@tty: terminal device
  *	@inbuf: user buffer
  *	@nr: number of bytes
@@ -358,24 +402,29 @@ static int opost(unsigned char c, struct tty_struct *tty)
  *	the simple cases normally found and helps to generate blocks of
  *	symbols for the console driver and thus improve performance.
  *
- *	Called from n_tty_write under the tty layer write lock. Relies
- *	on lock_kernel for the tty->column state.
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
  */
 
-static ssize_t opost_block(struct tty_struct *tty,
-		       const unsigned char *buf, unsigned int nr)
+static ssize_t process_output_block(struct tty_struct *tty,
+				    const unsigned char *buf, unsigned int nr)
 {
 	int	space;
 	int 	i;
 	const unsigned char *cp;
 
+	mutex_lock(&tty->output_lock);
+
 	space = tty_write_room(tty);
 	if (!space)
+	{
+		mutex_unlock(&tty->output_lock);
 		return 0;
+	}
 	if (nr > space)
 		nr = space;
 
-	lock_kernel();
 	for (i = 0, cp = buf; i < nr; i++, cp++) {
 		switch (*cp) {
 		case '\n':
@@ -407,46 +456,393 @@ static ssize_t opost_block(struct tty_struct *tty,
 		}
 	}
 break_out:
-	if (tty->ops->flush_chars)
-		tty->ops->flush_chars(tty);
 	i = tty->ops->write(tty, buf, i);
-	unlock_kernel();
+
+	mutex_unlock(&tty->output_lock);
 	return i;
 }
 
+/**
+ *	process_echoes	-	write pending echo characters
+ *	@tty: terminal device
+ *
+ *	Write previously buffered echo (and other ldisc-generated)
+ *	characters to the tty.
+ *
+ *	Characters generated by the ldisc (including echoes) need to
+ *	be buffered because the driver's write buffer can fill during
+ *	heavy program output.  Echoing straight to the driver will
+ *	often fail under these conditions, causing lost characters and
+ *	resulting mismatches of ldisc state information.
+ *
+ *	Since the ldisc state must represent the characters actually sent
+ *	to the driver at the time of the write, operations like certain
+ *	changes in column state are also saved in the buffer and executed
+ *	here.
+ *
+ *	A circular fifo buffer is used so that the most recent characters
+ *	are prioritized.  Also, when control characters are echoed with a
+ *	prefixed "^", the pair is treated atomically and thus not separated.
+ *
+ *	Locking: output_lock to protect column state and space left,
+ *		 echo_lock to protect the echo buffer
+ */
+
+static void process_echoes(struct tty_struct *tty)
+{
+	int	space, nr;
+	unsigned char c;
+	unsigned char *cp, *buf_end;
+
+	if (!tty->echo_cnt)
+		return;
+
+	mutex_lock(&tty->output_lock);
+	mutex_lock(&tty->echo_lock);
+
+	space = tty_write_room(tty);
+
+	buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
+	cp = tty->echo_buf + tty->echo_pos;
+	nr = tty->echo_cnt;
+	while (nr > 0) {
+		c = *cp;
+		if (c == ECHO_OP_START) {
+			unsigned char op;
+			unsigned char *opp;
+			int no_space_left = 0;
+
+			/*
+			 * If the buffer byte is the start of a multi-byte
+			 * operation, get the next byte, which is either the
+			 * op code or a control character value.
+			 */
+			opp = cp + 1;
+			if (opp == buf_end)
+				opp -= N_TTY_BUF_SIZE;
+			op = *opp;
+			
+			switch (op) {
+				unsigned int num_chars, num_bs;
+
+			case ECHO_OP_ERASE_TAB:
+				if (++opp == buf_end)
+					opp -= N_TTY_BUF_SIZE;
+				num_chars = *opp;
+
+				/*
+				 * Determine how many columns to go back
+				 * in order to erase the tab.
+				 * This depends on the number of columns
+				 * used by other characters within the tab
+				 * area.  If this (modulo 8) count is from
+				 * the start of input rather than from a
+				 * previous tab, we offset by canon column.
+				 * Otherwise, tab spacing is normal.
+				 */
+				if (!(num_chars & 0x80))
+					num_chars += tty->canon_column;
+				num_bs = 8 - (num_chars & 7);
+
+				if (num_bs > space) {
+					no_space_left = 1;
+					break;
+				}
+				space -= num_bs;
+				while (num_bs--) {
+					tty_put_char(tty, '\b');
+					if (tty->column > 0)
+						tty->column--;
+				}
+				cp += 3;
+				nr -= 3;
+				break;
+
+			case ECHO_OP_SET_CANON_COL:
+				tty->canon_column = tty->column;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_MOVE_BACK_COL:
+				if (tty->column > 0)
+					tty->column--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_START:
+				/* This is an escaped echo op start code */
+				if (!space) {
+					no_space_left = 1;
+					break;
+				}
+				tty_put_char(tty, ECHO_OP_START);
+				tty->column++;
+				space--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			default:
+				if (iscntrl(op)) {
+					if (L_ECHOCTL(tty)) {
+						/*
+						 * Ensure there is enough space
+						 * for the whole ctrl pair.
+						 */
+						if (space < 2) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, '^');
+						tty_put_char(tty, op ^ 0100);
+						tty->column += 2;
+						space -= 2;
+					} else {
+						if (!space) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, op);
+						space--;
+					}
+				}
+				/*
+				 * If above falls through, this was an
+				 * undefined op.
+				 */
+				cp += 2;
+				nr -= 2;
+			}
+
+			if (no_space_left)
+				break;
+		} else {
+			int retval;
+
+			if ((retval = do_output_char(c, tty, space)) < 0)
+				break;
+			space -= retval;
+			cp += 1;
+			nr -= 1;
+		}
+
+		/* When end of circular buffer reached, wrap around */
+		if (cp >= buf_end)
+			cp -= N_TTY_BUF_SIZE;
+	}
+
+	if (nr == 0) {
+		tty->echo_pos = 0;
+		tty->echo_cnt = 0;
+		tty->echo_overrun = 0;
+	} else {
+		int num_processed = tty->echo_cnt - nr;
+		tty->echo_pos += num_processed;
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt = nr;
+		if (num_processed > 0)
+			tty->echo_overrun = 0;
+	}
+
+	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&tty->output_lock);
+
+	if (tty->ops->flush_chars)
+		tty->ops->flush_chars(tty);
+}
+
+/**
+ *	add_echo_byte	-	add a byte to the echo buffer
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Add a character or operation byte to the echo buffer.
+ *
+ *	Should be called under the echo lock to protect the echo buffer.
+ */
+
+static void add_echo_byte(unsigned char c, struct tty_struct *tty)
+{
+	int	new_byte_pos;
+
+	if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+		/* Circular buffer is already at capacity */
+		new_byte_pos = tty->echo_pos;
+
+		/*
+		 * Since the buffer start position needs to be advanced,
+		 * be sure to step by a whole operation byte group.
+		 */
+		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START)
+		{
+			if (tty->echo_buf[(tty->echo_pos + 1) &
+					  (N_TTY_BUF_SIZE - 1)] ==
+						ECHO_OP_ERASE_TAB) {
+				tty->echo_pos += 3;
+				tty->echo_cnt -= 2;
+			} else {
+				tty->echo_pos += 2;
+				tty->echo_cnt -= 1;
+			}
+		} else {
+			tty->echo_pos++;
+		}
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+
+		tty->echo_overrun = 1;
+	} else {
+		new_byte_pos = tty->echo_pos + tty->echo_cnt;
+		new_byte_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt++;
+	}
+
+	tty->echo_buf[new_byte_pos] = c;
+}
+
+/**
+ *	echo_move_back_col	-	add operation to move back a column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to move back one column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_move_back_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_set_canon_col	-	add operation to set the canon column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to set the canon column
+ *	to the current column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_set_canon_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_erase_tab	-	add operation to erase a tab
+ *	@num_chars: number of character columns already used
+ *	@after_tab: true if num_chars starts after a previous tab
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to erase a tab.
+ *
+ *	Called by the eraser function, which knows how many character
+ *	columns have been used since either a previous tab or the start
+ *	of input.  This information will be used later, along with
+ *	canon column (if applicable), to go back the correct number
+ *	of columns.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_erase_tab(unsigned int num_chars, int after_tab,
+			   struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_ERASE_TAB, tty);
+
+	/* We only need to know this modulo 8 (tab spacing) */
+	num_chars &= 7;
+
+	/* Set the high bit as a flag if num_chars is after a previous tab */
+	if (after_tab)
+		num_chars |= 0x80;
+	
+	add_echo_byte(num_chars, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_char_raw	-	echo a character raw
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Echo user input back onto the screen. This must be called only when
+ *	L_ECHO(tty) is true. Called from the driver receive_buf path.
+ *
+ *	This variant does not treat control characters specially.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_char_raw(unsigned char c, struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
+}
 
 /**
- *	echo_char	-	echo characters
+ *	echo_char	-	echo a character
  *	@c: unicode byte to echo
  *	@tty: terminal device
  *
  *	Echo user input back onto the screen. This must be called only when
  *	L_ECHO(tty) is true. Called from the driver receive_buf path.
  *
- *	Relies on BKL for tty column locking
+ *	This variant tags control characters to be possibly echoed as
+ *	as "^X" (where X is the letter representing the control char).
+ *
+ *	Locking: echo_lock to protect the echo buffer
  */
 
 static void echo_char(unsigned char c, struct tty_struct *tty)
 {
-	if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') {
-		tty_put_char(tty, '^');
-		tty_put_char(tty, c ^ 0100);
-		tty->column += 2;
-	} else
-		opost(c, tty);
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		if (iscntrl(c) && c != '\t')
+			add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
 }
 
 /**
- *	finsh_erasing		-	complete erase
+ *	finish_erasing		-	complete erase
  *	@tty: tty doing the erase
- *
- *	Relies on BKL for tty column locking
  */
+
 static inline void finish_erasing(struct tty_struct *tty)
 {
 	if (tty->erasing) {
-		tty_put_char(tty, '/');
-		tty->column++;
+		echo_char_raw('/', tty);
 		tty->erasing = 0;
 	}
 }
@@ -460,7 +856,7 @@ static inline void finish_erasing(struct tty_struct *tty)
  *	present in the stream from the driver layer. Handles the complexities
  *	of UTF-8 multibyte symbols.
  *
- *	Locking: read_lock for tty buffers, BKL for column/erasing state
+ *	Locking: read_lock for tty buffers
  */
 
 static void eraser(unsigned char c, struct tty_struct *tty)
@@ -471,7 +867,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 
 	/* FIXME: locking needed ? */
 	if (tty->read_head == tty->canon_head) {
-		/* opost('\a', tty); */		/* what do you think? */
+		/* echo_char_raw('\a', tty); */ /* what do you think? */
 		return;
 	}
 	if (c == ERASE_CHAR(tty))
@@ -497,7 +893,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 			echo_char(KILL_CHAR(tty), tty);
 			/* Add a newline if ECHOK is on and ECHOKE is off. */
 			if (L_ECHOK(tty))
-				opost('\n', tty);
+				echo_char_raw('\n', tty);
 			return;
 		}
 		kill_type = KILL;
@@ -533,67 +929,62 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
 				if (!tty->erasing) {
-					tty_put_char(tty, '\\');
-					tty->column++;
+					echo_char_raw('\\', tty);
 					tty->erasing = 1;
 				}
 				/* if cnt > 1, output a multi-byte character */
 				echo_char(c, tty);
 				while (--cnt > 0) {
 					head = (head+1) & (N_TTY_BUF_SIZE-1);
-					tty_put_char(tty, tty->read_buf[head]);
+					echo_char_raw(tty->read_buf[head], tty);
+					echo_move_back_col(tty);
 				}
 			} else if (kill_type == ERASE && !L_ECHOE(tty)) {
 				echo_char(ERASE_CHAR(tty), tty);
 			} else if (c == '\t') {
-				unsigned int col = tty->canon_column;
-				unsigned long tail = tty->canon_head;
-
-				/* Find the column of the last char. */
-				while (tail != tty->read_head) {
+				unsigned int num_chars = 0;
+				int after_tab = 0;
+				unsigned long tail = tty->read_head;
+
+				/*
+				 * Count the columns used for characters
+				 * since the start of input or after a
+				 * previous tab.
+				 * This info is used to go back the correct
+				 * number of columns.
+				 */
+				while (tail != tty->canon_head) {
+					tail = (tail-1) & (N_TTY_BUF_SIZE-1);
 					c = tty->read_buf[tail];
-					if (c == '\t')
-						col = (col | 7) + 1;
+					if (c == '\t') {
+						after_tab = 1;
+						break;
+					}
 					else if (iscntrl(c)) {
 						if (L_ECHOCTL(tty))
-							col += 2;
-					} else if (!is_continuation(c, tty))
-						col++;
-					tail = (tail+1) & (N_TTY_BUF_SIZE-1);
-				}
-
-				/* should never happen */
-				if (tty->column > 0x80000000)
-					tty->column = 0;
-
-				/* Now backup to that column. */
-				while (tty->column > col) {
-					/* Can't use opost here. */
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+							num_chars += 2;
+					} else if (!is_continuation(c, tty)) {
+						num_chars++;
+					}
 				}
+				echo_erase_tab(num_chars, after_tab, tty);
 			} else {
 				if (iscntrl(c) && L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 				if (!iscntrl(c) || L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 			}
 		}
 		if (kill_type == ERASE)
 			break;
 	}
-	if (tty->read_head == tty->canon_head)
+	if (tty->read_head == tty->canon_head && L_ECHO(tty))
 		finish_erasing(tty);
 }
 
@@ -724,14 +1115,18 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		c=tolower(c);
 
 	if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
-	    ((I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty)) ||
-	     c == INTR_CHAR(tty) || c == QUIT_CHAR(tty) || c == SUSP_CHAR(tty)))
+	    I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
+	    c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) {
 		start_tty(tty);
+		process_echoes(tty);
+	}
 
 	if (tty->closing) {
 		if (I_IXON(tty)) {
-			if (c == START_CHAR(tty))
+			if (c == START_CHAR(tty)) {
 				start_tty(tty);
+				process_echoes(tty);
+			}
 			else if (c == STOP_CHAR(tty))
 				stop_tty(tty);
 		}
@@ -745,17 +1140,20 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	 * up.
 	 */
 	if (!test_bit(c, tty->process_char_map) || tty->lnext) {
-		finish_erasing(tty);
 		tty->lnext = 0;
 		if (L_ECHO(tty)) {
+			finish_erasing(tty);
 			if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-				tty_put_char(tty, '\a'); /* beep if no space */
+				/* beep if no space */
+				echo_char_raw('\a', tty);
+				process_echoes(tty);
 				return;
 			}
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
+			process_echoes(tty);
 		}
 		if (I_PARMRK(tty) && c == (unsigned char) '\377')
 			put_tty_queue(c, tty);
@@ -766,6 +1164,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	if (I_IXON(tty)) {
 		if (c == START_CHAR(tty)) {
 			start_tty(tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == STOP_CHAR(tty)) {
@@ -786,7 +1185,6 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		if (c == SUSP_CHAR(tty)) {
 send_signal:
 			/*
-			 * Echo character, and then send the signal.
 			 * Note that we do not use isig() here because we want
 			 * the order to be:
 			 * 1) flush, 2) echo, 3) signal
@@ -795,8 +1193,12 @@ send_signal:
 				n_tty_flush_buffer(tty);
 				tty_driver_flush_buffer(tty);
 			}
-			if (L_ECHO(tty))
+			if (I_IXON(tty))
+				start_tty(tty);
+			if (L_ECHO(tty)) {
 				echo_char(c, tty);
+				process_echoes(tty);
+			}
 			if (tty->pgrp)
 				kill_pgrp(tty->pgrp, signal, 1);
 			return;
@@ -815,6 +1217,7 @@ send_signal:
 		if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
 		    (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
 			eraser(c, tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
@@ -822,8 +1225,9 @@ send_signal:
 			if (L_ECHO(tty)) {
 				finish_erasing(tty);
 				if (L_ECHOCTL(tty)) {
-					tty_put_char(tty, '^');
-					tty_put_char(tty, '\b');
+					echo_char_raw('^', tty);
+					echo_char_raw('\b', tty);
+					process_echoes(tty);
 				}
 			}
 			return;
@@ -834,18 +1238,20 @@ send_signal:
 
 			finish_erasing(tty);
 			echo_char(c, tty);
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 			while (tail != tty->read_head) {
 				echo_char(tty->read_buf[tail], tty);
 				tail = (tail+1) & (N_TTY_BUF_SIZE-1);
 			}
+			process_echoes(tty);
 			return;
 		}
 		if (c == '\n') {
 			if (L_ECHO(tty) || L_ECHONL(tty)) {
 				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
-				opost('\n', tty);
+					echo_char_raw('\a', tty);
+				echo_char_raw('\n', tty);
+				process_echoes(tty);
 			}
 			goto handle_newline;
 		}
@@ -862,11 +1268,12 @@ send_signal:
 			 */
 			if (L_ECHO(tty)) {
 				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
+					echo_char_raw('\a', tty);
 				/* Record the column of first canon char. */
 				if (tty->canon_head == tty->read_head)
-					tty->canon_column = tty->column;
+					echo_set_canon_col(tty);
 				echo_char(c, tty);
+				process_echoes(tty);
 			}
 			/*
 			 * XXX does PARMRK doubling happen for
@@ -889,20 +1296,23 @@ handle_newline:
 		}
 	}
 
-	finish_erasing(tty);
 	if (L_ECHO(tty)) {
+		finish_erasing(tty);
 		if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-			tty_put_char(tty, '\a'); /* beep if no space */
+			/* beep if no space */
+			echo_char_raw('\a', tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == '\n')
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 		else {
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
 		}
+		process_echoes(tty);
 	}
 
 	if (I_PARMRK(tty) && c == (unsigned char) '\377')
@@ -923,6 +1333,9 @@ handle_newline:
 
 static void n_tty_write_wakeup(struct tty_struct *tty)
 {
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+	
 	if (tty->fasync) {
 		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 		kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
@@ -1134,6 +1547,10 @@ static void n_tty_close(struct tty_struct *tty)
 		free_buf(tty->read_buf);
 		tty->read_buf = NULL;
 	}
+	if (tty->echo_buf) {
+		free_buf(tty->echo_buf);
+		tty->echo_buf = NULL;
+	}
 }
 
 /**
@@ -1151,13 +1568,19 @@ static int n_tty_open(struct tty_struct *tty)
 	if (!tty)
 		return -EINVAL;
 
-	/* This one is ugly. Currently a malloc failure here can panic */
+	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
 		tty->read_buf = alloc_buf();
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
+	if (!tty->echo_buf) {
+		tty->echo_buf = alloc_buf();
+		if (!tty->echo_buf)
+			return -ENOMEM;
+	}
 	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
+	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
@@ -1487,16 +1910,23 @@ do_it_again:
  *	@buf: userspace buffer pointer
  *	@nr: size of I/O
  *
- *	Write function of the terminal device. This is serialized with
+ *	Write function of the terminal device.  This is serialized with
  *	respect to other write callers but not to termios changes, reads
- *	and other such events. We must be careful with N_TTY as the receive
- *	code will echo characters, thus calling driver write methods.
+ *	and other such events.  Since the receive code will echo characters,
+ *	thus calling driver write methods, the output_lock is used in
+ *	the output processing functions called here as well as in the
+ *	echo processing function to protect the column state and space
+ *	left in the buffer.
  *
  *	This code must be sure never to sleep through a hangup.
+ *
+ *	Locking: output_lock to protect column state and space left
+ *		 (note that the process_output*() functions take this
+ *		  lock themselves)
  */
 
 static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
-			  const unsigned char *buf, size_t nr)
+			   const unsigned char *buf, size_t nr)
 {
 	const unsigned char *b = buf;
 	DECLARE_WAITQUEUE(wait, current);
@@ -1510,6 +1940,9 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 			return retval;
 	}
 
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+	
 	add_wait_queue(&tty->write_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1523,7 +1956,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 		}
 		if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
 			while (nr > 0) {
-				ssize_t num = opost_block(tty, b, nr);
+				ssize_t num = process_output_block(tty, b, nr);
 				if (num < 0) {
 					if (num == -EAGAIN)
 						break;
@@ -1535,7 +1968,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 				if (nr == 0)
 					break;
 				c = *b;
-				if (opost(c, tty) < 0)
+				if (process_output(c, tty) < 0)
 					break;
 				b++; nr--;
 			}
@@ -1663,4 +2096,3 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup
 };
-
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index db15f9b..d8d240c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1111,9 +1111,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  *		Locks the line discipline as required
  *		Writes to the tty driver are serialized by the atomic_write_lock
  *	and are then processed in chunks to the device. The line discipline
- *	write method will not be involked in parallel for each device
- *		The line discipline write method is called under the big
- *	kernel lock for historical reasons. New code should not rely on this.
+ *	write method will not be invoked in parallel for each device.
  */
 
 static ssize_t tty_write(struct file *file, const char __user *buf,
@@ -2785,6 +2783,8 @@ void initialize_tty_struct(struct tty_struct *tty,
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
+	mutex_init(&tty->output_lock);
+	mutex_init(&tty->echo_lock);
 	spin_lock_init(&tty->read_lock);
 	spin_lock_init(&tty->ctrl_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 008176e..639e126 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2679,7 +2679,7 @@ static int con_write_room(struct tty_struct *tty)
 {
 	if (tty->stopped)
 		return 0;
-	return 4096;		/* No limit, really; we're not buffering */
+	return 32768;		/* No limit, really; we're not buffering */
 }
 
 static int con_chars_in_buffer(struct tty_struct *tty)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3f4954c..dfc77de 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -253,6 +253,7 @@ struct tty_struct {
 	unsigned int column;
 	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
 	unsigned char closing:1;
+	unsigned char echo_overrun:1;
 	unsigned short minimum_to_wake;
 	unsigned long overrun_time;
 	int num_overrun;
@@ -262,11 +263,16 @@ struct tty_struct {
 	int read_tail;
 	int read_cnt;
 	unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))];
+	unsigned char *echo_buf;
+	unsigned int echo_pos;
+	unsigned int echo_cnt;
 	int canon_data;
 	unsigned long canon_head;
 	unsigned int canon_column;
 	struct mutex atomic_read_lock;
 	struct mutex atomic_write_lock;
+	struct mutex output_lock;
+	struct mutex echo_lock;
 	unsigned char *write_buf;
 	int write_cnt;
 	spinlock_t read_lock;
-- 
cgit v0.10.2


From 300a6204b4d45dc70359b24384ad04ae899179c3 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:41:04 +0000
Subject: n_tty: clean up coding style

Now the main work is done its polishing time

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index a9bc576..a223823 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -47,8 +47,8 @@
 #include <linux/bitops.h>
 #include <linux/audit.h>
 #include <linux/file.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 /* number of characters left in xmit buffer before select has we have room */
@@ -309,7 +309,7 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 
 	if (!space)
 		return -1;
-	
+
 	switch (c) {
 	case '\n':
 		if (O_ONLRET(tty))
@@ -417,8 +417,7 @@ static ssize_t process_output_block(struct tty_struct *tty,
 	mutex_lock(&tty->output_lock);
 
 	space = tty_write_room(tty);
-	if (!space)
-	{
+	if (!space) {
 		mutex_unlock(&tty->output_lock);
 		return 0;
 	}
@@ -521,7 +520,7 @@ static void process_echoes(struct tty_struct *tty)
 			if (opp == buf_end)
 				opp -= N_TTY_BUF_SIZE;
 			op = *opp;
-			
+
 			switch (op) {
 				unsigned int num_chars, num_bs;
 
@@ -621,7 +620,8 @@ static void process_echoes(struct tty_struct *tty)
 		} else {
 			int retval;
 
-			if ((retval = do_output_char(c, tty, space)) < 0)
+			retval = do_output_char(c, tty, space);
+			if (retval < 0)
 				break;
 			space -= retval;
 			cp += 1;
@@ -675,8 +675,7 @@ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
 		 * Since the buffer start position needs to be advanced,
 		 * be sure to step by a whole operation byte group.
 		 */
-		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START)
-		{
+		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) {
 			if (tty->echo_buf[(tty->echo_pos + 1) &
 					  (N_TTY_BUF_SIZE - 1)] ==
 						ECHO_OP_ERASE_TAB) {
@@ -771,7 +770,7 @@ static void echo_erase_tab(unsigned int num_chars, int after_tab,
 	/* Set the high bit as a flag if num_chars is after a previous tab */
 	if (after_tab)
 		num_chars |= 0x80;
-	
+
 	add_echo_byte(num_chars, tty);
 
 	mutex_unlock(&tty->echo_lock);
@@ -959,8 +958,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 					if (c == '\t') {
 						after_tab = 1;
 						break;
-					}
-					else if (iscntrl(c)) {
+					} else if (iscntrl(c)) {
 						if (L_ECHOCTL(tty))
 							num_chars += 2;
 					} else if (!is_continuation(c, tty)) {
@@ -1112,7 +1110,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	if (I_ISTRIP(tty))
 		c &= 0x7f;
 	if (I_IUCLC(tty) && L_IEXTEN(tty))
-		c=tolower(c);
+		c = tolower(c);
 
 	if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
 	    I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
@@ -1126,8 +1124,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 			if (c == START_CHAR(tty)) {
 				start_tty(tty);
 				process_echoes(tty);
-			}
-			else if (c == STOP_CHAR(tty))
+			} else if (c == STOP_CHAR(tty))
 				stop_tty(tty);
 		}
 		return;
@@ -1335,7 +1332,7 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
 {
 	/* Write out any echoed characters that are still pending */
 	process_echoes(tty);
-	
+
 	if (tty->fasync) {
 		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 		kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
@@ -1942,7 +1939,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 
 	/* Write out any echoed characters that are still pending */
 	process_echoes(tty);
-	
+
 	add_wait_queue(&tty->write_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit v0.10.2


From 59e55e6cf86eb472e8373831c4234252916c53ef Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:41:11 +0000
Subject: Remove devpts_root global

Remove the 'devpts_root' global variable and find the root dentry using
the super_block. The super-block can be found from the device inode, using
the new wrapper, pts_sb_from_inode().

Changelog: This patch is based on an earlier patchset from Serge Hallyn
	   and Matt Helsley.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5d61b7c..f96e10a 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -34,7 +34,6 @@ static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
-static struct dentry *devpts_root;
 
 static struct {
 	int setuid;
@@ -56,6 +55,14 @@ static const match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
+static inline struct super_block *pts_sb_from_inode(struct inode *inode)
+{
+	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
+		return inode->i_sb;
+
+	return devpts_mnt->mnt_sb;
+}
+
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
 	char *p;
@@ -142,7 +149,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	inode->i_fop = &simple_dir_operations;
 	inode->i_nlink = 2;
 
-	devpts_root = s->s_root = d_alloc_root(inode);
+	s->s_root = d_alloc_root(inode);
 	if (s->s_root)
 		return 0;
 	
@@ -211,7 +218,9 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	struct tty_driver *driver = tty->driver;
 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
-	struct inode *inode = new_inode(devpts_mnt->mnt_sb);
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct inode *inode = new_inode(sb);
+	struct dentry *root = sb->s_root;
 	char s[12];
 
 	/* We're supposed to be given the slave end of a pty */
@@ -231,15 +240,15 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 
 	sprintf(s, "%d", number);
 
-	mutex_lock(&devpts_root->d_inode->i_mutex);
+	mutex_lock(&root->d_inode->i_mutex);
 
-	dentry = d_alloc_name(devpts_root, s);
+	dentry = d_alloc_name(root, s);
 	if (!IS_ERR(dentry)) {
 		d_add(dentry, inode);
-		fsnotify_create(devpts_root->d_inode, dentry);
+		fsnotify_create(root->d_inode, dentry);
 	}
 
-	mutex_unlock(&devpts_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 
 	return 0;
 }
@@ -256,11 +265,13 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
 void devpts_pty_kill(struct tty_struct *tty)
 {
 	struct inode *inode = tty->driver_data;
+	struct super_block *sb = pts_sb_from_inode(inode);
+	struct dentry *root = sb->s_root;
 	struct dentry *dentry;
 
 	BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-	mutex_lock(&devpts_root->d_inode->i_mutex);
+	mutex_lock(&root->d_inode->i_mutex);
 
 	dentry = d_find_alias(inode);
 	if (dentry && !IS_ERR(dentry)) {
@@ -269,7 +280,7 @@ void devpts_pty_kill(struct tty_struct *tty)
 		dput(dentry);
 	}
 
-	mutex_unlock(&devpts_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 }
 
 static int __init init_devpts_fs(void)
-- 
cgit v0.10.2


From e76b7c01e598d2d14ddfdb6ae5c6afe45245d0de Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:41:21 +0000
Subject: Per-mount allocated_ptys

To enable multiple mounts of devpts, 'allocated_ptys' must be a per-mount
variable rather than a global variable.  Move 'allocated_ptys' into the
super_block's s_fs_info.

Changelog[v2]:
	Define and use DEVPTS_SB() wrapper.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f96e10a..49d879d 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -30,7 +30,6 @@
 #define PTMX_MINOR	2
 
 extern int pty_limit;			/* Config limit on Unix98 ptys */
-static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
@@ -55,6 +54,15 @@ static const match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
+struct pts_fs_info {
+	struct ida allocated_ptys;
+};
+
+static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
 static inline struct super_block *pts_sb_from_inode(struct inode *inode)
 {
 	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
@@ -126,6 +134,19 @@ static const struct super_operations devpts_sops = {
 	.show_options	= devpts_show_options,
 };
 
+static void *new_pts_fs_info(void)
+{
+	struct pts_fs_info *fsi;
+
+	fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL);
+	if (!fsi)
+		return NULL;
+
+	ida_init(&fsi->allocated_ptys);
+
+	return fsi;
+}
+
 static int
 devpts_fill_super(struct super_block *s, void *data, int silent)
 {
@@ -137,9 +158,13 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	s->s_op = &devpts_sops;
 	s->s_time_gran = 1;
 
+	s->s_fs_info = new_pts_fs_info();
+	if (!s->s_fs_info)
+		goto fail;
+
 	inode = new_inode(s);
 	if (!inode)
-		goto fail;
+		goto free_fsi;
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
@@ -155,6 +180,9 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	
 	printk("devpts: get root dentry failed\n");
 	iput(inode);
+
+free_fsi:
+	kfree(s->s_fs_info);
 fail:
 	return -ENOMEM;
 }
@@ -165,11 +193,19 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
 }
 
+static void devpts_kill_sb(struct super_block *sb)
+{
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+
+	kfree(fsi);
+	kill_anon_super(sb);
+}
+
 static struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= devpts_kill_sb,
 };
 
 /*
@@ -179,16 +215,18 @@ static struct file_system_type devpts_fs_type = {
 
 int devpts_new_index(struct inode *ptmx_inode)
 {
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	int index;
 	int ida_ret;
 
 retry:
-	if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) {
 		return -ENOMEM;
 	}
 
 	mutex_lock(&allocated_ptys_lock);
-	ida_ret = ida_get_new(&allocated_ptys, &index);
+	ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
 	if (ida_ret < 0) {
 		mutex_unlock(&allocated_ptys_lock);
 		if (ida_ret == -EAGAIN)
@@ -197,7 +235,7 @@ retry:
 	}
 
 	if (index >= pty_limit) {
-		ida_remove(&allocated_ptys, index);
+		ida_remove(&fsi->allocated_ptys, index);
 		mutex_unlock(&allocated_ptys_lock);
 		return -EIO;
 	}
@@ -207,8 +245,11 @@ retry:
 
 void devpts_kill_index(struct inode *ptmx_inode, int idx)
 {
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+
 	mutex_lock(&allocated_ptys_lock);
-	ida_remove(&allocated_ptys, idx);
+	ida_remove(&fsi->allocated_ptys, idx);
 	mutex_unlock(&allocated_ptys_lock);
 }
 
-- 
cgit v0.10.2


From 31af0abbdafb66ad8e27e3df878faec2ebe1132e Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:41:33 +0000
Subject: Per-mount 'config' object

With support for multiple mounts of devpts, the 'config' structure really
represents per-mount options rather than config parameters. Rename 'config'
structure to 'pts_mount_opts' and store it in the super-block.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 49d879d..b793e6e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -34,13 +34,13 @@ static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
 
-static struct {
+struct pts_mount_opts {
 	int setuid;
 	int setgid;
 	uid_t   uid;
 	gid_t   gid;
 	umode_t mode;
-} config = {.mode = DEVPTS_DEFAULT_MODE};
+};
 
 enum {
 	Opt_uid, Opt_gid, Opt_mode,
@@ -56,6 +56,7 @@ static const match_table_t tokens = {
 
 struct pts_fs_info {
 	struct ida allocated_ptys;
+	struct pts_mount_opts mount_opts;
 };
 
 static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
@@ -74,12 +75,14 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
 	char *p;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
 
-	config.setuid  = 0;
-	config.setgid  = 0;
-	config.uid     = 0;
-	config.gid     = 0;
-	config.mode    = DEVPTS_DEFAULT_MODE;
+	opts->setuid  = 0;
+	opts->setgid  = 0;
+	opts->uid     = 0;
+	opts->gid     = 0;
+	opts->mode    = DEVPTS_DEFAULT_MODE;
 
 	while ((p = strsep(&data, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
@@ -94,19 +97,19 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			config.uid = option;
-			config.setuid = 1;
+			opts->uid = option;
+			opts->setuid = 1;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			config.gid = option;
-			config.setgid = 1;
+			opts->gid = option;
+			opts->setgid = 1;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
-			config.mode = option & S_IALLUGO;
+			opts->mode = option & S_IALLUGO;
 			break;
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
@@ -119,11 +122,14 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 
 static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	if (config.setuid)
-		seq_printf(seq, ",uid=%u", config.uid);
-	if (config.setgid)
-		seq_printf(seq, ",gid=%u", config.gid);
-	seq_printf(seq, ",mode=%03o", config.mode);
+	struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	if (opts->setuid)
+		seq_printf(seq, ",uid=%u", opts->uid);
+	if (opts->setgid)
+		seq_printf(seq, ",gid=%u", opts->gid);
+	seq_printf(seq, ",mode=%03o", opts->mode);
 
 	return 0;
 }
@@ -143,6 +149,7 @@ static void *new_pts_fs_info(void)
 		return NULL;
 
 	ida_init(&fsi->allocated_ptys);
+	fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
 
 	return fsi;
 }
@@ -262,6 +269,8 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
 	struct inode *inode = new_inode(sb);
 	struct dentry *root = sb->s_root;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
 	char s[12];
 
 	/* We're supposed to be given the slave end of a pty */
@@ -275,7 +284,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	inode->i_uid = config.setuid ? config.uid : current_fsuid();
 	inode->i_gid = config.setgid ? config.gid : current_fsgid();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	init_special_inode(inode, S_IFCHR|config.mode, device);
+	init_special_inode(inode, S_IFCHR|opts->mode, device);
 	inode->i_private = tty;
 	tty->driver_data = inode;
 
-- 
cgit v0.10.2


From 53af8ee4094d80ddaac7efefb572b1c22ae49367 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:41:47 +0000
Subject: Extract option parsing to new function

Move code to parse mount options into a separate function so it can
(later) be shared between mount and remount operations.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index b793e6e..00530e8 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -72,11 +72,9 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
 	return devpts_mnt->mnt_sb;
 }
 
-static int devpts_remount(struct super_block *sb, int *flags, char *data)
+static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 {
 	char *p;
-	struct pts_fs_info *fsi = DEVPTS_SB(sb);
-	struct pts_mount_opts *opts = &fsi->mount_opts;
 
 	opts->setuid  = 0;
 	opts->setgid  = 0;
@@ -120,6 +118,14 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
+static int devpts_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	return parse_mount_options(data, opts);
+}
+
 static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
-- 
cgit v0.10.2


From e4adca27bcbb8a73c4cf1dfa71392654cfa33345 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:41:54 +0000
Subject: Add DEVPTS_MULTIPLE_INSTANCES config token

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index c602b54..c52a167 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -443,6 +443,17 @@ config UNIX98_PTYS
 	  All modern Linux systems use the Unix98 ptys.  Say Y unless
 	  you're on an embedded system and want to conserve memory.
 
+config DEVPTS_MULTIPLE_INSTANCES
+	bool "Support multiple instances of devpts"
+	depends on UNIX98_PTYS
+	default n
+	---help---
+	  Enable support for multiple instances of devpts filesystem.
+	  If you want to have isolated PTY namespaces (eg: in containers),
+	  say Y here.  Otherwise, say N. If enabled, each mount of devpts
+	  filesystem with the '-o newinstance' option will create an
+	  independent PTY namespace.
+
 config LEGACY_PTYS
 	bool "Legacy (BSD) PTY support"
 	default y
-- 
cgit v0.10.2


From 1f8f1e296583f9f832c2fe7b5a219675b74bf43e Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:42:02 +0000
Subject: Define mknod_ptmx()

/dev/ptmx is closely tied to the devpts filesystem. An open of /dev/ptmx,
allocates the next pty index and the associated device shows up in the
devpts fs as /dev/pts/n.

Wih multiple instancs of devpts filesystem, during an open of /dev/ptmx
we would be unable to determine which instance of the devpts is being
accessed.

So we move the 'ptmx' node into /dev/pts and use the inode of the 'ptmx'
node to identify the superblock and hence the devpts instance.  This patch
adds ability for the kernel to internally create the [ptmx, c, 5:2] device
when mounting devpts filesystem.  Since the ptmx node in devpts is new and
may surprise some userspace scripts, the default permissions for the new
node is 0000.  These permissions can be changed either using chmod or by
remounting with the new '-o ptmxmode=0666' mount option.

Changelog[v5]:
	- [Serge Hallyn bugfix]: Letting new_inode() assign inode number to
	  ptmx can collide with hand-assigning inode numbers to ptys. So,
	  hand-assign specific inode number to ptmx node also.
	- [Serge Hallyn]: Maybe safer to grab root dentry mutex while creating
	  ptmx node
	- [Bugfix with Serge Hallyn] Replace lookup_one_len() in mknod_ptmx()
	  wih d_alloc_name() (lookup during ->get_sb() locks up system). To
	  simplify patchset, fold the ptmx_dentry patch into this.

Changelog[v4]:
	- Change default permissions of pts/ptmx node to 0000.
	- Move code for ptmxmode under #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES.

Changelog[v3]:
	- Rename ptmx_mode to ptmxmode (for consistency with 'newinstance')

Changelog[v2]:
	- [H. Peter Anvin] Remove mknod() system call support and create the
	  ptmx node internally.

Changelog[v1]:
	- Earlier version of this patch enabled creating /dev/pts/tty as
	  well. As pointed out by Al Viro and H. Peter Anvin, that is not
	  really necessary.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 00530e8..8ee9dc2 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,6 +27,13 @@
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
 #define DEVPTS_DEFAULT_MODE 0600
+/*
+ * ptmx is a new node in /dev/pts and will be unused in legacy (single-
+ * instance) mode. To prevent surprises in user space, set permissions of
+ * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful
+ * permissions.
+ */
+#define DEVPTS_DEFAULT_PTMX_MODE 0000
 #define PTMX_MINOR	2
 
 extern int pty_limit;			/* Config limit on Unix98 ptys */
@@ -40,10 +47,11 @@ struct pts_mount_opts {
 	uid_t   uid;
 	gid_t   gid;
 	umode_t mode;
+	umode_t ptmxmode;
 };
 
 enum {
-	Opt_uid, Opt_gid, Opt_mode,
+	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode,
 	Opt_err
 };
 
@@ -51,12 +59,16 @@ static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_mode, "mode=%o"},
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+	{Opt_ptmxmode, "ptmxmode=%o"},
+#endif
 	{Opt_err, NULL}
 };
 
 struct pts_fs_info {
 	struct ida allocated_ptys;
 	struct pts_mount_opts mount_opts;
+	struct dentry *ptmx_dentry;
 };
 
 static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
@@ -81,6 +93,7 @@ static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 	opts->uid     = 0;
 	opts->gid     = 0;
 	opts->mode    = DEVPTS_DEFAULT_MODE;
+	opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
 
 	while ((p = strsep(&data, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
@@ -109,6 +122,13 @@ static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 				return -EINVAL;
 			opts->mode = option & S_IALLUGO;
 			break;
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+		case Opt_ptmxmode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->ptmxmode = option & S_IALLUGO;
+			break;
+#endif
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
 			return -EINVAL;
@@ -118,12 +138,93 @@ static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 	return 0;
 }
 
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+static int mknod_ptmx(struct super_block *sb)
+{
+	int mode;
+	int rc = -ENOMEM;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct dentry *root = sb->s_root;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	mutex_lock(&root->d_inode->i_mutex);
+
+	/* If we have already created ptmx node, return */
+	if (fsi->ptmx_dentry) {
+		rc = 0;
+		goto out;
+	}
+
+	dentry = d_alloc_name(root, "ptmx");
+	if (!dentry) {
+		printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n");
+		goto out;
+	}
+
+	/*
+	 * Create a new 'ptmx' node in this mount of devpts.
+	 */
+	inode = new_inode(sb);
+	if (!inode) {
+		printk(KERN_ERR "Unable to alloc inode for ptmx node\n");
+		dput(dentry);
+		goto out;
+	}
+
+	inode->i_ino = 2;
+	inode->i_uid = inode->i_gid = 0;
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	mode = S_IFCHR|opts->ptmxmode;
+	init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
+
+	d_add(dentry, inode);
+
+	fsi->ptmx_dentry = dentry;
+	rc = 0;
+
+	printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
+			inode->i_ino);
+out:
+	mutex_unlock(&root->d_inode->i_mutex);
+	return rc;
+}
+
+static void update_ptmx_mode(struct pts_fs_info *fsi)
+{
+	struct inode *inode;
+	if (fsi->ptmx_dentry) {
+		inode = fsi->ptmx_dentry->d_inode;
+		inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
+	}
+}
+#else
+static inline void update_ptmx_mode(struct pts_fs_info *fsi)
+{
+       return;
+}
+#endif
+
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
+	int err;
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 
-	return parse_mount_options(data, opts);
+	err = parse_mount_options(data, opts);
+
+	/*
+	 * parse_mount_options() restores options to default values
+	 * before parsing and may have changed ptmxmode. So, update the
+	 * mode in the inode too. Bogus options don't fail the remount,
+	 * so do this even on error return.
+	 */
+	update_ptmx_mode(fsi);
+
+	return err;
 }
 
 static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
@@ -136,6 +237,9 @@ static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (opts->setgid)
 		seq_printf(seq, ",gid=%u", opts->gid);
 	seq_printf(seq, ",mode=%03o", opts->mode);
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
+#endif
 
 	return 0;
 }
@@ -156,6 +260,7 @@ static void *new_pts_fs_info(void)
 
 	ida_init(&fsi->allocated_ptys);
 	fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
+	fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
 
 	return fsi;
 }
@@ -163,7 +268,7 @@ static void *new_pts_fs_info(void)
 static int
 devpts_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct inode * inode;
+	struct inode *inode;
 
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
@@ -190,7 +295,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	s->s_root = d_alloc_root(inode);
 	if (s->s_root)
 		return 0;
-	
+
 	printk("devpts: get root dentry failed\n");
 	iput(inode);
 
@@ -211,7 +316,7 @@ static void devpts_kill_sb(struct super_block *sb)
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 
 	kfree(fsi);
-	kill_anon_super(sb);
+	kill_litter_super(sb);
 }
 
 static struct file_system_type devpts_fs_type = {
-- 
cgit v0.10.2


From d4076ac55bf8755ce6c5706478631c1726cf0179 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:42:19 +0000
Subject: Define get_init_pts_sb()

See comments in the function header for details. The new interface will
be used in a follow-on patch.

Changelog [v2]:
	[Dave Hansen] Replace get_sb_ref() in fs/super.c with get_init_pts_sb()
	and make the new interface private to devpts

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8ee9dc2..2d0eb2c 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -305,10 +305,63 @@ fail:
 	return -ENOMEM;
 }
 
+static int compare_init_pts_sb(struct super_block *s, void *p)
+{
+	if (devpts_mnt)
+		return devpts_mnt->mnt_sb == s;
+
+	return 0;
+}
+
+/*
+ * get_init_pts_sb()
+ *
+ *     This interface is needed to support multiple namespace semantics in
+ *     devpts while preserving backward compatibility of the current 'single-
+ *     namespace' semantics. i.e all mounts of devpts without the 'newinstance'
+ *     mount option should bind to the initial kernel mount, like
+ *     get_sb_single().
+ *
+ *     Mounts with 'newinstance' option create a new private namespace.
+ *
+ *     But for single-mount semantics, devpts cannot use get_sb_single(),
+ *     because get_sb_single()/sget() find and use the super-block from
+ *     the most recent mount of devpts. But that recent mount may be a
+ *     'newinstance' mount and get_sb_single() would pick the newinstance
+ *     super-block instead of the initial super-block.
+ *
+ *     This interface is identical to get_sb_single() except that it
+ *     consistently selects the 'single-namespace' superblock even in the
+ *     presence of the private namespace (i.e 'newinstance') super-blocks.
+ */
+static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+        struct super_block *s;
+        int error;
+
+        s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
+        if (IS_ERR(s))
+                return PTR_ERR(s);
+
+        if (!s->s_root) {
+                s->s_flags = flags;
+                error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+                if (error) {
+                        up_write(&s->s_umount);
+                        deactivate_super(s);
+                        return error;
+                }
+                s->s_flags |= MS_ACTIVE;
+        }
+        do_remount_sb(s, flags, data, 0);
+        return simple_set_mnt(mnt, s);
+}
+
 static int devpts_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+	return get_init_pts_sb(fs_type, flags, data, mnt);
 }
 
 static void devpts_kill_sb(struct super_block *sb)
-- 
cgit v0.10.2


From 2a1b2dc0c83bbfc24d72cafd5e69810a149b44e4 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:42:27 +0000
Subject: Enable multiple instances of devpts

To support containers, allow multiple instances of devpts filesystem, such
that indices of ptys allocated in one instance are independent of ptys
allocated in other instances of devpts.

But to preserve backward compatibility, enable this support for multiple
instances only if:

	- CONFIG_DEVPTS_MULTIPLE_INSTANCES is set to Y, and
	- '-o newinstance' mount option is specified while mounting devpts

To use multi-instance mount, a container startup script could:

	$ ns_exec -cm /bin/bash
	$ umount /dev/pts
	$ mount -t devpts -o newinstance lxcpts /dev/pts
	$ mount -o bind /dev/pts/ptmx /dev/ptmx
	$ /usr/sbin/sshd -p 1234

where 'ns_exec -cm /bin/bash' is calls clone() with CLONE_NEWNS flag and execs
/bin/bash in the child process. A pty created by the sshd is not visible in
the original mount of /dev/pts.

USER-SPACE-IMPACT:
	- See Documentation/fs/devpts.txt (included in next patch) for user-
	  space impact in multi-instance and mixed-mode operation.
TODO:
	- Update mount(8), pts(4) man pages. Highlight impact of not
	  redirecting /dev/ptmx to /dev/pts/ptmx after a multi-instance mount.

Changelog[v6]:
	- [Dave Hansen] Use new get_init_pts_sb() interface
	- [Serge Hallyn] Don't bother displaying 'newinstance' in show_options
	- [Serge Hallyn] Use macros (PARSE_REMOUNT/PARSE_MOUNT) instead of 0/1.
	- [Serge Hallyn] Check error return from get_sb_single() (now
	  get_init_pts_sb())
	- devpts_pty_kill(): don't dput error dentries

Changelog[v5]:
	- Move get_sb_ref() definition to earlier patch
	- Move usage info to Documentation/filesystems/devpts.txt (next patch)
	- Make ptmx node even in init_pts_ns, now that default mode is 0000
	  (defined in earlier patch, enabled here).
	- Cache ptmx dentry and use to update mode during remount
	  (defined in earlier patch, enabled here).
	- Bugfix: explicitly ignore newinstance on remount (if newinstance was
	  specified on remount of initial mount, it would be ignored but
	  /proc/mounts would imply that the option was set)

Changelog[v4]:

	- Update patch description to address H. Peter Anvin's comments
	- Consolidate multi-instance mode code under new config token,
	  CONFIG_DEVPTS_MULTIPLE_INSTANCE.
	- Move usage-details from patch description to
	  Documentation/fs/devpts.txt

Changelog[v3]:
	- Rename new mount option to 'newinstance'
	- Create ptmx nodes only in 'newinstance' mounts
	- Bugfix: parse_mount_options() modifies @data but since we need to
	  parse the @data twice (once in devpts_get_sb() and once during
	  do_remount_sb()), parse a local copy of @data in devpts_get_sb().
	  (restructured code in devpts_get_sb() to fix this)

Changelog[v2]:
	- Support both single-mount and multiple-mount semantics and
	  provide '-onewmnt' option to select the semantics.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2d0eb2c..b4a89fa 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -48,10 +48,11 @@ struct pts_mount_opts {
 	gid_t   gid;
 	umode_t mode;
 	umode_t ptmxmode;
+	int newinstance;
 };
 
 enum {
-	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode,
+	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
 	Opt_err
 };
 
@@ -61,6 +62,7 @@ static const match_table_t tokens = {
 	{Opt_mode, "mode=%o"},
 #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	{Opt_ptmxmode, "ptmxmode=%o"},
+	{Opt_newinstance, "newinstance"},
 #endif
 	{Opt_err, NULL}
 };
@@ -78,13 +80,17 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
 
 static inline struct super_block *pts_sb_from_inode(struct inode *inode)
 {
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
 		return inode->i_sb;
-
+#endif
 	return devpts_mnt->mnt_sb;
 }
 
-static int parse_mount_options(char *data, struct pts_mount_opts *opts)
+#define PARSE_MOUNT	0
+#define PARSE_REMOUNT	1
+
+static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 {
 	char *p;
 
@@ -95,6 +101,10 @@ static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 	opts->mode    = DEVPTS_DEFAULT_MODE;
 	opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
 
+	/* newinstance makes sense only on initial mount */
+	if (op == PARSE_MOUNT)
+		opts->newinstance = 0;
+
 	while ((p = strsep(&data, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
 		int token;
@@ -128,6 +138,11 @@ static int parse_mount_options(char *data, struct pts_mount_opts *opts)
 				return -EINVAL;
 			opts->ptmxmode = option & S_IALLUGO;
 			break;
+		case Opt_newinstance:
+			/* newinstance makes sense only on initial mount */
+			if (op == PARSE_MOUNT)
+				opts->newinstance = 1;
+			break;
 #endif
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
@@ -214,7 +229,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 
-	err = parse_mount_options(data, opts);
+	err = parse_mount_options(data, PARSE_REMOUNT, opts);
 
 	/*
 	 * parse_mount_options() restores options to default values
@@ -309,8 +324,100 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 {
 	if (devpts_mnt)
 		return devpts_mnt->mnt_sb == s;
+	return 0;
+}
+
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+/*
+ * Safely parse the mount options in @data and update @opts.
+ *
+ * devpts ends up parsing options two times during mount, due to the
+ * two modes of operation it supports. The first parse occurs in
+ * devpts_get_sb() when determining the mode (single-instance or
+ * multi-instance mode). The second parse happens in devpts_remount()
+ * or new_pts_mount() depending on the mode.
+ *
+ * Parsing of options modifies the @data making subsequent parsing
+ * incorrect. So make a local copy of @data and parse it.
+ *
+ * Return: 0 On success, -errno on error
+ */
+static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts)
+{
+	int rc;
+	void *datacp;
+
+	if (!data)
+		return 0;
+
+	/* Use kstrdup() ?  */
+	datacp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!datacp)
+		return -ENOMEM;
+
+	memcpy(datacp, data, PAGE_SIZE);
+	rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts);
+	kfree(datacp);
+
+	return rc;
+}
+
+/*
+ * Mount a new (private) instance of devpts.  PTYs created in this
+ * instance are independent of the PTYs in other devpts instances.
+ */
+static int new_pts_mount(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+	int err;
+	struct pts_fs_info *fsi;
+	struct pts_mount_opts *opts;
+
+	printk(KERN_NOTICE "devpts: newinstance mount\n");
+
+	err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
+	if (err)
+		return err;
+
+	fsi = DEVPTS_SB(mnt->mnt_sb);
+	opts = &fsi->mount_opts;
+
+	err = parse_mount_options(data, PARSE_MOUNT, opts);
+	if (err)
+		goto fail;
+
+	err = mknod_ptmx(mnt->mnt_sb);
+	if (err)
+		goto fail;
 
 	return 0;
+
+fail:
+	dput(mnt->mnt_sb->s_root);
+	deactivate_super(mnt->mnt_sb);
+	return err;
+}
+
+/*
+ * Check if 'newinstance' mount option was specified in @data.
+ *
+ * Return: -errno  	on error (eg: invalid mount options specified)
+ * 	 : 1 		if 'newinstance' mount option was specified
+ * 	 : 0 		if 'newinstance' mount option was NOT specified
+ */
+static int is_new_instance_mount(void *data)
+{
+	int rc;
+	struct pts_mount_opts opts;
+
+	if (!data)
+		return 0;
+
+	rc = safe_parse_mount_options(data, &opts);
+	if (!rc)
+		rc = opts.newinstance;
+
+	return rc;
 }
 
 /*
@@ -358,11 +465,54 @@ static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
         return simple_set_mnt(mnt, s);
 }
 
+/*
+ * Mount or remount the initial kernel mount of devpts. This type of
+ * mount maintains the legacy, single-instance semantics, while the
+ * kernel still allows multiple-instances.
+ */
+static int init_pts_mount(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+	int err;
+
+	err = get_init_pts_sb(fs_type, flags, data, mnt);
+	if (err)
+		 return err;
+
+	err = mknod_ptmx(mnt->mnt_sb);
+	if (err) {
+		dput(mnt->mnt_sb->s_root);
+		deactivate_super(mnt->mnt_sb);
+	}
+
+	return err;
+}
+
 static int devpts_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_init_pts_sb(fs_type, flags, data, mnt);
+	int new;
+
+	new = is_new_instance_mount(data);
+	if (new < 0)
+		return new;
+
+	if (new)
+		return new_pts_mount(fs_type, flags, data, mnt);
+
+	return init_pts_mount(fs_type, flags, data, mnt);
 }
+#else
+/*
+ * This supports only the legacy single-instance semantics (no
+ * multiple-instance semantics)
+ */
+static int devpts_get_sb(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+}
+#endif
 
 static void devpts_kill_sb(struct super_block *sb)
 {
@@ -488,12 +638,18 @@ void devpts_pty_kill(struct tty_struct *tty)
 	mutex_lock(&root->d_inode->i_mutex);
 
 	dentry = d_find_alias(inode);
-	if (dentry && !IS_ERR(dentry)) {
+	if (IS_ERR(dentry))
+		goto out;
+
+	if (dentry) {
 		inode->i_nlink--;
 		d_delete(dentry);
-		dput(dentry);
+		dput(dentry);	// d_alloc_name() in devpts_pty_new()
 	}
 
+	dput(dentry);		// d_find_alias above
+
+out:
 	mutex_unlock(&root->d_inode->i_mutex);
 }
 
-- 
cgit v0.10.2


From 784c4d8b1b1e66f8c45e8b889613f4982f525b2b Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 2 Jan 2009 13:42:34 +0000
Subject: Document usage of multiple-instances of devpts

Changelog [v2]:
	- Add note indicating strict isolation is not possible unless all
	  mounts of devpts use the 'newinstance' mount option.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
new file mode 100644
index 0000000..68dffd8
--- /dev/null
+++ b/Documentation/filesystems/devpts.txt
@@ -0,0 +1,132 @@
+
+To support containers, we now allow multiple instances of devpts filesystem,
+such that indices of ptys allocated in one instance are independent of indices
+allocated in other instances of devpts.
+
+To preserve backward compatibility, this support for multiple instances is
+enabled only if:
+
+	- CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and
+	- '-o newinstance' mount option is specified while mounting devpts
+
+IOW, devpts now supports both single-instance and multi-instance semantics.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
+this referred to as the "legacy" mode. In this mode, the new mount options
+(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
+on console.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
+'newinstance' option (as in current start-up scripts) the new mount binds
+to the initial kernel mount of devpts. This mode is referred to as the
+'single-instance' mode and the current, single-instance semantics are
+preserved, i.e PTYs are common across the system.
+
+The only difference between this single-instance mode and the legacy mode
+is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
+can safely be ignored.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
+the mount is considered to be in the multi-instance mode and a new instance
+of the devpts fs is created. Any ptys created in this instance are independent
+of ptys in other instances of devpts. Like in the single-instance mode, the
+/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
+open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
+bind-mount.
+
+Eg: A container startup script could do the following:
+
+	$ chmod 0666 /dev/pts/ptmx
+	$ rm /dev/ptmx
+	$ ln -s pts/ptmx /dev/ptmx
+	$ ns_exec -cm /bin/bash
+
+	# We are now in new container
+
+	$ umount /dev/pts
+	$ mount -t devpts -o newinstance lxcpts /dev/pts
+	$ sshd -p 1234
+
+where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
+/bin/bash in the child process.  A pty created by the sshd is not visible in
+the original mount of /dev/pts.
+
+User-space changes
+------------------
+
+In multi-instance mode (i.e '-o newinstance' mount option is specified at least
+once), following user-space issues should be noted.
+
+1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
+   and no change is needed to system-startup scripts.
+
+2. To effectively use multi-instance mode (i.e -o newinstance is specified)
+   administrators or startup scripts should "redirect" open of /dev/ptmx to
+   /dev/pts/ptmx using either a bind mount or symlink.
+
+	$ mount -t devpts -o newinstance devpts /dev/pts
+
+   followed by either
+
+	$ rm /dev/ptmx
+	$ ln -s pts/ptmx /dev/ptmx
+	$ chmod 666 /dev/pts/ptmx
+   or
+	$ mount -o bind /dev/pts/ptmx /dev/ptmx
+
+3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
+   enables better error-reporting and treats both single-instance and
+   multi-instance mounts similarly.
+
+   But this method requires that system-startup scripts set the mode of
+   /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
+   mode by, either
+
+   	- adding ptmxmode mount option to devpts entry in /etc/fstab, or
+	- using 'chmod 0666 /dev/pts/ptmx'
+
+4. If multi-instance mode mount is needed for containers, but the system
+   startup scripts have not yet been updated, container-startup scripts
+   should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
+   instance mounts.
+
+   Or, in general, container-startup scripts should use:
+
+	mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
+	if [ ! -L /dev/ptmx ]; then
+		mount -o bind /dev/pts/ptmx /dev/ptmx
+	fi
+
+   When all devpts mounts are multi-instance, /dev/ptmx can permanently be
+   a symlink to pts/ptmx and the bind mount can be ignored.
+
+5. A multi-instance mount that is not accompanied by the /dev/ptmx to
+   /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
+
+	mount -t devpts -o newinstance lxcpts /dev/pts
+
+   immediately followed by:
+
+	open("/dev/ptmx")
+
+    would create a pty, say /dev/pts/7, in the initial kernel mount.
+    But /dev/pts/7 would be invisible in the new mount.
+
+6. The permissions for /dev/pts/ptmx node should be specified when mounting
+   /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
+
+	mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
+
+   The permissions can be later be changed as usual with 'chmod'.
+
+	chmod 666 /dev/pts/ptmx
+
+7. A mount of devpts without the 'newinstance' option results in binding to
+   initial kernel mount.  This behavior while preserving legacy semantics,
+   does not provide strict isolation in a container environment. i.e by
+   mounting devpts without the 'newinstance' option, a container could
+   get visibility into the 'host' or root container's devpts.
+   
+   To workaround this and have strict isolation, all mounts of devpts,
+   including the mount in the root container, should use the newinstance
+   option.
-- 
cgit v0.10.2


From 835aa440f1c3fe16a622015bc1b52dffedf6d90e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:42:48 +0000
Subject: devpts: Coding style clean up

Just nail the oddments now while this code is being touched

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index b4a89fa..b02c243 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -311,7 +311,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	if (s->s_root)
 		return 0;
 
-	printk("devpts: get root dentry failed\n");
+	printk(KERN_ERR "devpts: get root dentry failed\n");
 	iput(inode);
 
 free_fsi:
@@ -444,25 +444,25 @@ static int is_new_instance_mount(void *data)
 static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
 		void *data, struct vfsmount *mnt)
 {
-        struct super_block *s;
-        int error;
-
-        s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
-        if (IS_ERR(s))
-                return PTR_ERR(s);
-
-        if (!s->s_root) {
-                s->s_flags = flags;
-                error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-                if (error) {
-                        up_write(&s->s_umount);
-                        deactivate_super(s);
-                        return error;
-                }
-                s->s_flags |= MS_ACTIVE;
-        }
-        do_remount_sb(s, flags, data, 0);
-        return simple_set_mnt(mnt, s);
+	struct super_block *s;
+	int error;
+
+	s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	if (!s->s_root) {
+		s->s_flags = flags;
+		error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+		if (error) {
+			up_write(&s->s_umount);
+			deactivate_super(s);
+			return error;
+		}
+		s->s_flags |= MS_ACTIVE;
+	}
+	do_remount_sb(s, flags, data, 0);
+	return simple_set_mnt(mnt, s);
 }
 
 /*
@@ -477,7 +477,7 @@ static int init_pts_mount(struct file_system_type *fs_type, int flags,
 
 	err = get_init_pts_sb(fs_type, flags, data, mnt);
 	if (err)
-		 return err;
+		return err;
 
 	err = mknod_ptmx(mnt->mnt_sb);
 	if (err) {
@@ -542,9 +542,8 @@ int devpts_new_index(struct inode *ptmx_inode)
 	int ida_ret;
 
 retry:
-	if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) {
+	if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
 		return -ENOMEM;
-	}
 
 	mutex_lock(&allocated_ptys_lock);
 	ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
@@ -576,7 +575,8 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
 
 int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 {
-	int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
+	/* tty layer puts index from devpts_new_index() in here */
+	int number = tty->index;
 	struct tty_driver *driver = tty->driver;
 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
@@ -644,11 +644,10 @@ void devpts_pty_kill(struct tty_struct *tty)
 	if (dentry) {
 		inode->i_nlink--;
 		d_delete(dentry);
-		dput(dentry);	// d_alloc_name() in devpts_pty_new()
+		dput(dentry);	/* d_alloc_name() in devpts_pty_new() */
 	}
 
-	dput(dentry);		// d_find_alias above
-
+	dput(dentry);		/* d_find_alias above */
 out:
 	mutex_unlock(&root->d_inode->i_mutex);
 }
-- 
cgit v0.10.2


From d95186d1f455b4b901121ba69d0680800fb4b57b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:42:56 +0000
Subject: sierra: Fix formatting

Andrew Morton wrote:

in drivers/usb/serial/sierra.c:

        } else {
                if (urb->actual_length) {
+               tty = tty_port_tty_get(&port->port);
                        tty_buffer_request_room(tty, urb->actual_length);

it's missing a tab.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 0f2b672..d9bf9a5 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -442,7 +442,7 @@ static void sierra_indat_callback(struct urb *urb)
 		    " endpoint %02x.", __func__, status, endpoint);
 	} else {
 		if (urb->actual_length) {
-		tty = tty_port_tty_get(&port->port);
+			tty = tty_port_tty_get(&port->port);
 			tty_buffer_request_room(tty, urb->actual_length);
 			tty_insert_flip_string(tty, data, urb->actual_length);
 			tty_flip_buffer_push(tty);
-- 
cgit v0.10.2


From a47d545f5782cbde871b50bdf4a83379ed2da222 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 2 Jan 2009 13:43:04 +0000
Subject: tty: Fix sparse static warning for tty_driver_lookup_tty

Fixed sparse warning:
drivers/char/tty_io.c:1216:19: warning: symbol 'tty_driver_lookup_tty' was not declared. Should it be static?

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index d8d240c..2a15af6 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1211,7 +1211,7 @@ static void tty_line_name(struct tty_driver *driver, int index, char *p)
  *	be held until the 'fast-open' is also done. Will change once we
  *	have refcounting in the driver and per driver locking
  */
-struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
+static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
 		struct inode *inode, int idx)
 {
 	struct tty_struct *tty;
-- 
cgit v0.10.2


From fc6f6238226e6d1248e1967eae2bf556eaf3ac17 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:43:17 +0000
Subject: pty: simplify resize

We have special case logic for resizing pty/tty pairs. We also have a per
driver resize method so for the pty case we should use it.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 0587b66..5a8a4c2 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -529,7 +529,7 @@ static void hvc_set_winsz(struct work_struct *work)
 	tty = tty_kref_get(hp->tty);
 	spin_unlock_irqrestore(&hp->lock, hvc_flags);
 
-	tty_do_resize(tty, tty, &ws);
+	tty_do_resize(tty, &ws);
 	tty_kref_put(tty);
 }
 
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6d45827..b5daaaa 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -230,6 +230,55 @@ static void pty_set_termios(struct tty_struct *tty,
 	tty->termios->c_cflag |= (CS8 | CREAD);
 }
 
+/**
+ *	pty_do_resize		-	resize event
+ *	@tty: tty being resized
+ *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
+ *	@rows: rows (character)
+ *	@cols: cols (character)
+ *
+ *	Update the termios variables and send the neccessary signals to
+ *	peform a terminal resize correctly
+ */
+
+int pty_resize(struct tty_struct *tty,  struct winsize *ws)
+{
+	struct pid *pgrp, *rpgrp;
+	unsigned long flags;
+	struct tty_struct *pty = tty->link;
+
+	/* For a PTY we need to lock the tty side */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
+		goto done;
+
+	/* Get the PID values and reference them so we can
+	   avoid holding the tty ctrl lock while sending signals.
+	   We need to lock these individually however. */
+
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
+	pgrp = get_pid(tty->pgrp);
+	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	spin_lock_irqsave(&pty->ctrl_lock, flags);
+	rpgrp = get_pid(pty->pgrp);
+	spin_unlock_irqrestore(&pty->ctrl_lock, flags);
+
+	if (pgrp)
+		kill_pgrp(pgrp, SIGWINCH, 1);
+	if (rpgrp != pgrp && rpgrp)
+		kill_pgrp(rpgrp, SIGWINCH, 1);
+
+	put_pid(pgrp);
+	put_pid(rpgrp);
+
+	tty->winsize = *ws;
+	pty->winsize = *ws;	/* Never used so will go away soon */
+done:
+	mutex_unlock(&tty->termios_mutex);
+	return 0;
+}
+
 static int pty_install(struct tty_driver *driver, struct tty_struct *tty)
 {
 	struct tty_struct *o_tty;
@@ -290,6 +339,7 @@ static const struct tty_operations pty_ops = {
 	.chars_in_buffer = pty_chars_in_buffer,
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
+	.resize = pty_resize
 };
 
 /* Traditional BSD devices */
@@ -319,6 +369,7 @@ static const struct tty_operations pty_ops_bsd = {
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_bsd_ioctl,
+	.resize = pty_resize
 };
 
 static void __init legacy_pty_init(void)
@@ -561,7 +612,8 @@ static const struct tty_operations ptm_unix98_ops = {
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_unix98_ioctl,
-	.shutdown = pty_unix98_shutdown
+	.shutdown = pty_unix98_shutdown,
+	.resize = pty_resize
 };
 
 static const struct tty_operations pty_unix98_ops = {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 2a15af6..d33e5ab 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2048,7 +2048,6 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
 /**
  *	tty_do_resize		-	resize event
  *	@tty: tty being resized
- *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
  *	@rows: rows (character)
  *	@cols: cols (character)
  *
@@ -2056,41 +2055,34 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
  *	peform a terminal resize correctly
  */
 
-int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-					struct winsize *ws)
+int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
 {
-	struct pid *pgrp, *rpgrp;
+	struct pid *pgrp;
 	unsigned long flags;
 
-	/* For a PTY we need to lock the tty side */
-	mutex_lock(&real_tty->termios_mutex);
-	if (!memcmp(ws, &real_tty->winsize, sizeof(*ws)))
+	/* Lock the tty */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
 		goto done;
 	/* Get the PID values and reference them so we can
 	   avoid holding the tty ctrl lock while sending signals */
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	pgrp = get_pid(tty->pgrp);
-	rpgrp = get_pid(real_tty->pgrp);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	if (pgrp)
 		kill_pgrp(pgrp, SIGWINCH, 1);
-	if (rpgrp != pgrp && rpgrp)
-		kill_pgrp(rpgrp, SIGWINCH, 1);
-
 	put_pid(pgrp);
-	put_pid(rpgrp);
 
 	tty->winsize = *ws;
-	real_tty->winsize = *ws;
 done:
-	mutex_unlock(&real_tty->termios_mutex);
+	mutex_unlock(&tty->termios_mutex);
 	return 0;
 }
 
 /**
  *	tiocswinsz		-	implement window size set ioctl
- *	@tty; tty
+ *	@tty; tty side of tty
  *	@arg: user buffer for result
  *
  *	Copies the user idea of the window size to the kernel. Traditionally
@@ -2103,17 +2095,16 @@ done:
  *	then calls into the default method.
  */
 
-static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize __user *arg)
+static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
 {
 	struct winsize tmp_ws;
 	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
 		return -EFAULT;
 
 	if (tty->ops->resize)
-		return tty->ops->resize(tty, real_tty, &tmp_ws);
+		return tty->ops->resize(tty, &tmp_ws);
 	else
-		return tty_do_resize(tty, real_tty, &tmp_ws);
+		return tty_do_resize(tty, &tmp_ws);
 }
 
 /**
@@ -2538,7 +2529,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCGWINSZ:
 		return tiocgwinsz(real_tty, p);
 	case TIOCSWINSZ:
-		return tiocswinsz(tty, real_tty, p);
+		return tiocswinsz(real_tty, p);
 	case TIOCCONS:
 		return real_tty != tty ? -EINVAL : tioccons(file);
 	case FIONBIO:
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 639e126..8001421 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -819,8 +819,8 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
  *	ctrl_lock of the tty IFF a tty is passed.
  */
 
-static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-		struct vc_data *vc, unsigned int cols, unsigned int lines)
+static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
+				unsigned int cols, unsigned int lines)
 {
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned int old_cols, old_rows, old_row_size, old_screen_size;
@@ -932,7 +932,7 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
 		ws.ws_row = vc->vc_rows;
 		ws.ws_col = vc->vc_cols;
 		ws.ws_ypixel = vc->vc_scan_lines;
-		tty_do_resize(tty, real_tty, &ws);
+		tty_do_resize(tty, &ws);
 	}
 
 	if (CON_IS_VISIBLE(vc))
@@ -954,13 +954,12 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
 
 int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
 {
-	return vc_do_resize(vc->vc_tty, vc->vc_tty, vc, cols, rows);
+	return vc_do_resize(vc->vc_tty, vc, cols, rows);
 }
 
 /**
  *	vt_resize		-	resize a VT
  *	@tty: tty to resize
- *	@real_tty: tty if a pty/tty pair
  *	@ws: winsize attributes
  *
  *	Resize a virtual terminal. This is called by the tty layer as we
@@ -971,14 +970,13 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
  *	termios_mutex and the tty ctrl_lock in that order.
  */
 
-int vt_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize *ws)
+int vt_resize(struct tty_struct *tty, struct winsize *ws)
 {
 	struct vc_data *vc = tty->driver_data;
 	int ret;
 
 	acquire_console_sem();
-	ret = vc_do_resize(tty, real_tty, vc, ws->ws_col, ws->ws_row);
+	ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row);
 	release_console_sem();
 	return ret;
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index dfc77de..f881697 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -360,8 +360,7 @@ extern int tty_write_room(struct tty_struct *tty);
 extern void tty_driver_flush_buffer(struct tty_struct *tty);
 extern void tty_throttle(struct tty_struct *tty);
 extern void tty_unthrottle(struct tty_struct *tty);
-extern int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-						struct winsize *ws);
+extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern void tty_shutdown(struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 78416b9..08e0883 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -196,8 +196,7 @@
  *	Optional: If not provided then the write method is called under
  *	the atomic write lock to keep it serialized with the ldisc.
  *
- * int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
- *				unsigned int rows, unsigned int cols);
+ * int (*resize)(struct tty_struct *tty, struct winsize *ws)
  *
  *	Called when a termios request is issued which changes the
  *	requested terminal geometry.
@@ -258,8 +257,7 @@ struct tty_operations {
 	int (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int (*tiocmset)(struct tty_struct *tty, struct file *file,
 			unsigned int set, unsigned int clear);
-	int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
-				struct winsize *ws);
+	int (*resize)(struct tty_struct *tty, struct winsize *ws);
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
 #ifdef CONFIG_CONSOLE_POLL
 	int (*poll_init)(struct tty_driver *driver, int line, char *options);
-- 
cgit v0.10.2


From a59c0d6f14315a3f300f6f3786137213727e4c47 Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 2 Jan 2009 13:43:25 +0000
Subject: n_tty: Fix handling of control characters and continuations

Fix process_output_block to detect continuation characters correctly
and to handle control characters even when O_OLCUC is enabled.  Make
similar change to do_output_char().

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index a223823..30b0426 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -351,10 +351,12 @@ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 			tty->column--;
 		break;
 	default:
-		if (O_OLCUC(tty))
-			c = toupper(c);
-		if (!iscntrl(c) && !is_continuation(c, tty))
-			tty->column++;
+		if (!iscntrl(c)) {
+			if (O_OLCUC(tty))
+				c = toupper(c);
+			if (!is_continuation(c, tty))
+				tty->column++;
+		}
 		break;
 	}
 
@@ -425,7 +427,9 @@ static ssize_t process_output_block(struct tty_struct *tty,
 		nr = space;
 
 	for (i = 0, cp = buf; i < nr; i++, cp++) {
-		switch (*cp) {
+		unsigned char c = *cp;
+
+		switch (c) {
 		case '\n':
 			if (O_ONLRET(tty))
 				tty->column = 0;
@@ -447,10 +451,12 @@ static ssize_t process_output_block(struct tty_struct *tty,
 				tty->column--;
 			break;
 		default:
-			if (O_OLCUC(tty))
-				goto break_out;
-			if (!iscntrl(*cp))
-				tty->column++;
+			if (!iscntrl(c)) {
+				if (O_OLCUC(tty))
+					goto break_out;
+				if (!is_continuation(c, tty))
+					tty->column++;
+			}
 			break;
 		}
 	}
-- 
cgit v0.10.2


From acc71bbad33478973dbed68ebbc2d76dac9a51bd Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 2 Jan 2009 13:43:32 +0000
Subject: n_tty: Fix hanfling of buffer full corner cases

Fix the handling of input characters when the tty buffer is full or nearly
full.  This includes tests that are done in n_tty_receive_char() and handling
of PARMRK.

Problems with the buffer-full tests done in receive_char() caused characters to
be lost at times when the buffer(s) filled.  Also, these full conditions
would often only be detected with echo on, and PARMRK was not accounted for
properly in all cases.  One symptom of these problems, in addition to lost
characters, was early termination from unix commands like tr and cat when
^Q was used to break from a stopped tty with full buffers (note that breaking
out was often previously not possible, due to the pty getting in "gridlock",
which will be addressed in another patch).  Note space is always reserved
at the end of the buffer for a newline (or EOF/EOL) in canonical mode.

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 30b0426..4b1e96b 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1107,6 +1107,7 @@ static inline void n_tty_receive_parity_error(struct tty_struct *tty,
 static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 {
 	unsigned long flags;
+	int parmrk;
 
 	if (tty->raw) {
 		put_tty_queue(c, tty);
@@ -1144,21 +1145,24 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	 */
 	if (!test_bit(c, tty->process_char_map) || tty->lnext) {
 		tty->lnext = 0;
-		if (L_ECHO(tty)) {
-			finish_erasing(tty);
-			if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-				/* beep if no space */
+		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+			/* beep if no space */
+			if (L_ECHO(tty)) {
 				echo_char_raw('\a', tty);
 				process_echoes(tty);
-				return;
 			}
+			return;
+		}
+		if (L_ECHO(tty)) {
+			finish_erasing(tty);
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
 				echo_set_canon_col(tty);
 			echo_char(c, tty);
 			process_echoes(tty);
 		}
-		if (I_PARMRK(tty) && c == (unsigned char) '\377')
+		if (parmrk)
 			put_tty_queue(c, tty);
 		put_tty_queue(c, tty);
 		return;
@@ -1250,15 +1254,22 @@ send_signal:
 			return;
 		}
 		if (c == '\n') {
-			if (L_ECHO(tty) || L_ECHONL(tty)) {
-				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
+			if (tty->read_cnt >= N_TTY_BUF_SIZE) {
+				if (L_ECHO(tty)) {
 					echo_char_raw('\a', tty);
+					process_echoes(tty);
+				}
+				return;
+			}
+			if (L_ECHO(tty) || L_ECHONL(tty)) {
 				echo_char_raw('\n', tty);
 				process_echoes(tty);
 			}
 			goto handle_newline;
 		}
 		if (c == EOF_CHAR(tty)) {
+			if (tty->read_cnt >= N_TTY_BUF_SIZE)
+				return;
 			if (tty->canon_head != tty->read_head)
 				set_bit(TTY_PUSH, &tty->flags);
 			c = __DISABLED_CHAR;
@@ -1266,12 +1277,19 @@ send_signal:
 		}
 		if ((c == EOL_CHAR(tty)) ||
 		    (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) {
+			parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
+				 ? 1 : 0;
+			if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
+				if (L_ECHO(tty)) {
+					echo_char_raw('\a', tty);
+					process_echoes(tty);
+				}
+				return;
+			}
 			/*
 			 * XXX are EOL_CHAR and EOL2_CHAR echoed?!?
 			 */
 			if (L_ECHO(tty)) {
-				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					echo_char_raw('\a', tty);
 				/* Record the column of first canon char. */
 				if (tty->canon_head == tty->read_head)
 					echo_set_canon_col(tty);
@@ -1282,7 +1300,7 @@ send_signal:
 			 * XXX does PARMRK doubling happen for
 			 * EOL_CHAR and EOL2_CHAR?
 			 */
-			if (I_PARMRK(tty) && c == (unsigned char) '\377')
+			if (parmrk)
 				put_tty_queue(c, tty);
 
 handle_newline:
@@ -1299,14 +1317,17 @@ handle_newline:
 		}
 	}
 
-	if (L_ECHO(tty)) {
-		finish_erasing(tty);
-		if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-			/* beep if no space */
+	parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+	if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+		/* beep if no space */
+		if (L_ECHO(tty)) {
 			echo_char_raw('\a', tty);
 			process_echoes(tty);
-			return;
 		}
+		return;
+	}
+	if (L_ECHO(tty)) {
+		finish_erasing(tty);
 		if (c == '\n')
 			echo_char_raw('\n', tty);
 		else {
@@ -1318,7 +1339,7 @@ handle_newline:
 		process_echoes(tty);
 	}
 
-	if (I_PARMRK(tty) && c == (unsigned char) '\377')
+	if (parmrk)
 		put_tty_queue(c, tty);
 
 	put_tty_queue(c, tty);
-- 
cgit v0.10.2


From 7e94b1d9bffc18dca3b45554d9d118a3ffcc4d1b Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 2 Jan 2009 13:43:40 +0000
Subject: n_tty: Output bells immediately on a full buffer

This patch causes "bell" (^G) characters (invoked when the input buffer
is full) to be immediately output rather than filling the echo buffer.

This is especially a problem when the tty is stopped and buffers fill, since
the bells do not serve their purpose of immediate notification that the
buffer cannot take further input, and they will flush all at once when the
tty is restarted.

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 4b1e96b..3922a08 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -872,7 +872,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 
 	/* FIXME: locking needed ? */
 	if (tty->read_head == tty->canon_head) {
-		/* echo_char_raw('\a', tty); */ /* what do you think? */
+		/* process_output('\a', tty); */ /* what do you think? */
 		return;
 	}
 	if (c == ERASE_CHAR(tty))
@@ -1148,10 +1148,8 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
 		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
 			/* beep if no space */
-			if (L_ECHO(tty)) {
-				echo_char_raw('\a', tty);
-				process_echoes(tty);
-			}
+			if (L_ECHO(tty))
+				process_output('\a', tty);
 			return;
 		}
 		if (L_ECHO(tty)) {
@@ -1255,10 +1253,8 @@ send_signal:
 		}
 		if (c == '\n') {
 			if (tty->read_cnt >= N_TTY_BUF_SIZE) {
-				if (L_ECHO(tty)) {
-					echo_char_raw('\a', tty);
-					process_echoes(tty);
-				}
+				if (L_ECHO(tty))
+					process_output('\a', tty);
 				return;
 			}
 			if (L_ECHO(tty) || L_ECHONL(tty)) {
@@ -1280,10 +1276,8 @@ send_signal:
 			parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
 				 ? 1 : 0;
 			if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
-				if (L_ECHO(tty)) {
-					echo_char_raw('\a', tty);
-					process_echoes(tty);
-				}
+				if (L_ECHO(tty))
+					process_output('\a', tty);
 				return;
 			}
 			/*
@@ -1320,10 +1314,8 @@ handle_newline:
 	parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
 	if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
 		/* beep if no space */
-		if (L_ECHO(tty)) {
-			echo_char_raw('\a', tty);
-			process_echoes(tty);
-		}
+		if (L_ECHO(tty))
+			process_output('\a', tty);
 		return;
 	}
 	if (L_ECHO(tty)) {
-- 
cgit v0.10.2


From 4bd43f2c31848d751f63e8753cd2788d48fb5f30 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:44:04 +0000
Subject: tty: Fix close races in USB serial

USB serial has always had races where the tty port usage count can hit zero
during a receive event. The internal locking is a mutex so we can't use
that in the IRQ handlers.

With krefs we can tackle this differently but we still need to be careful.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 794b5ff..aafa684 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -269,15 +269,19 @@ static void serial_close(struct tty_struct *tty, struct file *filp)
 		return;
 	}
 
-	--port->port.count;
-	if (port->port.count == 0)
+	if (port->port.count == 1)
 		/* only call the device specific close if this
-		 * port is being closed by the last owner */
+		 * port is being closed by the last owner. Ensure we do
+		 * this before we drop the port count. The call is protected
+		 * by the port mutex
+		 */
 		port->serial->type->close(tty, port, filp);
 
-	if (port->port.count == (port->console? 1 : 0)) {
+	if (port->port.count == (port->console ? 2 : 1)) {
 		struct tty_struct *tty = tty_port_tty_get(&port->port);
 		if (tty) {
+			/* We must do this before we drop the port count to
+			   zero. */
 			if (tty->driver_data)
 				tty->driver_data = NULL;
 			tty_port_tty_set(&port->port, NULL);
@@ -285,13 +289,14 @@ static void serial_close(struct tty_struct *tty, struct file *filp)
 		}
 	}
 
-	if (port->port.count == 0) {
+	if (port->port.count == 1) {
 		mutex_lock(&port->serial->disc_mutex);
 		if (!port->serial->disconnected)
 			usb_autopm_put_interface(port->serial->interface);
 		mutex_unlock(&port->serial->disc_mutex);
 		module_put(port->serial->type->driver.owner);
 	}
+	--port->port.count;
 
 	mutex_unlock(&port->mutex);
 	usb_serial_put(port->serial);
-- 
cgit v0.10.2


From 8c056e5b148498192832678cf2957760945e8c71 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 2 Jan 2009 13:44:12 +0000
Subject: devpts: fix unused function warning

fs/devpts/inode.c:324: warning: 'compare_init_pts_sb' defined but not used

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index b02c243..3f309f1 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -320,6 +320,7 @@ fail:
 	return -ENOMEM;
 }
 
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 static int compare_init_pts_sb(struct super_block *s, void *p)
 {
 	if (devpts_mnt)
@@ -327,7 +328,6 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 	return 0;
 }
 
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 /*
  * Safely parse the mount options in @data and update @opts.
  *
-- 
cgit v0.10.2


From 9f2a036aaac8f29bb7c68303b52a9263238b63d2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Fri, 2 Jan 2009 13:44:20 +0000
Subject: Convert the oxsemi tornado special cases to use the quirk interface
 and not

scribble on its own reference structures.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 5450a0e..057b532c 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -737,6 +737,38 @@ static void __devexit pci_ite887x_exit(struct pci_dev *dev)
 	release_region(ioport, ITE_887x_IOSIZE);
 }
 
+/*
+ * Oxford Semiconductor Inc.
+ * Check that device is part of the Tornado range of devices, then determine
+ * the number of ports available on the device.
+ */
+static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+{
+	u8 __iomem *p;
+	unsigned long deviceID;
+	unsigned int  number_uarts = 0;
+
+	/* OxSemi Tornado devices are all 0xCxxx */
+	if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
+	    (dev->device & 0xF000) != 0xC000)
+		return 0;
+
+	p = pci_iomap(dev, 0, 5);
+	if (p == NULL)
+		return -ENOMEM;
+
+	deviceID = ioread32(p);
+	/* Tornado device */
+	if (deviceID == 0x07000200) {
+		number_uarts = ioread8(p + 4);
+		printk(KERN_DEBUG
+			"%d ports detected on Oxford PCI Express device\n",
+								number_uarts);
+	}
+	pci_iounmap(dev, p);
+	return number_uarts;
+}
+
 static int
 pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
 		  struct uart_port *port, int idx)
@@ -1018,6 +1050,25 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.setup		= pci_default_setup,
 	},
 	/*
+	 * For Oxford Semiconductor and Mainpine
+	 */
+	{
+		.vendor		= PCI_VENDOR_ID_OXSEMI,
+		.device		= PCI_ANY_ID,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.init		= pci_oxsemi_tornado_init,
+		.setup		= pci_default_setup,
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_MAINPINE,
+		.device		= PCI_ANY_ID,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.init		= pci_oxsemi_tornado_init,
+		.setup		= pci_default_setup,
+	},
+	/*
 	 * Default "match everything" terminator entry
 	 */
 	{
@@ -1854,39 +1905,6 @@ serial_pci_matches(struct pciserial_board *board,
 	    board->first_offset == guessed->first_offset;
 }
 
-/*
- * Oxford Semiconductor Inc.
- * Check that device is part of the Tornado range of devices, then determine
- * the number of ports available on the device.
- */
-static int pci_oxsemi_tornado_init(struct pci_dev *dev, struct pciserial_board *board)
-{
-	u8 __iomem *p;
-	unsigned long deviceID;
-	unsigned int  number_uarts;
-
-	/* OxSemi Tornado devices are all 0xCxxx */
-	if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
-	    (dev->device & 0xF000) != 0xC000)
-		return 0;
-
-	p = pci_iomap(dev, 0, 5);
-	if (p == NULL)
-		return -ENOMEM;
-
-	deviceID = ioread32(p);
-	/* Tornado device */
-	if (deviceID == 0x07000200) {
-		number_uarts = ioread8(p + 4);
-		board->num_ports = number_uarts;
-		printk(KERN_DEBUG
-			"%d ports detected on Oxford PCI Express device\n",
-								number_uarts);
-	}
-	pci_iounmap(dev, p);
-	return 0;
-}
-
 struct serial_private *
 pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
 {
@@ -1895,13 +1913,6 @@ pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
 	struct pci_serial_quirk *quirk;
 	int rc, nr_ports, i;
 
-	/*
-	 * Find number of ports on board
-	 */
-	if (dev->vendor == PCI_VENDOR_ID_OXSEMI ||
-	    dev->vendor == PCI_VENDOR_ID_MAINPINE)
-		pci_oxsemi_tornado_init(dev, board);
-
 	nr_ports = board->num_ports;
 
 	/*
-- 
cgit v0.10.2


From 975a1a7d887048d4afc9201383e11b7af991866b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Fri, 2 Jan 2009 13:44:27 +0000
Subject: And here's a patch (to be applied on top of the last) which prevents

this happening again by making use of 'const'.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 057b532c..0b79413 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -42,7 +42,8 @@ struct pci_serial_quirk {
 	u32	subvendor;
 	u32	subdevice;
 	int	(*init)(struct pci_dev *dev);
-	int	(*setup)(struct serial_private *, struct pciserial_board *,
+	int	(*setup)(struct serial_private *,
+			 const struct pciserial_board *,
 			 struct uart_port *, int);
 	void	(*exit)(struct pci_dev *dev);
 };
@@ -107,7 +108,7 @@ setup_port(struct serial_private *priv, struct uart_port *port,
  * ADDI-DATA GmbH communication cards <info@addi-data.com>
  */
 static int addidata_apci7800_setup(struct serial_private *priv,
-				struct pciserial_board *board,
+				const struct pciserial_board *board,
 				struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -134,7 +135,7 @@ static int addidata_apci7800_setup(struct serial_private *priv,
  * Not that ugly ;) -- HW
  */
 static int
-afavlab_setup(struct serial_private *priv, struct pciserial_board *board,
+afavlab_setup(struct serial_private *priv, const struct pciserial_board *board,
 	      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -188,8 +189,9 @@ static int pci_hp_diva_init(struct pci_dev *dev)
  * some serial ports are supposed to be hidden on certain models.
  */
 static int
-pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board,
-	      struct uart_port *port, int idx)
+pci_hp_diva_setup(struct serial_private *priv,
+		const struct pciserial_board *board,
+		struct uart_port *port, int idx)
 {
 	unsigned int offset = board->first_offset;
 	unsigned int bar = FL_GET_BASE(board->flags);
@@ -306,7 +308,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev)
 
 /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */
 static int
-sbs_setup(struct serial_private *priv, struct pciserial_board *board,
+sbs_setup(struct serial_private *priv, const struct pciserial_board *board,
 		struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -463,7 +465,7 @@ static int pci_siig_init(struct pci_dev *dev)
 }
 
 static int pci_siig_setup(struct serial_private *priv,
-			  struct pciserial_board *board,
+			  const struct pciserial_board *board,
 			  struct uart_port *port, int idx)
 {
 	unsigned int bar = FL_GET_BASE(board->flags) + idx, offset = 0;
@@ -534,7 +536,8 @@ static int pci_timedia_init(struct pci_dev *dev)
  * Ugh, this is ugly as all hell --- TYT
  */
 static int
-pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_timedia_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -568,7 +571,7 @@ pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
  */
 static int
 titan_400l_800l_setup(struct serial_private *priv,
-		      struct pciserial_board *board,
+		      const struct pciserial_board *board,
 		      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -770,7 +773,8 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
 }
 
 static int
-pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_default_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset, maxnr;
@@ -1099,7 +1103,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev)
 }
 
 static inline int get_pci_irq(struct pci_dev *dev,
-				struct pciserial_board *board)
+				const struct pciserial_board *board)
 {
 	if (board->flags & FL_NOIRQ)
 		return 0;
@@ -1894,8 +1898,8 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
 }
 
 static inline int
-serial_pci_matches(struct pciserial_board *board,
-		   struct pciserial_board *guessed)
+serial_pci_matches(const struct pciserial_board *board,
+		   const struct pciserial_board *guessed)
 {
 	return
 	    board->num_ports == guessed->num_ports &&
@@ -1906,7 +1910,7 @@ serial_pci_matches(struct pciserial_board *board,
 }
 
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
 {
 	struct uart_port serial_port;
 	struct serial_private *priv;
@@ -2039,7 +2043,8 @@ static int __devinit
 pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	struct serial_private *priv;
-	struct pciserial_board *board, tmp;
+	const struct pciserial_board *board;
+	struct pciserial_board tmp;
 	int rc;
 
 	if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
@@ -2066,7 +2071,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 		 * We matched one of our class entries.  Try to
 		 * determine the parameters of this board.
 		 */
-		rc = serial_pci_guess_board(dev, board);
+		rc = serial_pci_guess_board(dev, &tmp);
 		if (rc)
 			goto disable;
 	} else {
diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h
index 3209dd4..b24ff08 100644
--- a/include/linux/8250_pci.h
+++ b/include/linux/8250_pci.h
@@ -31,7 +31,7 @@ struct pciserial_board {
 struct serial_private;
 
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board);
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board);
 void pciserial_remove_ports(struct serial_private *priv);
 void pciserial_suspend_ports(struct serial_private *priv);
 void pciserial_resume_ports(struct serial_private *priv);
-- 
cgit v0.10.2


From 39efd191d01b5f1efc3d604baf74233dc525e6a8 Mon Sep 17 00:00:00 2001
From: Kevin Hao <kexin.hao@windriver.com>
Date: Fri, 2 Jan 2009 13:44:34 +0000
Subject: Add device function for USB serial console

Add device funtion for usb serial console, so we can open /dev/console
when we use a usb serial device as console.

(Typecast removed as noted by Sergei Shtylyov)

Signed-off-by: Kevin Hao <kexin.hao@windriver.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 5b95009..19e2404 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -241,12 +241,25 @@ static void usb_console_write(struct console *co,
 	}
 }
 
+static struct tty_driver *usb_console_device(struct console *co, int *index)
+{
+	struct tty_driver **p = (struct tty_driver **)co->data;
+
+	if (!*p)
+		return NULL;
+
+	*index = co->index;
+	return *p;
+}
+
 static struct console usbcons = {
 	.name =		"ttyUSB",
 	.write =	usb_console_write,
+	.device =	usb_console_device,
 	.setup =	usb_console_setup,
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
+	.data = 	&usb_serial_tty_driver,
 };
 
 void usb_serial_console_disconnect(struct usb_serial *serial)
-- 
cgit v0.10.2


From d0eafc7db8f170d534a16b5f04617e98ae2025de Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 2 Jan 2009 13:44:49 +0000
Subject: CRED: Wrap task credential accesses in the devpts filesystem

Wrap access to task credentials so that they can be separated more easily from
the task_struct during the introduction of COW creds.

Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id().

Change some task->e?[ug]id to task_e?[ug]id().  In some places it makes more
sense to use RCU directly rather than a convenient wrapper; these will be
addressed by later patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 3f309f1..fff96e1 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -594,9 +594,9 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	if (!inode)
 		return -ENOMEM;
 
-	inode->i_ino = number+2;
-	inode->i_uid = config.setuid ? config.uid : current_fsuid();
-	inode->i_gid = config.setgid ? config.gid : current_fsgid();
+	inode->i_ino = number + 3;
+	inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
+	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	init_special_inode(inode, S_IFCHR|opts->mode, device);
 	inode->i_private = tty;
-- 
cgit v0.10.2


From c9b3976e3fec266be25c5001a70aa0a890b6c476 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:44:56 +0000
Subject: tty: Fix PPP hang under load

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f307f13..7a84b40 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -316,8 +316,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	if (tty->ldisc.refcount == 0)
-		printk(KERN_ERR "tty_ldisc_ref_wait\n");
+	WARN_ON(tty->ldisc.refcount == 0);
 	return &tty->ldisc;
 }
 
@@ -376,15 +375,17 @@ EXPORT_SYMBOL_GPL(tty_ldisc_deref);
  *	@tty: terminal to activate ldisc on
  *
  *	Set the TTY_LDISC flag when the line discipline can be called
- *	again. Do necessary wakeups for existing sleepers.
+ *	again. Do necessary wakeups for existing sleepers. Clear the LDISC
+ *	changing flag to indicate any ldisc change is now over.
  *
- *	Note: nobody should set this bit except via this function. Clearing
- *	directly is allowed.
+ *	Note: nobody should set the TTY_LDISC bit except via this function.
+ *	Clearing directly is allowed.
  */
 
 void tty_ldisc_enable(struct tty_struct *tty)
 {
 	set_bit(TTY_LDISC, &tty->flags);
+	clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 	wake_up(&tty_ldisc_wait);
 }
 
@@ -496,7 +497,14 @@ restart:
 	 *	reference to the line discipline. The TTY_LDISC bit
 	 *	prevents anyone taking a reference once it is clear.
 	 *	We need the lock to avoid racing reference takers.
+	 *
+	 *	We must clear the TTY_LDISC bit here to avoid a livelock
+	 *	with a userspace app continually trying to use the tty in
+	 *	parallel to the change and re-referencing the tty.
 	 */
+	clear_bit(TTY_LDISC, &tty->flags);
+	if (o_tty)
+		clear_bit(TTY_LDISC, &o_tty->flags);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
@@ -528,7 +536,7 @@ restart:
 	 *	If the TTY_LDISC bit is set, then we are racing against
 	 *	another ldisc change
 	 */
-	if (!test_bit(TTY_LDISC, &tty->flags)) {
+	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
 		struct tty_ldisc *ld;
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 		tty_ldisc_put(new_ldisc.ops);
@@ -536,10 +544,14 @@ restart:
 		tty_ldisc_deref(ld);
 		goto restart;
 	}
-
-	clear_bit(TTY_LDISC, &tty->flags);
+	/*
+	 *	This flag is used to avoid two parallel ldisc changes. Once
+	 *	open and close are fine grained locked this may work better
+	 *	as a mutex shared with the open/close/hup paths
+	 */
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
+		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 	
 	/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f881697..bbbeaef9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -301,6 +301,7 @@ struct tty_struct {
 #define TTY_PUSH 		6	/* n_tty private */
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
+#define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
-- 
cgit v0.10.2


From 31f35939d1d9bcfb3099b32c67b896d2792603f9 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:05 +0000
Subject: tty_port: Add a port level carrier detect operation

This is the first step to generalising the various pieces of waiting logic
duplicated in all sorts of serial drivers.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 7f077c0..45ec263 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -2054,6 +2054,15 @@ static void esp_hangup(struct tty_struct *tty)
 	wake_up_interruptible(&info->port.open_wait);
 }
 
+static int esp_carrier_raised(struct tty_port *port)
+{
+	struct esp_struct *info = container_of(port, struct esp_struct, port);
+	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
+	if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
+		return 1;
+	return 0;
+}
+
 /*
  * ------------------------------------------------------------
  * esp_open() and friends
@@ -2066,17 +2075,19 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	int		retval;
 	int		do_clocal = 0;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
 	if (tty_hung_up_p(filp) ||
-	    (info->port.flags & ASYNC_CLOSING)) {
-		if (info->port.flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->port.close_wait);
+	    (port->flags & ASYNC_CLOSING)) {
+		if (port->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&port->close_wait);
 #ifdef SERIAL_DO_RESTART
-		if (info->port.flags & ASYNC_HUP_NOTIFY)
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -2091,7 +2102,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -2101,20 +2112,20 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready before block: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp))
-		info->port.count--;
-	info->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
 			unsigned int scratch;
@@ -2129,9 +2140,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(info->port.flags & ASYNC_INITIALIZED)) {
+		    !(port->flags & ASYNC_INITIALIZED)) {
 #ifdef SERIAL_DO_RESTART
-			if (info->port.flags & ASYNC_HUP_NOTIFY)
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
@@ -2141,11 +2152,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			break;
 		}
 
-		serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-		if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
-			do_clocal = 1;
+		cd = tty_port_carrier_raised(port);
 
-		if (!(info->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal))
 			break;
 		if (signal_pending(current)) {
@@ -2154,25 +2163,25 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 #ifdef SERIAL_DEBUG_OPEN
 		printk(KERN_DEBUG "block_til_ready blocking: ttys%d, count = %d\n",
-		       info->line, info->port.count);
+		       info->line, port->count);
 #endif
 		spin_unlock_irqrestore(&info->lock, flags);
 		schedule();
 		spin_lock_irqsave(&info->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	spin_unlock_irqrestore(&info->lock, flags);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready after blocking: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	if (retval)
 		return retval;
-	info->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -2329,6 +2338,10 @@ static const struct tty_operations esp_ops = {
 	.tiocmset = esp_tiocmset,
 };
 
+static const struct tty_port_operations esp_port_ops = {
+	.esp_carrier_raised,
+};
+
 /*
  * The serial driver boot-time initialization code!
  */
@@ -2415,6 +2428,8 @@ static int __init espserial_init(void)
 	offset = 0;
 
 	do {
+		tty_port_init(&info->port);
+		info->port.ops = &esp_port_ops;
 		info->io_port = esp[i] + offset;
 		info->irq = irq[i];
 		info->line = (i * 8) + (offset / 8);
@@ -2437,8 +2452,6 @@ static int __init espserial_init(void)
 		info->config.flow_off = flow_off;
 		info->config.pio_threshold = pio_threshold;
 		info->next_port = ports;
-		init_waitqueue_head(&info->port.open_wait);
-		init_waitqueue_head(&info->port.close_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 		init_waitqueue_head(&info->break_wait);
 		ports = info;
diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index c6090f8..2356994 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -397,7 +397,8 @@ void gs_hangup(struct tty_struct *tty)
 
 int gs_block_til_ready(void *port_, struct file * filp)
 {
-	struct gs_port *port = port_;
+	struct gs_port *gp = port_;
+	struct tty_port *port = &gp->port;
 	DECLARE_WAITQUEUE(wait, current);
 	int    retval;
 	int    do_clocal = 0;
@@ -409,16 +410,16 @@ int gs_block_til_ready(void *port_, struct file * filp)
 
 	if (!port) return 0;
 
-	tty = port->port.tty;
+	tty = port->tty;
 
 	gs_dprintk (GS_DEBUG_BTR, "Entering gs_block_till_ready.\n"); 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -432,7 +433,7 @@ int gs_block_til_ready(void *port_, struct file * filp)
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -444,34 +445,34 @@ int gs_block_til_ready(void *port_, struct file * filp)
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
 
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	gs_dprintk (GS_DEBUG_BTR, "after add waitq.\n"); 
-	spin_lock_irqsave(&port->driver_lock, flags);
+	spin_lock_irqsave(&gp->driver_lock, flags);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count--;
+		port->count--;
 	}
-	spin_unlock_irqrestore(&port->driver_lock, flags);
-	port->port.blocked_open++;
+	spin_unlock_irqrestore(&gp->driver_lock, flags);
+	port->blocked_open++;
 	while (1) {
-		CD = port->rd->get_CD (port);
+		CD = tty_port_carrier_raised(port);
 		gs_dprintk (GS_DEBUG_BTR, "CD is now %d.\n", CD);
 		set_current_state (TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		    !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal || CD))
 			break;
 		gs_dprintk (GS_DEBUG_BTR, "signal_pending is now: %d (%lx)\n", 
@@ -483,17 +484,17 @@ int gs_block_til_ready(void *port_, struct file * filp)
 		schedule();
 	}
 	gs_dprintk (GS_DEBUG_BTR, "Got out of the loop. (%d)\n",
-		    port->port.blocked_open);
+		    port->blocked_open);
 	set_current_state (TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count++;
+		port->count++;
 	}
-	port->port.blocked_open--;
+	port->blocked_open--;
 	if (retval)
 		return retval;
 
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	func_exit ();
 	return 0;
 }			 
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 04e4549..b3da485 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -830,20 +830,28 @@ static int isicom_setup_port(struct tty_struct *tty)
 	return 0;
 }
 
+static int isicom_carrier_raised(struct tty_port *port)
+{
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	return (ip->status & ISI_DCD)?1 : 0;
+}
+
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
-	struct isi_port *port)
+	struct isi_port *ip)
 {
-	struct isi_board *card = port->card;
+	struct isi_board *card = ip->card;
+	struct tty_port *port = &ip->port;
 	int do_clocal = 0, retval;
 	unsigned long flags;
 	DECLARE_WAITQUEUE(wait, current);
+	int cd;
 
 	/* block if port is in the process of being closed */
 
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
 		pr_dbg("block_til_ready: close in progress.\n");
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -854,7 +862,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	if ((filp->f_flags & O_NONBLOCK) ||
 			(tty->flags & (1 << TTY_IO_ERROR))) {
 		pr_dbg("block_til_ready: non-block mode.\n");
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -864,29 +872,29 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	/* block waiting for DCD to be asserted, and while
 						callout dev is busy */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&card->card_lock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	port->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	while (1) {
-		raise_dtr_rts(port);
+		raise_dtr_rts(ip);
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal || (port->status & ISI_DCD))) {
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) &&
+				(do_clocal || cd))
 			break;
-		}
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
 			break;
@@ -894,15 +902,15 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	spin_lock_irqsave(&card->card_lock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 	if (retval)
 		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -1452,6 +1460,10 @@ static const struct tty_operations isicom_ops = {
 	.break_ctl		= isicom_send_break,
 };
 
+static const struct tty_port_operations isicom_port_ops = {
+	.carrier_raised		= isicom_carrier_raised,
+};
+
 static int __devinit reset_card(struct pci_dev *pdev,
 	const unsigned int card, unsigned int *signature)
 {
@@ -1794,6 +1806,7 @@ static int __init isicom_init(void)
 		spin_lock_init(&isi_card[idx].card_lock);
 		for (channel = 0; channel < 16; channel++, port++) {
 			tty_port_init(&port->port);
+			port->port.ops = &isicom_port_ops;
 			port->magic = ISICOM_MAGIC;
 			port->card = &isi_card[idx];
 			port->channel = channel;
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 4b10770..c4682f9 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -151,7 +151,7 @@ static char	*stli_drvversion = "5.6.0";
 static char	*stli_serialname = "ttyE";
 
 static struct tty_driver	*stli_serial;
-
+static const struct tty_port_operations stli_port_ops;
 
 #define	STLI_TXBUFSIZE		4096
 
@@ -1183,6 +1183,12 @@ static int stli_setport(struct tty_struct *tty)
 
 /*****************************************************************************/
 
+static int stli_carrier_raised(struct tty_port *port)
+{
+	struct stliport *portp = container_of(port, struct stliport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
+}
+
 /*
  *	Possibly need to wait for carrier (DCD signal) to come high. Say
  *	maybe because if we are clocal then we don't need to wait...
@@ -1193,6 +1199,7 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 {
 	unsigned long flags;
 	int rc, doclocal;
+	struct tty_port *port = &portp->port;
 
 	rc = 0;
 	doclocal = 0;
@@ -1203,7 +1210,7 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 	spin_lock_irqsave(&stli_lock, flags);
 	portp->openwaitcnt++;
 	if (! tty_hung_up_p(filp))
-		portp->port.count--;
+		port->count--;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
 	for (;;) {
@@ -1212,27 +1219,27 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 		    &portp->asig, sizeof(asysigs_t), 0)) < 0)
 			break;
 		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
+		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				rc = -EBUSY;
 			else
 				rc = -ERESTARTSYS;
 			break;
 		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD))) {
+		if (((port->flags & ASYNC_CLOSING) == 0) &&
+		    (doclocal || tty_port_carrier_raised(port))) {
 			break;
 		}
 		if (signal_pending(current)) {
 			rc = -ERESTARTSYS;
 			break;
 		}
-		interruptible_sleep_on(&portp->port.open_wait);
+		interruptible_sleep_on(&port->open_wait);
 	}
 
 	spin_lock_irqsave(&stli_lock, flags);
 	if (! tty_hung_up_p(filp))
-		portp->port.count++;
+		port->count++;
 	portp->openwaitcnt--;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
@@ -2696,6 +2703,7 @@ static int stli_initports(struct stlibrd *brdp)
 			continue;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stli_port_ops;
 		portp->magic = STLI_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = brdp->brdnr;
@@ -4518,6 +4526,10 @@ static const struct tty_operations stli_ops = {
 	.tiocmset = stli_tiocmset,
 };
 
+static const struct tty_port_operations stli_port_ops = {
+	.carrier_raised = stli_carrier_raised,
+};
+
 /*****************************************************************************/
 /*
  *	Loadable module initialization stuff.
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 12d327a..8b0da97 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -206,6 +206,7 @@ static void moxa_poll(unsigned long);
 static void moxa_set_tty_param(struct tty_struct *, struct ktermios *);
 static void moxa_setup_empty_event(struct tty_struct *);
 static void moxa_shut_down(struct tty_struct *);
+static int moxa_carrier_raised(struct tty_port *);
 /*
  * moxa board interface functions:
  */
@@ -405,6 +406,10 @@ static const struct tty_operations moxa_ops = {
 	.tiocmset = moxa_tiocmset,
 };
 
+static const struct tty_port_operations moxa_port_ops = {
+	.carrier_raised = moxa_carrier_raised,
+};
+
 static struct tty_driver *moxaDriver;
 static DEFINE_TIMER(moxaTimer, moxa_poll, 0, 0);
 static DEFINE_SPINLOCK(moxa_lock);
@@ -826,6 +831,7 @@ static int moxa_init_board(struct moxa_board_conf *brd, struct device *dev)
 
 	for (i = 0, p = brd->ports; i < MAX_PORTS_PER_BOARD; i++, p++) {
 		tty_port_init(&p->port);
+		p->port.ops = &moxa_port_ops;
 		p->type = PORT_16550A;
 		p->cflag = B9600 | CS8 | CREAD | CLOCAL | HUPCL;
 	}
@@ -1115,15 +1121,27 @@ static void moxa_close_port(struct tty_struct *tty)
 	tty_port_tty_set(&ch->port, NULL);
 }
 
+static int moxa_carrier_raised(struct tty_port *port)
+{
+	struct moxa_port *ch = container_of(port, struct moxa_port, port);
+	int dcd;
+
+	spin_lock_bh(&moxa_lock);
+	dcd = ch->DCDState;
+	spin_unlock_bh(&moxa_lock);
+	return dcd;
+}
+
 static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 			    struct moxa_port *ch)
 {
+	struct tty_port *port = &ch->port;
 	DEFINE_WAIT(wait);
 	int retval = 0;
 	u8 dcd;
 
 	while (1) {
-		prepare_to_wait(&ch->port.open_wait, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp)) {
 #ifdef SERIAL_DO_RESTART
 			retval = -ERESTARTSYS;
@@ -1132,9 +1150,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 #endif
 			break;
 		}
-		spin_lock_bh(&moxa_lock);
-		dcd = ch->DCDState;
-		spin_unlock_bh(&moxa_lock);
+		dcd = tty_port_carrier_raised(port);
 		if (dcd)
 			break;
 
@@ -1144,7 +1160,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 		}
 		schedule();
 	}
-	finish_wait(&ch->port.open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	return retval;
 }
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 0477669..eafbbcf 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -541,13 +541,21 @@ static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
 	return status;
 }
 
+static int mxser_carrier_raised(struct tty_port *port)
+{
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
+	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
+}
+
 static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
-		struct mxser_port *port)
+		struct mxser_port *mp)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	int retval;
 	int do_clocal = 0;
 	unsigned long flags;
+	int cd;
+	struct tty_port *port = &mp->port;
 
 	/*
 	 * If non-blocking mode is set, or the port is not enabled,
@@ -555,7 +563,7 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 			test_bit(TTY_IO_ERROR, &tty->flags)) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -565,34 +573,33 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * mxser_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&port->slock, flags);
+	spin_lock_irqsave(&mp->slock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	spin_unlock_irqrestore(&port->slock, flags);
-	port->port.blocked_open++;
+		port->count--;
+	spin_unlock_irqrestore(&mp->slock, flags);
+	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&port->slock, flags);
-		outb(inb(port->ioaddr + UART_MCR) |
-			UART_MCR_DTR | UART_MCR_RTS, port->ioaddr + UART_MCR);
-		spin_unlock_irqrestore(&port->slock, flags);
+		spin_lock_irqsave(&mp->slock, flags);
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+		spin_unlock_irqrestore(&mp->slock, flags);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal ||
-				(inb(port->ioaddr + UART_MSR) & UART_MSR_DCD)))
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
 			break;
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -601,13 +608,13 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	if (retval)
 		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -2449,6 +2456,10 @@ static const struct tty_operations mxser_ops = {
 	.tiocmset = mxser_tiocmset,
 };
 
+struct tty_port_operations mxser_port_ops = {
+	.carrier_raised = mxser_carrier_raised,
+};
+
 /*
  * The MOXA Smartio/Industio serial driver boot-time initialization code!
  */
@@ -2482,6 +2493,7 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
 	for (i = 0; i < brd->info->nports; i++) {
 		info = &brd->ports[i];
 		tty_port_init(&info->port);
+		info->port.ops = &mxser_port_ops;
 		info->board = brd;
 		info->stop_rx = 0;
 		info->ldisc_stop_rx = 0;
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index a8f68a3..ec2afd1 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -173,7 +173,7 @@ static void rio_disable_tx_interrupts(void *ptr);
 static void rio_enable_tx_interrupts(void *ptr);
 static void rio_disable_rx_interrupts(void *ptr);
 static void rio_enable_rx_interrupts(void *ptr);
-static int rio_get_CD(void *ptr);
+static int rio_carrier_raised(struct tty_port *port);
 static void rio_shutdown_port(void *ptr);
 static int rio_set_real_termios(void *ptr);
 static void rio_hungup(void *ptr);
@@ -224,7 +224,6 @@ static struct real_driver rio_real_driver = {
 	rio_enable_tx_interrupts,
 	rio_disable_rx_interrupts,
 	rio_enable_rx_interrupts,
-	rio_get_CD,
 	rio_shutdown_port,
 	rio_set_real_termios,
 	rio_chars_in_buffer,
@@ -476,9 +475,9 @@ static void rio_enable_rx_interrupts(void *ptr)
 
 
 /* Jeez. Isn't this simple?  */
-static int rio_get_CD(void *ptr)
+static int rio_carrier_raised(struct tty_port *port)
 {
-	struct Port *PortP = ptr;
+	struct Port *PortP = container_of(port, struct Port, gs.port);
 	int rv;
 
 	func_enter();
@@ -806,7 +805,9 @@ static void *ckmalloc(int size)
 	return p;
 }
 
-
+static const struct tty_port_operations rio_port_ops = {
+	.carrier_raised = rio_carrier_raised,
+};
 
 static int rio_init_datastructures(void)
 {
@@ -842,17 +843,14 @@ static int rio_init_datastructures(void)
 			goto free6;
 		}
 		rio_dprintk(RIO_DEBUG_INIT, "initing port %d (%d)\n", i, port->Mapped);
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &rio_port_ops;
 		port->PortNum = i;
 		port->gs.magic = RIO_MAGIC;
 		port->gs.close_delay = HZ / 2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &rio_real_driver;
 		spin_lock_init(&port->portSem);
-		/*
-		 * Initializing wait queue
-		 */
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 #else
 	/* We could postpone initializing them to when they are configured. */
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 2c6c8f3..6ad1c2a 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -857,23 +857,40 @@ static void rc_shutdown_port(struct tty_struct *tty,
 		rc_shutdown_board(bp);
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	struct riscom_port *p = container_of(port, struct riscom_port, port);
+	struct riscom_board *bp = port_Board(p);
+	unsigned long flags;
+	int CD;
+	
+	spin_lock_irqsave(&riscom_lock, flags);
+	rc_out(bp, CD180_CAR, port_No(p));
+	CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
+	rc_out(bp, CD180_MSVR, MSVR_RTS);
+	bp->DTR &= ~(1u << port_No(p));
+	rc_out(bp, RC_DTR, bp->DTR);
+	spin_unlock_irqrestore(&riscom_lock, flags);
+	return CD;
+}
+
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct riscom_port *port)
+			   struct riscom_port *rp)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct riscom_board *bp = port_Board(port);
 	int    retval;
 	int    do_clocal = 0;
 	int    CD;
 	unsigned long flags;
+	struct tty_port *port = &rp->port;
 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -885,7 +902,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -900,37 +917,29 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&riscom_lock, flags);
 
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
+		port->count--;
 
 	spin_unlock_irqrestore(&riscom_lock, flags);
 
-	port->port.blocked_open++;
+	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&riscom_lock, flags);
-
-		rc_out(bp, CD180_CAR, port_No(port));
-		CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
-		rc_out(bp, CD180_MSVR, MSVR_RTS);
-		bp->DTR &= ~(1u << port_No(port));
-		rc_out(bp, RC_DTR, bp->DTR);
-
-		spin_unlock_irqrestore(&riscom_lock, flags);
 
+		CD = tty_port_carrier_raised(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		    !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal || CD))
 			break;
 		if (signal_pending(current)) {
@@ -940,14 +949,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	if (retval)
 		return retval;
 
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -1510,6 +1519,11 @@ static const struct tty_operations riscom_ops = {
 	.break_ctl = rc_send_break,
 };
 
+static const struct tty_port_operations riscom_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
+
 static int __init rc_init_drivers(void)
 {
 	int error;
@@ -1541,6 +1555,7 @@ static int __init rc_init_drivers(void)
 	memset(rc_port, 0, sizeof(rc_port));
 	for (i = 0; i < RC_NPORT * RC_NBOARD; i++)  {
 		tty_port_init(&rc_port[i].port);
+		rc_port[i].port.ops = &riscom_port_ops;
 		rc_port[i].magic = RISCOM8_MAGIC;
 	}
 	return 0;
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 584d791..4a4110e 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -135,6 +135,7 @@ static int rcktpt_type[NUM_BOARDS];
 static int is_PCI[NUM_BOARDS];
 static rocketModel_t rocketModel[NUM_BOARDS];
 static int max_board;
+static const struct tty_port_operations rocket_port_ops;
 
 /*
  * The following arrays define the interrupt bits corresponding to each AIOP.
@@ -649,9 +650,8 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev)
 	info->board = board;
 	info->aiop = aiop;
 	info->chan = chan;
-	info->port.closing_wait = 3000;
-	info->port.close_delay = 50;
-	init_waitqueue_head(&info->port.open_wait);
+	tty_port_init(&info->port);
+	info->port.ops = &rocket_port_ops;
 	init_completion(&info->close_wait);
 	info->flags &= ~ROCKET_MODE_MASK;
 	switch (pc104[board][line]) {
@@ -864,11 +864,18 @@ static void configure_r_port(struct r_port *info,
 	}
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	struct r_port *info = container_of(port, struct r_port, port);
+	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
+}
+
 /*  info->port.count is considered critical, protected by spinlocks.  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			   struct r_port *info)
 {
 	DECLARE_WAITQUEUE(wait, current);
+	struct tty_port *port = &info->port;
 	int retval;
 	int do_clocal = 0, extra_count = 0;
 	unsigned long flags;
@@ -898,13 +905,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/*
 	 * Block waiting for the carrier detect and the line to become free.  While we are in
-	 * this loop, info->port.count is dropped by one, so that rp_close() knows when to free things.
+	 * this loop, port->count is dropped by one, so that rp_close() knows when to free things.
          * We restore it upon exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 #ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, info->port.count);
+	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, port->count);
 #endif
 	spin_lock_irqsave(&info->slock, flags);
 
@@ -913,10 +920,10 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #else
 	if (!tty_hung_up_p(filp)) {
 		extra_count = 1;
-		info->port.count--;
+		port->count--;
 	}
 #endif
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	spin_unlock_irqrestore(&info->slock, flags);
 
@@ -933,7 +940,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(info->flags & ROCKET_CLOSING) && (do_clocal || (sGetChanStatusLo(&info->channel) & CD_ACT)))
+		if (!(info->flags & ROCKET_CLOSING) &&
+			(do_clocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -941,24 +949,24 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 #ifdef ROCKET_DEBUG_OPEN
 		printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n",
-		     info->line, info->port.count, info->flags);
+		     info->line, port->count, info->flags);
 #endif
 		schedule();	/*  Don't hold spinlock here, will hang PC */
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&info->slock, flags);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	spin_unlock_irqrestore(&info->slock, flags);
 
 #ifdef ROCKET_DEBUG_OPEN
 	printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	if (retval)
 		return retval;
@@ -2371,6 +2379,10 @@ static const struct tty_operations rocket_ops = {
 	.tiocmset = rp_tiocmset,
 };
 
+static const struct tty_port_operations rocket_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /*
  * The module "startup" routine; it's run when the module is loaded.
  */
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 7b0c352..0c97f34 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -122,7 +122,7 @@ static void a2232_disable_tx_interrupts(void *ptr);
 static void a2232_enable_tx_interrupts(void *ptr);
 static void a2232_disable_rx_interrupts(void *ptr);
 static void a2232_enable_rx_interrupts(void *ptr);
-static int  a2232_get_CD(void *ptr);
+static int  a2232_carrier_raised(struct tty_port *port);
 static void a2232_shutdown_port(void *ptr);
 static int  a2232_set_real_termios(void *ptr);
 static int  a2232_chars_in_buffer(void *ptr);
@@ -148,7 +148,6 @@ static struct real_driver a2232_real_driver = {
         a2232_enable_tx_interrupts,
         a2232_disable_rx_interrupts,
         a2232_enable_rx_interrupts,
-        a2232_get_CD,
         a2232_shutdown_port,
         a2232_set_real_termios,
         a2232_chars_in_buffer,
@@ -260,9 +259,10 @@ static void a2232_enable_rx_interrupts(void *ptr)
 	port->disable_rx = 0;
 }
 
-static int  a2232_get_CD(void *ptr)
+static int  a2232_carrier_raised(struct tty_port *port)
 {
-	return ((struct a2232_port *) ptr)->cd_status;
+	struct a2232_port *ap = container_of(port, struct a2232_port, gs.port);
+	return ap->cd_status;
 }
 
 static void a2232_shutdown_port(void *ptr)
@@ -638,6 +638,10 @@ int ch, err, n, p;
 	return IRQ_HANDLED;
 }
 
+static const struct tty_port_operations a2232_port_ops = {
+	.carrier_raised = a2232_carrier_raised,
+};
+
 static void a2232_init_portstructs(void)
 {
 	struct a2232_port *port;
@@ -645,6 +649,8 @@ static void a2232_init_portstructs(void)
 
 	for (i = 0; i < MAX_A2232_BOARDS*NUMLINES; i++) {
 		port = a2232_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &a2232_port_ops;
 		port->which_a2232 = i/NUMLINES;
 		port->which_port_on_a2232 = i%NUMLINES;
 		port->disable_rx = port->throttle_input = port->cd_status = 0;
@@ -652,11 +658,6 @@ static void a2232_init_portstructs(void)
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &a2232_real_driver;
-#ifdef NEW_WRITE_LOCKING
-		mutex_init(&(port->gs.port_write_mutex));
-#endif
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 }
 
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 963b03f..12aecdaf 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -130,6 +130,8 @@ static char		stl_unwanted[SC26198_RXFIFOSIZE];
 static DEFINE_MUTEX(stl_brdslock);
 static struct stlbrd		*stl_brds[STL_MAXBRDS];
 
+static const struct tty_port_operations stl_port_ops;
+
 /*
  *	Per board state flags. Used with the state field of the board struct.
  *	Not really much here!
@@ -786,6 +788,12 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 
 /*****************************************************************************/
 
+static int stl_carrier_raised(struct tty_port *port)
+{
+	struct stlport *portp = container_of(port, struct stlport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
+}
+
 /*
  *	Possibly need to wait for carrier (DCD signal) to come high. Say
  *	maybe because if we are clocal then we don't need to wait...
@@ -796,6 +804,7 @@ static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
 {
 	unsigned long	flags;
 	int		rc, doclocal;
+	struct tty_port *port = &portp->port;
 
 	pr_debug("stl_waitcarrier(portp=%p,filp=%p)\n", portp, filp);
 
@@ -809,32 +818,32 @@ static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
 
 	portp->openwaitcnt++;
 	if (! tty_hung_up_p(filp))
-		portp->port.count--;
+		port->count--;
 
 	for (;;) {
 		/* Takes brd_lock internally */
 		stl_setsignals(portp, 1, 1);
 		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
+		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				rc = -EBUSY;
 			else
 				rc = -ERESTARTSYS;
 			break;
 		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD)))
+		if (((port->flags & ASYNC_CLOSING) == 0) &&
+		    (doclocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
 			rc = -ERESTARTSYS;
 			break;
 		}
 		/* FIXME */
-		interruptible_sleep_on(&portp->port.open_wait);
+		interruptible_sleep_on(&port->open_wait);
 	}
 
 	if (! tty_hung_up_p(filp))
-		portp->port.count++;
+		port->count++;
 	portp->openwaitcnt--;
 	spin_unlock_irqrestore(&stallion_lock, flags);
 
@@ -1776,6 +1785,7 @@ static int __devinit stl_initports(struct stlbrd *brdp, struct stlpanel *panelp)
 			break;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stl_port_ops;
 		portp->magic = STL_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = panelp->brdnr;
@@ -2659,6 +2669,10 @@ static const struct tty_operations stl_ops = {
 	.tiocmset = stl_tiocmset,
 };
 
+static const struct tty_port_operations stl_port_ops = {
+	.carrier_raised = stl_carrier_raised,
+};
+
 /*****************************************************************************/
 /*                       CD1400 HARDWARE FUNCTIONS                           */
 /*****************************************************************************/
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index ba4e862..a71bc58 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -279,7 +279,7 @@ static void sx_disable_tx_interrupts(void *ptr);
 static void sx_enable_tx_interrupts(void *ptr);
 static void sx_disable_rx_interrupts(void *ptr);
 static void sx_enable_rx_interrupts(void *ptr);
-static int sx_get_CD(void *ptr);
+static int sx_carrier_raised(struct tty_port *port);
 static void sx_shutdown_port(void *ptr);
 static int sx_set_real_termios(void *ptr);
 static void sx_close(void *ptr);
@@ -360,7 +360,6 @@ static struct real_driver sx_real_driver = {
 	sx_enable_tx_interrupts,
 	sx_disable_rx_interrupts,
 	sx_enable_rx_interrupts,
-	sx_get_CD,
 	sx_shutdown_port,
 	sx_set_real_termios,
 	sx_chars_in_buffer,
@@ -791,7 +790,7 @@ static int sx_getsignals(struct sx_port *port)
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "getsignals: %d/%d  (%d/%d) "
 			"%02x/%02x\n",
 			(o_stat & OP_DTR) != 0, (o_stat & OP_RTS) != 0,
-			port->c_dcd, sx_get_CD(port),
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port),
 			sx_read_channel_byte(port, hi_ip),
 			sx_read_channel_byte(port, hi_state));
 
@@ -1190,7 +1189,7 @@ static inline void sx_check_modem_signals(struct sx_port *port)
 
 	hi_state = sx_read_channel_byte(port, hi_state);
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "Checking modem signals (%d/%d)\n",
-			port->c_dcd, sx_get_CD(port));
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port));
 
 	if (hi_state & ST_BREAK) {
 		hi_state &= ~ST_BREAK;
@@ -1202,11 +1201,11 @@ static inline void sx_check_modem_signals(struct sx_port *port)
 		hi_state &= ~ST_DCD;
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "got a DCD change.\n");
 		sx_write_channel_byte(port, hi_state, hi_state);
-		c_dcd = sx_get_CD(port);
+		c_dcd = tty_port_carrier_raised(&port->gs.port);
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "DCD is now %d\n", c_dcd);
 		if (c_dcd != port->c_dcd) {
 			port->c_dcd = c_dcd;
-			if (sx_get_CD(port)) {
+			if (tty_port_carrier_raised(&port->gs.port)) {
 				/* DCD went UP */
 				if ((sx_read_channel_byte(port, hi_hstat) !=
 						HS_IDLE_CLOSED) &&
@@ -1415,13 +1414,10 @@ static void sx_enable_rx_interrupts(void *ptr)
 }
 
 /* Jeez. Isn't this simple? */
-static int sx_get_CD(void *ptr)
+static int sx_carrier_raised(struct tty_port *port)
 {
-	struct sx_port *port = ptr;
-	func_enter2();
-
-	func_exit();
-	return ((sx_read_channel_byte(port, hi_ip) & IP_DCD) != 0);
+	struct sx_port *sp = container_of(port, struct sx_port, gs.port);
+	return ((sx_read_channel_byte(sp, hi_ip) & IP_DCD) != 0);
 }
 
 /* Jeez. Isn't this simple? */
@@ -1536,7 +1532,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
 	}
 	/* tty->low_latency = 1; */
 
-	port->c_dcd = sx_get_CD(port);
+	port->c_dcd = sx_carrier_raised(&port->gs.port);
 	sx_dprintk(SX_DEBUG_OPEN, "at open: cd=%d\n", port->c_dcd);
 
 	func_exit();
@@ -2354,6 +2350,10 @@ static const struct tty_operations sx_ops = {
 	.tiocmset = sx_tiocmset,
 };
 
+static const struct tty_port_operations sx_port_ops = {
+	.carrier_raised = sx_carrier_raised,
+};
+
 static int sx_init_drivers(void)
 {
 	int error;
@@ -2410,6 +2410,7 @@ static int sx_init_portstructs(int nboards, int nports)
 		for (j = 0; j < boards[i].nports; j++) {
 			sx_dprintk(SX_DEBUG_INIT, "initing port %d\n", j);
 			tty_port_init(&port->gs.port);
+			port->gs.port.ops = &sx_port_ops;
 			port->gs.magic = SX_MAGIC;
 			port->gs.close_delay = HZ / 2;
 			port->gs.closing_wait = 30 * HZ;
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 500f517..fb2e6b5 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3281,6 +3281,23 @@ static void mgsl_hangup(struct tty_struct *tty)
 	
 }	/* end of mgsl_hangup() */
 
+/*
+ * carrier_raised()
+ *
+ *	Return true if carrier is raised
+ */
+
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	
+	spin_lock_irqsave(&info->irq_spinlock, flags);
+ 	usc_get_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock, flags);
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
+
 /* block_til_ready()
  * 
  * 	Block the current process until the specified port
@@ -3302,6 +3319,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		dcd;
+	struct tty_port *port = &info->port;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready on %s\n",
@@ -3309,7 +3328,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3318,25 +3337,25 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * mgsl_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready before block on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->irq_spinlock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->irq_spinlock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 	
 	while (1) {
 		if (tty->termios->c_cflag & CBAUD) {
@@ -3348,20 +3367,16 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 		
 		set_current_state(TASK_INTERRUPTIBLE);
 		
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 		
-		spin_lock_irqsave(&info->irq_spinlock,flags);
-	 	usc_get_serial_signals(info);
-		spin_unlock_irqrestore(&info->irq_spinlock,flags);
+		dcd = tty_port_carrier_raised(&info->port);
 		
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || dcd))
  			break;
-		}
 			
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3370,24 +3385,24 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 		
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 				 
 		schedule();
 	}
 	
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready after blocking on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 			 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		
 	return retval;
 	
@@ -4304,6 +4319,11 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 }	/* end of mgsl_add_device() */
 
+static const struct tty_port_operations mgsl_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
+
 /* mgsl_allocate_device()
  * 
  * 	Allocate and initialize a device instance structure
@@ -4322,6 +4342,7 @@ static struct mgsl_struct* mgsl_allocate_device(void)
 		printk("Error can't allocate device instance data\n");
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &mgsl_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, mgsl_bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 08911ed..39ccaba 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3132,6 +3132,17 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+	return (info->signals & SerialSignal_DCD) ? 1 : 0;
+}
+
 /*
  *  block current process until the device is ready to open
  */
@@ -3143,12 +3154,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	DBGINFO(("%s block_til_ready\n", tty->driver->name));
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3157,21 +3170,21 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
@@ -3183,20 +3196,16 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd ))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3208,14 +3217,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval));
 	return retval;
@@ -3444,6 +3453,10 @@ static void add_device(struct slgt_info *info)
 #endif
 }
 
+static const struct tty_port_operations slgt_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /*
  *  allocate device instance structure, return NULL on failure
  */
@@ -3458,6 +3471,7 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev
 			driver_name, adapter_num, port_num));
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &slgt_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 6bdb44f..fcf1ec7 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -558,6 +558,7 @@ static void release_resources(SLMP_INFO *info);
 
 static int  startup(SLMP_INFO *info);
 static int  block_til_ready(struct tty_struct *tty, struct file * filp,SLMP_INFO *info);
+static int carrier_raised(struct tty_port *port);
 static void shutdown(SLMP_INFO *info);
 static void program_hw(SLMP_INFO *info);
 static void change_params(SLMP_INFO *info);
@@ -3318,7 +3319,17 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
 
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
 
 /* Block the current process until the specified port is ready to open.
  */
@@ -3330,6 +3341,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready()\n",
@@ -3338,7 +3351,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
 		/* just verify that callout device is not active */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3347,25 +3360,25 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() before block, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
@@ -3377,20 +3390,16 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3399,24 +3408,24 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):%s block_til_ready() count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 
 		schedule();
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() after, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	return retval;
 }
@@ -3782,6 +3791,10 @@ static void add_device(SLMP_INFO *info)
 #endif
 }
 
+static const struct tty_port_operations port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /* Allocate and initialize a device instance structure
  *
  * Return Value:	pointer to SLMP_INFO if success, otherwise NULL
@@ -3798,6 +3811,7 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev)
 			__FILE__,__LINE__, adapter_num, port_num);
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
@@ -3940,6 +3954,7 @@ static const struct tty_operations ops = {
 	.tiocmset = tiocmset,
 };
 
+
 static void synclinkmp_cleanup(void)
 {
 	int rc;
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index c8f8024..f54e40c 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -94,3 +94,20 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_tty_set);
+
+/**
+ *	tty_port_carrier_raised	-	carrier raised check
+ *	@port: tty port
+ *
+ *	Wrapper for the carrier detect logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+int tty_port_carrier_raised(struct tty_port *port)
+{
+	if (port->ops->carrier_raised == NULL)
+		return 1;
+	return port->ops->carrier_raised(port);
+}
+EXPORT_SYMBOL(tty_port_carrier_raised);
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index 1718b3c..d4e1534 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -69,7 +69,7 @@ static void scc_disable_tx_interrupts(void * ptr);
 static void scc_enable_tx_interrupts(void * ptr);
 static void scc_disable_rx_interrupts(void * ptr);
 static void scc_enable_rx_interrupts(void * ptr);
-static int  scc_get_CD(void * ptr);
+static int  scc_carrier_raised(struct tty_port *port);
 static void scc_shutdown_port(void * ptr);
 static int scc_set_real_termios(void  *ptr);
 static void scc_hungup(void  *ptr);
@@ -100,7 +100,6 @@ static struct real_driver scc_real_driver = {
         scc_enable_tx_interrupts,
         scc_disable_rx_interrupts,
         scc_enable_rx_interrupts,
-        scc_get_CD,
         scc_shutdown_port,
         scc_set_real_termios,
         scc_chars_in_buffer,
@@ -129,6 +128,10 @@ static const struct tty_operations scc_ops = {
 	.break_ctl = scc_break_ctl,
 };
 
+static const struct tty_port_operations scc_port_ops = {
+	.carrier_raised = scc_carrier_raised,
+};
+
 /*----------------------------------------------------------------------------
  * vme_scc_init() and support functions
  *---------------------------------------------------------------------------*/
@@ -176,6 +179,8 @@ static void scc_init_portstructs(void)
 
 	for (i = 0; i < 2; i++) {
 		port = scc_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &scc_port_ops;
 		port->gs.magic = SCC_MAGIC;
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
@@ -624,9 +629,9 @@ static void scc_enable_rx_interrupts(void *ptr)
 }
 
 
-static int scc_get_CD(void *ptr)
+static int scc_carrier_raised(struct tty_port *port)
 {
-	struct scc_port *port = ptr;
+	struct scc_port *scc = container_of(port, struct scc_port, gs.port);
 	unsigned channel = port->channel;
 
 	return !!(scc_last_status_reg[channel] & SR_DCD);
@@ -896,7 +901,7 @@ static int scc_open (struct tty_struct * tty, struct file * filp)
 		return retval;
 	}
 
-	port->c_dcd = scc_get_CD (port);
+	port->c_dcd = tty_port_carrier_raised(&port->gs.port);
 
 	scc_enable_rx_interrupts(port);
 
diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h
index 4cc9139..fadff28 100644
--- a/include/linux/generic_serial.h
+++ b/include/linux/generic_serial.h
@@ -21,7 +21,6 @@ struct real_driver {
   void                    (*enable_tx_interrupts) (void *);
   void                    (*disable_rx_interrupts) (void *);
   void                    (*enable_rx_interrupts) (void *);
-  int                     (*get_CD) (void *);
   void                    (*shutdown_port) (void*);
   int                     (*set_real_termios) (void*);
   int                     (*chars_in_buffer) (void*);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bbbeaef9..bc7bae7 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -180,8 +180,16 @@ struct signal_struct;
  * until a hangup so don't use the wrong path.
  */
 
+struct tty_port;
+
+struct tty_port_operations {
+	/* Return 1 if the carrier is raised */
+	int (*carrier_raised)(struct tty_port *port);
+};
+	
 struct tty_port {
 	struct tty_struct	*tty;		/* Back pointer */
+	const struct tty_port_operations *ops;	/* Port operations */
 	spinlock_t		lock;		/* Lock protecting tty field */
 	int			blocked_open;	/* Waiting to open */
 	int			count;		/* Usage count */
@@ -427,6 +435,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
+extern int tty_port_carrier_raised(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v0.10.2


From d0c9873addc1f18e7becb50094dad07df8cc4694 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:12 +0000
Subject: rio: Kill off ckmalloc

This was an alloc/clear wrapper but makes even less sense now it uses
kzalloc. Kill it off.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index ec2afd1..2e8a6ee 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -796,15 +796,6 @@ static int rio_init_drivers(void)
 	return 1;
 }
 
-
-static void *ckmalloc(int size)
-{
-	void *p;
-
-	p = kzalloc(size, GFP_KERNEL);
-	return p;
-}
-
 static const struct tty_port_operations rio_port_ops = {
 	.carrier_raised = rio_carrier_raised,
 };
@@ -827,18 +818,18 @@ static int rio_init_datastructures(void)
 #define TMIO_SZ sizeof(struct termios *)
 	rio_dprintk(RIO_DEBUG_INIT, "getting : %Zd %Zd %Zd %Zd %Zd bytes\n", RI_SZ, RIO_HOSTS * HOST_SZ, RIO_PORTS * PORT_SZ, RIO_PORTS * TMIO_SZ, RIO_PORTS * TMIO_SZ);
 
-	if (!(p = ckmalloc(RI_SZ)))
+	if (!(p = kzalloc(RI_SZ, GFP_KERNEL)))
 		goto free0;
-	if (!(p->RIOHosts = ckmalloc(RIO_HOSTS * HOST_SZ)))
+	if (!(p->RIOHosts = kzalloc(RIO_HOSTS * HOST_SZ, GFP_KERNEL)))
 		goto free1;
-	if (!(p->RIOPortp = ckmalloc(RIO_PORTS * PORT_SZ)))
+	if (!(p->RIOPortp = kzalloc(RIO_PORTS * PORT_SZ, GFP_KERNEL)))
 		goto free2;
 	p->RIOConf = RIOConf;
 	rio_dprintk(RIO_DEBUG_INIT, "Got : %p %p %p\n", p, p->RIOHosts, p->RIOPortp);
 
 #if 1
 	for (i = 0; i < RIO_PORTS; i++) {
-		port = p->RIOPortp[i] = ckmalloc(sizeof(struct Port));
+		port = p->RIOPortp[i] = kzalloc(sizeof(struct Port), GFP_KERNEL);
 		if (!port) {
 			goto free6;
 		}
-- 
cgit v0.10.2


From 5d951fb458f847e5485b5251597fbf326000bb3b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:19 +0000
Subject: tty: Pull the dtr raise into tty port

This moves another per device special out of what should be shared open
wait paths into private methods

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index b3da485..a449449 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -328,11 +328,13 @@ static inline void drop_rts(struct isi_port *port)
 }
 
 /* card->lock MUST NOT be held */
-static inline void raise_dtr_rts(struct isi_port *port)
+
+static void isicom_raise_dtr_rts(struct tty_port *port)
 {
-	struct isi_board *card = port->card;
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	struct isi_board *card = ip->card;
 	unsigned long base = card->base;
-	u16 channel = port->channel;
+	u16 channel = ip->channel;
 
 	if (!lock_card(card))
 		return;
@@ -340,7 +342,7 @@ static inline void raise_dtr_rts(struct isi_port *port)
 	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
 	outw(0x0f04, base);
 	InterruptTheCard(base);
-	port->status |= (ISI_DTR | ISI_RTS);
+	ip->status |= (ISI_DTR | ISI_RTS);
 	unlock_card(card);
 }
 
@@ -881,7 +883,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	while (1) {
-		raise_dtr_rts(ip);
+		tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
@@ -1462,6 +1464,7 @@ static const struct tty_operations isicom_ops = {
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
+	.raise_dtr_rts		= isicom_raise_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index eafbbcf..ff5ff61 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,6 +547,17 @@ static int mxser_carrier_raised(struct tty_port *port)
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
+static void mxser_raise_dtr_rts(struct tty_port *port)
+{
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mp->slock, flags);
+	outb(inb(mp->ioaddr + UART_MCR) |
+		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	spin_unlock_irqrestore(&mp->slock, flags);
+}
+
 static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 		struct mxser_port *mp)
 {
@@ -586,10 +597,7 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&mp->slock, flags);
 	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&mp->slock, flags);
-		outb(inb(mp->ioaddr + UART_MCR) |
-			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
-		spin_unlock_irqrestore(&mp->slock, flags);
+		tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
 			if (port->flags & ASYNC_HUP_NOTIFY)
@@ -2458,6 +2466,7 @@ static const struct tty_operations mxser_ops = {
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
+	.raise_dtr_rts = mxser_raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 4a4110e..f4d9c39 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -870,6 +870,13 @@ static int carrier_raised(struct tty_port *port)
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct r_port *info = container_of(port, struct r_port, port);
+	sSetDTR(&info->channel);
+	sSetRTS(&info->channel);
+}
+
 /*  info->port.count is considered critical, protected by spinlocks.  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			   struct r_port *info)
@@ -928,10 +935,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&info->slock, flags);
 
 	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			sSetDTR(&info->channel);
-			sSetRTS(&info->channel);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(info->flags & ROCKET_INITIALIZED)) {
 			if (info->flags & ROCKET_HUP_NOTIFY)
@@ -2381,6 +2386,7 @@ static const struct tty_operations rocket_ops = {
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index fb2e6b5..ac9f21e 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3298,6 +3298,18 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->irq_spinlock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	usc_set_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock,flags);
+}
+
+
 /* block_til_ready()
  * 
  * 	Block the current process until the specified port
@@ -3358,12 +3370,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	port->blocked_open++;
 	
 	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			spin_lock_irqsave(&info->irq_spinlock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	usc_set_serial_signals(info);
-			spin_unlock_irqrestore(&info->irq_spinlock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		
 		set_current_state(TASK_INTERRUPTIBLE);
 		
@@ -4321,6 +4329,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 39ccaba..625c9bd 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3143,6 +3143,18 @@ static int carrier_raised(struct tty_port *port)
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
+
 /*
  *  block current process until the device is ready to open
  */
@@ -3187,12 +3199,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if ((tty->termios->c_cflag & CBAUD))
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3455,6 +3463,7 @@ static void add_device(struct slgt_info *info)
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index f54e40c..0557b63 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -111,3 +111,19 @@ int tty_port_carrier_raised(struct tty_port *port)
 	return port->ops->carrier_raised(port);
 }
 EXPORT_SYMBOL(tty_port_carrier_raised);
+
+/**
+ *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_raise_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->raise_dtr_rts)
+		port->ops->raise_dtr_rts(port);
+}
+EXPORT_SYMBOL(tty_port_raise_dtr_rts);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bc7bae7..5001bbc 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,6 +185,7 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
+	void (*raise_dtr_rts)(struct tty_port *port);
 };
 	
 struct tty_port {
@@ -436,6 +437,7 @@ extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
+extern void tty_port_raise_dtr_rts(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v0.10.2


From 3e61696bdc2103107674b06d0daf30b76193e922 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:26 +0000
Subject: isicom: redo locking to use tty port locks

This helps set the basis for moving block_til_ready into common code. We also
introduce a tty_port_hangup helper as this will also be generally needed.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a449449..db53db9 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -841,7 +841,6 @@ static int isicom_carrier_raised(struct tty_port *port)
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	struct isi_port *ip)
 {
-	struct isi_board *card = ip->card;
 	struct tty_port *port = &ip->port;
 	int do_clocal = 0, retval;
 	unsigned long flags;
@@ -876,11 +875,11 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	retval = 0;
 	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count--;
 	port->blocked_open++;
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	while (1) {
 		tty_port_raise_dtr_rts(port);
@@ -905,14 +904,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count++;
 	port->blocked_open--;
-	spin_unlock_irqrestore(&card->card_lock, flags);
-	if (retval)
-		return retval;
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
@@ -1034,9 +1032,9 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 
 	pr_dbg("Close start!!!.\n");
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		return;
 	}
 
@@ -1054,12 +1052,12 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 	}
 
 	if (port->port.count) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		return;
 	}
 	port->port.flags |= ASYNC_CLOSING;
 	tty->closing = 1;
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->port.closing_wait);
@@ -1076,22 +1074,22 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 	isicom_flush_buffer(tty);
 	tty_ldisc_flush(tty);
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 	tty->closing = 0;
 
 	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		if (port->port.close_delay) {
 			pr_dbg("scheduling until time out.\n");
 			msleep_interruptible(
 				jiffies_to_msecs(port->port.close_delay));
 		}
-		spin_lock_irqsave(&card->card_lock, flags);
+		spin_lock_irqsave(&port->port.lock, flags);
 		wake_up_interruptible(&port->port.open_wait);
 	}
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
 	wake_up_interruptible(&port->port.close_wait);
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 }
 
 /* write et all */
@@ -1430,10 +1428,7 @@ static void isicom_hangup(struct tty_struct *tty)
 	isicom_shutdown_port(port);
 	spin_unlock_irqrestore(&port->card->card_lock, flags);
 
-	port->port.count = 0;
-	port->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&port->port, NULL);
-	wake_up_interruptible(&port->port.open_wait);
+	tty_port_hangup(&port->port);
 }
 
 
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index fcf1ec7..1f5c21e 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3331,6 +3331,17 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
 /* Block the current process until the specified port is ready to open.
  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
@@ -3381,12 +3392,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3793,6 +3800,7 @@ static void add_device(SLMP_INFO *info)
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 0557b63..9f418bc 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -7,6 +7,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
+#include <linux/serial.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -96,6 +97,29 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 EXPORT_SYMBOL(tty_port_tty_set);
 
 /**
+ *	tty_port_hangup		-	hangup helper
+ *	@port: tty port
+ *
+ *	Perform port level tty hangup flag and count changes. Drop the tty
+ *	reference.
+ */
+
+void tty_port_hangup(struct tty_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->count = 0;
+	port->flags &= ~ASYNC_NORMAL_ACTIVE;
+	if (port->tty)
+		tty_kref_put(port->tty);
+	port->tty = NULL;
+	spin_unlock_irqrestore(&port->lock, flags);
+	wake_up_interruptible(&port->open_wait);
+}
+EXPORT_SYMBOL(tty_port_hangup);
+
+/**
  *	tty_port_carrier_raised	-	carrier raised check
  *	@port: tty port
  *
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5001bbc..a1a9314 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_hangup(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v0.10.2


From 510a3049573868d3d77414bfa55d293f44d0dbbe Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:36 +0000
Subject: tty: relock generic_serial

Switch generic_serial to do port count locking via the tty_port structure
ready for moving to a common port wait routine. Keep the old driver lock for
internal calling so we don't risk messing up the drivers below until we
are ready.

Still needs kref conversions

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index 2356994..2f040d1 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -376,7 +376,8 @@ static void gs_shutdown_port (struct gs_port *port)
 
 void gs_hangup(struct tty_struct *tty)
 {
-	struct gs_port   *port;
+	struct gs_port *port;
+	unsigned long flags;
 
 	func_enter ();
 
@@ -386,9 +387,11 @@ void gs_hangup(struct tty_struct *tty)
 		return;
 
 	gs_shutdown_port (port);
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|GS_ACTIVE);
 	port->port.tty = NULL;
 	port->port.count = 0;
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	wake_up_interruptible(&port->port.open_wait);
 	func_exit ();
@@ -454,12 +457,12 @@ int gs_block_til_ready(void *port_, struct file * filp)
 	add_wait_queue(&port->open_wait, &wait);
 
 	gs_dprintk (GS_DEBUG_BTR, "after add waitq.\n"); 
-	spin_lock_irqsave(&gp->driver_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		port->count--;
 	}
-	spin_unlock_irqrestore(&gp->driver_lock, flags);
 	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
 	while (1) {
 		CD = tty_port_carrier_raised(port);
 		gs_dprintk (GS_DEBUG_BTR, "CD is now %d.\n", CD);
@@ -487,16 +490,17 @@ int gs_block_til_ready(void *port_, struct file * filp)
 		    port->blocked_open);
 	set_current_state (TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
+	
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		port->count++;
 	}
 	port->blocked_open--;
-	if (retval)
-		return retval;
-
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	if (retval == 0)
+        	port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	func_exit ();
-	return 0;
+	return retval;
 }			 
 
 
@@ -517,10 +521,10 @@ void gs_close(struct tty_struct * tty, struct file * filp)
 		port->port.tty = tty;
 	}
 
-	spin_lock_irqsave(&port->driver_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->driver_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		if (port->rd->hungup)
 			port->rd->hungup (port);
 		func_exit ();
@@ -539,7 +543,7 @@ void gs_close(struct tty_struct * tty, struct file * filp)
 
 	if (port->port.count) {
 		gs_dprintk(GS_DEBUG_CLOSE, "gs_close port %p: count: %d\n", port, port->port.count);
-		spin_unlock_irqrestore(&port->driver_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		func_exit ();
 		return;
 	}
@@ -560,8 +564,10 @@ void gs_close(struct tty_struct * tty, struct file * filp)
 	 * line status register.
 	 */
 
+	spin_lock_irqsave(&port->driver_lock, flags);
 	port->rd->disable_rx_interrupts (port);
 	spin_unlock_irqrestore(&port->driver_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	/* close has no way of returning "EINTR", so discard return value */
 	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
@@ -574,20 +580,25 @@ void gs_close(struct tty_struct * tty, struct file * filp)
 	tty_ldisc_flush(tty);
 	tty->closing = 0;
 
+	spin_lock_irqsave(&port->driver_lock, flags);
 	port->event = 0;
 	port->rd->close (port);
 	port->rd->shutdown_port (port);
+	spin_unlock_irqrestore(&port->driver_lock, flags);
+
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.tty = NULL;
 
 	if (port->port.blocked_open) {
 		if (port->close_delay) {
-			spin_unlock_irqrestore(&port->driver_lock, flags);
+			spin_unlock_irqrestore(&port->port.lock, flags);
 			msleep_interruptible(jiffies_to_msecs(port->close_delay));
-			spin_lock_irqsave(&port->driver_lock, flags);
+			spin_lock_irqsave(&port->port.lock, flags);
 		}
 		wake_up_interruptible(&port->port.open_wait);
 	}
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING | ASYNC_INITIALIZED);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 	wake_up_interruptible(&port->port.close_wait);
 
 	func_exit ();
-- 
cgit v0.10.2


From a129909ca910d086b8536c790338504878489a95 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:44 +0000
Subject: tty: rocketport uses different port flags to everyone else

Normalise them so we can use the common helpers later on

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f4d9c39..9d81980 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -499,7 +499,7 @@ static void rp_handle_port(struct r_port *info)
 	if (!info)
 		return;
 
-	if ((info->flags & ROCKET_INITIALIZED) == 0) {
+	if ((info->flags & ASYNC_INITIALIZED) == 0) {
 		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
 				"info->flags & NOT_INIT\n");
 		return;
@@ -892,11 +892,11 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * until it's done, and then try again.
 	 */
 	if (tty_hung_up_p(filp))
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
-	if (info->flags & ROCKET_CLOSING) {
+		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+	if (info->flags & ASYNC_CLOSING) {
 		if (wait_for_completion_interruptible(&info->close_wait))
 			return -ERESTARTSYS;
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -904,7 +904,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * then make the check up front and then exit.
 	 */
 	if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->flags |= ROCKET_NORMAL_ACTIVE;
+		info->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 	if (tty->termios->c_cflag & CLOCAL)
@@ -923,7 +923,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_lock_irqsave(&info->slock, flags);
 
 #ifdef ROCKET_DISABLE_SIMUSAGE
-	info->flags |= ROCKET_NORMAL_ACTIVE;
+	info->flags |= ASYNC_NORMAL_ACTIVE;
 #else
 	if (!tty_hung_up_p(filp)) {
 		extra_count = 1;
@@ -938,14 +938,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		if (tty->termios->c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(info->flags & ROCKET_INITIALIZED)) {
-			if (info->flags & ROCKET_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)) {
+			if (info->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(info->flags & ROCKET_CLOSING) &&
+		if (!(info->flags & ASYNC_CLOSING) &&
 			(do_clocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
@@ -975,7 +975,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #endif
 	if (retval)
 		return retval;
-	info->flags |= ROCKET_NORMAL_ACTIVE;
+	info->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -998,12 +998,12 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	if (!page)
 		return -ENOMEM;
 
-	if (info->flags & ROCKET_CLOSING) {
+	if (info->flags & ASYNC_CLOSING) {
 		retval = wait_for_completion_interruptible(&info->close_wait);
 		free_page(page);
 		if (retval)
 			return retval;
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -1032,7 +1032,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if ((info->flags & ROCKET_INITIALIZED) == 0) {
+	if ((info->flags & ASYNC_INITIALIZED) == 0) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -1056,7 +1056,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		info->flags |= ROCKET_INITIALIZED;
+		info->flags |= ASYNC_INITIALIZED;
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1131,7 +1131,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 		spin_unlock_irqrestore(&info->slock, flags);
 		return;
 	}
-	info->flags |= ROCKET_CLOSING;
+	info->flags |= ASYNC_CLOSING;
 	spin_unlock_irqrestore(&info->slock, flags);
 
 	cp = &info->channel;
@@ -1151,7 +1151,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wait for the transmit buffer to clear
 	 */
-	if (info->port.closing_wait != ROCKET_CLOSING_WAIT_NONE)
+	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, info->port.closing_wait);
 	/*
 	 * Before we drop DTR, make sure the UART transmitter
@@ -1192,7 +1192,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 			info->xmit_buf = NULL;
 		}
 	}
-	info->flags &= ~(ROCKET_INITIALIZED | ROCKET_CLOSING | ROCKET_NORMAL_ACTIVE);
+	info->flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE);
 	tty->closing = 0;
 	complete_all(&info->close_wait);
 	atomic_dec(&rp_num_ports_open);
@@ -1649,14 +1649,14 @@ static void rp_hangup(struct tty_struct *tty)
 	printk(KERN_INFO "rp_hangup of ttyR%d...\n", info->line);
 #endif
 	rp_flush_buffer(tty);
-	if (info->flags & ROCKET_CLOSING)
+	if (info->flags & ASYNC_CLOSING)
 		return;
 	if (info->port.count)
 		atomic_dec(&rp_num_ports_open);
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
 	info->port.count = 0;
-	info->flags &= ~ROCKET_NORMAL_ACTIVE;
+	info->flags &= ~ASYNC_NORMAL_ACTIVE;
 	info->port.tty = NULL;
 
 	cp = &info->channel;
@@ -1666,7 +1666,7 @@ static void rp_hangup(struct tty_struct *tty)
 	sDisCTSFlowCtl(cp);
 	sDisTxSoftFlowCtl(cp);
 	sClrTxXOFF(cp);
-	info->flags &= ~ROCKET_INITIALIZED;
+	info->flags &= ~ASYNC_INITIALIZED;
 
 	wake_up_interruptible(&info->port.open_wait);
 }
diff --git a/drivers/char/rocket.h b/drivers/char/rocket.h
index a8b0919..ec863f3 100644
--- a/drivers/char/rocket.h
+++ b/drivers/char/rocket.h
@@ -39,7 +39,7 @@ struct rocket_version {
 /*
  * Rocketport flags
  */
-#define ROCKET_CALLOUT_NOHUP    0x00000001
+/*#define ROCKET_CALLOUT_NOHUP    0x00000001 */
 #define ROCKET_FORCE_CD		0x00000002
 #define ROCKET_HUP_NOTIFY	0x00000004
 #define ROCKET_SPLIT_TERMIOS	0x00000008
diff --git a/drivers/char/rocket_int.h b/drivers/char/rocket_int.h
index 21f3ff5..67e0f1e 100644
--- a/drivers/char/rocket_int.h
+++ b/drivers/char/rocket_int.h
@@ -1162,11 +1162,6 @@ struct r_port {
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
-/* Internal flags used only by the rocketport driver */
-#define ROCKET_INITIALIZED	0x80000000	/* Port is active */
-#define ROCKET_CLOSING		0x40000000	/* Serial port is closing */
-#define ROCKET_NORMAL_ACTIVE	0x20000000	/* Normal port is active */
-
 /*
  * Assigned major numbers for the Comtrol Rocketport
  */
-- 
cgit v0.10.2


From c2ba38cd76df770a253f0cab4b6abe514c265a85 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:50 +0000
Subject: tty: relock riscom8 using port locks

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 6ad1c2a..14662d7 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -919,14 +919,12 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	retval = 0;
 	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&riscom_lock, flags);
-
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count--;
-
-	spin_unlock_irqrestore(&riscom_lock, flags);
-
 	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
+
 	while (1) {
 
 		CD = tty_port_carrier_raised(port);
@@ -950,13 +948,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count++;
 	port->blocked_open--;
-	if (retval)
-		return retval;
-
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
@@ -1015,7 +1013,7 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	if (!port || rc_paranoia_check(port, tty->name, "close"))
 		return;
 
-	spin_lock_irqsave(&riscom_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 
 	if (tty_hung_up_p(filp))
 		goto out;
@@ -1041,6 +1039,7 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	 * the line discipline to only process XON/XOFF characters.
 	 */
 	tty->closing = 1;
+	spin_unlock_irqrestore(&port->port.lock, flags);
 	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->port.closing_wait);
 	/*
@@ -1049,6 +1048,8 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	 * interrupt driver to stop checking the data ready bit in the
 	 * line status register.
 	 */
+
+	spin_lock_irqsave(&riscom_lock, flags);
 	port->IER &= ~IER_RXD;
 	if (port->port.flags & ASYNC_INITIALIZED) {
 		port->IER &= ~IER_TXRDY;
@@ -1062,21 +1063,27 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 		 */
 		timeout = jiffies + HZ;
 		while (port->IER & IER_TXEMPTY) {
+			spin_unlock_irqrestore(&riscom_lock, flags);
 			msleep_interruptible(jiffies_to_msecs(port->timeout));
+			spin_lock_irqsave(&riscom_lock, flags);
 			if (time_after(jiffies, timeout))
 				break;
 		}
 	}
 	rc_shutdown_port(tty, bp, port);
 	rc_flush_buffer(tty);
+	spin_unlock_irqrestore(&riscom_lock, flags);
 	tty_ldisc_flush(tty);
 
+	spin_lock_irqsave(&port->port.lock, flags);
 	tty->closing = 0;
 	port->port.tty = NULL;
 	if (port->port.blocked_open) {
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		if (port->port.close_delay)
 			msleep_interruptible(jiffies_to_msecs(port->port.close_delay));
 		wake_up_interruptible(&port->port.open_wait);
+		spin_lock_irqsave(&port->port.lock, flags);
 	}
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
 	wake_up_interruptible(&port->port.close_wait);
@@ -1465,6 +1472,7 @@ static void rc_hangup(struct tty_struct *tty)
 {
 	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
 	struct riscom_board *bp;
+	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_hangup"))
 		return;
@@ -1472,10 +1480,12 @@ static void rc_hangup(struct tty_struct *tty)
 	bp = port_Board(port);
 
 	rc_shutdown_port(tty, bp, port);
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.count = 0;
 	port->port.flags &= ~ASYNC_NORMAL_ACTIVE;
 	port->port.tty = NULL;
 	wake_up_interruptible(&port->port.open_wait);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 }
 
 static void rc_set_termios(struct tty_struct *tty,
-- 
cgit v0.10.2


From 3b6826b250633361f08a6427a4ac0035e5d88c72 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:58 +0000
Subject: tty: relock the mxser driver

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index ff5ff61..e2471cf 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -591,11 +591,11 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	retval = 0;
 	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&mp->slock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count--;
-	spin_unlock_irqrestore(&mp->slock, flags);
 	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
 	while (1) {
 		tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -617,12 +617,13 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count++;
 	port->blocked_open--;
-	if (retval)
-		return retval;
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
@@ -1102,9 +1103,9 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Start up serial port
 	 */
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&info->port.lock, flags);
 	info->port.count++;
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&info->port.lock, flags);
 	retval = mxser_startup(tty);
 	if (retval)
 		return retval;
@@ -1157,10 +1158,10 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
 	if (!info)
 		return;
 
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&info->port.lock, flags);
 
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&info->slock, flags);
+		spin_unlock_irqrestore(&info->port.lock, flags);
 		return;
 	}
 	if ((tty->count == 1) && (info->port.count != 1)) {
@@ -1181,11 +1182,11 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
 		info->port.count = 0;
 	}
 	if (info->port.count) {
-		spin_unlock_irqrestore(&info->slock, flags);
+		spin_unlock_irqrestore(&info->port.lock, flags);
 		return;
 	}
 	info->port.flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&info->port.lock, flags);
 	/*
 	 * Save the termios structure, since this port may have
 	 * separate termios for callout and dialin.
@@ -2161,10 +2162,7 @@ static void mxser_hangup(struct tty_struct *tty)
 
 	mxser_flush_buffer(tty);
 	mxser_shutdown(tty);
-	info->port.count = 0;
-	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&info->port, NULL);
-	wake_up_interruptible(&info->port.open_wait);
+	tty_port_hangup(&info->port);
 }
 
 /*
-- 
cgit v0.10.2


From 36c621d82b956ff6ff72273f848af53e6c581aba Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:10 +0000
Subject: tty: Introduce a tty_port generic block_til_ready

Start sucking more commonality out of the drivers into a single piece of
core code.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index db53db9..bac55cf 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -838,82 +838,6 @@ static int isicom_carrier_raised(struct tty_port *port)
 	return (ip->status & ISI_DCD)?1 : 0;
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-	struct isi_port *ip)
-{
-	struct tty_port *port = &ip->port;
-	int do_clocal = 0, retval;
-	unsigned long flags;
-	DECLARE_WAITQUEUE(wait, current);
-	int cd;
-
-	/* block if port is in the process of being closed */
-
-	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		pr_dbg("block_til_ready: close in progress.\n");
-		interruptible_sleep_on(&port->close_wait);
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/* if non-blocking mode is set ... */
-
-	if ((filp->f_flags & O_NONBLOCK) ||
-			(tty->flags & (1 << TTY_IO_ERROR))) {
-		pr_dbg("block_til_ready: non-block mode.\n");
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/* block waiting for DCD to be asserted, and while
-						callout dev is busy */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-		tty_port_raise_dtr_rts(port);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		cd = tty_port_carrier_raised(port);
-		if (!(port->flags & ASYNC_CLOSING) &&
-				(do_clocal || cd))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int isicom_open(struct tty_struct *tty, struct file *filp)
 {
 	struct isi_port *port;
@@ -940,12 +864,13 @@ static int isicom_open(struct tty_struct *tty, struct file *filp)
 
 	isicom_setup_board(card);
 
+	/* FIXME: locking on port.count etc */
 	port->port.count++;
 	tty->driver_data = port;
 	tty_port_tty_set(&port->port, tty);
 	error = isicom_setup_port(tty);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index e2471cf..08ba6eb 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -558,75 +558,6 @@ static void mxser_raise_dtr_rts(struct tty_port *port)
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
-static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
-		struct mxser_port *mp)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int retval;
-	int do_clocal = 0;
-	unsigned long flags;
-	int cd;
-	struct tty_port *port = &mp->port;
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-			test_bit(TTY_IO_ERROR, &tty->flags)) {
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->count is dropped by one, so that
-	 * mxser_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-	while (1) {
-		tty_port_raise_dtr_rts(port);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		cd = tty_port_carrier_raised(port);
-		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int mxser_set_baud(struct tty_struct *tty, long newspd)
 {
 	struct mxser_port *info = tty->driver_data;
@@ -1110,7 +1041,7 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
 	if (retval)
 		return retval;
 
-	retval = mxser_block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(&info->port, tty, filp);
 	if (retval)
 		return retval;
 
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 14662d7..af34c20 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -874,90 +874,6 @@ static int carrier_raised(struct tty_port *port)
 	return CD;
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct riscom_port *rp)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int    retval;
-	int    do_clocal = 0;
-	int    CD;
-	unsigned long flags;
-	struct tty_port *port = &rp->port;
-
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->count is dropped by one, so that
-	 * rs_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-
-		CD = tty_port_carrier_raised(port);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-		    !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->flags & ASYNC_CLOSING) &&
-		    (do_clocal || CD))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int rc_open(struct tty_struct *tty, struct file *filp)
 {
 	int board;
@@ -984,7 +900,7 @@ static int rc_open(struct tty_struct *tty, struct file *filp)
 
 	error = rc_setup_port(bp, port);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index ac9f21e..0ded4ed 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3401,6 +3401,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 	
+	/* FIXME: Racy on hangup during close wait */
 	if (extra_count)
 		port->count++;
 	port->blocked_open--;
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9f418bc..ff94182 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -151,3 +151,108 @@ void tty_port_raise_dtr_rts(struct tty_port *port)
 		port->ops->raise_dtr_rts(port);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
+
+/**
+ *	tty_port_block_til_ready	-	Waiting logic for tty open
+ *	@port: the tty port being opened
+ *	@tty: the tty device being bound
+ *	@filp: the file pointer of the opener
+ *
+ *	Implement the core POSIX/SuS tty behaviour when opening a tty device.
+ *	Handles:
+ *		- hangup (both before and during)
+ *		- non blocking open
+ *		- rts/dtr/dcd
+ *		- signals
+ *		- port flags and counts
+ *
+ *	The passed tty_port must implement the carrier_raised method if it can
+ *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	management of these lines. Note that the dtr/rts raise is done each
+ *	iteration as a hangup may have previously dropped them while we wait.
+ */
+ 
+int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp)
+{
+	int do_clocal = 0, retval;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(wait, current);
+	int cd;
+
+	/* block if port is in the process of being closed */
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
+			return -EAGAIN;
+		else
+			return -ERESTARTSYS;
+	}
+
+	/* if non-blocking mode is set we can pass directly to open unless
+	   the port has just hung up or is in another error state */
+	if ((filp->f_flags & O_NONBLOCK) ||
+			(tty->flags & (1 << TTY_IO_ERROR))) {
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+		return 0;
+	}
+
+	if (C_CLOCAL(tty))
+		do_clocal = 1;
+
+	/* Block waiting until we can proceed. We may need to wait for the
+	   carrier, but we must also wait for any close that is in progress
+	   before the next open may complete */
+
+	retval = 0;
+	add_wait_queue(&port->open_wait, &wait);
+
+	/* The port lock protects the port counts */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count--;
+	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while (1) {
+		/* Indicate we are open */
+		tty_port_raise_dtr_rts(port);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		/* Check for a hangup or uninitialised port. Return accordingly */
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
+				retval = -EAGAIN;
+			else
+				retval = -ERESTARTSYS;
+			break;
+		}
+		/* Probe the carrier. For devices with no carrier detect this
+		   will always return true */
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) &&
+				(do_clocal || cd))
+			break;
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&port->open_wait, &wait);
+
+	/* Update counts. A parallel hangup will have set count to zero and
+	   we must not mess that up further */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count++;
+	port->blocked_open--;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
+	return 0;
+	
+}
+EXPORT_SYMBOL(tty_port_block_til_ready);
+
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index d4e1534..2d9242a 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -631,8 +631,8 @@ static void scc_enable_rx_interrupts(void *ptr)
 
 static int scc_carrier_raised(struct tty_port *port)
 {
-	struct scc_port *scc = container_of(port, struct scc_port, gs.port);
-	unsigned channel = port->channel;
+	struct scc_port *sc = container_of(port, struct scc_port, gs.port);
+	unsigned channel = sc->channel;
 
 	return !!(scc_last_status_reg[channel] & SR_DCD);
 }
@@ -643,7 +643,7 @@ static void scc_shutdown_port(void *ptr)
 	struct scc_port *port = ptr;
 
 	port->gs.port.flags &= ~ GS_ACTIVE;
-	if (port->gs.port.tty && port->gs.port.tty->termios->c_cflag & HUPCL) {
+	if (port->gs.port.tty && (port->gs.port.tty->termios->c_cflag & HUPCL)) {
 		scc_setsignals (port, 0, 0);
 	}
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a1a9314..61a0ab3 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -439,6 +439,8 @@ extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
+extern int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v0.10.2


From 2a6eadbd5a2ae8f458e421f3614f1ad13c0f9a1c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:18 +0000
Subject: tty: Rework istallion to use the tty port changes

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index c4682f9..4c69ab9 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -626,8 +626,6 @@ static int	stli_hostcmd(struct stlibrd *brdp, struct stliport *portp);
 static int	stli_initopen(struct tty_struct *tty, struct stlibrd *brdp, struct stliport *portp);
 static int	stli_rawopen(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
 static int	stli_rawclose(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
-static int	stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp);
 static int	stli_setport(struct tty_struct *tty);
 static int	stli_cmdwait(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
 static void	stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
@@ -787,6 +785,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned int minordev, brdnr, portnr;
 	int rc;
 
@@ -808,30 +807,19 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 		return -ENODEV;
 	if (portp->devnr < 1)
 		return -ENODEV;
-
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
+	port = &portp->port;
 
 /*
  *	On the first open of the device setup the port hardware, and
  *	initialize the per port data structure. Since initializing the port
  *	requires several commands to the board we will need to wait for any
  *	other open that is already initializing the port.
+ *
+ *	Review - locking
  */
-	tty_port_tty_set(&portp->port, tty);
+	tty_port_tty_set(port, tty);
 	tty->driver_data = portp;
-	portp->port.count++;
+	port->count++;
 
 	wait_event_interruptible(portp->raw_wait,
 			!test_bit(ST_INITIALIZING, &portp->state));
@@ -841,7 +829,8 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 	if ((portp->port.flags & ASYNC_INITIALIZED) == 0) {
 		set_bit(ST_INITIALIZING, &portp->state);
 		if ((rc = stli_initopen(tty, brdp, portp)) >= 0) {
-			portp->port.flags |= ASYNC_INITIALIZED;
+			/* Locking */
+			port->flags |= ASYNC_INITIALIZED;
 			clear_bit(TTY_IO_ERROR, &tty->flags);
 		}
 		clear_bit(ST_INITIALIZING, &portp->state);
@@ -849,31 +838,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 		if (rc < 0)
 			return rc;
 	}
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status, based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
-
-/*
- *	Based on type of open being done check if it can overlap with any
- *	previous opens still in effect. If we are a normal serial device
- *	then also we might have to wait for carrier.
- */
-	if (!(filp->f_flags & O_NONBLOCK)) {
-		if ((rc = stli_waitcarrier(tty, brdp, portp, filp)) != 0)
-			return rc;
-	}
-	portp->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	return tty_port_block_til_ready(&portp->port, tty, filp);
 }
 
 /*****************************************************************************/
@@ -882,25 +847,29 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
 
-	spin_lock_irqsave(&stli_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&stli_lock, flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
-	if ((tty->count == 1) && (portp->port.count != 1))
-		portp->port.count = 1;
-	if (portp->port.count-- > 1) {
-		spin_unlock_irqrestore(&stli_lock, flags);
+	if (tty->count == 1 && port->count != 1)
+		port->count = 1;
+	if (port->count-- > 1) {
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
 
-	portp->port.flags |= ASYNC_CLOSING;
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 /*
  *	May want to wait for data to drain before closing. The BUSY flag
@@ -908,15 +877,17 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
  *	updated by messages from the slave - indicating when all chars
  *	really have drained.
  */
+ 	spin_lock_irqsave(&stli_lock, flags);
 	if (tty == stli_txcooktty)
 		stli_flushchars(tty);
-	tty->closing = 1;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	/* FIXME: port locking here needs attending to */
+	port->flags &= ~ASYNC_INITIALIZED;
+
 	brdp = stli_brds[portp->brdnr];
 	stli_rawclose(brdp, portp, 0, 0);
 	if (tty->termios->c_cflag & HUPCL) {
@@ -937,14 +908,14 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 	tty->closing = 0;
 	tty_port_tty_set(&portp->port, NULL);
 
-	if (portp->openwaitcnt) {
+	if (port->blocked_open) {
 		if (portp->close_delay)
 			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
+		wake_up_interruptible(&port->open_wait);
 	}
 
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&portp->port.close_wait);
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
 }
 
 /*****************************************************************************/
@@ -1189,63 +1160,17 @@ static int stli_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-/*
- *	Possibly need to wait for carrier (DCD signal) to come high. Say
- *	maybe because if we are clocal then we don't need to wait...
- */
-
-static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp)
+static void stli_raise_dtr_rts(struct tty_port *port)
 {
-	unsigned long flags;
-	int rc, doclocal;
-	struct tty_port *port = &portp->port;
-
-	rc = 0;
-	doclocal = 0;
-
-	if (tty->termios->c_cflag & CLOCAL)
-		doclocal++;
-
-	spin_lock_irqsave(&stli_lock, flags);
-	portp->openwaitcnt++;
-	if (! tty_hung_up_p(filp))
-		port->count--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	for (;;) {
-		stli_mkasysigs(&portp->asig, 1, 1);
-		if ((rc = stli_cmdwait(brdp, portp, A_SETSIGNALS,
-		    &portp->asig, sizeof(asysigs_t), 0)) < 0)
-			break;
-		if (tty_hung_up_p(filp) ||
-		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				rc = -EBUSY;
-			else
-				rc = -ERESTARTSYS;
-			break;
-		}
-		if (((port->flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || tty_port_carrier_raised(port))) {
-			break;
-		}
-		if (signal_pending(current)) {
-			rc = -ERESTARTSYS;
-			break;
-		}
-		interruptible_sleep_on(&port->open_wait);
-	}
-
-	spin_lock_irqsave(&stli_lock, flags);
-	if (! tty_hung_up_p(filp))
-		port->count++;
-	portp->openwaitcnt--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	return rc;
+	struct stliport *portp = container_of(port, struct stliport, port);
+	struct stlibrd *brdp = stli_brds[portp->brdnr];
+	stli_mkasysigs(&portp->asig, 1, 1);
+	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
+		sizeof(asysigs_t), 0) < 0)
+			printk(KERN_WARNING "istallion: dtr raise failed.\n");
 }
 
+
 /*****************************************************************************/
 
 /*
@@ -1828,6 +1753,7 @@ static void stli_hangup(struct tty_struct *tty)
 {
 	struct stliport *portp;
 	struct stlibrd *brdp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
@@ -1838,8 +1764,11 @@ static void stli_hangup(struct tty_struct *tty)
 	brdp = stli_brds[portp->brdnr];
 	if (brdp == NULL)
 		return;
+	port = &portp->port;
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	spin_lock_irqsave(&port->lock, flags);
+	port->flags &= ~ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (!test_bit(ST_CLOSING, &portp->state))
 		stli_rawclose(brdp, portp, 0, 0);
@@ -1860,12 +1789,9 @@ static void stli_hangup(struct tty_struct *tty)
 	clear_bit(ST_TXBUSY, &portp->state);
 	clear_bit(ST_RXSTOP, &portp->state);
 	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_port_tty_set(&portp->port, NULL);
-	portp->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	portp->port.count = 0;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
-	wake_up_interruptible(&portp->port.open_wait);
+	tty_port_hangup(port);
 }
 
 /*****************************************************************************/
@@ -4528,6 +4454,7 @@ static const struct tty_operations stli_ops = {
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
+	.raise_dtr_rts = stli_raise_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 0d18407..053d5ae 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -61,7 +61,6 @@ struct stliport {
 	int			custom_divisor;
 	int			close_delay;
 	int			closing_wait;
-	int			openwaitcnt;
 	int			rc;
 	int			argsize;
 	void			*argp;
-- 
cgit v0.10.2


From 4350f3ffec7a7e70770a7369186b3db7d97acfdf Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:24 +0000
Subject: tty: rework stallion to use the tty_port bits

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 12aecdaf..77eef61 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -409,7 +409,6 @@ static int	stl_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, uns
 static int	stl_brdinit(struct stlbrd *brdp);
 static int	stl_getportstats(struct tty_struct *tty, struct stlport *portp, comstats_t __user *cp);
 static int	stl_clrportstats(struct stlport *portp, comstats_t __user *cp);
-static int	stl_waitcarrier(struct tty_struct *tty, struct stlport *portp, struct file *filp);
 
 /*
  *	CD1400 uart specific handling functions.
@@ -705,8 +704,9 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 {
 	struct stlport	*portp;
 	struct stlbrd	*brdp;
+	struct tty_port *port;
 	unsigned int	minordev, brdnr, panelnr;
-	int		portnr, rc;
+	int		portnr;
 
 	pr_debug("stl_open(tty=%p,filp=%p): device=%s\n", tty, filp, tty->name);
 
@@ -717,6 +717,7 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 	brdp = stl_brds[brdnr];
 	if (brdp == NULL)
 		return -ENODEV;
+
 	minordev = MINOR2PORT(minordev);
 	for (portnr = -1, panelnr = 0; panelnr < STL_MAXPANELS; panelnr++) {
 		if (brdp->panels[panelnr] == NULL)
@@ -733,16 +734,17 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 	portp = brdp->panels[panelnr]->ports[portnr];
 	if (portp == NULL)
 		return -ENODEV;
+	port = &portp->port;
 
 /*
  *	On the first open of the device setup the port hardware, and
  *	initialize the per port data structure.
  */
-	tty_port_tty_set(&portp->port, tty);
+	tty_port_tty_set(port, tty);
 	tty->driver_data = portp;
-	portp->port.count++;
+	port->count++;
 
-	if ((portp->port.flags & ASYNC_INITIALIZED) == 0) {
+	if ((port->flags & ASYNC_INITIALIZED) == 0) {
 		if (!portp->tx.buf) {
 			portp->tx.buf = kmalloc(STL_TXBUFSIZE, GFP_KERNEL);
 			if (!portp->tx.buf)
@@ -756,34 +758,9 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 		stl_enablerxtx(portp, 1, 1);
 		stl_startrxtx(portp, 1, 0);
 		clear_bit(TTY_IO_ERROR, &tty->flags);
-		portp->port.flags |= ASYNC_INITIALIZED;
-	}
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status, based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
+		port->flags |= ASYNC_INITIALIZED;
 	}
-
-/*
- *	Based on type of open being done check if it can overlap with any
- *	previous opens still in effect. If we are a normal serial device
- *	then also we might have to wait for carrier.
- */
-	if (!(filp->f_flags & O_NONBLOCK))
-		if ((rc = stl_waitcarrier(tty, portp, filp)) != 0)
-			return rc;
-
-	portp->port.flags |= ASYNC_NORMAL_ACTIVE;
-
-	return 0;
+	return tty_port_block_til_ready(port, tty, filp);
 }
 
 /*****************************************************************************/
@@ -794,60 +771,11 @@ static int stl_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-/*
- *	Possibly need to wait for carrier (DCD signal) to come high. Say
- *	maybe because if we are clocal then we don't need to wait...
- */
-
-static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
-							struct file *filp)
+static void stl_raise_dtr_rts(struct tty_port *port)
 {
-	unsigned long	flags;
-	int		rc, doclocal;
-	struct tty_port *port = &portp->port;
-
-	pr_debug("stl_waitcarrier(portp=%p,filp=%p)\n", portp, filp);
-
-	rc = 0;
-	doclocal = 0;
-
-	spin_lock_irqsave(&stallion_lock, flags);
-
-	if (tty->termios->c_cflag & CLOCAL)
-		doclocal++;
-
-	portp->openwaitcnt++;
-	if (! tty_hung_up_p(filp))
-		port->count--;
-
-	for (;;) {
-		/* Takes brd_lock internally */
-		stl_setsignals(portp, 1, 1);
-		if (tty_hung_up_p(filp) ||
-		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				rc = -EBUSY;
-			else
-				rc = -ERESTARTSYS;
-			break;
-		}
-		if (((port->flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || tty_port_carrier_raised(port)))
-			break;
-		if (signal_pending(current)) {
-			rc = -ERESTARTSYS;
-			break;
-		}
-		/* FIXME */
-		interruptible_sleep_on(&port->open_wait);
-	}
-
-	if (! tty_hung_up_p(filp))
-		port->count++;
-	portp->openwaitcnt--;
-	spin_unlock_irqrestore(&stallion_lock, flags);
-
-	return rc;
+	struct stlport *portp = container_of(port, struct stlport, port);
+	/* Takes brd_lock internally */
+	stl_setsignals(portp, 1, 1);
 }
 
 /*****************************************************************************/
@@ -899,6 +827,7 @@ static void stl_waituntilsent(struct tty_struct *tty, int timeout)
 static void stl_close(struct tty_struct *tty, struct file *filp)
 {
 	struct stlport	*portp;
+	struct tty_port *port;
 	unsigned long	flags;
 
 	pr_debug("stl_close(tty=%p,filp=%p)\n", tty, filp);
@@ -906,21 +835,22 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
 
-	spin_lock_irqsave(&stallion_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&stallion_lock, flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
-	if ((tty->count == 1) && (portp->port.count != 1))
-		portp->port.count = 1;
-	if (portp->port.count-- > 1) {
-		spin_unlock_irqrestore(&stallion_lock, flags);
+	if (tty->count == 1 && port->count != 1)
+		port->count = 1;
+	if (port->count-- > 1) {
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
 
-	portp->port.count = 0;
-	portp->port.flags |= ASYNC_CLOSING;
+	port->count = 0;
+	port->flags |= ASYNC_CLOSING;
 
 /*
  *	May want to wait for any data to drain before closing. The BUSY
@@ -930,16 +860,16 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
  */
 	tty->closing = 1;
 
-	spin_unlock_irqrestore(&stallion_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 	stl_waituntilsent(tty, (HZ / 2));
 
 
-	spin_lock_irqsave(&stallion_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	portp->port.flags &= ~ASYNC_INITIALIZED;
-	spin_unlock_irqrestore(&stallion_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	stl_disableintrs(portp);
 	if (tty->termios->c_cflag & HUPCL)
@@ -957,16 +887,16 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	tty_port_tty_set(&portp->port, NULL);
+	tty_port_tty_set(port, NULL);
 
-	if (portp->openwaitcnt) {
+	if (port->blocked_open) {
 		if (portp->close_delay)
 			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
 		wake_up_interruptible(&portp->port.open_wait);
 	}
 
 	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&portp->port.close_wait);
+	wake_up_interruptible(&port->close_wait);
 }
 
 /*****************************************************************************/
@@ -1414,14 +1344,20 @@ static void stl_stop(struct tty_struct *tty)
 static void stl_hangup(struct tty_struct *tty)
 {
 	struct stlport	*portp;
+	struct tty_port *port;
+	unsigned long flags;
 
 	pr_debug("stl_hangup(tty=%p)\n", tty);
 
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->flags &= ~ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
 	stl_disableintrs(portp);
 	if (tty->termios->c_cflag & HUPCL)
 		stl_setsignals(portp, 0, 0);
@@ -1435,10 +1371,7 @@ static void stl_hangup(struct tty_struct *tty)
 		portp->tx.head = NULL;
 		portp->tx.tail = NULL;
 	}
-	tty_port_tty_set(&portp->port, NULL);
-	portp->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	portp->port.count = 0;
-	wake_up_interruptible(&portp->port.open_wait);
+	tty_port_hangup(port);
 }
 
 /*****************************************************************************/
@@ -2671,6 +2604,7 @@ static const struct tty_operations stl_ops = {
 
 static const struct tty_port_operations stl_port_ops = {
 	.carrier_raised = stl_carrier_raised,
+	.raise_dtr_rts = stl_raise_dtr_rts,
 };
 
 /*****************************************************************************/
-- 
cgit v0.10.2


From 0fdeceb88df7f4d9e4734859bcd650e8584cc0e4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:34 +0000
Subject: tty: ESP has been broken for locking etc forver

Mark it broken

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index c52a167..1697043 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -190,7 +190,7 @@ config DIGIEPCA
 
 config ESPSERIAL
 	tristate "Hayes ESP serial port support"
-	depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API
+	depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API && BROKEN
 	help
 	  This is a driver which supports Hayes ESP serial ports.  Both single
 	  port cards and multiport cards are supported.  Make sure to read
-- 
cgit v0.10.2


From 7834909f1eb96ba7c49ca2b9e3a69b500a2cff76 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:43 +0000
Subject: tty: tty port zero baud open

If we have no speed set at some point then we should not raise DTR/RTS at
that point when opening as the tty is not ready

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index ff94182..0723664 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -216,7 +216,8 @@ int tty_port_block_til_ready(struct tty_port *port,
 
 	while (1) {
 		/* Indicate we are open */
-		tty_port_raise_dtr_rts(port);
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		/* Check for a hangup or uninitialised port. Return accordingly */
-- 
cgit v0.10.2


From a6614999e800cf3a134ce93ea46ef837e3c0e76e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:50 +0000
Subject: tty: Introduce some close helpers for ports

Again this is a lot of common code we can unify

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index bac55cf..24aa6e8 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -945,76 +945,30 @@ static void isicom_flush_buffer(struct tty_struct *tty)
 
 static void isicom_close(struct tty_struct *tty, struct file *filp)
 {
-	struct isi_port *port = tty->driver_data;
+	struct isi_port *ip = tty->driver_data;
+	struct tty_port *port = &ip->port;
 	struct isi_board *card;
 	unsigned long flags;
 
-	if (!port)
-		return;
-	card = port->card;
-	if (isicom_paranoia_check(port, tty->name, "isicom_close"))
-		return;
-
-	pr_dbg("Close start!!!.\n");
-
-	spin_lock_irqsave(&port->port.lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		return;
-	}
-
-	if (tty->count == 1 && port->port.count != 1) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count tty->count = 1 port count = %d.\n",
-			card->base, port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count for channel%d = %d", card->base, port->channel,
-			port->port.count);
-		port->port.count = 0;
-	}
+	BUG_ON(!ip);
 
-	if (port->port.count) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
+	card = ip->card;
+	if (isicom_paranoia_check(ip, tty->name, "isicom_close"))
 		return;
-	}
-	port->port.flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->port.lock, flags);
 
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
 	/* indicate to the card that no more data can be received
 	   on this port */
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (port->port.flags & ASYNC_INITIALIZED) {
-		card->port_status &= ~(1 << port->channel);
+	if (port->flags & ASYNC_INITIALIZED) {
+		card->port_status &= ~(1 << ip->channel);
 		outw(card->port_status, card->base + 0x02);
 	}
-	isicom_shutdown_port(port);
+	isicom_shutdown_port(ip);
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	isicom_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	spin_lock_irqsave(&port->port.lock, flags);
-	tty->closing = 0;
-
-	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		if (port->port.close_delay) {
-			pr_dbg("scheduling until time out.\n");
-			msleep_interruptible(
-				jiffies_to_msecs(port->port.close_delay));
-		}
-		spin_lock_irqsave(&port->port.lock, flags);
-		wake_up_interruptible(&port->port.open_wait);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-	spin_unlock_irqrestore(&port->port.lock, flags);
+	
+	tty_port_close_end(port, tty);
 }
 
 /* write et all */
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 4c69ab9..5c3dc6b 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -767,7 +767,7 @@ static int stli_parsebrd(struct stlconf *confp, char **argp)
 			break;
 	}
 	if (i == ARRAY_SIZE(stli_brdstr)) {
-		printk("STALLION: unknown board name, %s?\n", argp[0]);
+		printk(KERN_WARNING "istallion: unknown board name, %s?\n", argp[0]);
 		return 0;
 	}
 
@@ -855,21 +855,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 		return;
 	port = &portp->port;
 
-	spin_lock_irqsave(&port->lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-	if (tty->count == 1 && port->count != 1)
-		port->count = 1;
-	if (port->count-- > 1) {
-		spin_unlock_irqrestore(&port->lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-
-	port->flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->lock, flags);
 
 /*
  *	May want to wait for data to drain before closing. The BUSY flag
@@ -882,6 +869,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 		stli_flushchars(tty);
 	spin_unlock_irqrestore(&stli_lock, flags);
 
+	/* We end up doing this twice for the moment. This needs looking at
+	   eventually. Note we still use portp->closing_wait as a result */
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 
@@ -905,17 +894,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 	set_bit(ST_DOFLUSHRX, &portp->state);
 	stli_flushbuffer(tty);
 
-	tty->closing = 0;
-	tty_port_tty_set(&portp->port, NULL);
-
-	if (port->blocked_open) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&port->open_wait);
-	}
-
-	port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 /*****************************************************************************/
@@ -1482,7 +1462,7 @@ static int stli_getserial(struct stliport *portp, struct serial_struct __user *s
 	sio.irq = 0;
 	sio.flags = portp->port.flags;
 	sio.baud_base = portp->baud_base;
-	sio.close_delay = portp->close_delay;
+	sio.close_delay = portp->port.close_delay;
 	sio.closing_wait = portp->closing_wait;
 	sio.custom_divisor = portp->custom_divisor;
 	sio.xmit_fifo_size = 0;
@@ -1514,7 +1494,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s
 		return -EFAULT;
 	if (!capable(CAP_SYS_ADMIN)) {
 		if ((sio.baud_base != portp->baud_base) ||
-		    (sio.close_delay != portp->close_delay) ||
+		    (sio.close_delay != portp->port.close_delay) ||
 		    ((sio.flags & ~ASYNC_USR_MASK) !=
 		    (portp->port.flags & ~ASYNC_USR_MASK)))
 			return -EPERM;
@@ -1523,7 +1503,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s
 	portp->port.flags = (portp->port.flags & ~ASYNC_USR_MASK) |
 		(sio.flags & ASYNC_USR_MASK);
 	portp->baud_base = sio.baud_base;
-	portp->close_delay = sio.close_delay;
+	portp->port.close_delay = sio.close_delay;
 	portp->closing_wait = sio.closing_wait;
 	portp->custom_divisor = sio.custom_divisor;
 
@@ -2065,7 +2045,7 @@ static void __stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigne
 	unsigned char __iomem *bits;
 
 	if (test_bit(ST_CMDING, &portp->state)) {
-		printk(KERN_ERR "STALLION: command already busy, cmd=%x!\n",
+		printk(KERN_ERR "istallion: command already busy, cmd=%x!\n",
 				(int) cmd);
 		return;
 	}
@@ -2625,7 +2605,7 @@ static int stli_initports(struct stlibrd *brdp)
 	for (i = 0, panelnr = 0, panelport = 0; (i < brdp->nrports); i++) {
 		portp = kzalloc(sizeof(struct stliport), GFP_KERNEL);
 		if (!portp) {
-			printk("STALLION: failed to allocate port structure\n");
+			printk(KERN_WARNING "istallion: failed to allocate port structure\n");
 			continue;
 		}
 		tty_port_init(&portp->port);
@@ -2635,7 +2615,7 @@ static int stli_initports(struct stlibrd *brdp)
 		portp->brdnr = brdp->brdnr;
 		portp->panelnr = panelnr;
 		portp->baud_base = STL_BAUDBASE;
-		portp->close_delay = STL_CLOSEDELAY;
+		portp->port.close_delay = STL_CLOSEDELAY;
 		portp->closing_wait = 30 * HZ;
 		init_waitqueue_head(&portp->port.open_wait);
 		init_waitqueue_head(&portp->port.close_wait);
@@ -2692,7 +2672,7 @@ static void __iomem *stli_ecpgetmemptr(struct stlibrd *brdp, unsigned long offse
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2766,7 +2746,7 @@ static void __iomem *stli_ecpeigetmemptr(struct stlibrd *brdp, unsigned long off
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2818,7 +2798,7 @@ static void __iomem *stli_ecpmcgetmemptr(struct stlibrd *brdp, unsigned long off
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2863,7 +2843,7 @@ static void __iomem *stli_ecppcigetmemptr(struct stlibrd *brdp, unsigned long of
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), board=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2928,7 +2908,7 @@ static void __iomem *stli_onbgetmemptr(struct stlibrd *brdp, unsigned long offse
 	void __iomem *ptr;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2994,7 +2974,7 @@ static void __iomem *stli_onbegetmemptr(struct stlibrd *brdp, unsigned long offs
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -3433,7 +3413,7 @@ static int stli_startbrd(struct stlibrd *brdp)
 #endif
 
 	if (nrdevs < (brdp->nrports + 1)) {
-		printk(KERN_ERR "STALLION: slave failed to allocate memory for "
+		printk(KERN_ERR "istallion: slave failed to allocate memory for "
 				"all devices, devices=%d\n", nrdevs);
 		brdp->nrports = nrdevs - 1;
 	}
@@ -3443,13 +3423,13 @@ static int stli_startbrd(struct stlibrd *brdp)
 	brdp->bitsize = (nrdevs + 7) / 8;
 	memoff = readl(&hdrp->memp);
 	if (memoff > brdp->memsize) {
-		printk(KERN_ERR "STALLION: corrupted shared memory region?\n");
+		printk(KERN_ERR "istallion: corrupted shared memory region?\n");
 		rc = -EIO;
 		goto stli_donestartup;
 	}
 	memp = (cdkmem_t __iomem *) EBRDGETMEMPTR(brdp, memoff);
 	if (readw(&memp->dtype) != TYP_ASYNCTRL) {
-		printk(KERN_ERR "STALLION: no slave control device found\n");
+		printk(KERN_ERR "istallion: no slave control device found\n");
 		goto stli_donestartup;
 	}
 	memp++;
@@ -3534,7 +3514,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp)
 		retval = stli_initonb(brdp);
 		break;
 	default:
-		printk(KERN_ERR "STALLION: board=%d is unknown board "
+		printk(KERN_ERR "istallion: board=%d is unknown board "
 				"type=%d\n", brdp->brdnr, brdp->brdtype);
 		retval = -ENODEV;
 	}
@@ -3543,7 +3523,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp)
 		return retval;
 
 	stli_initports(brdp);
-	printk(KERN_INFO "STALLION: %s found, board=%d io=%x mem=%x "
+	printk(KERN_INFO "istallion: %s found, board=%d io=%x mem=%x "
 		"nrpanels=%d nrports=%d\n", stli_brdnames[brdp->brdtype],
 		brdp->brdnr, brdp->iobase, (int) brdp->memaddr,
 		brdp->nrpanels, brdp->nrports);
@@ -3637,7 +3617,7 @@ static int stli_eisamemprobe(struct stlibrd *brdp)
 	if (! foundit) {
 		brdp->memaddr = 0;
 		brdp->membase = NULL;
-		printk(KERN_ERR "STALLION: failed to probe shared memory "
+		printk(KERN_ERR "istallion: failed to probe shared memory "
 				"region for %s in EISA slot=%d\n",
 			stli_brdnames[brdp->brdtype], (brdp->iobase >> 12));
 		return -ENODEV;
@@ -3782,7 +3762,7 @@ static int __devinit stli_pciprobe(struct pci_dev *pdev,
 	mutex_lock(&stli_brdslock);
 	brdnr = stli_getbrdnr();
 	if (brdnr < 0) {
-		printk(KERN_INFO "STALLION: too many boards found, "
+		printk(KERN_INFO "istallion: too many boards found, "
 			"maximum supported %d\n", STL_MAXBRDS);
 		mutex_unlock(&stli_brdslock);
 		retval = -EIO;
@@ -3854,7 +3834,7 @@ static struct stlibrd *stli_allocbrd(void)
 
 	brdp = kzalloc(sizeof(struct stlibrd), GFP_KERNEL);
 	if (!brdp) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%Zd)\n", sizeof(struct stlibrd));
 		return NULL;
 	}
@@ -4493,7 +4473,7 @@ static int __init istallion_module_init(void)
 
 	stli_txcookbuf = kmalloc(STLI_TXBUFSIZE, GFP_KERNEL);
 	if (!stli_txcookbuf) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%d)\n", STLI_TXBUFSIZE);
 		retval = -ENOMEM;
 		goto err;
@@ -4518,7 +4498,7 @@ static int __init istallion_module_init(void)
 
 	retval = tty_register_driver(stli_serial);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial driver\n");
+		printk(KERN_ERR "istallion: failed to register serial driver\n");
 		goto err_ttyput;
 	}
 
@@ -4532,7 +4512,7 @@ static int __init istallion_module_init(void)
  */
 	retval = register_chrdev(STL_SIOMEMMAJOR, "staliomem", &stli_fsiomem);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial memory "
+		printk(KERN_ERR "istallion: failed to register serial memory "
 				"device\n");
 		goto err_deinit;
 	}
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 08ba6eb..402c9f2 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -1080,58 +1080,27 @@ static void mxser_flush_buffer(struct tty_struct *tty)
 static void mxser_close(struct tty_struct *tty, struct file *filp)
 {
 	struct mxser_port *info = tty->driver_data;
+	struct tty_port *port = &info->port;
 
 	unsigned long timeout;
-	unsigned long flags;
 
 	if (tty->index == MXSER_PORTS)
 		return;
 	if (!info)
 		return;
 
-	spin_lock_irqsave(&info->port.lock, flags);
-
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&info->port.lock, flags);
-		return;
-	}
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->port.count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_ERR "mxser_close: bad serial port count; "
-			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	if (--info->port.count < 0) {
-		printk(KERN_ERR "mxser_close: bad serial port count for "
-			"ttys%d: %d\n", tty->index, info->port.count);
-		info->port.count = 0;
-	}
-	if (info->port.count) {
-		spin_unlock_irqrestore(&info->port.lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	info->port.flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&info->port.lock, flags);
+
 	/*
 	 * Save the termios structure, since this port may have
 	 * separate termios for callout and dialin.
+	 *
+	 * FIXME: Can this go ?
 	 */
 	if (info->port.flags & ASYNC_NORMAL_ACTIVE)
 		info->normal_termios = *tty->termios;
 	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
 	 * interrupt driver to stop checking the data ready bit in the
@@ -1156,19 +1125,12 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
 		}
 	}
 	mxser_shutdown(tty);
-
 	mxser_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	tty->closing = 0;
-	tty_port_tty_set(&info->port, NULL);
-	if (info->port.blocked_open) {
-		if (info->port.close_delay)
-			schedule_timeout_interruptible(info->port.close_delay);
-		wake_up_interruptible(&info->port.open_wait);
-	}
 
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	/* Right now the tty_port set is done outside of the close_end helper
+	   as we don't yet have everyone using refcounts */	
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 static int mxser_write(struct tty_struct *tty, const unsigned char *buf, int count)
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index af34c20..9ac5feb 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -929,35 +929,11 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	if (!port || rc_paranoia_check(port, tty->name, "close"))
 		return;
 
-	spin_lock_irqsave(&port->port.lock, flags);
-
-	if (tty_hung_up_p(filp))
-		goto out;
-
 	bp = port_Board(port);
-	if ((tty->count == 1) && (port->port.count != 1))  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count;"
-		       " tty->count is 1, port count is %d\n",
-		       board_No(bp), port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0)  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count "
-				 "for tty%d: %d\n",
-		       board_No(bp), port_No(port), port->port.count);
-		port->port.count = 0;
-	}
-	if (port->port.count)
-		goto out;
-	port->port.flags |= ASYNC_CLOSING;
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->port.lock, flags);
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
+	
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+	
 	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
@@ -989,23 +965,8 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	rc_shutdown_port(tty, bp, port);
 	rc_flush_buffer(tty);
 	spin_unlock_irqrestore(&riscom_lock, flags);
-	tty_ldisc_flush(tty);
 
-	spin_lock_irqsave(&port->port.lock, flags);
-	tty->closing = 0;
-	port->port.tty = NULL;
-	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		if (port->port.close_delay)
-			msleep_interruptible(jiffies_to_msecs(port->port.close_delay));
-		wake_up_interruptible(&port->port.open_wait);
-		spin_lock_irqsave(&port->port.lock, flags);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-
-out:
-	spin_unlock_irqrestore(&riscom_lock, flags);
+	tty_port_close_end(&port->port, tty);
 }
 
 static int rc_write(struct tty_struct *tty,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 77eef61..e1e0dd8 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -833,40 +833,20 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 	pr_debug("stl_close(tty=%p,filp=%p)\n", tty, filp);
 
 	portp = tty->driver_data;
-	if (portp == NULL)
-		return;
+	BUG_ON(portp == NULL);
+
 	port = &portp->port;
 
-	spin_lock_irqsave(&port->lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	if (tty->count == 1 && port->count != 1)
-		port->count = 1;
-	if (port->count-- > 1) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-
-	port->count = 0;
-	port->flags |= ASYNC_CLOSING;
-
 /*
  *	May want to wait for any data to drain before closing. The BUSY
  *	flag keeps track of whether we are still sending or not - it is
  *	very accurate for the cd1400, not quite so for the sc26198.
  *	(The sc26198 has no "end-of-data" interrupt only empty FIFO)
  */
-	tty->closing = 1;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, portp->closing_wait);
 	stl_waituntilsent(tty, (HZ / 2));
 
-
 	spin_lock_irqsave(&port->lock, flags);
 	portp->port.flags &= ~ASYNC_INITIALIZED;
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -883,20 +863,9 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 		portp->tx.head = NULL;
 		portp->tx.tail = NULL;
 	}
-	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_ldisc_flush(tty);
 
-	tty->closing = 0;
+	tty_port_close_end(port, tty);
 	tty_port_tty_set(port, NULL);
-
-	if (port->blocked_open) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
-	}
-
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
 }
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 0ded4ed..fbd5a5c 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3104,70 +3104,18 @@ static void mgsl_close(struct tty_struct *tty, struct file * filp)
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
-			 
-	if (!info->port.count)
-		return;
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)			 
 		goto cleanup;
 			
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("mgsl_close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	
-	info->port.count--;
-	
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-	
-	info->port.flags |= ASYNC_CLOSING;
-	
-	/* set tty->closing to notify line discipline to 
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-	
-	/* wait for transmit data to clear all layers */
-	
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):mgsl_close(%s) calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		mgsl_wait_until_sent(tty, info->timeout);
-
 	mgsl_flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-		
 	shutdown(info);
-	
-	tty->closing = 0;
+
+	tty_port_close_end(&info->port, tty);	
 	info->port.tty = NULL;
-	
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-	
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-			 
-	wake_up_interruptible(&info->port.close_wait);
-	
 cleanup:			
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) exit, count=%d\n", __FILE__,__LINE__,
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 625c9bd..53544e2 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -720,44 +720,9 @@ static void close(struct tty_struct *tty, struct file *filp)
 		return;
 	DBGINFO(("%s close entry, count=%d\n", info->device_name, info->port.count));
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
 
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		DBGERR(("%s close: bad refcount; tty->count=1, "
-		       "info->port.count=%d\n", info->device_name, info->port.count));
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		DBGINFO(("%s call tty_wait_until_sent\n", info->device_name));
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 	flush_buffer(tty);
@@ -765,20 +730,8 @@ static void close(struct tty_struct *tty, struct file *filp)
 
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->port.count));
 }
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 1f5c21e..2aac55b 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -810,70 +810,18 @@ static void close(struct tty_struct *tty, struct file *filp)
 		printk("%s(%d):%s close() entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
-		goto cleanup;
-
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("%s(%d):%s close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n",
-			 __FILE__,__LINE__, info->device_name, info->port.count);
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):%s close() calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
+		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 
 	flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s close() exit, count=%d\n", __FILE__,__LINE__,
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 0723664..b3175f5 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -257,3 +257,61 @@ int tty_port_block_til_ready(struct tty_port *port,
 }
 EXPORT_SYMBOL(tty_port_block_til_ready);
 
+int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (tty_hung_up_p(filp)) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+
+	if( tty->count == 1 && port->count != 1) {
+		printk(KERN_WARNING
+		    "tty_port_close_start: tty->count = 1 port count = %d.\n",
+								port->count);
+		port->count = 1;
+	}
+	if (--port->count < 0) {
+		printk(KERN_WARNING "tty_port_close_start: count = %d\n",
+								port->count);
+		port->count = 0;
+	}
+
+	if (port->count) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
+	spin_unlock_irqrestore(&port->lock, flags);
+	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, port->closing_wait);
+	return 1;
+}
+EXPORT_SYMBOL(tty_port_close_start);
+
+void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	tty_ldisc_flush(tty);
+
+	spin_lock_irqsave(&port->lock, flags);
+	tty->closing = 0;
+
+	if (port->blocked_open) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		if (port->close_delay) {
+			msleep_interruptible(
+				jiffies_to_msecs(port->close_delay));
+		}
+		spin_lock_irqsave(&port->lock, flags);
+		wake_up_interruptible(&port->open_wait);
+	}
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+EXPORT_SYMBOL(tty_port_close_end);
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 053d5ae..7faca98 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -59,7 +59,6 @@ struct stliport {
 	unsigned int		devnr;
 	int			baud_base;
 	int			custom_divisor;
-	int			close_delay;
 	int			closing_wait;
 	int			rc;
 	int			argsize;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 61a0ab3..fc39db9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -441,6 +441,9 @@ extern void tty_port_raise_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
+extern int tty_port_close_start(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
+extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v0.10.2


From 39aced68d664291db3324d0fcf0985ab5626aac2 Mon Sep 17 00:00:00 2001
From: Niels de Vos <niels.devos@wincor-nixdorf.com>
Date: Fri, 2 Jan 2009 13:46:58 +0000
Subject: serial: set correct baud_base for Oxford Semiconductor Ltd EXSYS
 EX-41092 Dual 16950 Serial adapter

The PCI-card identified as "Oxford Semiconductor Ltd EXSYS EX-41092 Dual
16950 Serial adapter" is only usable with other devices (i.e. not the same
card) after doing a "setserial /dev/ttyS<n> baud_base 115200".  This
baud_base should be default for this card.

Signed-off-by: Niels de Vos <niels.devos@wincor-nixdorf.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 0b79413..2a2e1c7 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2388,6 +2388,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 		 * For now just used the hex ID 0x950a.
 		 */
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
+		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL, 0, 0,
+		pbn_b0_2_115200 },
+	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b6e6944..fa83dfe 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1766,6 +1766,7 @@
 #define PCI_DEVICE_ID_SIIG_8S_20x_650	0x2081
 #define PCI_DEVICE_ID_SIIG_8S_20x_850	0x2082
 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL	0x2050
+#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL	0x2530
 
 #define PCI_VENDOR_ID_RADISYS		0x1331
 
-- 
cgit v0.10.2


From eff6937a46e096eb35c16a391617b7a5e098a30c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:06 +0000
Subject: tty: USB tty devices can block in tcdrain when unplugged

The underlying problem is that the device methods don't all correctly
handle disconnected status and some keep reporting bytes pending which
causes tcdrain to stall.

When the cable is unplugged they are definitely gone, and as this is true
for all USB cables we can fix it in the core usb serial code.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index aafa684..8d51890 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -339,6 +339,10 @@ static int serial_chars_in_buffer(struct tty_struct *tty)
 	dbg("%s = port %d", __func__, port->number);
 
 	WARN_ON(!port->port.count);
+	/* if the device was unplugged then any remaining characters
+	   fell out of the connector ;) */
+	if (port->serial->disconnected)
+		return 0;
 	/* pass on to the driver specific version of this function */
 	return port->serial->type->chars_in_buffer(tty);
 }
-- 
cgit v0.10.2


From ff8cb0fd6f195389aefe55d5dac9927d09a9de54 Mon Sep 17 00:00:00 2001
From: Thomas Pfaff <tpfaff@pcs.com>
Date: Fri, 2 Jan 2009 13:47:13 +0000
Subject: tty: N_TTY SIGIO only works for read

The N_TTY ldisc layer does not send SIGIO POLL_OUTs correctly when output is
possible due to flawed handling of the TTY_DO_WRITE_WAKEUP bit. It will
either send no SIGIOs at all or on every tty wakeup.

The fix is to set the bit when the tty driver write would block and test
and clear it on write wakeup.

[Merged with existing N_TTY patches and a small buglet fixed -- Alan]

Signed-off-by: Thomas Pfaff <tpfaff@pcs.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 3922a08..f6f0e4e 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1352,10 +1352,8 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
 	/* Write out any echoed characters that are still pending */
 	process_echoes(tty);
 
-	if (tty->fasync) {
-		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+	if (tty->fasync && test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags))
 		kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
-	}
 }
 
 /**
@@ -2014,6 +2012,8 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 break_out:
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&tty->write_wait, &wait);
+	if (b - buf != nr && tty->fasync)
+		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	return (b - buf) ? b - buf : retval;
 }
 
-- 
cgit v0.10.2


From 0ac6053c4db9369d7b0f9b39c30f4fb04405666b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:20 +0000
Subject: tty: PTYs set TTY_DO_WRITE_WAKEUP when they don't need to

The write wakeup is done anyway for the poll while DO_WRITE_WAKUP is
cleared, set and managed by the ldisc layer and is no business of the pty
code.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index b5daaaa..112a6ba 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -5,8 +5,6 @@
  *
  *  Added support for a Unix98-style ptmx device.
  *    -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
- *  Added TTY_DO_WRITE_WAKEUP to enable n_tty to send POLL_OUT to
- *      waiting writers -- Sapan Bhatia <sapan@corewars.org>
  *
  *  When reading this code see also fs/devpts. In particular note that the
  *  driver_data field is used by the devpts side as a binding to the devpts
@@ -217,7 +215,6 @@ static int pty_open(struct tty_struct *tty, struct file *filp)
 
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
-	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	retval = 0;
 out:
 	return retval;
-- 
cgit v0.10.2


From c9f19e96a2f33cd56c2bd19f87a0c4982d011c2b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:26 +0000
Subject: tty: Remove some pointless casts

disc_data and driver_data are void *

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index b97aebd..4e0cfde 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -170,7 +170,7 @@ static __inline__ void rtsdtr_ctrl(int bits)
  */
 static void rs_stop(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_stop"))
@@ -190,7 +190,7 @@ static void rs_stop(struct tty_struct *tty)
 
 static void rs_start(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_start"))
@@ -861,7 +861,7 @@ static int rs_put_char(struct tty_struct *tty, unsigned char ch)
 
 static void rs_flush_chars(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_flush_chars"))
@@ -934,7 +934,7 @@ static int rs_write(struct tty_struct * tty, const unsigned char *buf, int count
 
 static int rs_write_room(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 
 	if (serial_paranoia_check(info, tty->name, "rs_write_room"))
 		return 0;
@@ -943,7 +943,7 @@ static int rs_write_room(struct tty_struct *tty)
 
 static int rs_chars_in_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 
 	if (serial_paranoia_check(info, tty->name, "rs_chars_in_buffer"))
 		return 0;
@@ -952,7 +952,7 @@ static int rs_chars_in_buffer(struct tty_struct *tty)
 
 static void rs_flush_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_flush_buffer"))
@@ -969,7 +969,7 @@ static void rs_flush_buffer(struct tty_struct *tty)
  */
 static void rs_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
         unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_send_char"))
@@ -1004,7 +1004,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch)
  */
 static void rs_throttle(struct tty_struct * tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
 	char	buf[64];
@@ -1029,7 +1029,7 @@ static void rs_throttle(struct tty_struct * tty)
 
 static void rs_unthrottle(struct tty_struct * tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
 	char	buf[64];
@@ -1194,7 +1194,7 @@ static int get_lsr_info(struct async_struct * info, unsigned int __user *value)
 
 static int rs_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned char control, status;
 	unsigned long flags;
 
@@ -1217,7 +1217,7 @@ static int rs_tiocmget(struct tty_struct *tty, struct file *file)
 static int rs_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_ioctl"))
@@ -1244,7 +1244,7 @@ static int rs_tiocmset(struct tty_struct *tty, struct file *file,
  */
 static int rs_break(struct tty_struct *tty, int break_state)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_break"))
@@ -1264,7 +1264,7 @@ static int rs_break(struct tty_struct *tty, int break_state)
 static int rs_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct async_icount cprev, cnow;	/* kernel counter temps */
 	struct serial_icounter_struct icount;
 	void __user *argp = (void __user *)arg;
@@ -1368,7 +1368,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 
 static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 	unsigned int cflag = tty->termios->c_cflag;
 
@@ -1428,7 +1428,7 @@ static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
  */
 static void rs_close(struct tty_struct *tty, struct file * filp)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct serial_state *state;
 	unsigned long flags;
 
@@ -1523,7 +1523,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
  */
 static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 	int lsr;
 
@@ -1587,7 +1587,7 @@ static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
  */
 static void rs_hangup(struct tty_struct *tty)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct serial_state *state = info->state;
 
 	if (serial_paranoia_check(info, tty->name, "rs_hangup"))
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index cf2461d..da2d2cf 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -392,7 +392,7 @@ static struct channel *verifyChannel(struct tty_struct *tty)
 	 * through tty->driver_data this should catch it.
 	 */
 	if (tty) {
-		struct channel *ch = (struct channel *)tty->driver_data;
+		struct channel *ch = tty->driver_data;
 		if (ch >= &digi_channels[0] && ch < &digi_channels[nbdevs]) {
 			if (ch->magic == EPCA_MAGIC)
 				return ch;
@@ -2097,7 +2097,7 @@ static int info_ioctl(struct tty_struct *tty, struct file *file,
 
 static int pc_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	struct board_chan __iomem *bc;
 	unsigned int mstat, mflag = 0;
 	unsigned long flags;
@@ -2131,7 +2131,7 @@ static int pc_tiocmget(struct tty_struct *tty, struct file *file)
 static int pc_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	unsigned long flags;
 
 	if (!ch)
@@ -2178,7 +2178,7 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
 	unsigned int mflag, mstat;
 	unsigned char startc, stopc;
 	struct board_chan __iomem *bc;
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 
 	if (ch)
@@ -2473,7 +2473,7 @@ static void pc_unthrottle(struct tty_struct *tty)
 
 static int pc_send_break(struct tty_struct *tty, int msec)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	unsigned long flags;
 
 	if (msec == -1)
diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index 2f040d1..9e4e569 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -511,7 +511,7 @@ void gs_close(struct tty_struct * tty, struct file * filp)
 	
 	func_enter ();
 
-	port = (struct gs_port *) tty->driver_data;
+	port = tty->driver_data;
 
 	if (!port) return;
 
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index af05528..406f874 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -997,14 +997,14 @@ out:
 
 static int hvsi_write_room(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	return N_OUTBUF - hp->n_outbuf;
 }
 
 static int hvsi_chars_in_buffer(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	return hp->n_outbuf;
 }
@@ -1070,7 +1070,7 @@ out:
  */
 static void hvsi_throttle(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	pr_debug("%s\n", __func__);
 
@@ -1079,7 +1079,7 @@ static void hvsi_throttle(struct tty_struct *tty)
 
 static void hvsi_unthrottle(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 	unsigned long flags;
 	int shouldflip = 0;
 
@@ -1100,7 +1100,7 @@ static void hvsi_unthrottle(struct tty_struct *tty)
 
 static int hvsi_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	hvsi_get_mctrl(hp);
 	return hp->mctrl;
@@ -1109,7 +1109,7 @@ static int hvsi_tiocmget(struct tty_struct *tty, struct file *file)
 static int hvsi_tiocmset(struct tty_struct *tty, struct file *file,
 		unsigned int set, unsigned int clear)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 	unsigned long flags;
 	uint16_t new_mctrl;
 
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index 4a8215a..d2e93e3 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -1003,7 +1003,7 @@ static int r3964_open(struct tty_struct *tty)
 
 static void r3964_close(struct tty_struct *tty)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient, *pNext;
 	struct r3964_message *pMsg;
 	struct r3964_block_header *pHeader, *pNextHeader;
@@ -1058,7 +1058,7 @@ static void r3964_close(struct tty_struct *tty)
 static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 			  unsigned char __user * buf, size_t nr)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient;
 	struct r3964_message *pMsg;
 	struct r3964_client_message theMsg;
@@ -1113,7 +1113,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 			   const unsigned char *data, size_t count)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_block_header *pHeader;
 	struct r3964_client_info *pClient;
 	unsigned char *new_data;
@@ -1182,7 +1182,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 		unsigned int cmd, unsigned long arg)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	if (pInfo == NULL)
 		return -EINVAL;
 	switch (cmd) {
@@ -1216,7 +1216,7 @@ static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old)
 static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 			struct poll_table_struct *wait)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient;
 	struct r3964_message *pMsg = NULL;
 	unsigned long flags;
@@ -1241,7 +1241,7 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 			char *fp, int count)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	const unsigned char *p;
 	char *f, flags = 0;
 	int i;
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 9ac5feb..9af8d74 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -906,7 +906,7 @@ static int rc_open(struct tty_struct *tty, struct file *filp)
 
 static void rc_flush_buffer(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_flush_buffer"))
@@ -921,7 +921,7 @@ static void rc_flush_buffer(struct tty_struct *tty)
 
 static void rc_close(struct tty_struct *tty, struct file *filp)
 {
-	struct riscom_port *port = (struct riscom_port *) tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 	unsigned long timeout;
@@ -972,7 +972,7 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 static int rc_write(struct tty_struct *tty,
 		    const unsigned char *buf, int count)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	int c, total = 0;
 	unsigned long flags;
@@ -1015,7 +1015,7 @@ static int rc_write(struct tty_struct *tty,
 
 static int rc_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 	int ret = 0;
 
@@ -1039,7 +1039,7 @@ out:
 
 static void rc_flush_chars(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_flush_chars"))
@@ -1059,7 +1059,7 @@ static void rc_flush_chars(struct tty_struct *tty)
 
 static int rc_write_room(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	int	ret;
 
 	if (rc_paranoia_check(port, tty->name, "rc_write_room"))
@@ -1073,7 +1073,7 @@ static int rc_write_room(struct tty_struct *tty)
 
 static int rc_chars_in_buffer(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 
 	if (rc_paranoia_check(port, tty->name, "rc_chars_in_buffer"))
 		return 0;
@@ -1083,7 +1083,7 @@ static int rc_chars_in_buffer(struct tty_struct *tty)
 
 static int rc_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned char status;
 	unsigned int result;
@@ -1113,7 +1113,7 @@ static int rc_tiocmget(struct tty_struct *tty, struct file *file)
 static int rc_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 	struct riscom_board *bp;
 
@@ -1145,7 +1145,7 @@ static int rc_tiocmset(struct tty_struct *tty, struct file *file,
 
 static int rc_send_break(struct tty_struct *tty, int length)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp = port_Board(port);
 	unsigned long flags;
 
@@ -1238,7 +1238,7 @@ static int rc_get_serial_info(struct riscom_port *port,
 static int rc_ioctl(struct tty_struct *tty, struct file *filp,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 	int retval;
 
@@ -1264,7 +1264,7 @@ static int rc_ioctl(struct tty_struct *tty, struct file *filp,
 
 static void rc_throttle(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1286,7 +1286,7 @@ static void rc_throttle(struct tty_struct *tty)
 
 static void rc_unthrottle(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1308,7 +1308,7 @@ static void rc_unthrottle(struct tty_struct *tty)
 
 static void rc_stop(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1326,7 +1326,7 @@ static void rc_stop(struct tty_struct *tty)
 
 static void rc_start(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1347,7 +1347,7 @@ static void rc_start(struct tty_struct *tty)
 
 static void rc_hangup(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1368,7 +1368,7 @@ static void rc_hangup(struct tty_struct *tty)
 static void rc_set_termios(struct tty_struct *tty,
 					struct ktermios *old_termios)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_set_termios"))
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 9d81980..1e68cc2 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -1094,7 +1094,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
  */
 static void rp_close(struct tty_struct *tty, struct file *filp)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	unsigned long flags;
 	int timeout;
 	CHANNEL_t *cp;
@@ -1208,7 +1208,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 static void rp_set_termios(struct tty_struct *tty,
 			   struct ktermios *old_termios)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned cflag;
 
@@ -1251,7 +1251,7 @@ static void rp_set_termios(struct tty_struct *tty,
 
 static int rp_break(struct tty_struct *tty, int break_state)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	unsigned long flags;
 
 	if (rocket_paranoia_check(info, "rp_break"))
@@ -1297,7 +1297,7 @@ static int sGetChanRI(CHANNEL_T * ChP)
  */
 static int rp_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct r_port *info = (struct r_port *)tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	unsigned int control, result, ChanStatus;
 
 	ChanStatus = sGetChanStatusLo(&info->channel);
@@ -1318,7 +1318,7 @@ static int rp_tiocmget(struct tty_struct *tty, struct file *file)
 static int rp_tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	struct r_port *info = (struct r_port *)tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 	if (set & TIOCM_RTS)
 		info->channel.TxControl[3] |= SET_RTS;
@@ -1447,7 +1447,7 @@ static int get_version(struct r_port *info, struct rocket_version __user *retver
 static int rp_ioctl(struct tty_struct *tty, struct file *file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 	int ret = 0;
 
@@ -1485,7 +1485,7 @@ static int rp_ioctl(struct tty_struct *tty, struct file *file,
 
 static void rp_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 	if (rocket_paranoia_check(info, "rp_send_xchar"))
@@ -1500,7 +1500,7 @@ static void rp_send_xchar(struct tty_struct *tty, char ch)
 
 static void rp_throttle(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 #ifdef ROCKET_DEBUG_THROTTLE
@@ -1520,7 +1520,7 @@ static void rp_throttle(struct tty_struct *tty)
 
 static void rp_unthrottle(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 #ifdef ROCKET_DEBUG_THROTTLE
 	printk(KERN_INFO "unthrottle %s: %d....\n", tty->name,
@@ -1547,7 +1547,7 @@ static void rp_unthrottle(struct tty_struct *tty)
  */
 static void rp_stop(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 #ifdef ROCKET_DEBUG_FLOW
 	printk(KERN_INFO "stop %s: %d %d....\n", tty->name,
@@ -1563,7 +1563,7 @@ static void rp_stop(struct tty_struct *tty)
 
 static void rp_start(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 #ifdef ROCKET_DEBUG_FLOW
 	printk(KERN_INFO "start %s: %d %d....\n", tty->name,
@@ -1583,7 +1583,7 @@ static void rp_start(struct tty_struct *tty)
  */
 static void rp_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long orig_jiffies;
 	int check_time, exit_time;
@@ -1640,7 +1640,7 @@ static void rp_wait_until_sent(struct tty_struct *tty, int timeout)
 static void rp_hangup(struct tty_struct *tty)
 {
 	CHANNEL_t *cp;
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 	if (rocket_paranoia_check(info, "rp_hangup"))
 		return;
@@ -1680,7 +1680,7 @@ static void rp_hangup(struct tty_struct *tty)
  */
 static int rp_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long flags;
 
@@ -1727,7 +1727,7 @@ static int rp_put_char(struct tty_struct *tty, unsigned char ch)
 static int rp_write(struct tty_struct *tty,
 		    const unsigned char *buf, int count)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	const unsigned char *b;
 	int c, retval = 0;
@@ -1819,7 +1819,7 @@ end:
  */
 static int rp_write_room(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	int ret;
 
 	if (rocket_paranoia_check(info, "rp_write_room"))
@@ -1840,7 +1840,7 @@ static int rp_write_room(struct tty_struct *tty)
  */
 static int rp_chars_in_buffer(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 	if (rocket_paranoia_check(info, "rp_chars_in_buffer"))
@@ -1861,7 +1861,7 @@ static int rp_chars_in_buffer(struct tty_struct *tty)
  */
 static void rp_flush_buffer(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long flags;
 
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index 2978a49..f29fbe9 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -306,7 +306,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
  */
 int paste_selection(struct tty_struct *tty)
 {
-	struct vc_data *vc = (struct vc_data *)tty->driver_data;
+	struct vc_data *vc = tty->driver_data;
 	int	pasted = 0;
 	unsigned int count;
 	struct  tty_ldisc *ld;
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 0c97f34..33872a2 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -460,14 +460,14 @@ static void a2232_throttle(struct tty_struct *tty)
    if switched on. So the only thing we can do at this
    layer here is not taking any characters out of the
    A2232 buffer any more. */
-	struct a2232_port *port = (struct a2232_port *) tty->driver_data;
+	struct a2232_port *port = tty->driver_data;
 	port->throttle_input = -1;
 }
 
 static void a2232_unthrottle(struct tty_struct *tty)
 {
 /* Unthrottle: dual to "throttle()" above. */
-	struct a2232_port *port = (struct a2232_port *) tty->driver_data;
+	struct a2232_port *port = tty->driver_data;
 	port->throttle_input = 0;
 }
 
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index a8f15e6..f1f24f0 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -315,7 +315,7 @@ u_short write_cy_cmd(volatile u_char * base_addr, u_char cmd)
 
 static void cy_stop(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR;
 	int channel;
 	unsigned long flags;
@@ -337,7 +337,7 @@ static void cy_stop(struct tty_struct *tty)
 
 static void cy_start(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR;
 	int channel;
 	unsigned long flags;
@@ -1062,7 +1062,7 @@ static void config_setup(struct cyclades_port *info)
 
 static int cy_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1090,7 +1090,7 @@ static int cy_put_char(struct tty_struct *tty, unsigned char ch)
 
 static void cy_flush_chars(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1122,7 +1122,7 @@ static void cy_flush_chars(struct tty_struct *tty)
  */
 static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	int c, total = 0;
 
@@ -1166,7 +1166,7 @@ static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count)
 
 static int cy_write_room(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int ret;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1183,7 +1183,7 @@ static int cy_write_room(struct tty_struct *tty)
 
 static int cy_chars_in_buffer(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_IO
 	printk("cy_chars_in_buffer %s %d\n", tty->name, info->xmit_cnt);	/* */
@@ -1197,7 +1197,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
 
 static void cy_flush_buffer(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1218,7 +1218,7 @@ static void cy_flush_buffer(struct tty_struct *tty)
  */
 static void cy_throttle(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1250,7 +1250,7 @@ static void cy_throttle(struct tty_struct *tty)
 
 static void cy_unthrottle(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1345,7 +1345,7 @@ check_and_exit:
 
 static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int channel;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	unsigned long flags;
@@ -1369,7 +1369,7 @@ static int
 cy_tiocmset(struct tty_struct *tty, struct file *file,
 	    unsigned int set, unsigned int clear)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int channel;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	unsigned long flags;
@@ -1532,7 +1532,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file,
 	 unsigned int cmd, unsigned long arg)
 {
 	unsigned long val;
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int ret_val = 0;
 	void __user *argp = (void __user *)arg;
 
@@ -1607,7 +1607,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file,
 
 static void cy_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_OTHER
 	printk("cy_set_termios %s\n", tty->name);
@@ -1631,7 +1631,7 @@ static void cy_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 
 static void cy_close(struct tty_struct *tty, struct file *filp)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 /* CP('C'); */
 #ifdef SERIAL_DEBUG_OTHER
@@ -1698,7 +1698,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
  */
 void cy_hangup(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_OTHER
 	printk("cy_hangup %s\n", tty->name);	/* */
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index a16b94f..3c67c3d 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -1450,7 +1450,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
 
 static void sx_flush_buffer(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
@@ -1472,7 +1472,7 @@ static void sx_flush_buffer(struct tty_struct *tty)
 
 static void sx_close(struct tty_struct *tty, struct file *filp)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 	unsigned long timeout;
@@ -1585,7 +1585,7 @@ static void sx_close(struct tty_struct *tty, struct file *filp)
 static int sx_write(struct tty_struct *tty,
 					const unsigned char *buf, int count)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	int c, total = 0;
 	unsigned long flags;
@@ -1637,7 +1637,7 @@ static int sx_write(struct tty_struct *tty,
 
 static int sx_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
@@ -1676,7 +1676,7 @@ static int sx_put_char(struct tty_struct *tty, unsigned char ch)
 
 static void sx_flush_chars(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp = port_Board(port);
 
@@ -1703,7 +1703,7 @@ static void sx_flush_chars(struct tty_struct *tty)
 
 static int sx_write_room(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	int	ret;
 
 	func_enter();
@@ -1724,7 +1724,7 @@ static int sx_write_room(struct tty_struct *tty)
 
 static int sx_chars_in_buffer(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 
 	func_enter();
 
@@ -1738,7 +1738,7 @@ static int sx_chars_in_buffer(struct tty_struct *tty)
 
 static int sx_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned char status;
 	unsigned int result;
@@ -1780,7 +1780,7 @@ static int sx_tiocmget(struct tty_struct *tty, struct file *file)
 static int sx_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board *bp;
 
@@ -1820,7 +1820,7 @@ static int sx_tiocmset(struct tty_struct *tty, struct file *file,
 
 static int sx_send_break(struct tty_struct *tty, int length)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp = port_Board(port);
 	unsigned long flags;
 
@@ -1931,7 +1931,7 @@ static int sx_get_serial_info(struct specialix_port *port,
 static int sx_ioctl(struct tty_struct *tty, struct file *filp,
 				unsigned int cmd, unsigned long arg)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 
 	func_enter();
@@ -1959,7 +1959,7 @@ static int sx_ioctl(struct tty_struct *tty, struct file *filp,
 
 static void sx_throttle(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2004,7 +2004,7 @@ static void sx_throttle(struct tty_struct *tty)
 
 static void sx_unthrottle(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2045,7 +2045,7 @@ static void sx_unthrottle(struct tty_struct *tty)
 
 static void sx_stop(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2072,7 +2072,7 @@ static void sx_stop(struct tty_struct *tty)
 
 static void sx_start(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2100,7 +2100,7 @@ static void sx_start(struct tty_struct *tty)
 
 static void sx_hangup(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2135,7 +2135,7 @@ static void sx_hangup(struct tty_struct *tty)
 static void sx_set_termios(struct tty_struct *tty,
 					struct ktermios *old_termios)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index a71bc58..b60be7b 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -1941,7 +1941,7 @@ static int sx_ioctl(struct tty_struct *tty, struct file *filp,
 
 static void sx_throttle(struct tty_struct *tty)
 {
-	struct sx_port *port = (struct sx_port *)tty->driver_data;
+	struct sx_port *port = tty->driver_data;
 
 	func_enter2();
 	/* If the port is using any type of input flow
@@ -1955,7 +1955,7 @@ static void sx_throttle(struct tty_struct *tty)
 
 static void sx_unthrottle(struct tty_struct *tty)
 {
-	struct sx_port *port = (struct sx_port *)tty->driver_data;
+	struct sx_port *port = tty->driver_data;
 
 	func_enter2();
 	/* Always unthrottle even if flow control is not enabled on
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index fbd5a5c..b8063d4 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -977,7 +977,7 @@ static void ldisc_receive_buf(struct tty_struct *tty,
  */
 static void mgsl_stop(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_stop"))
@@ -1000,7 +1000,7 @@ static void mgsl_stop(struct tty_struct *tty)
  */
 static void mgsl_start(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_start"))
@@ -2057,7 +2057,7 @@ static int mgsl_put_char(struct tty_struct *tty, unsigned char ch)
  */
 static void mgsl_flush_chars(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 				
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -2109,7 +2109,7 @@ static int mgsl_write(struct tty_struct * tty,
 		    const unsigned char *buf, int count)
 {
 	int	c, ret = 0;
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -2232,7 +2232,7 @@ cleanup:
  */
 static int mgsl_write_room(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	int	ret;
 				
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_write_room"))
@@ -2267,7 +2267,7 @@ static int mgsl_write_room(struct tty_struct *tty)
  */
 static int mgsl_chars_in_buffer(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 			 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_chars_in_buffer(%s)\n",
@@ -2301,7 +2301,7 @@ static int mgsl_chars_in_buffer(struct tty_struct *tty)
  */
 static void mgsl_flush_buffer(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2329,7 +2329,7 @@ static void mgsl_flush_buffer(struct tty_struct *tty)
  */
 static void mgsl_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2358,7 +2358,7 @@ static void mgsl_send_xchar(struct tty_struct *tty, char ch)
  */
 static void mgsl_throttle(struct tty_struct * tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2388,7 +2388,7 @@ static void mgsl_throttle(struct tty_struct * tty)
  */
 static void mgsl_unthrottle(struct tty_struct * tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2841,7 +2841,7 @@ static int modem_input_wait(struct mgsl_struct *info,int arg)
  */
 static int tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned int result;
  	unsigned long flags;
 
@@ -2867,7 +2867,7 @@ static int tiocmget(struct tty_struct *tty, struct file *file)
 static int tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
  	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2898,7 +2898,7 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
  */
 static int mgsl_break(struct tty_struct *tty, int break_state)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2932,7 +2932,7 @@ static int mgsl_break(struct tty_struct *tty, int break_state)
 static int mgsl_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	int ret;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3042,7 +3042,7 @@ static int mgsl_ioctl_common(struct mgsl_struct *info, unsigned int cmd, unsigne
  */
 static void mgsl_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3096,7 +3096,7 @@ static void mgsl_set_termios(struct tty_struct *tty, struct ktermios *old_termio
  */
 static void mgsl_close(struct tty_struct *tty, struct file * filp)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_close"))
 		return;
@@ -3136,7 +3136,7 @@ cleanup:
  */
 static void mgsl_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 
 	if (!info )
@@ -3209,7 +3209,7 @@ exit:
  */
 static void mgsl_hangup(struct tty_struct *tty)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_hangup(%s)\n",
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 2aac55b..7b0c5b2 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -801,7 +801,7 @@ cleanup:
  */
 static void close(struct tty_struct *tty, struct file *filp)
 {
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 
 	if (sanity_check(info, tty->name, "close"))
 		return;
@@ -833,7 +833,7 @@ cleanup:
  */
 static void hangup(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s hangup()\n",
@@ -856,7 +856,7 @@ static void hangup(struct tty_struct *tty)
  */
 static void set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -909,7 +909,7 @@ static int write(struct tty_struct *tty,
 		 const unsigned char *buf, int count)
 {
 	int	c, ret = 0;
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -987,7 +987,7 @@ cleanup:
  */
 static int put_char(struct tty_struct *tty, unsigned char ch)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 	int ret = 0;
 
@@ -1024,7 +1024,7 @@ static int put_char(struct tty_struct *tty, unsigned char ch)
  */
 static void send_xchar(struct tty_struct *tty, char ch)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1048,7 +1048,7 @@ static void send_xchar(struct tty_struct *tty, char ch)
  */
 static void wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 
 	if (!info )
@@ -1115,7 +1115,7 @@ exit:
  */
 static int write_room(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	int ret;
 
 	if (sanity_check(info, tty->name, "write_room"))
@@ -1142,7 +1142,7 @@ static int write_room(struct tty_struct *tty)
  */
 static void flush_chars(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -1181,7 +1181,7 @@ static void flush_chars(struct tty_struct *tty)
  */
 static void flush_buffer(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1203,7 +1203,7 @@ static void flush_buffer(struct tty_struct *tty)
  */
 static void tx_hold(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (sanity_check(info, tty->name, "tx_hold"))
@@ -1223,7 +1223,7 @@ static void tx_hold(struct tty_struct *tty)
  */
 static void tx_release(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (sanity_check(info, tty->name, "tx_release"))
@@ -1253,7 +1253,7 @@ static void tx_release(struct tty_struct *tty)
 static int do_ioctl(struct tty_struct *tty, struct file *file,
 		 unsigned int cmd, unsigned long arg)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	int error;
 	struct mgsl_icount cnow;	/* kernel counter temps */
 	struct serial_icounter_struct __user *p_cuser;	/* user space */
@@ -1464,7 +1464,7 @@ done:
  */
 static int chars_in_buffer(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 
 	if (sanity_check(info, tty->name, "chars_in_buffer"))
 		return 0;
@@ -1480,7 +1480,7 @@ static int chars_in_buffer(struct tty_struct *tty)
  */
 static void throttle(struct tty_struct * tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1505,7 +1505,7 @@ static void throttle(struct tty_struct * tty)
  */
 static void unthrottle(struct tty_struct * tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1536,7 +1536,7 @@ static void unthrottle(struct tty_struct * tty)
 static int set_break(struct tty_struct *tty, int break_state)
 {
 	unsigned char RegValue;
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3218,7 +3218,7 @@ static int modem_input_wait(SLMP_INFO *info,int arg)
  */
 static int tiocmget(struct tty_struct *tty, struct file *file)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned int result;
  	unsigned long flags;
 
@@ -3244,7 +3244,7 @@ static int tiocmget(struct tty_struct *tty, struct file *file)
 static int tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
  	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index 2d9242a..0e8234b 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -784,7 +784,7 @@ static void scc_setsignals(struct scc_port *port, int dtr, int rts)
 
 static void scc_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 
 	port->x_char = ch;
 	if (ch)
@@ -911,7 +911,7 @@ static int scc_open (struct tty_struct * tty, struct file * filp)
 
 static void scc_throttle (struct tty_struct * tty)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
@@ -927,7 +927,7 @@ static void scc_throttle (struct tty_struct * tty)
 
 static void scc_unthrottle (struct tty_struct * tty)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
@@ -950,7 +950,7 @@ static int scc_ioctl(struct tty_struct *tty, struct file *file,
 
 static int scc_break_ctl(struct tty_struct *tty, int break_state)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 8944ce5..a2dee0e 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -366,7 +366,7 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_
 int vt_ioctl(struct tty_struct *tty, struct file * file,
 	     unsigned int cmd, unsigned long arg)
 {
-	struct vc_data *vc = (struct vc_data *)tty->driver_data;
+	struct vc_data *vc = tty->driver_data;
 	struct console_font_op op;	/* used in multiple places here */
 	struct kbd_struct * kbd;
 	unsigned int console;
-- 
cgit v0.10.2


From 33dd474ae712dc435eb586b44cb771cc8d24e2bd Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:32 +0000
Subject: tty: kref nozomi

Update the nozomi driver to use krefs

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 9a34a19..d6102b6 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -353,6 +353,7 @@ struct ctrl_ul {
 
 /* This holds all information that is needed regarding a port */
 struct port {
+	struct tty_port port;
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
@@ -365,8 +366,6 @@ struct port {
 	u8 toggle_ul;
 	u16 token_dl;
 
-	struct tty_struct *tty;
-	int tty_open_count;
 	/* mutex to ensure one access patch to this port */
 	struct mutex tty_sem;
 	wait_queue_head_t tty_wait;
@@ -788,14 +787,14 @@ static void disable_transmit_dl(enum port_type port, struct nozomi *dc)
  * Return 1 - send buffer to card and ack.
  * Return 0 - don't ack, don't send buffer to card.
  */
-static int send_data(enum port_type index, const struct nozomi *dc)
+static int send_data(enum port_type index, struct nozomi *dc)
 {
 	u32 size = 0;
-	const struct port *port = &dc->port[index];
+	struct port *port = &dc->port[index];
 	const u8 toggle = port->toggle_ul;
 	void __iomem *addr = port->ul_addr[toggle];
 	const u32 ul_size = port->ul_size[toggle];
-	struct tty_struct *tty = port->tty;
+	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
 	size = __kfifo_get(port->fifo_ul, dc->send_buf,
@@ -803,6 +802,7 @@ static int send_data(enum port_type index, const struct nozomi *dc)
 
 	if (size == 0) {
 		DBG4("No more data to send, disable link:");
+		tty_kref_put(tty);
 		return 0;
 	}
 
@@ -815,6 +815,7 @@ static int send_data(enum port_type index, const struct nozomi *dc)
 	if (tty)
 		tty_wakeup(tty);
 
+	tty_kref_put(tty);
 	return 1;
 }
 
@@ -826,7 +827,7 @@ static int receive_data(enum port_type index, struct nozomi *dc)
 	u32 offset = 4;
 	struct port *port = &dc->port[index];
 	void __iomem *addr = port->dl_addr[port->toggle_dl];
-	struct tty_struct *tty = port->tty;
+	struct tty_struct *tty = tty_port_tty_get(&port->port);
 	int i;
 
 	if (unlikely(!tty)) {
@@ -870,7 +871,7 @@ static int receive_data(enum port_type index, struct nozomi *dc)
 	}
 
 	set_bit(index, &dc->flip);
-
+	tty_kref_put(tty);
 	return 1;
 }
 
@@ -1276,9 +1277,15 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id)
 
 exit_handler:
 	spin_unlock(&dc->spin_mutex);
-	for (a = 0; a < NOZOMI_MAX_PORTS; a++)
-		if (test_and_clear_bit(a, &dc->flip))
-			tty_flip_buffer_push(dc->port[a].tty);
+	for (a = 0; a < NOZOMI_MAX_PORTS; a++) {
+		struct tty_struct *tty;
+		if (test_and_clear_bit(a, &dc->flip)) {
+			tty = tty_port_tty_get(&dc->port[a].port);
+			if (tty)
+				tty_flip_buffer_push(tty);
+			tty_kref_put(tty);
+		}
+	}
 	return IRQ_HANDLED;
 none:
 	spin_unlock(&dc->spin_mutex);
@@ -1453,12 +1460,10 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 
 	for (i = 0; i < MAX_PORT; i++) {
 		mutex_init(&dc->port[i].tty_sem);
-		dc->port[i].tty_open_count = 0;
-		dc->port[i].tty = NULL;
+		tty_port_init(&dc->port[i].port);
 		tty_register_device(ntty_driver, dc->index_start + i,
 							&pdev->dev);
 	}
-
 	return 0;
 
 err_free_sbuf:
@@ -1482,14 +1487,16 @@ static void __devexit tty_exit(struct nozomi *dc)
 
 	flush_scheduled_work();
 
-	for (i = 0; i < MAX_PORT; ++i)
-		if (dc->port[i].tty && \
-				list_empty(&dc->port[i].tty->hangup_work.entry))
-			tty_hangup(dc->port[i].tty);
-
+	for (i = 0; i < MAX_PORT; ++i) {
+		struct tty_struct *tty = tty_port_tty_get(&dc->port[i].port);
+		if (tty && list_empty(&tty->hangup_work.entry))
+			tty_hangup(tty);
+		tty_kref_put(tty);
+	}
+	/* Racy below - surely should wait for scheduled work to be done or
+	   complete off a hangup method ? */
 	while (dc->open_ttys)
 		msleep(1);
-
 	for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i)
 		tty_unregister_device(ntty_driver, i);
 }
@@ -1579,23 +1586,22 @@ static int ntty_open(struct tty_struct *tty, struct file *file)
 	if (mutex_lock_interruptible(&port->tty_sem))
 		return -ERESTARTSYS;
 
-	port->tty_open_count++;
+	port->port.count++;
 	dc->open_ttys++;
 
 	/* Enable interrupt downlink for channel */
-	if (port->tty_open_count == 1) {
+	if (port->port.count == 1) {
+		/* FIXME: is this needed now ? */
 		tty->low_latency = 1;
 		tty->driver_data = port;
-		port->tty = tty;
+		tty_port_tty_set(&port->port, tty);
 		DBG1("open: %d", port->token_dl);
 		spin_lock_irqsave(&dc->spin_mutex, flags);
 		dc->last_ier = dc->last_ier | port->token_dl;
 		writew(dc->last_ier, dc->reg_ier);
 		spin_unlock_irqrestore(&dc->spin_mutex, flags);
 	}
-
 	mutex_unlock(&port->tty_sem);
-
 	return 0;
 }
 
@@ -1606,31 +1612,30 @@ static int ntty_open(struct tty_struct *tty, struct file *file)
 static void ntty_close(struct tty_struct *tty, struct file *file)
 {
 	struct nozomi *dc = get_dc_by_tty(tty);
-	struct port *port = tty->driver_data;
+	struct port *nport = tty->driver_data;
+	struct tty_port *port = &nport->port;
 	unsigned long flags;
 
-	if (!dc || !port)
+	if (!dc || !nport)
 		return;
 
-	if (mutex_lock_interruptible(&port->tty_sem))
-		return;
+	/* Users cannot interrupt a close */
+	mutex_lock(&nport->tty_sem);
 
-	if (!port->tty_open_count)
-		goto exit;
+	WARN_ON(!port->count);
 
 	dc->open_ttys--;
-	port->tty_open_count--;
+	port->count--;
+	tty_port_tty_set(port, NULL);
 
-	if (port->tty_open_count == 0) {
-		DBG1("close: %d", port->token_dl);
+	if (port->count == 0) {
+		DBG1("close: %d", nport->token_dl);
 		spin_lock_irqsave(&dc->spin_mutex, flags);
-		dc->last_ier &= ~(port->token_dl);
+		dc->last_ier &= ~(nport->token_dl);
 		writew(dc->last_ier, dc->reg_ier);
 		spin_unlock_irqrestore(&dc->spin_mutex, flags);
 	}
-
-exit:
-	mutex_unlock(&port->tty_sem);
+	mutex_unlock(&nport->tty_sem);
 }
 
 /*
@@ -1660,7 +1665,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		return -EAGAIN;
 	}
 
-	if (unlikely(!port->tty_open_count)) {
+	if (unlikely(!port->port.count)) {
 		DBG1(" ");
 		goto exit;
 	}
@@ -1710,7 +1715,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!mutex_trylock(&port->tty_sem))
 		return 0;
 
-	if (!port->tty_open_count)
+	if (!port->port.count)
 		goto exit;
 
 	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
@@ -1866,7 +1871,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	if (unlikely(!port->tty_open_count)) {
+	if (unlikely(!port->port.count)) {
 		dev_err(&dc->pdev->dev, "No tty open?\n");
 		rval = -ENODEV;
 		goto exit_in_buffer;
-- 
cgit v0.10.2


From e136e3036bf27569dbfeae245cc09c7167cdc749 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:39 +0000
Subject: hso: net driver using tty without locking

Checking tty == NULL doesn't help us unless we have a clear semantic for
the locking of the tty object in the driver. Use the tty kref objects so that
we can take references to the tty in the USB event handling paths.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 9f7896a..d345a6e 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -1015,7 +1015,7 @@ static void _hso_serial_set_termios(struct tty_struct *tty,
 	struct hso_serial *serial = get_serial_by_tty(tty);
 	struct ktermios *termios;
 
-	if ((!tty) || (!tty->termios) || (!serial)) {
+	if (!serial) {
 		printk(KERN_ERR "%s: no tty structures", __func__);
 		return;
 	}
@@ -1057,14 +1057,14 @@ static void _hso_serial_set_termios(struct tty_struct *tty,
 	termios->c_cflag |= CS8;	/* character size 8 bits */
 
 	/* baud rate 115200 */
-	tty_encode_baud_rate(serial->tty, 115200, 115200);
+	tty_encode_baud_rate(tty, 115200, 115200);
 
 	/*
 	 * Force low_latency on; otherwise the pushes are scheduled;
 	 * this is bad as it opens up the possibility of dropping bytes
 	 * on the floor.  We don't want to drop bytes on the floor. :)
 	 */
-	serial->tty->low_latency = 1;
+	tty->low_latency = 1;
 	return;
 }
 
@@ -1228,6 +1228,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
 
 	/* sanity check */
 	if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) {
+		WARN_ON(1);
 		tty->driver_data = NULL;
 		D1("Failed to open port");
 		return -ENODEV;
@@ -1242,8 +1243,10 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
 	kref_get(&serial->parent->ref);
 
 	/* setup */
+	spin_lock_irq(&serial->serial_lock);
 	tty->driver_data = serial;
-	serial->tty = tty;
+	serial->tty = tty_kref_get(tty);
+	spin_unlock_irq(&serial->serial_lock);
 
 	/* check for port already opened, if not set the termios */
 	serial->open_count++;
@@ -1285,6 +1288,10 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp)
 
 	D1("Closing serial port");
 
+	/* Open failed, no close cleanup required */
+	if (serial == NULL)
+		return;
+
 	mutex_lock(&serial->parent->mutex);
 	usb_gone = serial->parent->usb_gone;
 
@@ -1297,10 +1304,13 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp)
 	kref_put(&serial->parent->ref, hso_serial_ref_free);
 	if (serial->open_count <= 0) {
 		serial->open_count = 0;
-		if (serial->tty) {
+		spin_lock_irq(&serial->serial_lock);
+		if (serial->tty == tty) {
 			serial->tty->driver_data = NULL;
 			serial->tty = NULL;
+			tty_kref_put(tty);
 		}
+		spin_unlock_irq(&serial->serial_lock);
 		if (!usb_gone)
 			hso_stop_serial_device(serial->parent);
 		tasklet_kill(&serial->unthrottle_tasklet);
@@ -1653,6 +1663,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
 {
 	struct hso_serial *serial = urb->context;
 	int status = urb->status;
+	struct tty_struct *tty;
 
 	/* sanity check */
 	if (!serial) {
@@ -1662,14 +1673,18 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
 
 	spin_lock(&serial->serial_lock);
 	serial->tx_urb_used = 0;
+	tty = tty_kref_get(serial->tty);
 	spin_unlock(&serial->serial_lock);
 	if (status) {
 		log_usb_status(status, __func__);
+		tty_kref_put(tty);
 		return;
 	}
 	hso_put_activity(serial->parent);
-	if (serial->tty)
-		tty_wakeup(serial->tty);
+	if (tty) {
+		tty_wakeup(tty);
+		tty_kref_put(tty);
+	}
 	hso_kick_transmit(serial);
 
 	D1(" ");
@@ -1706,6 +1721,7 @@ static void ctrl_callback(struct urb *urb)
 	struct hso_serial *serial = urb->context;
 	struct usb_ctrlrequest *req;
 	int status = urb->status;
+	struct tty_struct *tty;
 
 	/* sanity check */
 	if (!serial)
@@ -1713,9 +1729,11 @@ static void ctrl_callback(struct urb *urb)
 
 	spin_lock(&serial->serial_lock);
 	serial->tx_urb_used = 0;
+	tty = tty_kref_get(serial->tty);
 	spin_unlock(&serial->serial_lock);
 	if (status) {
 		log_usb_status(status, __func__);
+		tty_kref_put(tty);
 		return;
 	}
 
@@ -1734,25 +1752,31 @@ static void ctrl_callback(struct urb *urb)
 		spin_unlock(&serial->serial_lock);
 	} else {
 		hso_put_activity(serial->parent);
-		if (serial->tty)
-			tty_wakeup(serial->tty);
+		if (tty)
+			tty_wakeup(tty);
 		/* response to a write command */
 		hso_kick_transmit(serial);
 	}
+	tty_kref_put(tty);
 }
 
 /* handle RX data for serial port */
 static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 {
-	struct tty_struct *tty = serial->tty;
+	struct tty_struct *tty;
 	int write_length_remaining = 0;
 	int curr_write_len;
+
 	/* Sanity check */
 	if (urb == NULL || serial == NULL) {
 		D1("serial = NULL");
 		return -2;
 	}
 
+	spin_lock(&serial->serial_lock);
+	tty = tty_kref_get(serial->tty);
+	spin_unlock(&serial->serial_lock);
+
 	/* Push data to tty */
 	if (tty) {
 		write_length_remaining = urb->actual_length -
@@ -1774,6 +1798,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 		serial->curr_rx_urb_offset = 0;
 		serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0;
 	}
+	tty_kref_put(tty);
 	return write_length_remaining;
 }
 
@@ -2786,15 +2811,20 @@ static void hso_serial_ref_free(struct kref *ref)
 static void hso_free_interface(struct usb_interface *interface)
 {
 	struct hso_serial *hso_dev;
+	struct tty_struct *tty;
 	int i;
 
 	for (i = 0; i < HSO_SERIAL_TTY_MINORS; i++) {
 		if (serial_table[i]
 		    && (serial_table[i]->interface == interface)) {
 			hso_dev = dev2ser(serial_table[i]);
-			if (hso_dev->tty)
-				tty_hangup(hso_dev->tty);
+			spin_lock_irq(&hso_dev->serial_lock);
+			tty = tty_kref_get(hso_dev->tty);
+			spin_unlock_irq(&hso_dev->serial_lock);
+			if (tty)
+				tty_hangup(tty);
 			mutex_lock(&hso_dev->parent->mutex);
+			tty_kref_put(tty);
 			hso_dev->parent->usb_gone = 1;
 			mutex_unlock(&hso_dev->parent->mutex);
 			kref_put(&serial_table[i]->ref, hso_serial_ref_free);
-- 
cgit v0.10.2


From ac9720c37e8795317e8be3adad63cb0d5522a640 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:45 +0000
Subject: tty: Fix the HSO termios handling a bit

Init the tty structure once
Don't set ->low_latency twice in a row
Don't force bits we should be leaving to the user
Don't allocate termios arrays as these are in fact allocated by the tty layer
for you and just overwrite the ones allocated in the driver

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index d345a6e..7373fb6 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -362,8 +362,6 @@ static struct tty_driver *tty_drv;
 static struct hso_device *serial_table[HSO_SERIAL_TTY_MINORS];
 static struct hso_device *network_table[HSO_MAX_NET_DEVICES];
 static spinlock_t serial_table_lock;
-static struct ktermios *hso_serial_termios[HSO_SERIAL_TTY_MINORS];
-static struct ktermios *hso_serial_termios_locked[HSO_SERIAL_TTY_MINORS];
 
 static const s32 default_port_spec[] = {
 	HSO_INTF_MUX | HSO_PORT_NETWORK,
@@ -1009,23 +1007,11 @@ static void read_bulk_callback(struct urb *urb)
 
 /* Serial driver functions */
 
-static void _hso_serial_set_termios(struct tty_struct *tty,
-				    struct ktermios *old)
+static void hso_init_termios(struct ktermios *termios)
 {
-	struct hso_serial *serial = get_serial_by_tty(tty);
-	struct ktermios *termios;
-
-	if (!serial) {
-		printk(KERN_ERR "%s: no tty structures", __func__);
-		return;
-	}
-
-	D4("port %d", serial->minor);
-
 	/*
 	 * The default requirements for this device are:
 	 */
-	termios = tty->termios;
 	termios->c_iflag &=
 		~(IGNBRK	/* disable ignore break */
 		| BRKINT	/* disable break causes interrupt */
@@ -1057,15 +1043,38 @@ static void _hso_serial_set_termios(struct tty_struct *tty,
 	termios->c_cflag |= CS8;	/* character size 8 bits */
 
 	/* baud rate 115200 */
-	tty_encode_baud_rate(tty, 115200, 115200);
+	tty_termios_encode_baud_rate(termios, 115200, 115200);
+}
+
+static void _hso_serial_set_termios(struct tty_struct *tty,
+				    struct ktermios *old)
+{
+	struct hso_serial *serial = get_serial_by_tty(tty);
+	struct ktermios *termios;
+
+	if (!serial) {
+		printk(KERN_ERR "%s: no tty structures", __func__);
+		return;
+	}
+
+	D4("port %d", serial->minor);
 
 	/*
-	 * Force low_latency on; otherwise the pushes are scheduled;
-	 * this is bad as it opens up the possibility of dropping bytes
-	 * on the floor.  We don't want to drop bytes on the floor. :)
+	 *	Fix up unsupported bits
 	 */
-	tty->low_latency = 1;
-	return;
+	termios = tty->termios;
+	termios->c_iflag &= ~IXON; /* disable enable XON/XOFF flow control */
+
+	termios->c_cflag &=
+		~(CSIZE		/* no size */
+		| PARENB	/* disable parity bit */
+		| CBAUD		/* clear current baud rate */
+		| CBAUDEX);	/* clear current buad rate */
+
+	termios->c_cflag |= CS8;	/* character size 8 bits */
+
+	/* baud rate 115200 */
+	tty_encode_baud_rate(tty, 115200, 115200);
 }
 
 static void hso_resubmit_rx_bulk_urb(struct hso_serial *serial, struct urb *urb)
@@ -2969,9 +2978,7 @@ static int __init hso_init(void)
 	tty_drv->subtype = SERIAL_TYPE_NORMAL;
 	tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	tty_drv->init_termios = tty_std_termios;
-	tty_drv->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
-	tty_drv->termios = hso_serial_termios;
-	tty_drv->termios_locked = hso_serial_termios_locked;
+	hso_init_termios(&tty_drv->init_termios);
 	tty_set_operations(tty_drv, &hso_serial_ops);
 
 	/* register the tty driver */
-- 
cgit v0.10.2


From 542f54823614915780c3459b0e6062f06c0c0f99 Mon Sep 17 00:00:00 2001
From: Denis Joseph Barrow <D.Barow@option.com>
Date: Fri, 2 Jan 2009 13:47:52 +0000
Subject: tty: Modem functions for the HSO driver

Makes TIOCM ioctls for Data Carrier Detect & related functions
work like /drivers/serial/serial-core.c potentially needed
for pppd & similar user programs.

Signed-off-by: Denis Joseph Barrow <D.Barow@option.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 7373fb6..d974d97 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -39,8 +39,11 @@
  *		port is opened, as this have a huge impact on the network port
  *		throughput.
  *
- * Interface 2:	Standard modem interface - circuit switched interface, should
- *		not be used.
+ * Interface 2:	Standard modem interface - circuit switched interface, this
+ *		can be used to make a standard ppp connection however it
+ *              should not be used in conjunction with the IP network interface
+ *              enabled for USB performance reasons i.e. if using this set
+ *              ideally disable_net=1.
  *
  *****************************************************************************/
 
@@ -63,6 +66,8 @@
 #include <linux/usb/cdc.h>
 #include <net/arp.h>
 #include <asm/byteorder.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
 
 
 #define DRIVER_VERSION			"1.2"
@@ -182,6 +187,41 @@ enum rx_ctrl_state{
 	RX_PENDING
 };
 
+#define BM_REQUEST_TYPE (0xa1)
+#define B_NOTIFICATION  (0x20)
+#define W_VALUE         (0x0)
+#define W_INDEX         (0x2)
+#define W_LENGTH        (0x2)
+
+#define B_OVERRUN       (0x1<<6)
+#define B_PARITY        (0x1<<5)
+#define B_FRAMING       (0x1<<4)
+#define B_RING_SIGNAL   (0x1<<3)
+#define B_BREAK         (0x1<<2)
+#define B_TX_CARRIER    (0x1<<1)
+#define B_RX_CARRIER    (0x1<<0)
+
+struct hso_serial_state_notification {
+	u8 bmRequestType;
+	u8 bNotification;
+	u16 wValue;
+	u16 wIndex;
+	u16 wLength;
+	u16 UART_state_bitmap;
+} __attribute__((packed));
+
+struct hso_tiocmget {
+	struct mutex mutex;
+	wait_queue_head_t waitq;
+	int    intr_completed;
+	struct usb_endpoint_descriptor *endp;
+	struct urb *urb;
+	struct hso_serial_state_notification serial_state_notification;
+	u16    prev_UART_state_bitmap;
+	struct uart_icount icount;
+};
+
+
 struct hso_serial {
 	struct hso_device *parent;
 	int magic;
@@ -219,6 +259,7 @@ struct hso_serial {
 	spinlock_t serial_lock;
 
 	int (*write_data) (struct hso_serial *serial);
+	struct hso_tiocmget  *tiocmget;
 	/* Hacks required to get flow control
 	 * working on the serial receive buffers
 	 * so as not to drop characters on the floor.
@@ -305,7 +346,7 @@ static void async_get_intf(struct work_struct *data);
 static void async_put_intf(struct work_struct *data);
 static int hso_put_activity(struct hso_device *hso_dev);
 static int hso_get_activity(struct hso_device *hso_dev);
-
+static void tiocmget_intr_callback(struct urb *urb);
 /*****************************************************************************/
 /* Helping functions                                                         */
 /*****************************************************************************/
@@ -1419,25 +1460,217 @@ static int hso_serial_chars_in_buffer(struct tty_struct *tty)
 
 	return chars;
 }
+int tiocmget_submit_urb(struct hso_serial *serial,
+			struct hso_tiocmget  *tiocmget,
+			struct usb_device *usb)
+{
+	int result;
+
+	if (serial->parent->usb_gone)
+		return -ENODEV;
+	usb_fill_int_urb(tiocmget->urb, usb,
+			 usb_rcvintpipe(usb,
+					tiocmget->endp->
+					bEndpointAddress & 0x7F),
+			 &tiocmget->serial_state_notification,
+			 sizeof(struct hso_serial_state_notification),
+			 tiocmget_intr_callback, serial,
+			 tiocmget->endp->bInterval);
+	result = usb_submit_urb(tiocmget->urb, GFP_ATOMIC);
+	if (result) {
+		dev_warn(&usb->dev, "%s usb_submit_urb failed %d\n", __func__,
+			 result);
+	}
+	return result;
+
+}
+
+static void tiocmget_intr_callback(struct urb *urb)
+{
+	struct hso_serial *serial = urb->context;
+	struct hso_tiocmget *tiocmget;
+	int status = urb->status;
+	u16 UART_state_bitmap, prev_UART_state_bitmap;
+	struct uart_icount *icount;
+	struct hso_serial_state_notification *serial_state_notification;
+	struct usb_device *usb;
+
+	/* Sanity checks */
+	if (!serial)
+		return;
+	if (status) {
+		log_usb_status(status, __func__);
+		return;
+	}
+	tiocmget = serial->tiocmget;
+	if (!tiocmget)
+		return;
+	usb = serial->parent->usb;
+	serial_state_notification = &tiocmget->serial_state_notification;
+	if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE ||
+	    serial_state_notification->bNotification != B_NOTIFICATION ||
+	    le16_to_cpu(serial_state_notification->wValue) != W_VALUE ||
+	    le16_to_cpu(serial_state_notification->wIndex) != W_INDEX ||
+	    le16_to_cpu(serial_state_notification->wLength) != W_LENGTH) {
+		dev_warn(&usb->dev,
+			 "hso received invalid serial state notification\n");
+		DUMP(serial_state_notification,
+		     sizeof(hso_serial_state_notifation))
+	} else {
+
+		UART_state_bitmap = le16_to_cpu(serial_state_notification->
+						UART_state_bitmap);
+		prev_UART_state_bitmap = tiocmget->prev_UART_state_bitmap;
+		icount = &tiocmget->icount;
+		spin_lock(&serial->serial_lock);
+		if ((UART_state_bitmap & B_OVERRUN) !=
+		   (prev_UART_state_bitmap & B_OVERRUN))
+			icount->parity++;
+		if ((UART_state_bitmap & B_PARITY) !=
+		   (prev_UART_state_bitmap & B_PARITY))
+			icount->parity++;
+		if ((UART_state_bitmap & B_FRAMING) !=
+		   (prev_UART_state_bitmap & B_FRAMING))
+			icount->frame++;
+		if ((UART_state_bitmap & B_RING_SIGNAL) &&
+		   !(prev_UART_state_bitmap & B_RING_SIGNAL))
+			icount->rng++;
+		if ((UART_state_bitmap & B_BREAK) !=
+		   (prev_UART_state_bitmap & B_BREAK))
+			icount->brk++;
+		if ((UART_state_bitmap & B_TX_CARRIER) !=
+		   (prev_UART_state_bitmap & B_TX_CARRIER))
+			icount->dsr++;
+		if ((UART_state_bitmap & B_RX_CARRIER) !=
+		   (prev_UART_state_bitmap & B_RX_CARRIER))
+			icount->dcd++;
+		tiocmget->prev_UART_state_bitmap = UART_state_bitmap;
+		spin_unlock(&serial->serial_lock);
+		tiocmget->intr_completed = 1;
+		wake_up_interruptible(&tiocmget->waitq);
+	}
+	memset(serial_state_notification, 0,
+	       sizeof(struct hso_serial_state_notification));
+	tiocmget_submit_urb(serial,
+			    tiocmget,
+			    serial->parent->usb);
+}
+
+/*
+ * next few functions largely stolen from drivers/serial/serial_core.c
+ */
+/* Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+ * - mask passed in arg for lines of interest
+ *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+ * Caller should use TIOCGICOUNT to see which one it was
+ */
+static int
+hso_wait_modem_status(struct hso_serial *serial, unsigned long arg)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct uart_icount cprev, cnow;
+	struct hso_tiocmget  *tiocmget;
+	int ret;
+
+	tiocmget = serial->tiocmget;
+	if (!tiocmget)
+		return -ENOENT;
+	/*
+	 * note the counters on entry
+	 */
+	spin_lock_irq(&serial->serial_lock);
+	memcpy(&cprev, &tiocmget->icount, sizeof(struct uart_icount));
+	spin_unlock_irq(&serial->serial_lock);
+	add_wait_queue(&tiocmget->waitq, &wait);
+	for (;;) {
+		spin_lock_irq(&serial->serial_lock);
+		memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount));
+		spin_unlock_irq(&serial->serial_lock);
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) ||
+		    ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) ||
+		    ((arg & TIOCM_CD)  && (cnow.dcd != cprev.dcd))) {
+			ret = 0;
+			break;
+		}
+		schedule();
+		/* see if a signal did it */
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		cprev = cnow;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&tiocmget->waitq, &wait);
+
+	return ret;
+}
+
+/*
+ * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
+ * Return: write counters to the user passed counter struct
+ * NB: both 1->0 and 0->1 transitions are counted except for
+ *     RI where only 0->1 is counted.
+ */
+static int hso_get_count(struct hso_serial *serial,
+			  struct serial_icounter_struct __user *icnt)
+{
+	struct serial_icounter_struct icount;
+	struct uart_icount cnow;
+	struct hso_tiocmget  *tiocmget = serial->tiocmget;
+
+	if (!tiocmget)
+		 return -ENOENT;
+	spin_lock_irq(&serial->serial_lock);
+	memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount));
+	spin_unlock_irq(&serial->serial_lock);
+
+	icount.cts         = cnow.cts;
+	icount.dsr         = cnow.dsr;
+	icount.rng         = cnow.rng;
+	icount.dcd         = cnow.dcd;
+	icount.rx          = cnow.rx;
+	icount.tx          = cnow.tx;
+	icount.frame       = cnow.frame;
+	icount.overrun     = cnow.overrun;
+	icount.parity      = cnow.parity;
+	icount.brk         = cnow.brk;
+	icount.buf_overrun = cnow.buf_overrun;
+
+	return copy_to_user(icnt, &icount, sizeof(icount)) ? -EFAULT : 0;
+}
+
 
 static int hso_serial_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	unsigned int value;
+	int retval;
 	struct hso_serial *serial = get_serial_by_tty(tty);
-	unsigned long flags;
+	struct hso_tiocmget  *tiocmget;
+	u16 UART_state_bitmap;
 
 	/* sanity check */
 	if (!serial) {
 		D1("no tty structures");
 		return -EINVAL;
 	}
-
-	spin_lock_irqsave(&serial->serial_lock, flags);
-	value = ((serial->rts_state) ? TIOCM_RTS : 0) |
+	spin_lock_irq(&serial->serial_lock);
+	retval = ((serial->rts_state) ? TIOCM_RTS : 0) |
 	    ((serial->dtr_state) ? TIOCM_DTR : 0);
-	spin_unlock_irqrestore(&serial->serial_lock, flags);
-
-	return value;
+	tiocmget = serial->tiocmget;
+	if (tiocmget) {
+
+		UART_state_bitmap = le16_to_cpu(
+			tiocmget->prev_UART_state_bitmap);
+		if (UART_state_bitmap & B_RING_SIGNAL)
+			retval |=  TIOCM_RNG;
+		if (UART_state_bitmap & B_RX_CARRIER)
+			retval |=  TIOCM_CD;
+		if (UART_state_bitmap & B_TX_CARRIER)
+			retval |=  TIOCM_DSR;
+	}
+	spin_unlock_irq(&serial->serial_lock);
+	return retval;
 }
 
 static int hso_serial_tiocmset(struct tty_struct *tty, struct file *file,
@@ -1479,6 +1712,32 @@ static int hso_serial_tiocmset(struct tty_struct *tty, struct file *file,
 			       USB_CTRL_SET_TIMEOUT);
 }
 
+static int hso_serial_ioctl(struct tty_struct *tty, struct file *file,
+			    unsigned int cmd, unsigned long arg)
+{
+	struct hso_serial *serial =  get_serial_by_tty(tty);
+	void __user *uarg = (void __user *)arg;
+	int ret = 0;
+	D4("IOCTL cmd: %d, arg: %ld", cmd, arg);
+
+	if (!serial)
+		return -ENODEV;
+	switch (cmd) {
+	case TIOCMIWAIT:
+		ret = hso_wait_modem_status(serial, arg);
+		break;
+
+	case TIOCGICOUNT:
+		ret = hso_get_count(serial, uarg);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+	return ret;
+}
+
+
 /* starts a transmit */
 static void hso_kick_transmit(struct hso_serial *serial)
 {
@@ -1956,7 +2215,10 @@ static int hso_start_serial_device(struct hso_device *hso_dev, gfp_t flags)
 		serial->shared_int->use_count++;
 		mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
-
+	if (serial->tiocmget)
+		tiocmget_submit_urb(serial,
+				    serial->tiocmget,
+				    serial->parent->usb);
 	return result;
 }
 
@@ -1964,6 +2226,7 @@ static int hso_stop_serial_device(struct hso_device *hso_dev)
 {
 	int i;
 	struct hso_serial *serial = dev2ser(hso_dev);
+	struct hso_tiocmget  *tiocmget;
 
 	if (!serial)
 		return -ENODEV;
@@ -1992,6 +2255,11 @@ static int hso_stop_serial_device(struct hso_device *hso_dev)
 		}
 		mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
+	tiocmget = serial->tiocmget;
+	if (tiocmget) {
+		wake_up_interruptible(&tiocmget->waitq);
+		usb_kill_urb(tiocmget->urb);
+	}
 
 	return 0;
 }
@@ -2338,6 +2606,20 @@ exit:
 	return NULL;
 }
 
+static void hso_free_tiomget(struct hso_serial *serial)
+{
+	struct hso_tiocmget *tiocmget = serial->tiocmget;
+	if (tiocmget) {
+		kfree(tiocmget);
+		if (tiocmget->urb) {
+			usb_free_urb(tiocmget->urb);
+			tiocmget->urb = NULL;
+		}
+		serial->tiocmget = NULL;
+
+	}
+}
+
 /* Frees an AT channel ( goes for both mux and non-mux ) */
 static void hso_free_serial_device(struct hso_device *hso_dev)
 {
@@ -2356,6 +2638,7 @@ static void hso_free_serial_device(struct hso_device *hso_dev)
 		else
 			mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
+	hso_free_tiomget(serial);
 	kfree(serial);
 	hso_free_device(hso_dev);
 }
@@ -2367,6 +2650,7 @@ static struct hso_device *hso_create_bulk_serial_device(
 	struct hso_device *hso_dev;
 	struct hso_serial *serial;
 	int num_urbs;
+	struct hso_tiocmget *tiocmget;
 
 	hso_dev = hso_create_device(interface, port);
 	if (!hso_dev)
@@ -2379,8 +2663,27 @@ static struct hso_device *hso_create_bulk_serial_device(
 	serial->parent = hso_dev;
 	hso_dev->port_data.dev_serial = serial;
 
-	if (port & HSO_PORT_MODEM)
+	if (port & HSO_PORT_MODEM) {
 		num_urbs = 2;
+		serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
+					   GFP_KERNEL);
+		/* it isn't going to break our heart if serial->tiocmget
+		 *  allocation fails don't bother checking this.
+		 */
+		if (serial->tiocmget) {
+			tiocmget = serial->tiocmget;
+			tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
+			if (tiocmget->urb) {
+				mutex_init(&tiocmget->mutex);
+				init_waitqueue_head(&tiocmget->waitq);
+				tiocmget->endp = hso_get_ep(
+					interface,
+					USB_ENDPOINT_XFER_INT,
+					USB_DIR_IN);
+			} else
+				hso_free_tiomget(serial);
+		}
+	}
 	else
 		num_urbs = 1;
 
@@ -2416,6 +2719,7 @@ static struct hso_device *hso_create_bulk_serial_device(
 exit2:
 	hso_serial_common_free(serial);
 exit:
+	hso_free_tiomget(serial);
 	kfree(serial);
 	hso_free_device(hso_dev);
 	return NULL;
@@ -2926,6 +3230,7 @@ static const struct tty_operations hso_serial_ops = {
 	.close = hso_serial_close,
 	.write = hso_serial_write,
 	.write_room = hso_serial_write_room,
+	.ioctl = hso_serial_ioctl,
 	.set_termios = hso_serial_set_termios,
 	.chars_in_buffer = hso_serial_chars_in_buffer,
 	.tiocmget = hso_serial_tiocmget,
-- 
cgit v0.10.2


From d1c815e549ff40f9e9db65654855118e6bdff6a4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:47:58 +0000
Subject: tty: relock epca

Bring epca into line with the port locking.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index da2d2cf..e07d792 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -69,7 +69,9 @@ static int invalid_lilo_config;
 
 /*
  * The ISA boards do window flipping into the same spaces so its only sane with
- * a single lock. It's still pretty efficient.
+ * a single lock. It's still pretty efficient. This lock guards the hardware
+ * and the tty_port lock guards the kernel side stuff like use counts. Take
+ * this lock inside the port lock if you must take both.
  */
 static DEFINE_SPINLOCK(epca_lock);
 
@@ -156,7 +158,7 @@ static struct channel *verifyChannel(struct tty_struct *);
 static void pc_sched_event(struct channel *, int);
 static void epca_error(int, char *);
 static void pc_close(struct tty_struct *, struct file *);
-static void shutdown(struct channel *);
+static void shutdown(struct channel *, struct tty_struct *tty);
 static void pc_hangup(struct tty_struct *);
 static int pc_write_room(struct tty_struct *);
 static int pc_chars_in_buffer(struct tty_struct *);
@@ -419,76 +421,78 @@ static void epca_error(int line, char *msg)
 static void pc_close(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
+	struct tty_port *port;
 	unsigned long flags;
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
 	 * valid. This serves as a sanity check.
 	 */
 	ch = verifyChannel(tty);
-	if (ch != NULL) {
-		spin_lock_irqsave(&epca_lock, flags);
-		if (tty_hung_up_p(filp)) {
-			spin_unlock_irqrestore(&epca_lock, flags);
-			return;
-		}
-		if (ch->port.count-- > 1)  {
-			/* Begin channel is open more than once */
-			/*
-			 * Return without doing anything. Someone might still
-			 * be using the channel.
-			 */
-			spin_unlock_irqrestore(&epca_lock, flags);
-			return;
-		}
-		/* Port open only once go ahead with shutdown & reset */
-		BUG_ON(ch->port.count < 0);
+	if (ch == NULL)
+		return;
+	port = &ch->port;
 
+	spin_lock_irqsave(&port->lock, flags);
+	if (tty_hung_up_p(filp)) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return;
+	}
+	if (port->count-- > 1)  {
+		/* Begin channel is open more than once */
 		/*
-		 * Let the rest of the driver know the channel is being closed.
-		 * This becomes important if an open is attempted before close
-		 * is finished.
+		 * Return without doing anything. Someone might still
+		 * be using the channel.
 		 */
-		ch->port.flags |= ASYNC_CLOSING;
-		tty->closing = 1;
-
-		spin_unlock_irqrestore(&epca_lock, flags);
-
-		if (ch->port.flags & ASYNC_INITIALIZED)  {
-			/* Setup an event to indicate when the
-			   transmit buffer empties */
-			setup_empty_event(tty, ch);
-			/* 30 seconds timeout */
-			tty_wait_until_sent(tty, 3000);
-		}
-		pc_flush_buffer(tty);
+		spin_unlock_irqrestore(&port->lock, flags);
+		return;
+	}
+	/* Port open only once go ahead with shutdown & reset */
+	WARN_ON(port->count < 0);
 
-		tty_ldisc_flush(tty);
-		shutdown(ch);
+	/*
+	 * Let the rest of the driver know the channel is being closed.
+	 * This becomes important if an open is attempted before close
+	 * is finished.
+	 */
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
 
-		spin_lock_irqsave(&epca_lock, flags);
-		tty->closing = 0;
-		ch->event = 0;
-		ch->port.tty = NULL;
-		spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
-		if (ch->port.blocked_open) {
-			if (ch->close_delay)
-				msleep_interruptible(jiffies_to_msecs(ch->close_delay));
-			wake_up_interruptible(&ch->port.open_wait);
-		}
-		ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED |
-					ASYNC_CLOSING);
-		wake_up_interruptible(&ch->port.close_wait);
+	if (port->flags & ASYNC_INITIALIZED)  {
+		/* Setup an event to indicate when the
+		   transmit buffer empties */
+		setup_empty_event(tty, ch);
+		/* 30 seconds timeout */
+		tty_wait_until_sent(tty, 3000);
+	}
+	pc_flush_buffer(tty);
+	tty_ldisc_flush(tty);
+	shutdown(ch, tty);
+
+	spin_lock_irqsave(&port->lock, flags);
+	tty->closing = 0;
+	ch->event = 0;
+	port->tty = NULL;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (port->blocked_open) {
+		if (ch->close_delay)
+			msleep_interruptible(jiffies_to_msecs(ch->close_delay));
+		wake_up_interruptible(&port->open_wait);
 	}
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED |
+							ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
 }
 
-static void shutdown(struct channel *ch)
+static void shutdown(struct channel *ch, struct tty_struct *tty)
 {
 	unsigned long flags;
-	struct tty_struct *tty;
 	struct board_chan __iomem *bc;
+	struct tty_port *port = &ch->port;
 
-	if (!(ch->port.flags & ASYNC_INITIALIZED))
+	if (!(port->flags & ASYNC_INITIALIZED))
 		return;
 
 	spin_lock_irqsave(&epca_lock, flags);
@@ -503,7 +507,6 @@ static void shutdown(struct channel *ch)
 	 */
 	if (bc)
 		writeb(0, &bc->idata);
-	tty = ch->port.tty;
 
 	/* If we're a modem control device and HUPCL is on, drop RTS & DTR. */
 	if (tty->termios->c_cflag & HUPCL)  {
@@ -517,13 +520,15 @@ static void shutdown(struct channel *ch)
 	 * will have to reinitialized. Set a flag to indicate this.
 	 */
 	/* Prevent future Digi programmed interrupts from coming active */
-	ch->port.flags &= ~ASYNC_INITIALIZED;
+	port->flags &= ~ASYNC_INITIALIZED;
 	spin_unlock_irqrestore(&epca_lock, flags);
 }
 
 static void pc_hangup(struct tty_struct *tty)
 {
 	struct channel *ch;
+	struct tty_port *port;
+
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
 	 * valid. This serves as a sanity check.
@@ -531,18 +536,19 @@ static void pc_hangup(struct tty_struct *tty)
 	ch = verifyChannel(tty);
 	if (ch != NULL) {
 		unsigned long flags;
+		port = &ch->port;
 
 		pc_flush_buffer(tty);
 		tty_ldisc_flush(tty);
-		shutdown(ch);
-
-		spin_lock_irqsave(&epca_lock, flags);
-		ch->port.tty   = NULL;
-		ch->event = 0;
-		ch->port.count = 0;
-		ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED);
-		spin_unlock_irqrestore(&epca_lock, flags);
-		wake_up_interruptible(&ch->port.open_wait);
+		shutdown(ch, tty);
+
+		spin_lock_irqsave(&port->lock, flags);
+		port->tty = NULL;
+		ch->event = 0;	/* FIXME: review locking of ch->event */
+		port->count = 0;
+		port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED);
+		spin_unlock_irqrestore(&port->lock, flags);
+		wake_up_interruptible(&port->open_wait);
 	}
 }
 
@@ -792,9 +798,10 @@ static int block_til_ready(struct tty_struct *tty,
 	DECLARE_WAITQUEUE(wait, current);
 	int retval, do_clocal = 0;
 	unsigned long flags;
+	struct tty_port *port = &ch->port;
 
 	if (tty_hung_up_p(filp)) {
-		if (ch->port.flags & ASYNC_HUP_NOTIFY)
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			retval = -EAGAIN;
 		else
 			retval = -ERESTARTSYS;
@@ -805,10 +812,10 @@ static int block_til_ready(struct tty_struct *tty,
 	 * If the device is in the middle of being closed, then block until
 	 * it's done, and then try again.
 	 */
-	if (ch->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&ch->port.close_wait);
+	if (port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
 
-		if (ch->port.flags & ASYNC_HUP_NOTIFY)
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -819,7 +826,7 @@ static int block_til_ready(struct tty_struct *tty,
 		 * If non-blocking mode is set, then make the check up front
 		 * and then exit.
 		 */
-		ch->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 	if (tty->termios->c_cflag & CLOCAL)
@@ -827,31 +834,31 @@ static int block_til_ready(struct tty_struct *tty,
 	/* Block waiting for the carrier detect and the line to become free */
 
 	retval = 0;
-	add_wait_queue(&ch->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&epca_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	/* We dec count so that pc_close will know when to free things */
 	if (!tty_hung_up_p(filp))
-		ch->port.count--;
-	ch->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-				!(ch->port.flags & ASYNC_INITIALIZED)) {
-			if (ch->port.flags & ASYNC_HUP_NOTIFY)
+				!(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(ch->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 			  (do_clocal || (ch->imodem & ch->dcd)))
 			break;
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
 			break;
 		}
-		spin_unlock_irqrestore(&epca_lock, flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 		/*
 		 * Allow someone else to be scheduled. We will occasionally go
 		 * through this loop until one of the above conditions change.
@@ -859,27 +866,28 @@ static int block_til_ready(struct tty_struct *tty,
 		 * and prevent this loop from hogging the cpu.
 		 */
 		schedule();
-		spin_lock_irqsave(&epca_lock, flags);
+		spin_lock_irqsave(&port->lock, flags);
 	}
 
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&ch->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		ch->port.count++;
-	ch->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
-	spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (retval)
 		return retval;
 
-	ch->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
 static int pc_open(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
+	struct tty_port *port;
 	unsigned long flags;
 	int line, retval, boardnum;
 	struct board_chan __iomem *bc;
@@ -890,6 +898,7 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 		return -ENODEV;
 
 	ch = &digi_channels[line];
+	port = &ch->port;
 	boardnum = ch->boardnum;
 
 	/* Check status of board configured in system.  */
@@ -926,22 +935,24 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 		return -ENODEV;
 	}
 
-	spin_lock_irqsave(&epca_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	/*
 	 * Every time a channel is opened, increment a counter. This is
 	 * necessary because we do not wish to flush and shutdown the channel
 	 * until the last app holding the channel open, closes it.
 	 */
-	ch->port.count++;
+	port->count++;
 	/*
 	 * Set a kernel structures pointer to our local channel structure. This
 	 * way we can get to it when passed only a tty struct.
 	 */
 	tty->driver_data = ch;
+	port->tty = tty;
 	/*
 	 * If this is the first time the channel has been opened, initialize
 	 * the tty->termios struct otherwise let pc_close handle it.
 	 */
+	spin_lock(&epca_lock);
 	globalwinon(ch);
 	ch->statusflags = 0;
 
@@ -956,16 +967,16 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 	writew(head, &bc->rout);
 
 	/* Set the channels associated tty structure */
-	ch->port.tty = tty;
 
 	/*
 	 * The below routine generally sets up parity, baud, flow control
 	 * issues, etc.... It effect both control flags and input flags.
 	 */
 	epcaparam(tty, ch);
-	ch->port.flags |= ASYNC_INITIALIZED;
 	memoff(ch);
-	spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock(&epca_lock);
+	port->flags |= ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	retval = block_til_ready(tty, filp, ch);
 	if (retval)
@@ -974,13 +985,15 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 	 * Set this again in case a hangup set it to zero while this open() was
 	 * waiting for the line...
 	 */
-	spin_lock_irqsave(&epca_lock, flags);
-	ch->port.tty = tty;
+	spin_lock_irqsave(&port->lock, flags);
+	port->tty = tty;
+	spin_lock(&epca_lock);
 	globalwinon(ch);
 	/* Enable Digi Data events */
 	writeb(1, &bc->idata);
 	memoff(ch);
-	spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock(&epca_lock);
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 3969ffba71d39ced700d09d9cfde83174396299e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:04 +0000
Subject: tty: refcount the epca driver

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index e07d792..7a69705 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -175,7 +175,7 @@ static unsigned termios2digi_h(struct channel *ch, unsigned);
 static unsigned termios2digi_i(struct channel *ch, unsigned);
 static unsigned termios2digi_c(struct channel *ch, unsigned);
 static void epcaparam(struct tty_struct *, struct channel *);
-static void receive_data(struct channel *);
+static void receive_data(struct channel *, struct tty_struct *tty);
 static int pc_ioctl(struct tty_struct *, struct file *,
 			unsigned int, unsigned long);
 static int info_ioctl(struct tty_struct *, struct file *,
@@ -473,7 +473,7 @@ static void pc_close(struct tty_struct *tty, struct file *filp)
 	spin_lock_irqsave(&port->lock, flags);
 	tty->closing = 0;
 	ch->event = 0;
-	port->tty = NULL;
+	tty_port_tty_set(port, NULL);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (port->blocked_open) {
@@ -1029,8 +1029,11 @@ static void __exit epca_module_exit(void)
 		}
 		ch = card_ptr[crd];
 		for (count = 0; count < bd->numports; count++, ch++) {
-			if (ch && ch->port.tty)
-				tty_hangup(ch->port.tty);
+			struct tty_struct *tty = tty_port_tty_get(&ch->port);
+			if (tty) {
+				tty_hangup(tty);
+				tty_kref_put(tty);
+			}
 		}
 	}
 	pci_unregister_driver(&epca_driver);
@@ -1441,7 +1444,7 @@ static void post_fep_init(unsigned int crd)
 		ch->boardnum   = crd;
 		ch->channelnum = i;
 		ch->magic      = EPCA_MAGIC;
-		ch->port.tty        = NULL;
+		tty_port_tty_set(&ch->port, NULL);
 
 		if (shrinkmem) {
 			fepcmd(ch, SETBUFFER, 32, 0, 0, 0);
@@ -1635,8 +1638,9 @@ static void doevent(int crd)
 		if (bc == NULL)
 			goto next;
 
+		tty = tty_port_tty_get(&ch->port);
 		if (event & DATA_IND)  { /* Begin DATA_IND */
-			receive_data(ch);
+			receive_data(ch, tty);
 			assertgwinon(ch);
 		} /* End DATA_IND */
 		/* else *//* Fix for DCD transition missed bug */
@@ -1651,7 +1655,6 @@ static void doevent(int crd)
 					pc_sched_event(ch, EPCA_EVENT_HANGUP);
 			}
 		}
-		tty = ch->port.tty;
 		if (tty) {
 			if (event & BREAK_IND) {
 				/* A break has been indicated */
@@ -1671,6 +1674,7 @@ static void doevent(int crd)
 					tty_wakeup(tty);
 				}
 			}
+			tty_kref_put(tty);
 		}
 next:
 		globalwinon(ch);
@@ -1965,11 +1969,10 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch)
 }
 
 /* Caller holds lock */
-static void receive_data(struct channel *ch)
+static void receive_data(struct channel *ch, struct tty_struct *tty)
 {
 	unchar *rptr;
 	struct ktermios *ts = NULL;
-	struct tty_struct *tty;
 	struct board_chan __iomem *bc;
 	int dataToRead, wrapgap, bytesAvailable;
 	unsigned int tail, head;
@@ -1982,7 +1985,6 @@ static void receive_data(struct channel *ch)
 	globalwinon(ch);
 	if (ch->statusflags & RXSTOPPED)
 		return;
-	tty = ch->port.tty;
 	if (tty)
 		ts = tty->termios;
 	bc = ch->brdchan;
@@ -2042,7 +2044,7 @@ static void receive_data(struct channel *ch)
 	globalwinon(ch);
 	writew(tail, &bc->rout);
 	/* Must be called with global data */
-	tty_schedule_flip(ch->port.tty);
+	tty_schedule_flip(tty);
 }
 
 static int info_ioctl(struct tty_struct *tty, struct file *file,
@@ -2365,7 +2367,7 @@ static void do_softint(struct work_struct *work)
 	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC) {
-		struct tty_struct *tty = ch->port.tty;
+		struct tty_struct *tty = tty_port_tty_get(&ch->port);;
 
 		if (tty && tty->driver_data) {
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
@@ -2374,6 +2376,7 @@ static void do_softint(struct work_struct *work)
 				ch->port.flags &= ~ASYNC_NORMAL_ACTIVE;
 			}
 		}
+		tty_kref_put(tty);
 	}
 }
 
-- 
cgit v0.10.2


From 6ed1dbaeadd62a026a93aa3ac8680d2dfe9f96b3 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:11 +0000
Subject: tty: Make epca use the port helpers

Now the locking is straight and the port kref usage is straight we can
replace lots of chunks of code with the standard port helpers

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 7a69705..71225d1 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -432,58 +432,15 @@ static void pc_close(struct tty_struct *tty, struct file *filp)
 		return;
 	port = &ch->port;
 
-	spin_lock_irqsave(&port->lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-	if (port->count-- > 1)  {
-		/* Begin channel is open more than once */
-		/*
-		 * Return without doing anything. Someone might still
-		 * be using the channel.
-		 */
-		spin_unlock_irqrestore(&port->lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	/* Port open only once go ahead with shutdown & reset */
-	WARN_ON(port->count < 0);
 
-	/*
-	 * Let the rest of the driver know the channel is being closed.
-	 * This becomes important if an open is attempted before close
-	 * is finished.
-	 */
-	port->flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	if (port->flags & ASYNC_INITIALIZED)  {
-		/* Setup an event to indicate when the
-		   transmit buffer empties */
-		setup_empty_event(tty, ch);
-		/* 30 seconds timeout */
-		tty_wait_until_sent(tty, 3000);
-	}
 	pc_flush_buffer(tty);
-	tty_ldisc_flush(tty);
 	shutdown(ch, tty);
 
-	spin_lock_irqsave(&port->lock, flags);
-	tty->closing = 0;
-	ch->event = 0;
+	tty_port_close_end(port, tty);
+	ch->event = 0;	/* FIXME: review ch->event locking */
 	tty_port_tty_set(port, NULL);
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	if (port->blocked_open) {
-		if (ch->close_delay)
-			msleep_interruptible(jiffies_to_msecs(ch->close_delay));
-		wake_up_interruptible(&port->open_wait);
-	}
-	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED |
-							ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
 }
 
 static void shutdown(struct channel *ch, struct tty_struct *tty)
@@ -527,7 +484,6 @@ static void shutdown(struct channel *ch, struct tty_struct *tty)
 static void pc_hangup(struct tty_struct *tty)
 {
 	struct channel *ch;
-	struct tty_port *port;
 
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
@@ -536,19 +492,13 @@ static void pc_hangup(struct tty_struct *tty)
 	ch = verifyChannel(tty);
 	if (ch != NULL) {
 		unsigned long flags;
-		port = &ch->port;
 
 		pc_flush_buffer(tty);
 		tty_ldisc_flush(tty);
 		shutdown(ch, tty);
 
-		spin_lock_irqsave(&port->lock, flags);
-		port->tty = NULL;
 		ch->event = 0;	/* FIXME: review locking of ch->event */
-		port->count = 0;
-		port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED);
-		spin_unlock_irqrestore(&port->lock, flags);
-		wake_up_interruptible(&port->open_wait);
+		tty_port_hangup(&ch->port);
 	}
 }
 
@@ -792,98 +742,18 @@ static void pc_flush_chars(struct tty_struct *tty)
 	}
 }
 
-static int block_til_ready(struct tty_struct *tty,
-				struct file *filp, struct channel *ch)
+static int epca_carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int retval, do_clocal = 0;
-	unsigned long flags;
-	struct tty_port *port = &ch->port;
-
-	if (tty_hung_up_p(filp)) {
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			retval = -EAGAIN;
-		else
-			retval = -ERESTARTSYS;
-		return retval;
-	}
-
-	/*
-	 * If the device is in the middle of being closed, then block until
-	 * it's done, and then try again.
-	 */
-	if (port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
-
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	if (filp->f_flags & O_NONBLOCK)  {
-		/*
-		 * If non-blocking mode is set, then make the check up front
-		 * and then exit.
-		 */
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-	/* Block waiting for the carrier detect and the line to become free */
-
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	/* We dec count so that pc_close will know when to free things */
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-				!(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->flags & ASYNC_CLOSING) &&
-			  (do_clocal || (ch->imodem & ch->dcd)))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		spin_unlock_irqrestore(&port->lock, flags);
-		/*
-		 * Allow someone else to be scheduled. We will occasionally go
-		 * through this loop until one of the above conditions change.
-		 * The below schedule call will allow other processes to enter
-		 * and prevent this loop from hogging the cpu.
-		 */
-		schedule();
-		spin_lock_irqsave(&port->lock, flags);
-	}
-
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	if (retval)
-		return retval;
-
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	struct channel *ch = container_of(port, struct channel, port);
+	if (ch->imodem & ch->dcd)
+		return 1;
 	return 0;
 }
 
+static void epca_raise_dtr_rts(struct tty_port *port0
+{
+}
+
 static int pc_open(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
@@ -978,7 +848,7 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 	port->flags |= ASYNC_INITIALIZED;
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	retval = block_til_ready(tty, filp, ch);
+	retval = tty_port_block_til_ready(port, tty, filp);
 	if (retval)
 		return retval;
 	/*
@@ -1058,6 +928,11 @@ static const struct tty_operations pc_ops = {
 	.break_ctl = pc_send_break
 };
 
+static const struct tty_port_operations epca_port_ops = {
+	.carrier_raised = epca_carrier_raised,
+	.raise_dtr_rts = epca_raise_dtr_rts,
+};
+
 static int info_open(struct tty_struct *tty, struct file *filp)
 {
 	return 0;
@@ -1393,6 +1268,7 @@ static void post_fep_init(unsigned int crd)
 		u16 tseg, rseg;
 
 		tty_port_init(&ch->port);
+		ch->port.ops - &epca_port_ops;
 		ch->brdchan = bc;
 		ch->mailbox = gd;
 		INIT_WORK(&ch->tqueue, do_softint);
@@ -1526,7 +1402,7 @@ static void post_fep_init(unsigned int crd)
 		ch->fepstartca = 0;
 		ch->fepstopca = 0;
 
-		ch->close_delay = 50;
+		ch->port.close_delay = 50;
 
 		spin_unlock_irqrestore(&epca_lock, flags);
 	}
@@ -1647,7 +1523,7 @@ static void doevent(int crd)
 		if (event & MODEMCHG_IND) {
 			/* A modem signal change has been indicated */
 			ch->imodem = mstat;
-			if (ch->port.flags & ASYNC_CHECK_CD) {
+			if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
 				/* We are now receiving dcd */
 				if (mstat & ch->dcd)
 					wake_up_interruptible(&ch->port.open_wait);
@@ -1894,9 +1770,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch)
 		 * that the driver will wait on carrier detect.
 		 */
 		if (ts->c_cflag & CLOCAL)
-			ch->port.flags &= ~ASYNC_CHECK_CD;
+			clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
 		else
-			ch->port.flags |= ASYNC_CHECK_CD;
+			set_bit(ASYNC_CHECK_CD, &ch->port.flags);
 		mval = ch->m_dtr | ch->m_rts;
 	} /* End CBAUD not detected */
 	iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2373,7 +2249,7 @@ static void do_softint(struct work_struct *work)
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
 				tty_hangup(tty);
 				wake_up_interruptible(&ch->port.open_wait);
-				ch->port.flags &= ~ASYNC_NORMAL_ACTIVE;
+				clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
 			}
 		}
 		tty_kref_put(tty);
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index b3175f5..b580fcf 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -286,7 +286,8 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 	port->flags |= ASYNC_CLOSING;
 	tty->closing = 1;
 	spin_unlock_irqrestore(&port->lock, flags);
-	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+	if (port->flags & ASYNC_INITIALIZED &&
+			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
 	return 1;
 }
-- 
cgit v0.10.2


From c1314a49d7907b96d72f2c41f8927fc3c738e956 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:17 +0000
Subject: tty: Redo the rocket driver locking

Bring this driver into the port locking model

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 71225d1..39ad820 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -164,8 +164,6 @@ static int pc_write_room(struct tty_struct *);
 static int pc_chars_in_buffer(struct tty_struct *);
 static void pc_flush_buffer(struct tty_struct *);
 static void pc_flush_chars(struct tty_struct *);
-static int block_til_ready(struct tty_struct *, struct file *,
-			struct channel *);
 static int pc_open(struct tty_struct *, struct file *);
 static void post_fep_init(unsigned int crd);
 static void epcapoll(unsigned long);
@@ -422,7 +420,6 @@ static void pc_close(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
 	struct tty_port *port;
-	unsigned long flags;
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
 	 * valid. This serves as a sanity check.
@@ -491,8 +488,6 @@ static void pc_hangup(struct tty_struct *tty)
 	 */
 	ch = verifyChannel(tty);
 	if (ch != NULL) {
-		unsigned long flags;
-
 		pc_flush_buffer(tty);
 		tty_ldisc_flush(tty);
 		shutdown(ch, tty);
@@ -750,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port)
 	return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port0
+static void epca_raise_dtr_rts(struct tty_port *port)
 {
 }
 
@@ -1268,7 +1263,7 @@ static void post_fep_init(unsigned int crd)
 		u16 tseg, rseg;
 
 		tty_port_init(&ch->port);
-		ch->port.ops - &epca_port_ops;
+		ch->port.ops = &epca_port_ops;
 		ch->brdchan = bc;
 		ch->mailbox = gd;
 		INIT_WORK(&ch->tqueue, do_softint);
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 1e68cc2..efc3e5c 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -920,7 +920,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #ifdef ROCKET_DEBUG_OPEN
 	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, port->count);
 #endif
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 
 #ifdef ROCKET_DISABLE_SIMUSAGE
 	info->flags |= ASYNC_NORMAL_ACTIVE;
@@ -932,7 +932,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #endif
 	port->blocked_open++;
 
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	while (1) {
 		if (tty->termios->c_cflag & CBAUD)
@@ -961,13 +961,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 
 	if (extra_count)
 		port->count++;
 	port->blocked_open--;
 
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 #ifdef ROCKET_DEBUG_OPEN
 	printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n",
@@ -1095,6 +1095,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 static void rp_close(struct tty_struct *tty, struct file *filp)
 {
 	struct r_port *info = tty->driver_data;
+	struct tty_port *port = &info->port;
 	unsigned long flags;
 	int timeout;
 	CHANNEL_t *cp;
@@ -1108,9 +1109,9 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 
 	if (tty_hung_up_p(filp))
 		return;
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 
-	if ((tty->count == 1) && (info->port.count != 1)) {
+	if (tty->count == 1 && port->count != 1) {
 		/*
 		 * Uh, oh.  tty->count is 1, which means that the tty
 		 * structure will be freed.  Info->count should always
@@ -1120,19 +1121,19 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 		 */
 		printk(KERN_WARNING "rp_close: bad serial port count; "
 			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
+		port->count = 1;
 	}
-	if (--info->port.count < 0) {
+	if (--port->count < 0) {
 		printk(KERN_WARNING "rp_close: bad serial port count for "
 				"ttyR%d: %d\n", info->line, info->port.count);
-		info->port.count = 0;
+		port->count = 0;
 	}
-	if (info->port.count) {
-		spin_unlock_irqrestore(&info->slock, flags);
+	if (port->count) {
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
 	info->flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	cp = &info->channel;
 
@@ -1152,7 +1153,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 	 * Wait for the transmit buffer to clear
 	 */
 	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->port.closing_wait);
+		tty_wait_until_sent(tty, port->closing_wait);
 	/*
 	 * Before we drop DTR, make sure the UART transmitter
 	 * has completely drained; this is especially
@@ -1181,11 +1182,11 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
+	if (port->blocked_open) {
+		if (port->close_delay) {
+			msleep_interruptible(jiffies_to_msecs(port->close_delay));
 		}
-		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&port->open_wait);
 	} else {
 		if (info->xmit_buf) {
 			free_page((unsigned long) info->xmit_buf);
-- 
cgit v0.10.2


From 21bed701da009b4192d9e86b3596cf210ac7369c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:23 +0000
Subject: tty: make rocketport use standard port->flags

We need to this ready for using the standard helpers

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index efc3e5c..ca6fcdc 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -499,7 +499,7 @@ static void rp_handle_port(struct r_port *info)
 	if (!info)
 		return;
 
-	if ((info->flags & ASYNC_INITIALIZED) == 0) {
+	if ((info->port.flags & ASYNC_INITIALIZED) == 0) {
 		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
 				"info->flags & NOT_INIT\n");
 		return;
@@ -892,11 +892,11 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * until it's done, and then try again.
 	 */
 	if (tty_hung_up_p(filp))
-		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	if (info->flags & ASYNC_CLOSING) {
 		if (wait_for_completion_interruptible(&info->close_wait))
 			return -ERESTARTSYS;
-		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -904,7 +904,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * then make the check up front and then exit.
 	 */
 	if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->flags |= ASYNC_NORMAL_ACTIVE;
+		info->port.flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 	if (tty->termios->c_cflag & CLOCAL)
@@ -923,7 +923,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_lock_irqsave(&port->lock, flags);
 
 #ifdef ROCKET_DISABLE_SIMUSAGE
-	info->flags |= ASYNC_NORMAL_ACTIVE;
+	info->port.flags |= ASYNC_NORMAL_ACTIVE;
 #else
 	if (!tty_hung_up_p(filp)) {
 		extra_count = 1;
@@ -938,14 +938,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		if (tty->termios->c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)) {
-			if (info->flags & ASYNC_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)) {
+			if (info->port.flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(info->flags & ASYNC_CLOSING) &&
+		if (!(info->port.flags & ASYNC_CLOSING) &&
 			(do_clocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
@@ -954,7 +954,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 #ifdef ROCKET_DEBUG_OPEN
 		printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n",
-		     info->line, port->count, info->flags);
+		     info->line, port->count, info->port.flags);
 #endif
 		schedule();	/*  Don't hold spinlock here, will hang PC */
 	}
@@ -975,7 +975,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #endif
 	if (retval)
 		return retval;
-	info->flags |= ASYNC_NORMAL_ACTIVE;
+	info->port.flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -998,12 +998,12 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	if (!page)
 		return -ENOMEM;
 
-	if (info->flags & ASYNC_CLOSING) {
+	if (info->port.flags & ASYNC_CLOSING) {
 		retval = wait_for_completion_interruptible(&info->close_wait);
 		free_page(page);
 		if (retval)
 			return retval;
-		return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -1032,7 +1032,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if ((info->flags & ASYNC_INITIALIZED) == 0) {
+	if ((info->port.flags & ASYNC_INITIALIZED) == 0) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -1056,7 +1056,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		info->flags |= ASYNC_INITIALIZED;
+		info->port.flags |= ASYNC_INITIALIZED;
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1132,7 +1132,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
-	info->flags |= ASYNC_CLOSING;
+	info->port.flags |= ASYNC_CLOSING;
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	cp = &info->channel;
@@ -1193,7 +1193,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 			info->xmit_buf = NULL;
 		}
 	}
-	info->flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE);
+	info->port.flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE);
 	tty->closing = 0;
 	complete_all(&info->close_wait);
 	atomic_dec(&rp_num_ports_open);
@@ -1650,14 +1650,14 @@ static void rp_hangup(struct tty_struct *tty)
 	printk(KERN_INFO "rp_hangup of ttyR%d...\n", info->line);
 #endif
 	rp_flush_buffer(tty);
-	if (info->flags & ASYNC_CLOSING)
+	if (info->port.flags & ASYNC_CLOSING)
 		return;
 	if (info->port.count)
 		atomic_dec(&rp_num_ports_open);
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
 	info->port.count = 0;
-	info->flags &= ~ASYNC_NORMAL_ACTIVE;
+	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
 	info->port.tty = NULL;
 
 	cp = &info->channel;
@@ -1667,7 +1667,7 @@ static void rp_hangup(struct tty_struct *tty)
 	sDisCTSFlowCtl(cp);
 	sDisTxSoftFlowCtl(cp);
 	sClrTxXOFF(cp);
-	info->flags &= ~ASYNC_INITIALIZED;
+	info->port.flags &= ~ASYNC_INITIALIZED;
 
 	wake_up_interruptible(&info->port.open_wait);
 }
-- 
cgit v0.10.2


From 47b01b3a5fc7239f3e8d5d5cadc88afbea24d0c3 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:30 +0000
Subject: tty: kref the rocket driver

We will need this kref fitted to make full use of the port operations.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index ca6fcdc..b5e5e77 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -436,15 +436,15 @@ static void rp_do_transmit(struct r_port *info)
 #endif
 	if (!info)
 		return;
-	if (!info->port.tty) {
-		printk(KERN_WARNING "rp: WARNING %s called with "
-				"info->port.tty==NULL\n", __func__);
+	tty = tty_port_tty_get(&info->port);
+
+	if (tty == NULL) {
+		printk(KERN_WARNING "rp: WARNING %s called with tty==NULL\n", __func__);
 		clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 		return;
 	}
 
 	spin_lock_irqsave(&info->slock, flags);
-	tty = info->port.tty;
 	info->xmit_fifo_room = TXFIFO_SIZE - sGetTxCnt(cp);
 
 	/*  Loop sending data to FIFO until done or FIFO full */
@@ -478,6 +478,7 @@ static void rp_do_transmit(struct r_port *info)
 	}
 
 	spin_unlock_irqrestore(&info->slock, flags);
+	tty_kref_put(tty);
 
 #ifdef ROCKET_DEBUG_INTR
 	printk(KERN_DEBUG "(%d,%d,%d,%d)...\n", info->xmit_cnt, info->xmit_head,
@@ -504,13 +505,13 @@ static void rp_handle_port(struct r_port *info)
 				"info->flags & NOT_INIT\n");
 		return;
 	}
-	if (!info->port.tty) {
+	tty = tty_port_tty_get(&info->port);
+	if (!tty) {
 		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
-				"info->port.tty==NULL\n");
+				"tty==NULL\n");
 		return;
 	}
 	cp = &info->channel;
-	tty = info->port.tty;
 
 	IntMask = sGetChanIntID(cp) & info->intmask;
 #ifdef ROCKET_DEBUG_INTR
@@ -542,6 +543,7 @@ static void rp_handle_port(struct r_port *info)
 		printk(KERN_INFO "DSR change...\n");
 	}
 #endif
+	tty_kref_put(tty);
 }
 
 /*
@@ -710,7 +712,7 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev)
  *  Configures a rocketport port according to its termio settings.  Called from 
  *  user mode into the driver (exception handler).  *info CD manipulation is spinlock protected.
  */
-static void configure_r_port(struct r_port *info,
+static void configure_r_port(struct tty_struct *tty, struct r_port *info,
 			     struct ktermios *old_termios)
 {
 	unsigned cflag;
@@ -718,7 +720,7 @@ static void configure_r_port(struct r_port *info,
 	unsigned rocketMode;
 	int bits, baud, divisor;
 	CHANNEL_t *cp;
-	struct ktermios *t = info->port.tty->termios;
+	struct ktermios *t = tty->termios;
 
 	cp = &info->channel;
 	cflag = t->c_cflag;
@@ -751,7 +753,7 @@ static void configure_r_port(struct r_port *info,
 	}
 
 	/* baud rate */
-	baud = tty_get_baud_rate(info->port.tty);
+	baud = tty_get_baud_rate(tty);
 	if (!baud)
 		baud = 9600;
 	divisor = ((rp_baud_base[info->board] + (baud >> 1)) / baud) - 1;
@@ -769,7 +771,7 @@ static void configure_r_port(struct r_port *info,
 	sSetBaud(cp, divisor);
 
 	/* FIXME: Should really back compute a baud rate from the divisor */
-	tty_encode_baud_rate(info->port.tty, baud, baud);
+	tty_encode_baud_rate(tty, baud, baud);
 
 	if (cflag & CRTSCTS) {
 		info->intmask |= DELTA_CTS;
@@ -794,15 +796,15 @@ static void configure_r_port(struct r_port *info,
 	 * Handle software flow control in the board
 	 */
 #ifdef ROCKET_SOFT_FLOW
-	if (I_IXON(info->port.tty)) {
+	if (I_IXON(tty)) {
 		sEnTxSoftFlowCtl(cp);
-		if (I_IXANY(info->port.tty)) {
+		if (I_IXANY(tty)) {
 			sEnIXANY(cp);
 		} else {
 			sDisIXANY(cp);
 		}
-		sSetTxXONChar(cp, START_CHAR(info->port.tty));
-		sSetTxXOFFChar(cp, STOP_CHAR(info->port.tty));
+		sSetTxXONChar(cp, START_CHAR(tty));
+		sSetTxXOFFChar(cp, STOP_CHAR(tty));
 	} else {
 		sDisTxSoftFlowCtl(cp);
 		sDisIXANY(cp);
@@ -814,24 +816,24 @@ static void configure_r_port(struct r_port *info,
 	 * Set up ignore/read mask words
 	 */
 	info->read_status_mask = STMRCVROVRH | 0xFF;
-	if (I_INPCK(info->port.tty))
+	if (I_INPCK(tty))
 		info->read_status_mask |= STMFRAMEH | STMPARITYH;
-	if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
+	if (I_BRKINT(tty) || I_PARMRK(tty))
 		info->read_status_mask |= STMBREAKH;
 
 	/*
 	 * Characters to ignore
 	 */
 	info->ignore_status_mask = 0;
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(tty))
 		info->ignore_status_mask |= STMFRAMEH | STMPARITYH;
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(tty)) {
 		info->ignore_status_mask |= STMBREAKH;
 		/*
 		 * If we're ignoring parity and break indicators,
 		 * ignore overruns too.  (For real raw support).
 		 */
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(tty))
 			info->ignore_status_mask |= STMRCVROVRH;
 	}
 
@@ -1015,7 +1017,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		info->xmit_buf = (unsigned char *) page;
 
 	tty->driver_data = info;
-	info->port.tty = tty;
+	tty_port_tty_set(&info->port, tty);
 
 	if (info->port.count++ == 0) {
 		atomic_inc(&rp_num_ports_open);
@@ -1062,15 +1064,15 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		 * Set up the tty->alt_speed kludge
 		 */
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI)
-			info->port.tty->alt_speed = 57600;
+			tty->alt_speed = 57600;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI)
-			info->port.tty->alt_speed = 115200;
+			tty->alt_speed = 115200;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI)
-			info->port.tty->alt_speed = 230400;
+			tty->alt_speed = 230400;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP)
-			info->port.tty->alt_speed = 460800;
+			tty->alt_speed = 460800;
 
-		configure_r_port(info, NULL);
+		configure_r_port(tty, info, NULL);
 		if (tty->termios->c_cflag & CBAUD) {
 			sSetDTR(cp);
 			sSetRTS(cp);
@@ -1227,7 +1229,7 @@ static void rp_set_termios(struct tty_struct *tty,
 	/* Or CMSPAR */
 	tty->termios->c_cflag &= ~CMSPAR;
 
-	configure_r_port(info, old_termios);
+	configure_r_port(tty, info, old_termios);
 
 	cp = &info->channel;
 
@@ -1352,7 +1354,8 @@ static int get_config(struct r_port *info, struct rocket_config __user *retinfo)
 	return 0;
 }
 
-static int set_config(struct r_port *info, struct rocket_config __user *new_info)
+static int set_config(struct tty_struct *tty, struct r_port *info,
+					struct rocket_config __user *new_info)
 {
 	struct rocket_config new_serial;
 
@@ -1364,7 +1367,7 @@ static int set_config(struct r_port *info, struct rocket_config __user *new_info
 		if ((new_serial.flags & ~ROCKET_USR_MASK) != (info->flags & ~ROCKET_USR_MASK))
 			return -EPERM;
 		info->flags = ((info->flags & ~ROCKET_USR_MASK) | (new_serial.flags & ROCKET_USR_MASK));
-		configure_r_port(info, NULL);
+		configure_r_port(tty, info, NULL);
 		return 0;
 	}
 
@@ -1373,15 +1376,15 @@ static int set_config(struct r_port *info, struct rocket_config __user *new_info
 	info->port.closing_wait = new_serial.closing_wait;
 
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI)
-		info->port.tty->alt_speed = 57600;
+		tty->alt_speed = 57600;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI)
-		info->port.tty->alt_speed = 115200;
+		tty->alt_speed = 115200;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI)
-		info->port.tty->alt_speed = 230400;
+		tty->alt_speed = 230400;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP)
-		info->port.tty->alt_speed = 460800;
+		tty->alt_speed = 460800;
 
-	configure_r_port(info, NULL);
+	configure_r_port(tty, info, NULL);
 	return 0;
 }
 
@@ -1466,7 +1469,7 @@ static int rp_ioctl(struct tty_struct *tty, struct file *file,
 		ret = get_config(info, argp);
 		break;
 	case RCKP_SET_CONFIG:
-		ret = set_config(info, argp);
+		ret = set_config(tty, info, argp);
 		break;
 	case RCKP_GET_PORTS:
 		ret = get_ports(info, argp);
@@ -1658,7 +1661,7 @@ static void rp_hangup(struct tty_struct *tty)
 
 	info->port.count = 0;
 	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	info->port.tty = NULL;
+	tty_port_tty_set(&info->port, NULL);
 
 	cp = &info->channel;
 	sDisRxFIFO(cp);
@@ -1778,7 +1781,8 @@ static int rp_write(struct tty_struct *tty,
 
 	/*  Write remaining data into the port's xmit_buf */
 	while (1) {
-		if (!info->port.tty)		/* Seemingly obligatory check... */
+		/* Hung up ? */
+		if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
 			goto end;
 		c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
 		c = min(c, XMIT_BUF_SIZE - info->xmit_head);
-- 
cgit v0.10.2


From fba85e013f106a44e91ef5edec899fc56a7e61ee Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:39 +0000
Subject: tty: use port methods for the rocket driver

Now we have our ducks in order we can begin switching to the port
operations

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index b5e5e77..f59fc5c 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -879,108 +879,6 @@ static void raise_dtr_rts(struct tty_port *port)
 	sSetRTS(&info->channel);
 }
 
-/*  info->port.count is considered critical, protected by spinlocks.  */
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct r_port *info)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	struct tty_port *port = &info->port;
-	int retval;
-	int do_clocal = 0, extra_count = 0;
-	unsigned long flags;
-
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp))
-		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
-	if (info->flags & ASYNC_CLOSING) {
-		if (wait_for_completion_interruptible(&info->close_wait))
-			return -ERESTARTSYS;
-		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become free.  While we are in
-	 * this loop, port->count is dropped by one, so that rp_close() knows when to free things.
-         * We restore it upon exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-#ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, port->count);
-#endif
-	spin_lock_irqsave(&port->lock, flags);
-
-#ifdef ROCKET_DISABLE_SIMUSAGE
-	info->port.flags |= ASYNC_NORMAL_ACTIVE;
-#else
-	if (!tty_hung_up_p(filp)) {
-		extra_count = 1;
-		port->count--;
-	}
-#endif
-	port->blocked_open++;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-		if (tty->termios->c_cflag & CBAUD)
-			tty_port_raise_dtr_rts(port);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)) {
-			if (info->port.flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(info->port.flags & ASYNC_CLOSING) &&
-			(do_clocal || tty_port_carrier_raised(port)))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-#ifdef ROCKET_DEBUG_OPEN
-		printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n",
-		     info->line, port->count, info->port.flags);
-#endif
-		schedule();	/*  Don't hold spinlock here, will hang PC */
-	}
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-
-	if (extra_count)
-		port->count++;
-	port->blocked_open--;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-#ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n",
-	       info->line, port->count);
-#endif
-	if (retval)
-		return retval;
-	info->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
-}
-
 /*
  *  Exception handler that opens a serial port.  Creates xmit_buf storage, fills in 
  *  port's r_port struct.  Initializes the port hardware.  
@@ -988,24 +886,26 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 static int rp_open(struct tty_struct *tty, struct file *filp)
 {
 	struct r_port *info;
+	struct tty_port *port;
 	int line = 0, retval;
 	CHANNEL_t *cp;
 	unsigned long page;
 
 	line = tty->index;
-	if ((line < 0) || (line >= MAX_RP_PORTS) || ((info = rp_table[line]) == NULL))
+	if (line < 0 || line >= MAX_RP_PORTS || ((info = rp_table[line]) == NULL))
 		return -ENXIO;
-
+	port = &info->port;
+	
 	page = __get_free_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
 
-	if (info->port.flags & ASYNC_CLOSING) {
+	if (port->flags & ASYNC_CLOSING) {
 		retval = wait_for_completion_interruptible(&info->close_wait);
 		free_page(page);
 		if (retval)
 			return retval;
-		return ((info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -1017,9 +917,9 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		info->xmit_buf = (unsigned char *) page;
 
 	tty->driver_data = info;
-	tty_port_tty_set(&info->port, tty);
+	tty_port_tty_set(port, tty);
 
-	if (info->port.count++ == 0) {
+	if (port->count++ == 0) {
 		atomic_inc(&rp_num_ports_open);
 
 #ifdef ROCKET_DEBUG_OPEN
@@ -1034,7 +934,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if ((info->port.flags & ASYNC_INITIALIZED) == 0) {
+	if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -1058,7 +958,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		info->port.flags |= ASYNC_INITIALIZED;
+		set_bit(ASYNC_INITIALIZED, &info->port.flags);
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1081,7 +981,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*  Starts (or resets) the maint polling loop */
 	mod_timer(&rocket_timer, jiffies + POLL_PERIOD);
 
-	retval = block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(port, tty, filp);
 	if (retval) {
 #ifdef ROCKET_DEBUG_OPEN
 		printk(KERN_INFO "rp_open returning after block_til_ready with %d\n", retval);
@@ -1098,7 +998,6 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 {
 	struct r_port *info = tty->driver_data;
 	struct tty_port *port = &info->port;
-	unsigned long flags;
 	int timeout;
 	CHANNEL_t *cp;
 	
@@ -1109,53 +1008,10 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 	printk(KERN_INFO "rp_close ttyR%d, count = %d\n", info->line, info->port.count);
 #endif
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	spin_lock_irqsave(&port->lock, flags);
-
-	if (tty->count == 1 && port->count != 1) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_WARNING "rp_close: bad serial port count; "
-			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		port->count = 1;
-	}
-	if (--port->count < 0) {
-		printk(KERN_WARNING "rp_close: bad serial port count for "
-				"ttyR%d: %d\n", info->line, info->port.count);
-		port->count = 0;
-	}
-	if (port->count) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-	info->port.flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&port->lock, flags);
 
 	cp = &info->channel;
-
-	/*
-	 * Notify the line discpline to only process XON/XOFF characters
-	 */
-	tty->closing = 1;
-
-	/*
-	 * If transmission was throttled by the application request,
-	 * just flush the xmit buffer.
-	 */
-	if (tty->flow_stopped)
-		rp_flush_buffer(tty);
-
-	/*
-	 * Wait for the transmit buffer to clear
-	 */
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->closing_wait);
 	/*
 	 * Before we drop DTR, make sure the UART transmitter
 	 * has completely drained; this is especially
@@ -1184,6 +1040,9 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
+	/* We can't yet use tty_port_close_end as the buffer handling in this
+	   driver is a bit different to the usual */
+
 	if (port->blocked_open) {
 		if (port->close_delay) {
 			msleep_interruptible(jiffies_to_msecs(port->close_delay));
@@ -1197,6 +1056,8 @@ static void rp_close(struct tty_struct *tty, struct file *filp)
 	}
 	info->port.flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE);
 	tty->closing = 0;
+	tty_port_tty_set(port, NULL);
+	wake_up_interruptible(&port->close_wait);
 	complete_all(&info->close_wait);
 	atomic_dec(&rp_num_ports_open);
 
@@ -1659,9 +1520,7 @@ static void rp_hangup(struct tty_struct *tty)
 		atomic_dec(&rp_num_ports_open);
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
-	info->port.count = 0;
-	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&info->port, NULL);
+	tty_port_hangup(&info->port);
 
 	cp = &info->channel;
 	sDisRxFIFO(cp);
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index b580fcf..9b8004c 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -286,6 +286,9 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 	port->flags |= ASYNC_CLOSING;
 	tty->closing = 1;
 	spin_unlock_irqrestore(&port->lock, flags);
+	/* Don't block on a stalled port, just pull the chain */
+	if (tty->flow_stopped)
+		tty_driver_flush_buffer(tty);
 	if (port->flags & ASYNC_INITIALIZED &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
-- 
cgit v0.10.2


From eeb4613436f0f19a38f667ea3078821040559c68 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:47 +0000
Subject: synclink_cs: Convert to tty_port

Use the tty port operations, add refcounting, and refactor a bit to make the
refcounting work cleanly.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 4d64a02..dc073e1 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -138,20 +138,15 @@ struct _input_signal_events {
  */
 
 typedef struct _mgslpc_info {
+	struct tty_port		port;
 	void *if_ptr;	/* General purpose pointer (used by SPPP) */
 	int			magic;
-	int			flags;
-	int			count;		/* count of opens */
 	int			line;
-	unsigned short		close_delay;
-	unsigned short		closing_wait;	/* time to wait before closing */
 
 	struct mgsl_icount	icount;
 
-	struct tty_struct 	*tty;
 	int			timeout;
 	int			x_char;		/* xon/xoff character */
-	int			blocked_open;	/* # of blocked opens */
 	unsigned char		read_status_mask;
 	unsigned char		ignore_status_mask;
 
@@ -170,9 +165,6 @@ typedef struct _mgslpc_info {
 	int            rx_buf_count;   /* total number of rx buffers */
 	int            rx_frame_count; /* number of full rx buffers */
 
-	wait_queue_head_t	open_wait;
-	wait_queue_head_t	close_wait;
-
 	wait_queue_head_t	status_event_wait_q;
 	wait_queue_head_t	event_wait_q;
 	struct timer_list	tx_timer;	/* HDLC transmit timeout timer */
@@ -375,7 +367,7 @@ static void irq_enable(MGSLPC_INFO *info, unsigned char channel, unsigned short
 static void rx_start(MGSLPC_INFO *info);
 static void rx_stop(MGSLPC_INFO *info);
 
-static void tx_start(MGSLPC_INFO *info);
+static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty);
 static void tx_stop(MGSLPC_INFO *info);
 static void tx_set_idle(MGSLPC_INFO *info);
 
@@ -389,7 +381,8 @@ static void async_mode(MGSLPC_INFO *info);
 
 static void tx_timeout(unsigned long context);
 
-static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg);
+static int carrier_raised(struct tty_port *port);
+static void raise_dtr_rts(struct tty_port *port);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -410,7 +403,7 @@ static void release_resources(MGSLPC_INFO *info);
 static void mgslpc_add_device(MGSLPC_INFO *info);
 static void mgslpc_remove_device(MGSLPC_INFO *info);
 
-static bool rx_get_frame(MGSLPC_INFO *info);
+static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty);
 static void rx_reset_buffers(MGSLPC_INFO *info);
 static int  rx_alloc_buffers(MGSLPC_INFO *info);
 static void rx_free_buffers(MGSLPC_INFO *info);
@@ -421,7 +414,7 @@ static irqreturn_t mgslpc_isr(int irq, void *dev_id);
  * Bottom half interrupt handlers
  */
 static void bh_handler(struct work_struct *work);
-static void bh_transmit(MGSLPC_INFO *info);
+static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty);
 static void bh_status(MGSLPC_INFO *info);
 
 /*
@@ -432,10 +425,10 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear);
 static int get_stats(MGSLPC_INFO *info, struct mgsl_icount __user *user_icount);
 static int get_params(MGSLPC_INFO *info, MGSL_PARAMS __user *user_params);
-static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params);
+static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params, struct tty_struct *tty);
 static int get_txidle(MGSLPC_INFO *info, int __user *idle_mode);
 static int set_txidle(MGSLPC_INFO *info, int idle_mode);
-static int set_txenable(MGSLPC_INFO *info, int enable);
+static int set_txenable(MGSLPC_INFO *info, int enable, struct tty_struct *tty);
 static int tx_abort(MGSLPC_INFO *info);
 static int set_rxenable(MGSLPC_INFO *info, int enable);
 static int wait_events(MGSLPC_INFO *info, int __user *mask);
@@ -474,7 +467,7 @@ static struct tty_driver *serial_driver;
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
-static void mgslpc_change_params(MGSLPC_INFO *info);
+static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty);
 static void mgslpc_wait_until_sent(struct tty_struct *tty, int timeout);
 
 /* PCMCIA prototypes */
@@ -517,6 +510,11 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 	}
 }
 
+static const struct tty_port_operations mgslpc_port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts
+};
+
 static int mgslpc_probe(struct pcmcia_device *link)
 {
     MGSLPC_INFO *info;
@@ -532,12 +530,12 @@ static int mgslpc_probe(struct pcmcia_device *link)
     }
 
     info->magic = MGSLPC_MAGIC;
+    tty_port_init(&info->port);
+    info->port.ops = &mgslpc_port_ops;
     INIT_WORK(&info->task, bh_handler);
     info->max_frame_size = 4096;
-    info->close_delay = 5*HZ/10;
-    info->closing_wait = 30*HZ;
-    init_waitqueue_head(&info->open_wait);
-    init_waitqueue_head(&info->close_wait);
+    info->port.close_delay = 5*HZ/10;
+    info->port.closing_wait = 30*HZ;
     init_waitqueue_head(&info->status_event_wait_q);
     init_waitqueue_head(&info->event_wait_q);
     spin_lock_init(&info->lock);
@@ -784,7 +782,7 @@ static void tx_release(struct tty_struct *tty)
 
 	spin_lock_irqsave(&info->lock,flags);
 	if (!info->tx_enabled)
-	 	tx_start(info);
+	 	tx_start(info, tty);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
 
@@ -823,6 +821,7 @@ static int bh_action(MGSLPC_INFO *info)
 static void bh_handler(struct work_struct *work)
 {
 	MGSLPC_INFO *info = container_of(work, MGSLPC_INFO, task);
+	struct tty_struct *tty;
 	int action;
 
 	if (!info)
@@ -833,6 +832,7 @@ static void bh_handler(struct work_struct *work)
 			__FILE__,__LINE__,info->device_name);
 
 	info->bh_running = true;
+	tty = tty_port_tty_get(&info->port);
 
 	while((action = bh_action(info)) != 0) {
 
@@ -844,10 +844,10 @@ static void bh_handler(struct work_struct *work)
 		switch (action) {
 
 		case BH_RECEIVE:
-			while(rx_get_frame(info));
+			while(rx_get_frame(info, tty));
 			break;
 		case BH_TRANSMIT:
-			bh_transmit(info);
+			bh_transmit(info, tty);
 			break;
 		case BH_STATUS:
 			bh_status(info);
@@ -859,14 +859,14 @@ static void bh_handler(struct work_struct *work)
 		}
 	}
 
+	tty_kref_put(tty);
 	if (debug_level >= DEBUG_LEVEL_BH)
 		printk( "%s(%d):bh_handler(%s) exit\n",
 			__FILE__,__LINE__,info->device_name);
 }
 
-static void bh_transmit(MGSLPC_INFO *info)
+static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty)
 {
-	struct tty_struct *tty = info->tty;
 	if (debug_level >= DEBUG_LEVEL_BH)
 		printk("bh_transmit() entry on %s\n", info->device_name);
 
@@ -945,12 +945,11 @@ static void rx_ready_hdlc(MGSLPC_INFO *info, int eom)
 	issue_command(info, CHA, CMD_RXFIFO);
 }
 
-static void rx_ready_async(MGSLPC_INFO *info, int tcd)
+static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 {
 	unsigned char data, status, flag;
 	int fifo_count;
 	int work = 0;
- 	struct tty_struct *tty = info->tty;
  	struct mgsl_icount *icount = &info->icount;
 
 	if (tcd) {
@@ -1013,7 +1012,7 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd)
 }
 
 
-static void tx_done(MGSLPC_INFO *info)
+static void tx_done(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	if (!info->tx_active)
 		return;
@@ -1042,7 +1041,7 @@ static void tx_done(MGSLPC_INFO *info)
 	else
 #endif
 	{
-		if (info->tty->stopped || info->tty->hw_stopped) {
+		if (tty->stopped || tty->hw_stopped) {
 			tx_stop(info);
 			return;
 		}
@@ -1050,7 +1049,7 @@ static void tx_done(MGSLPC_INFO *info)
 	}
 }
 
-static void tx_ready(MGSLPC_INFO *info)
+static void tx_ready(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned char fifo_count = 32;
 	int c;
@@ -1062,7 +1061,7 @@ static void tx_ready(MGSLPC_INFO *info)
 		if (!info->tx_active)
 			return;
 	} else {
-		if (info->tty->stopped || info->tty->hw_stopped) {
+		if (tty->stopped || tty->hw_stopped) {
 			tx_stop(info);
 			return;
 		}
@@ -1099,7 +1098,7 @@ static void tx_ready(MGSLPC_INFO *info)
 	}
 }
 
-static void cts_change(MGSLPC_INFO *info)
+static void cts_change(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	get_signals(info);
 	if ((info->cts_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
@@ -1112,14 +1111,14 @@ static void cts_change(MGSLPC_INFO *info)
 	wake_up_interruptible(&info->status_event_wait_q);
 	wake_up_interruptible(&info->event_wait_q);
 
-	if (info->flags & ASYNC_CTS_FLOW) {
-		if (info->tty->hw_stopped) {
+	if (info->port.flags & ASYNC_CTS_FLOW) {
+		if (tty->hw_stopped) {
 			if (info->serial_signals & SerialSignal_CTS) {
 				if (debug_level >= DEBUG_LEVEL_ISR)
 					printk("CTS tx start...");
-				if (info->tty)
-					info->tty->hw_stopped = 0;
-				tx_start(info);
+				if (tty)
+					tty->hw_stopped = 0;
+				tx_start(info, tty);
 				info->pending_bh |= BH_TRANSMIT;
 				return;
 			}
@@ -1127,8 +1126,8 @@ static void cts_change(MGSLPC_INFO *info)
 			if (!(info->serial_signals & SerialSignal_CTS)) {
 				if (debug_level >= DEBUG_LEVEL_ISR)
 					printk("CTS tx stop...");
-				if (info->tty)
-					info->tty->hw_stopped = 1;
+				if (tty)
+					tty->hw_stopped = 1;
 				tx_stop(info);
 			}
 		}
@@ -1136,7 +1135,7 @@ static void cts_change(MGSLPC_INFO *info)
 	info->pending_bh |= BH_STATUS;
 }
 
-static void dcd_change(MGSLPC_INFO *info)
+static void dcd_change(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	get_signals(info);
 	if ((info->dcd_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
@@ -1158,17 +1157,17 @@ static void dcd_change(MGSLPC_INFO *info)
 	wake_up_interruptible(&info->status_event_wait_q);
 	wake_up_interruptible(&info->event_wait_q);
 
-	if (info->flags & ASYNC_CHECK_CD) {
+	if (info->port.flags & ASYNC_CHECK_CD) {
 		if (debug_level >= DEBUG_LEVEL_ISR)
 			printk("%s CD now %s...", info->device_name,
 			       (info->serial_signals & SerialSignal_DCD) ? "on" : "off");
 		if (info->serial_signals & SerialSignal_DCD)
-			wake_up_interruptible(&info->open_wait);
+			wake_up_interruptible(&info->port.open_wait);
 		else {
 			if (debug_level >= DEBUG_LEVEL_ISR)
 				printk("doing serial hangup...");
-			if (info->tty)
-				tty_hangup(info->tty);
+			if (tty)
+				tty_hangup(tty);
 		}
 	}
 	info->pending_bh |= BH_STATUS;
@@ -1214,6 +1213,7 @@ static void ri_change(MGSLPC_INFO *info)
 static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 {
 	MGSLPC_INFO *info = dev_id;
+	struct tty_struct *tty;
 	unsigned short isr;
 	unsigned char gis, pis;
 	int count=0;
@@ -1224,6 +1224,8 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 	if (!(info->p_dev->_locked))
 		return IRQ_HANDLED;
 
+	tty = tty_port_tty_get(&info->port);
+
 	spin_lock(&info->lock);
 
 	while ((gis = read_reg(info, CHA + GIS))) {
@@ -1239,9 +1241,9 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 		if (gis & (BIT1 + BIT0)) {
 			isr = read_reg16(info, CHB + ISR);
 			if (isr & IRQ_DCD)
-				dcd_change(info);
+				dcd_change(info, tty);
 			if (isr & IRQ_CTS)
-				cts_change(info);
+				cts_change(info, tty);
 		}
 		if (gis & (BIT3 + BIT2))
 		{
@@ -1258,8 +1260,8 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 			}
 			if (isr & IRQ_BREAK_ON) {
 				info->icount.brk++;
-				if (info->flags & ASYNC_SAK)
-					do_SAK(info->tty);
+				if (info->port.flags & ASYNC_SAK)
+					do_SAK(tty);
 			}
 			if (isr & IRQ_RXTIME) {
 				issue_command(info, CHA, CMD_RXFIFO_READ);
@@ -1268,7 +1270,7 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 				if (info->params.mode == MGSL_MODE_HDLC)
 					rx_ready_hdlc(info, isr & IRQ_RXEOM);
 				else
-					rx_ready_async(info, isr & IRQ_RXEOM);
+					rx_ready_async(info, isr & IRQ_RXEOM, tty);
 			}
 
 			/* transmit IRQs */
@@ -1277,14 +1279,14 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 					info->icount.txabort++;
 				else
 					info->icount.txunder++;
-				tx_done(info);
+				tx_done(info, tty);
 			}
 			else if (isr & IRQ_ALLSENT) {
 				info->icount.txok++;
-				tx_done(info);
+				tx_done(info, tty);
 			}
 			else if (isr & IRQ_TXFIFO)
-				tx_ready(info);
+				tx_ready(info, tty);
 		}
 		if (gis & BIT7) {
 			pis = read_reg(info, CHA + PIS);
@@ -1308,6 +1310,7 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 	}
 
 	spin_unlock(&info->lock);
+	tty_kref_put(tty);
 
 	if (debug_level >= DEBUG_LEVEL_ISR)
 		printk("%s(%d):mgslpc_isr(%d)exit.\n",
@@ -1318,14 +1321,14 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 
 /* Initialize and start device.
  */
-static int startup(MGSLPC_INFO * info)
+static int startup(MGSLPC_INFO * info, struct tty_struct *tty)
 {
 	int retval = 0;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):startup(%s)\n",__FILE__,__LINE__,info->device_name);
 
-	if (info->flags & ASYNC_INITIALIZED)
+	if (info->port.flags & ASYNC_INITIALIZED)
 		return 0;
 
 	if (!info->tx_buf) {
@@ -1352,30 +1355,30 @@ static int startup(MGSLPC_INFO * info)
 		retval = adapter_test(info);
 
 	if ( retval ) {
-  		if (capable(CAP_SYS_ADMIN) && info->tty)
-			set_bit(TTY_IO_ERROR, &info->tty->flags);
+  		if (capable(CAP_SYS_ADMIN) && tty)
+			set_bit(TTY_IO_ERROR, &tty->flags);
 		release_resources(info);
   		return retval;
   	}
 
 	/* program hardware for current parameters */
-	mgslpc_change_params(info);
+	mgslpc_change_params(info, tty);
 
-	if (info->tty)
-		clear_bit(TTY_IO_ERROR, &info->tty->flags);
+	if (tty)
+		clear_bit(TTY_IO_ERROR, &tty->flags);
 
-	info->flags |= ASYNC_INITIALIZED;
+	info->port.flags |= ASYNC_INITIALIZED;
 
 	return 0;
 }
 
 /* Called by mgslpc_close() and mgslpc_hangup() to shutdown hardware
  */
-static void shutdown(MGSLPC_INFO * info)
+static void shutdown(MGSLPC_INFO * info, struct tty_struct *tty)
 {
 	unsigned long flags;
 
-	if (!(info->flags & ASYNC_INITIALIZED))
+	if (!(info->port.flags & ASYNC_INITIALIZED))
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1402,7 +1405,7 @@ static void shutdown(MGSLPC_INFO * info)
 	/* TODO:disable interrupts instead of reset to preserve signal states */
 	reset_device(info);
 
- 	if (!info->tty || info->tty->termios->c_cflag & HUPCL) {
+ 	if (!tty || tty->termios->c_cflag & HUPCL) {
  		info->serial_signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
 		set_signals(info);
 	}
@@ -1411,13 +1414,13 @@ static void shutdown(MGSLPC_INFO * info)
 
 	release_resources(info);
 
-	if (info->tty)
-		set_bit(TTY_IO_ERROR, &info->tty->flags);
+	if (tty)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	info->flags &= ~ASYNC_INITIALIZED;
+	info->port.flags &= ~ASYNC_INITIALIZED;
 }
 
-static void mgslpc_program_hw(MGSLPC_INFO *info)
+static void mgslpc_program_hw(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned long flags;
 
@@ -1443,7 +1446,7 @@ static void mgslpc_program_hw(MGSLPC_INFO *info)
 	port_irq_enable(info, (unsigned char) PVR_DSR | PVR_RI);
 	get_signals(info);
 
-	if (info->netcount || info->tty->termios->c_cflag & CREAD)
+	if (info->netcount || (tty && (tty->termios->c_cflag & CREAD)))
 		rx_start(info);
 
 	spin_unlock_irqrestore(&info->lock,flags);
@@ -1451,19 +1454,19 @@ static void mgslpc_program_hw(MGSLPC_INFO *info)
 
 /* Reconfigure adapter based on new parameters
  */
-static void mgslpc_change_params(MGSLPC_INFO *info)
+static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned cflag;
 	int bits_per_char;
 
-	if (!info->tty || !info->tty->termios)
+	if (!tty || !tty->termios)
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_change_params(%s)\n",
 			 __FILE__,__LINE__, info->device_name );
 
-	cflag = info->tty->termios->c_cflag;
+	cflag = tty->termios->c_cflag;
 
 	/* if B0 rate (hangup) specified then negate DTR and RTS */
 	/* otherwise assert DTR and RTS */
@@ -1510,7 +1513,7 @@ static void mgslpc_change_params(MGSLPC_INFO *info)
 	 * current data rate.
 	 */
 	if (info->params.data_rate <= 460800) {
-		info->params.data_rate = tty_get_baud_rate(info->tty);
+		info->params.data_rate = tty_get_baud_rate(tty);
 	}
 
 	if ( info->params.data_rate ) {
@@ -1520,24 +1523,24 @@ static void mgslpc_change_params(MGSLPC_INFO *info)
 	info->timeout += HZ/50;		/* Add .02 seconds of slop */
 
 	if (cflag & CRTSCTS)
-		info->flags |= ASYNC_CTS_FLOW;
+		info->port.flags |= ASYNC_CTS_FLOW;
 	else
-		info->flags &= ~ASYNC_CTS_FLOW;
+		info->port.flags &= ~ASYNC_CTS_FLOW;
 
 	if (cflag & CLOCAL)
-		info->flags &= ~ASYNC_CHECK_CD;
+		info->port.flags &= ~ASYNC_CHECK_CD;
 	else
-		info->flags |= ASYNC_CHECK_CD;
+		info->port.flags |= ASYNC_CHECK_CD;
 
 	/* process tty input control flags */
 
 	info->read_status_mask = 0;
-	if (I_INPCK(info->tty))
+	if (I_INPCK(tty))
 		info->read_status_mask |= BIT7 | BIT6;
-	if (I_IGNPAR(info->tty))
+	if (I_IGNPAR(tty))
 		info->ignore_status_mask |= BIT7 | BIT6;
 
-	mgslpc_program_hw(info);
+	mgslpc_program_hw(info, tty);
 }
 
 /* Add a character to the transmit buffer
@@ -1597,7 +1600,7 @@ static void mgslpc_flush_chars(struct tty_struct *tty)
 
 	spin_lock_irqsave(&info->lock,flags);
 	if (!info->tx_active)
-	 	tx_start(info);
+	 	tx_start(info, tty);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
 
@@ -1659,7 +1662,7 @@ start:
  	if (info->tx_count && !tty->stopped && !tty->hw_stopped) {
 		spin_lock_irqsave(&info->lock,flags);
 		if (!info->tx_active)
-		 	tx_start(info);
+		 	tx_start(info, tty);
 		spin_unlock_irqrestore(&info->lock,flags);
  	}
 cleanup:
@@ -1764,7 +1767,7 @@ static void mgslpc_send_xchar(struct tty_struct *tty, char ch)
 	if (ch) {
 		spin_lock_irqsave(&info->lock,flags);
 		if (!info->tx_enabled)
-		 	tx_start(info);
+		 	tx_start(info, tty);
 		spin_unlock_irqrestore(&info->lock,flags);
 	}
 }
@@ -1862,7 +1865,7 @@ static int get_params(MGSLPC_INFO * info, MGSL_PARAMS __user *user_params)
  *
  * Returns:	0 if success, otherwise error code
  */
-static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params)
+static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params, struct tty_struct *tty)
 {
  	unsigned long flags;
 	MGSL_PARAMS tmp_params;
@@ -1883,7 +1886,7 @@ static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params)
 	memcpy(&info->params,&tmp_params,sizeof(MGSL_PARAMS));
 	spin_unlock_irqrestore(&info->lock,flags);
 
- 	mgslpc_change_params(info);
+ 	mgslpc_change_params(info, tty);
 
 	return 0;
 }
@@ -1944,7 +1947,7 @@ static int set_interface(MGSLPC_INFO * info, int if_mode)
 	return 0;
 }
 
-static int set_txenable(MGSLPC_INFO * info, int enable)
+static int set_txenable(MGSLPC_INFO * info, int enable, struct tty_struct *tty)
 {
  	unsigned long flags;
 
@@ -1954,7 +1957,7 @@ static int set_txenable(MGSLPC_INFO * info, int enable)
 	spin_lock_irqsave(&info->lock,flags);
 	if (enable) {
 		if (!info->tx_enabled)
-			tx_start(info);
+			tx_start(info, tty);
 	} else {
 		if (info->tx_enabled)
 			tx_stop(info);
@@ -2263,6 +2266,11 @@ static int mgslpc_ioctl(struct tty_struct *tty, struct file * file,
 			unsigned int cmd, unsigned long arg)
 {
 	MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data;
+	int error;
+	struct mgsl_icount cnow;	/* kernel counter temps */
+	struct serial_icounter_struct __user *p_cuser;	/* user space */
+	void __user *argp = (void __user *)arg;
+	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_ioctl %s cmd=%08X\n", __FILE__,__LINE__,
@@ -2277,22 +2285,11 @@ static int mgslpc_ioctl(struct tty_struct *tty, struct file * file,
 		    return -EIO;
 	}
 
-	return ioctl_common(info, cmd, arg);
-}
-
-static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg)
-{
-	int error;
-	struct mgsl_icount cnow;	/* kernel counter temps */
-	struct serial_icounter_struct __user *p_cuser;	/* user space */
-	void __user *argp = (void __user *)arg;
-	unsigned long flags;
-
 	switch (cmd) {
 	case MGSL_IOCGPARAMS:
 		return get_params(info, argp);
 	case MGSL_IOCSPARAMS:
-		return set_params(info, argp);
+		return set_params(info, argp, tty);
 	case MGSL_IOCGTXIDLE:
 		return get_txidle(info, argp);
 	case MGSL_IOCSTXIDLE:
@@ -2302,7 +2299,7 @@ static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg)
 	case MGSL_IOCSIF:
 		return set_interface(info,(int)arg);
 	case MGSL_IOCTXENABLE:
-		return set_txenable(info,(int)arg);
+		return set_txenable(info,(int)arg, tty);
 	case MGSL_IOCRXENABLE:
 		return set_rxenable(info,(int)arg);
 	case MGSL_IOCTXABORT:
@@ -2369,7 +2366,7 @@ static void mgslpc_set_termios(struct tty_struct *tty, struct ktermios *old_term
 		== RELEVANT_IFLAG(old_termios->c_iflag)))
 	  return;
 
-	mgslpc_change_params(info);
+	mgslpc_change_params(info, tty);
 
 	/* Handle transition to B0 status */
 	if (old_termios->c_cflag & CBAUD &&
@@ -2404,81 +2401,34 @@ static void mgslpc_set_termios(struct tty_struct *tty, struct ktermios *old_term
 static void mgslpc_close(struct tty_struct *tty, struct file * filp)
 {
 	MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data;
+	struct tty_port *port = &info->port;
 
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_close"))
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_close(%s) entry, count=%d\n",
-			 __FILE__,__LINE__, info->device_name, info->count);
-
-	if (!info->count)
-		return;
+			 __FILE__,__LINE__, info->device_name, port->count);
 
-	if (tty_hung_up_p(filp))
-		goto cleanup;
-
-	if ((tty->count == 1) && (info->count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("mgslpc_close: bad refcount; tty->count is 1, "
-		       "info->count is %d\n", info->count);
-		info->count = 1;
-	}
+	WARN_ON(!port->count);
 
-	info->count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->count)
+	if (tty_port_close_start(port, tty, filp) == 0)
 		goto cleanup;
 
-	info->flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):mgslpc_close(%s) calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->closing_wait);
-	}
-
- 	if (info->flags & ASYNC_INITIALIZED)
+ 	if (port->flags & ASYNC_INITIALIZED)
  		mgslpc_wait_until_sent(tty, info->timeout);
 
 	mgslpc_flush_buffer(tty);
 
 	tty_ldisc_flush(tty);
-
-	shutdown(info);
-
-	tty->closing = 0;
-	info->tty = NULL;
-
-	if (info->blocked_open) {
-		if (info->close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->close_delay));
-		}
-		wake_up_interruptible(&info->open_wait);
-	}
-
-	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->close_wait);
-
+	shutdown(info, tty);
+	
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 cleanup:
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_close(%s) exit, count=%d\n", __FILE__,__LINE__,
-			tty->driver->name, info->count);
+			tty->driver->name, port->count);
 }
 
 /* Wait until the transmitter is empty.
@@ -2498,7 +2448,7 @@ static void mgslpc_wait_until_sent(struct tty_struct *tty, int timeout)
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_wait_until_sent"))
 		return;
 
-	if (!(info->flags & ASYNC_INITIALIZED))
+	if (!(info->port.flags & ASYNC_INITIALIZED))
 		goto exit;
 
 	orig_jiffies = jiffies;
@@ -2559,120 +2509,40 @@ static void mgslpc_hangup(struct tty_struct *tty)
 		return;
 
 	mgslpc_flush_buffer(tty);
-	shutdown(info);
-
-	info->count = 0;
-	info->flags &= ~ASYNC_NORMAL_ACTIVE;
-	info->tty = NULL;
-
-	wake_up_interruptible(&info->open_wait);
+	shutdown(info, tty);
+	tty_port_hangup(&info->port);
 }
 
-/* Block the current process until the specified port
- * is ready to be opened.
- */
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   MGSLPC_INFO *info)
+static int carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int		retval;
-	bool		do_clocal = false;
-	bool		extra_count = false;
-	unsigned long	flags;
-
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready on %s\n",
-			 __FILE__,__LINE__, tty->driver->name );
-
-	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
-		/* nonblock mode is set or port is not enabled */
-		/* just verify that callout device is not active */
-		info->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = true;
-
-	/* Wait for carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->count is dropped by one, so that
-	 * mgslpc_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-
-	retval = 0;
-	add_wait_queue(&info->open_wait, &wait);
-
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready before block on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->count );
-
-	spin_lock_irqsave(&info->lock, flags);
-	if (!tty_hung_up_p(filp)) {
-		extra_count = true;
-		info->count--;
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-	info->blocked_open++;
-
-	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){
-			retval = (info->flags & ASYNC_HUP_NOTIFY) ?
-					-EAGAIN : -ERESTARTSYS;
-			break;
-		}
-
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
-
- 		if (!(info->flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
- 			break;
-		}
-
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->count );
-
-		schedule();
-	}
-
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->open_wait, &wait);
+	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
+	unsigned long flags;
 
-	if (extra_count)
-		info->count++;
-	info->blocked_open--;
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
 
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready after blocking on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->count );
+	if (info->serial_signals & SerialSignal_DCD)
+		return 1;
+	return 0;
+}
 
-	if (!retval)
-		info->flags |= ASYNC_NORMAL_ACTIVE;
+static void raise_dtr_rts(struct tty_port *port)
+{
+	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
+	unsigned long flags;
 
-	return retval;
+	spin_lock_irqsave(&info->lock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
 }
 
+
 static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 {
 	MGSLPC_INFO	*info;
+	struct tty_port *port;
 	int 			retval, line;
 	unsigned long flags;
 
@@ -2691,23 +2561,24 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_open"))
 		return -ENODEV;
 
+	port = &info->port;
 	tty->driver_data = info;
-	info->tty = tty;
+	tty_port_tty_set(port, tty);
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_open(%s), old ref count = %d\n",
-			 __FILE__,__LINE__,tty->driver->name, info->count);
+			 __FILE__,__LINE__,tty->driver->name, port->count);
 
 	/* If port is closing, signal caller to try again */
-	if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){
-		if (info->flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->close_wait);
-		retval = ((info->flags & ASYNC_HUP_NOTIFY) ?
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING){
+		if (port->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&port->close_wait);
+		retval = ((port->flags & ASYNC_HUP_NOTIFY) ?
 			-EAGAIN : -ERESTARTSYS);
 		goto cleanup;
 	}
 
-	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
@@ -2715,17 +2586,19 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 		spin_unlock_irqrestore(&info->netlock, flags);
 		goto cleanup;
 	}
-	info->count++;
+	spin_lock(&port->lock);
+	port->count++;
+	spin_unlock(&port->lock);
 	spin_unlock_irqrestore(&info->netlock, flags);
 
-	if (info->count == 1) {
+	if (port->count == 1) {
 		/* 1st open on this device, init hardware */
-		retval = startup(info);
+		retval = startup(info, tty);
 		if (retval < 0)
 			goto cleanup;
 	}
 
-	retval = block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(&info->port, tty, filp);
 	if (retval) {
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):block_til_ready(%s) returned %d\n",
@@ -2739,13 +2612,6 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 	retval = 0;
 
 cleanup:
-	if (retval) {
-		if (tty->count == 1)
-			info->tty = NULL; /* tty layer will release tty struct */
-		if(info->count)
-			info->count--;
-	}
-
 	return retval;
 }
 
@@ -3500,7 +3366,7 @@ static void rx_start(MGSLPC_INFO *info)
 	info->rx_enabled = true;
 }
 
-static void tx_start(MGSLPC_INFO *info)
+static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	if (debug_level >= DEBUG_LEVEL_ISR)
 		printk("%s(%d):tx_start(%s)\n",
@@ -3524,11 +3390,11 @@ static void tx_start(MGSLPC_INFO *info)
 		if (info->params.mode == MGSL_MODE_ASYNC) {
 			if (!info->tx_active) {
 				info->tx_active = true;
-				tx_ready(info);
+				tx_ready(info, tty);
 			}
 		} else {
 			info->tx_active = true;
-			tx_ready(info);
+			tx_ready(info, tty);
 			mod_timer(&info->tx_timer, jiffies +
 					msecs_to_jiffies(5000));
 		}
@@ -3849,13 +3715,12 @@ static void rx_reset_buffers(MGSLPC_INFO *info)
  *
  * Returns true if frame returned, otherwise false
  */
-static bool rx_get_frame(MGSLPC_INFO *info)
+static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned short status;
 	RXBUF *buf;
 	unsigned int framesize = 0;
 	unsigned long flags;
-	struct tty_struct *tty = info->tty;
 	bool return_frame = false;
 
 	if (info->rx_frame_count == 0)
@@ -4075,7 +3940,11 @@ static void tx_timeout(unsigned long context)
 		hdlcdev_tx_done(info);
 	else
 #endif
-		bh_transmit(info);
+	{
+		struct tty_struct *tty = tty_port_tty_get(&info->port);
+		bh_transmit(info, tty);
+		tty_kref_put(tty);
+	}
 }
 
 #if SYNCLINK_GENERIC_HDLC
@@ -4094,11 +3963,12 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 			  unsigned short parity)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty;
 	unsigned char  new_encoding;
 	unsigned short new_crctype;
 
 	/* return error if TTY interface open */
-	if (info->count)
+	if (info->port.count)
 		return -EBUSY;
 
 	switch (encoding)
@@ -4123,8 +3993,11 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
-	if (info->netcount)
-		mgslpc_program_hw(info);
+	if (info->netcount) {
+		tty = tty_port_tty_get(&info->port);
+		mgslpc_program_hw(info, tty);
+		tty_kref_put(tty);
+	}
 
 	return 0;
 }
@@ -4165,8 +4038,11 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* start hardware transmitter if necessary */
 	spin_lock_irqsave(&info->lock,flags);
-	if (!info->tx_active)
-	 	tx_start(info);
+	if (!info->tx_active) {
+		struct tty_struct *tty = tty_port_tty_get(&info->port);
+	 	tx_start(info, tty);
+	 	tty_kref_put(tty);
+	}
 	spin_unlock_irqrestore(&info->lock,flags);
 
 	return 0;
@@ -4183,6 +4059,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 static int hdlcdev_open(struct net_device *dev)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty;
 	int rc;
 	unsigned long flags;
 
@@ -4195,7 +4072,7 @@ static int hdlcdev_open(struct net_device *dev)
 
 	/* arbitrate between network and tty opens */
 	spin_lock_irqsave(&info->netlock, flags);
-	if (info->count != 0 || info->netcount != 0) {
+	if (info->port.count != 0 || info->netcount != 0) {
 		printk(KERN_WARNING "%s: hdlc_open returning busy\n", dev->name);
 		spin_unlock_irqrestore(&info->netlock, flags);
 		return -EBUSY;
@@ -4203,17 +4080,19 @@ static int hdlcdev_open(struct net_device *dev)
 	info->netcount=1;
 	spin_unlock_irqrestore(&info->netlock, flags);
 
+	tty = tty_port_tty_get(&info->port);
 	/* claim resources and init adapter */
-	if ((rc = startup(info)) != 0) {
+	if ((rc = startup(info, tty)) != 0) {
+		tty_kref_put(tty);
 		spin_lock_irqsave(&info->netlock, flags);
 		info->netcount=0;
 		spin_unlock_irqrestore(&info->netlock, flags);
 		return rc;
 	}
-
 	/* assert DTR and RTS, apply hardware settings */
 	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-	mgslpc_program_hw(info);
+	mgslpc_program_hw(info, tty);
+	tty_kref_put(tty);
 
 	/* enable network layer transmit */
 	dev->trans_start = jiffies;
@@ -4241,6 +4120,7 @@ static int hdlcdev_open(struct net_device *dev)
 static int hdlcdev_close(struct net_device *dev)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty = tty_port_tty_get(&info->port);
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -4249,8 +4129,8 @@ static int hdlcdev_close(struct net_device *dev)
 	netif_stop_queue(dev);
 
 	/* shutdown adapter and release resources */
-	shutdown(info);
-
+	shutdown(info, tty);
+	tty_kref_put(tty);
 	hdlc_close(dev);
 
 	spin_lock_irqsave(&info->netlock, flags);
@@ -4281,7 +4161,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		printk("%s:hdlcdev_ioctl(%s)\n",__FILE__,dev->name);
 
 	/* return error if TTY interface open */
-	if (info->count)
+	if (info->port.count)
 		return -EBUSY;
 
 	if (cmd != SIOCWANDEV)
@@ -4354,8 +4234,11 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			info->params.clock_speed = 0;
 
 		/* if network interface up, reprogram hardware */
-		if (info->netcount)
-			mgslpc_program_hw(info);
+		if (info->netcount) {
+			struct tty_struct *tty = tty_port_tty_get(&info->port);
+			mgslpc_program_hw(info, tty);
+			tty_kref_put(tty);
+		}
 		return 0;
 
 	default:
-- 
cgit v0.10.2


From 6b447f04a9aecdf2a30c1a97e4b034ac7931bb70 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:48:56 +0000
Subject: tty: Drop the lock_kernel in the private ioctl hook

We don't need the BKL here any more so it can go. In a couple of spots the
driver requirements are not clear so push the lock down into the driver.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index fb6f293..ef6cfa5 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1054,6 +1054,8 @@ static int set_serial_info(struct tty_struct *tty,
 
 	if (copy_from_user(&new_serial, newinfo, sizeof(new_serial)))
 		return -EFAULT;
+
+	lock_kernel();
 	old_priv = *priv;
 
 	/* Do error checking and permission checking */
@@ -1069,8 +1071,10 @@ static int set_serial_info(struct tty_struct *tty,
 	}
 
 	if ((new_serial.baud_base != priv->baud_base) &&
-	    (new_serial.baud_base < 9600))
+	    (new_serial.baud_base < 9600)) {
+	    	unlock_kernel();
 		return -EINVAL;
+	}
 
 	/* Make the changes - these are privileged changes! */
 
@@ -1098,8 +1102,11 @@ check_and_exit:
 	     (priv->flags & ASYNC_SPD_MASK)) ||
 	    (((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) &&
 	     (old_priv.custom_divisor != priv->custom_divisor))) {
+		unlock_kernel();
 		change_speed(tty, port);
 	}
+	else
+		unlock_kernel();
 	return 0;
 
 } /* set_serial_info */
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index dc36a05..fcd9082 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -878,6 +878,7 @@ static void mct_u232_break_ctl(struct tty_struct *tty, int break_state)
 
 	dbg("%sstate=%d", __func__, break_state);
 
+	/* LOCKING */
 	if (break_state)
 		lcr |= MCT_U232_SET_BREAK;
 
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 07710cf..82930a7 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -721,10 +721,10 @@ static void mct_u232_break_ctl(struct tty_struct *tty, int break_state)
 
 	spin_lock_irqsave(&priv->lock, flags);
 	lcr = priv->last_lcr;
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (break_state)
 		lcr |= MCT_U232_SET_BREAK;
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	mct_u232_set_line_ctrl(serial, lcr);
 } /* mct_u232_break_ctl */
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index fda4a64..96a8c77 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1343,6 +1343,7 @@ static void mos7840_break(struct tty_struct *tty, int break_state)
 	else
 		data = mos7840_port->shadowLCR & ~LCR_SET_BREAK;
 
+	/* FIXME: no locking on shadowLCR anywhere in driver */
 	mos7840_port->shadowLCR = data;
 	dbg("mcs7840_break mos7840_port->shadowLCR is %x\n",
 	    mos7840_port->shadowLCR);
@@ -2214,10 +2215,12 @@ static int mos7840_set_modem_info(struct moschip_port *mos7840_port,
 		break;
 	}
 
+	lock_kernel();
 	mos7840_port->shadowMCR = mcr;
 
 	Data = mos7840_port->shadowMCR;
 	status = mos7840_set_uart_reg(port, MODEM_CONTROL_REGISTER, Data);
+	unlock_kernel();
 	if (status < 0) {
 		dbg("setting MODEM_CONTROL_REGISTER Failed\n");
 		return -1;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 8d51890..080ade2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -382,9 +382,7 @@ static int serial_ioctl(struct tty_struct *tty, struct file *file,
 	/* pass on to the driver specific version of this function
 	   if it is available */
 	if (port->serial->type->ioctl) {
-		lock_kernel();
 		retval = port->serial->type->ioctl(tty, file, cmd, arg);
-		unlock_kernel();
 	} else
 		retval = -ENOIOCTLCMD;
 	return retval;
@@ -413,11 +411,8 @@ static int serial_break(struct tty_struct *tty, int break_state)
 	WARN_ON(!port->port.count);
 	/* pass on to the driver specific version of this function
 	   if it is available */
-	if (port->serial->type->break_ctl) {
-		lock_kernel();
+	if (port->serial->type->break_ctl)
 		port->serial->type->break_ctl(tty, break_state);
-		unlock_kernel();
-	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From 60c20fb8c00a2b23308ae4517f145383bc66d291 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Fri, 2 Jan 2009 13:49:04 +0000
Subject: serial: RS485 ioctl structure uses __u32 include linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the commit below a new struct serial_rs485 was introduced for a new
ioctl:

    commit c26c56c0f40e200e61d1390629c806f6adaffbcc
    Author: Alan Cox <alan@redhat.com>
    Date:   Mon Oct 13 10:37:48 2008 +0100

	tty: Cris has a nice RS485 ioctl so we should steal it

This structure uses the __u32 types for some of its members, which leads
to the following compile error:

    $ cc -I.../include -c X.c
    In file included from X.c:2: .../include/linux/serial.h:185:
		error: expected specifier-qualifier-list before ‘__u32’
    $

It seems that these types are appropriate for this structure as it is
to be exposed to userspace.  These types are available via linux/types.h
so move the include of that outside the __KERNEL__ section.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 1ea8d92..9136cc5 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -10,8 +10,9 @@
 #ifndef _LINUX_SERIAL_H
 #define _LINUX_SERIAL_H
 
-#ifdef __KERNEL__
 #include <linux/types.h>
+
+#ifdef __KERNEL__
 #include <asm/page.h>
 
 /*
-- 
cgit v0.10.2


From 6ef53066ff7991d5f9670340e92d42ee1776bbe4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 2 Jan 2009 13:49:13 +0000
Subject: __FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index dd8564d..529c0ff 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -99,7 +99,7 @@ static void sport_stop_tx(struct uart_port *port);
 
 static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 {
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 	/* Place a Start and Stop bit */
 	__asm__ volatile (
 		"R2 = b#01111111100;\n\t"
@@ -110,7 +110,7 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 		:"=r"(value)
 		:"0"(value)
 		:"R2", "R3");
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 
 	SPORT_PUT_TX(up, value);
 }
@@ -120,7 +120,7 @@ static inline unsigned int rx_one_byte(struct sport_uart_port *up)
 	unsigned int value, extract;
 
 	value = SPORT_GET_RX32(up);
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 
 	/* Extract 8 bits data */
 	__asm__ volatile (
@@ -151,12 +151,12 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
 	/* Set TCR1 and TCR2 */
 	SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
 	SPORT_PUT_TCR2(up, 10);
-	pr_debug("%s TCR1:%x, TCR2:%x\n", __FUNCTION__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
+	pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
 	/* Set RCR1 and RCR2 */
 	SPORT_PUT_RCR1(up, (RCKFE | LARFS | LRFS | RFSR | IRCLK));
 	SPORT_PUT_RCR2(up, 28);
-	pr_debug("%s RCR1:%x, RCR2:%x\n", __FUNCTION__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up));
+	pr_debug("%s RCR1:%x, RCR2:%x\n", __func__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up));
 
 	tclkdiv = sclk/(2 * baud_rate) - 1;
 	tfsdiv = 12;
@@ -166,7 +166,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
 	SPORT_PUT_RCLKDIV(up, rclkdiv);
 	SSYNC();
 	pr_debug("%s sclk:%d, baud_rate:%d, tclkdiv:%d, tfsdiv:%d, rclkdiv:%d\n",
-			__FUNCTION__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv);
+			__func__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv);
 
 	return 0;
 }
@@ -231,7 +231,7 @@ static int sport_startup(struct uart_port *port)
 	char buffer[20];
 	int retval;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	memset(buffer, 20, '\0');
 	snprintf(buffer, 20, "%s rx", up->name);
 	retval = request_irq(up->rx_irq, sport_uart_rx_irq, IRQF_SAMPLE_RANDOM, buffer, up);
@@ -320,7 +320,7 @@ static unsigned int sport_tx_empty(struct uart_port *port)
 	unsigned int stat;
 
 	stat = SPORT_GET_STAT(up);
-	pr_debug("%s stat:%04x\n", __FUNCTION__, stat);
+	pr_debug("%s stat:%04x\n", __func__, stat);
 	if (stat & TXHRE) {
 		return TIOCSER_TEMT;
 	} else
@@ -329,13 +329,13 @@ static unsigned int sport_tx_empty(struct uart_port *port)
 
 static unsigned int sport_get_mctrl(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return (TIOCM_CTS | TIOCM_CD | TIOCM_DSR);
 }
 
 static void sport_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_stop_tx(struct uart_port *port)
@@ -343,7 +343,7 @@ static void sport_stop_tx(struct uart_port *port)
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 	unsigned int stat;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 
 	stat = SPORT_GET_STAT(up);
 	while(!(stat & TXHRE)) {
@@ -366,21 +366,21 @@ static void sport_start_tx(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	/* Write data into SPORT FIFO before enable SPROT to transmit */
 	sport_uart_tx_chars(up);
 
 	/* Enable transmit, then an interrupt will generated */
 	SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) | TSPEN));
 	SSYNC();
-	pr_debug("%s exit\n", __FUNCTION__);
+	pr_debug("%s exit\n", __func__);
 }
 
 static void sport_stop_rx(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	/* Disable sport to stop rx */
 	SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) & ~RSPEN));
 	SSYNC();
@@ -388,19 +388,19 @@ static void sport_stop_rx(struct uart_port *port)
 
 static void sport_enable_ms(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_break_ctl(struct uart_port *port, int break_state)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_shutdown(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 
 	/* Disable sport */
 	SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) & ~TSPEN));
@@ -421,7 +421,7 @@ static void sport_shutdown(struct uart_port *port)
 static void sport_set_termios(struct uart_port *port,
 		struct termios *termios, struct termios *old)
 {
-	pr_debug("%s enter, c_cflag:%08x\n", __FUNCTION__, termios->c_cflag);
+	pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
 	uart_update_timeout(port, CS8 ,port->uartclk);
 }
 
@@ -429,18 +429,18 @@ static const char *sport_type(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return up->name;
 }
 
 static void sport_release_port(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static int sport_request_port(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return 0;
 }
 
@@ -448,13 +448,13 @@ static void sport_config_port(struct uart_port *port, int flags)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	up->port.type = PORT_BFIN_SPORT;
 }
 
 static int sport_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return 0;
 }
 
@@ -527,7 +527,7 @@ static int sport_uart_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	if (sport)
 		uart_suspend_port(&sport_uart_reg, &sport->port);
 
@@ -538,7 +538,7 @@ static int sport_uart_resume(struct platform_device *dev)
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	if (sport)
 		uart_resume_port(&sport_uart_reg, &sport->port);
 
@@ -547,7 +547,7 @@ static int sport_uart_resume(struct platform_device *dev)
 
 static int sport_uart_probe(struct platform_device *dev)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	sport_uart_ports[dev->id].port.dev = &dev->dev;
 	uart_add_one_port(&sport_uart_reg, &sport_uart_ports[dev->id].port);
 	platform_set_drvdata(dev, &sport_uart_ports[dev->id]);
@@ -559,7 +559,7 @@ static int sport_uart_remove(struct platform_device *dev)
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	platform_set_drvdata(dev, NULL);
 
 	if (sport)
@@ -582,7 +582,7 @@ static int __init sport_uart_init(void)
 {
 	int ret;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	ret = uart_register_driver(&sport_uart_reg);
 	if (ret != 0) {
 		printk(KERN_ERR "Failed to register %s:%d\n",
@@ -597,13 +597,13 @@ static int __init sport_uart_init(void)
 	}
 
 
-	pr_debug("%s exit\n", __FUNCTION__);
+	pr_debug("%s exit\n", __func__);
 	return ret;
 }
 
 static void __exit sport_uart_exit(void)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	platform_driver_unregister(&sport_uart_driver);
 	uart_unregister_driver(&sport_uart_reg);
 }
-- 
cgit v0.10.2


From f751928e0ddf54ea4fe5546f35e99efc5b5d9938 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:49:21 +0000
Subject: tty: We want the port object to be persistent

Move the tty_port and uart_info bits around a little. By embedding the uart_info
into the uart_port we get rid of lots of corner case testing and also get the
ability to go port<->state<->info which is a bit more elegant than the current
data structures.

Downsides - we allocate a tiny bit more memory for unused ports, upside we've
removed as much code as it saved for most users..

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index a697914..3547558 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -272,7 +272,7 @@ static void jsm_tty_close(struct uart_port *port)
 	jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n");
 
 	bd = channel->ch_bd;
-	ts = channel->uart_port.info->port.tty->termios;
+	ts = port->info->port.tty->termios;
 
 	channel->ch_flags &= ~(CH_STOPI);
 
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 874786a..daeba1c 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -50,7 +50,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + ((state)->info ? (state)->info->port.blocked_open : 0))
+#define uart_users(state)	((state)->count + (state)->info.port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -94,7 +94,7 @@ static void __uart_start(struct tty_struct *tty)
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->port;
 
-	if (!uart_circ_empty(&state->info->xmit) && state->info->xmit.buf &&
+	if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
 }
@@ -113,7 +113,7 @@ static void uart_start(struct tty_struct *tty)
 static void uart_tasklet_action(unsigned long data)
 {
 	struct uart_state *state = (struct uart_state *)data;
-	tty_wakeup(state->info->port.tty);
+	tty_wakeup(state->info.port.tty);
 }
 
 static inline void
@@ -139,7 +139,7 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
  */
 static int uart_startup(struct uart_state *state, int init_hw)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned long page;
 	int retval = 0;
@@ -212,14 +212,15 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
+	struct tty_struct *tty = info->port.tty;
 
 	/*
 	 * Set the TTY IO error marker
 	 */
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	if (tty)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
 	if (info->flags & UIF_INITIALIZED) {
 		info->flags &= ~UIF_INITIALIZED;
@@ -227,7 +228,7 @@ static void uart_shutdown(struct uart_state *state)
 		/*
 		 * Turn off DTR and RTS early.
 		 */
-		if (!info->port.tty || (info->port.tty->termios->c_cflag & HUPCL))
+		if (!tty || (tty->termios->c_cflag & HUPCL))
 			uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 
 		/*
@@ -427,7 +428,7 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 	struct ktermios *termios;
 
@@ -444,14 +445,14 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->info->flags |= UIF_CTS_FLOW;
+		state->info.flags |= UIF_CTS_FLOW;
 	else
-		state->info->flags &= ~UIF_CTS_FLOW;
+		state->info.flags &= ~UIF_CTS_FLOW;
 
 	if (termios->c_cflag & CLOCAL)
-		state->info->flags &= ~UIF_CHECK_CD;
+		state->info.flags &= ~UIF_CHECK_CD;
 	else
-		state->info->flags |= UIF_CHECK_CD;
+		state->info.flags |= UIF_CHECK_CD;
 
 	port->ops->set_termios(port, termios, old_termios);
 }
@@ -479,7 +480,7 @@ static int uart_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	struct uart_state *state = tty->driver_data;
 
-	return __uart_put_char(state->port, &state->info->xmit, ch);
+	return __uart_put_char(state->port, &state->info.xmit, ch);
 }
 
 static void uart_flush_chars(struct tty_struct *tty)
@@ -500,13 +501,13 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count)
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return -EL3HLT;
 	}
 
 	port = state->port;
-	circ = &state->info->xmit;
+	circ = &state->info.xmit;
 
 	if (!circ->buf)
 		return 0;
@@ -537,7 +538,7 @@ static int uart_write_room(struct tty_struct *tty)
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_free(&state->info->xmit);
+	ret = uart_circ_chars_free(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -549,7 +550,7 @@ static int uart_chars_in_buffer(struct tty_struct *tty)
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_pending(&state->info->xmit);
+	ret = uart_circ_chars_pending(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -564,7 +565,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return;
 	}
@@ -573,7 +574,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
 
 	spin_lock_irqsave(&port->lock, flags);
-	uart_circ_clear(&state->info->xmit);
+	uart_circ_clear(&state->info.xmit);
 	if (port->ops->flush_buffer)
 		port->ops->flush_buffer(port);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -837,15 +838,15 @@ static int uart_set_info(struct uart_state *state,
 	state->closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
-	if (state->info->port.tty)
-		state->info->port.tty->low_latency =
+	if (state->info.port.tty)
+		state->info.port.tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
 	if (port->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		if (((old_flags ^ port->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != port->custom_divisor) {
 			/*
@@ -858,7 +859,7 @@ static int uart_set_info(struct uart_state *state,
 				printk(KERN_NOTICE
 				       "%s sets custom speed on %s. This "
 				       "is deprecated.\n", current->comm,
-				       tty_name(state->info->port.tty, buf));
+				       tty_name(state->info.port.tty, buf));
 			}
 			uart_change_speed(state, NULL);
 		}
@@ -889,8 +890,8 @@ static int uart_get_lsr_info(struct uart_state *state,
 	 * interrupt happens).
 	 */
 	if (port->x_char ||
-	    ((uart_circ_chars_pending(&state->info->xmit) > 0) &&
-	     !state->info->port.tty->stopped && !state->info->port.tty->hw_stopped))
+	    ((uart_circ_chars_pending(&state->info.xmit) > 0) &&
+	     !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -1017,7 +1018,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	port->ops->enable_ms(port);
 	spin_unlock_irq(&port->lock);
 
-	add_wait_queue(&state->info->delta_msr_wait, &wait);
+	add_wait_queue(&state->info.delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&port->lock);
 		memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1045,7 +1046,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->info->delta_msr_wait, &wait);
+	remove_wait_queue(&state->info.delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1241,7 +1242,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	 */
 	if (!(old_termios->c_cflag & CLOCAL) &&
 	    (tty->termios->c_cflag & CLOCAL))
-		wake_up_interruptible(&state->info->port.open_wait);
+		wake_up_interruptible(&info->port.open_wait);
 #endif
 }
 
@@ -1303,7 +1304,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		port->ops->stop_rx(port);
@@ -1322,9 +1323,9 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	state->info->port.tty = NULL;
+	state->info.port.tty = NULL;
 
-	if (state->info->port.blocked_open) {
+	if (state->info.port.blocked_open) {
 		if (state->close_delay)
 			msleep_interruptible(state->close_delay);
 	} else if (!uart_console(port)) {
@@ -1334,8 +1335,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->info->flags &= ~UIF_NORMAL_ACTIVE;
-	wake_up_interruptible(&state->info->port.open_wait);
+	state->info.flags &= ~UIF_NORMAL_ACTIVE;
+	wake_up_interruptible(&state->info.port.open_wait);
 
  done:
 	mutex_unlock(&state->mutex);
@@ -1409,19 +1410,20 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 static void uart_hangup(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	struct uart_info *info = &state->info;
 
 	BUG_ON(!kernel_locked());
 	pr_debug("uart_hangup(%d)\n", state->port->line);
 
 	mutex_lock(&state->mutex);
-	if (state->info && state->info->flags & UIF_NORMAL_ACTIVE) {
+	if (info->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		state->count = 0;
-		state->info->flags &= ~UIF_NORMAL_ACTIVE;
-		state->info->port.tty = NULL;
-		wake_up_interruptible(&state->info->port.open_wait);
-		wake_up_interruptible(&state->info->delta_msr_wait);
+		info->flags &= ~UIF_NORMAL_ACTIVE;
+		info->port.tty = NULL;
+		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&info->delta_msr_wait);
 	}
 	mutex_unlock(&state->mutex);
 }
@@ -1434,7 +1436,7 @@ static void uart_hangup(struct tty_struct *tty)
  */
 static void uart_update_termios(struct uart_state *state)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 
 	if (uart_console(port) && port->cons->cflag) {
@@ -1469,7 +1471,7 @@ static int
 uart_block_til_ready(struct file *filp, struct uart_state *state)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned int mctrl;
 
@@ -1563,28 +1565,6 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 		ret = -ENXIO;
 		goto err_unlock;
 	}
-
-	/* BKL: RACE HERE - LEAK */
-	/* We should move this into the uart_state structure and kill off
-	   this whole complexity */
-	if (!state->info) {
-		state->info = kzalloc(sizeof(struct uart_info), GFP_KERNEL);
-		if (state->info) {
-			init_waitqueue_head(&state->info->port.open_wait);
-			init_waitqueue_head(&state->info->delta_msr_wait);
-
-			/*
-			 * Link the info into the other structures.
-			 */
-			state->port->info = state->info;
-
-			tasklet_init(&state->info->tlet, uart_tasklet_action,
-				     (unsigned long)state);
-		} else {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
 	return state;
 
  err_unlock:
@@ -1641,9 +1621,10 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 * Any failures from here onwards should not touch the count.
 	 */
 	tty->driver_data = state;
+	state->port->info = &state->info;
 	tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->info->port.tty = tty;
+	state->info.port.tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
@@ -1676,8 +1657,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->info->flags & UIF_NORMAL_ACTIVE)) {
-		state->info->flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) {
+		state->info.flags |= UIF_NORMAL_ACTIVE;
 
 		uart_update_termios(state);
 	}
@@ -2028,11 +2009,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	}
 	port->suspended = 1;
 
-	if (state->info && state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 		int tries;
 
-		state->info->flags = (state->info->flags & ~UIF_INITIALIZED)
+		state->info.flags = (state->info.flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
 
 		spin_lock_irq(&port->lock);
@@ -2107,15 +2088,15 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->info && state->info->port.tty && termios.c_cflag == 0)
-			termios = *state->info->port.tty->termios;
+		if (state->info.port.tty && termios.c_cflag == 0)
+			termios = *state->info.port.tty->termios;
 
 		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
 
-	if (state->info && state->info->flags & UIF_SUSPENDED) {
+	if (state->info.flags & UIF_SUSPENDED) {
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
@@ -2130,7 +2111,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			ops->set_mctrl(port, port->mctrl);
 			ops->start_tx(port);
 			spin_unlock_irq(&port->lock);
-			state->info->flags |= UIF_INITIALIZED;
+			state->info.flags |= UIF_INITIALIZED;
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2140,7 +2121,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->info->flags &= ~UIF_SUSPENDED;
+		state->info.flags &= ~UIF_SUSPENDED;
 	}
 
 	mutex_unlock(&state->mutex);
@@ -2383,8 +2364,12 @@ int uart_register_driver(struct uart_driver *drv)
 
 		state->close_delay     = 500;	/* .5 seconds */
 		state->closing_wait    = 30000;	/* 30 seconds */
-
 		mutex_init(&state->mutex);
+
+		tty_port_init(&state->info.port);
+		init_waitqueue_head(&state->info.delta_msr_wait);
+		tasklet_init(&state->info.tlet, uart_tasklet_action,
+			     (unsigned long)state);
 	}
 
 	retval = tty_register_driver(normal);
@@ -2455,7 +2440,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 	state->pm_state = -1;
 
 	port->cons = drv->cons;
-	port->info = state->info;
+	port->info = &state->info;
 
 	/*
 	 * If this port is a console, then the spinlock is already
@@ -2527,18 +2512,11 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 */
 	tty_unregister_device(drv->tty_driver, port->line);
 
-	info = state->info;
+	info = &state->info;
 	if (info && info->port.tty)
 		tty_vhangup(info->port.tty);
 
 	/*
-	 * All users of this port should now be disconnected from
-	 * this driver, and the port shut down.  We should be the
-	 * only thread fiddling with this port from now on.
-	 */
-	state->info = NULL;
-
-	/*
 	 * Free the port IO and memory resources, if any.
 	 */
 	if (port->type != PORT_UNKNOWN)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index feb3b93..2395969 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -316,35 +316,13 @@ struct uart_port {
 };
 
 /*
- * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
- */
-struct uart_state {
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
-
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
-	int			count;
-	int			pm_state;
-	struct uart_info	*info;
-	struct uart_port	*port;
-
-	struct mutex		mutex;
-};
-
-#define UART_XMIT_SIZE	PAGE_SIZE
-
-typedef unsigned int __bitwise__ uif_t;
-
-/*
  * This is the state information which is only valid when the port
- * is open; it may be freed by the core driver once the device has
+ * is open; it may be cleared the core driver once the device has
  * been closed.  Either the low level driver or the core can modify
  * stuff here.
  */
+typedef unsigned int __bitwise__ uif_t;
+
 struct uart_info {
 	struct tty_port		port;
 	struct circ_buf		xmit;
@@ -366,6 +344,29 @@ struct uart_info {
 	wait_queue_head_t	delta_msr_wait;
 };
 
+/*
+ * This is the state information which is persistent across opens.
+ * The low level driver must not to touch any elements contained
+ * within.
+ */
+struct uart_state {
+	unsigned int		close_delay;		/* msec */
+	unsigned int		closing_wait;		/* msec */
+
+#define USF_CLOSING_WAIT_INF	(0)
+#define USF_CLOSING_WAIT_NONE	(~0U)
+
+	int			count;
+	int			pm_state;
+	struct uart_info	info;
+	struct uart_port	*port;
+
+	struct mutex		mutex;
+};
+
+#define UART_XMIT_SIZE	PAGE_SIZE
+
+
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS		256
 
@@ -439,8 +440,13 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 #define uart_circ_chars_free(circ)	\
 	(CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE))
 
-#define uart_tx_stopped(portp)		\
-	((portp)->info->port.tty->stopped || (portp)->info->port.tty->hw_stopped)
+static inline int uart_tx_stopped(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+	if(tty->stopped || tty->hw_stopped)
+		return 1;
+	return 0;
+}
 
 /*
  * The following are helper functions for the low level drivers.
@@ -451,7 +457,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->info ? port->info->port.tty : NULL);
+			handle_sysrq(ch, port->info->port.tty);
 			port->sysrq = 0;
 			return 1;
 		}
-- 
cgit v0.10.2


From bc3256288b8ff9787623805e53cf7c6d5a2b4591 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Fri, 2 Jan 2009 13:49:32 +0000
Subject: fix for tty-serial-move-port

Hi Alan

next-20081204 crashes with the following message:

BUG: unable to handle kernel paging request at ffff88007d320248
IP: [<ffffffff803de934>] uart_remove_one_port+0xef/0x111
                kfree(info);
     393:       49 8d 7d 10             lea    0x10(%r13),%rdi
     397:       e8 00 00 00 00          callq  39c <uart_remove_one_port+0xef>

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index daeba1c..9425ed6 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2530,10 +2530,8 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	/*
 	 * Kill the tasklet, and free resources.
 	 */
-	if (info) {
+	if (info)
 		tasklet_kill(&info->tlet);
-		kfree(info);
-	}
 
 	state->port = NULL;
 	mutex_unlock(&port_mutex);
-- 
cgit v0.10.2


From b430428a188e8a434325e251d0704af4b88b4711 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:49:41 +0000
Subject: 8250: Don't clobber spinlocks.

In serial8250_isa_init_ports(), the port's lock is initialized.  We
should not overwrite it.  In early_serial_setup(), only copy in the
fields we need.  Since the early console code only uses a subset of
the fields, these are sufficient.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 303272a..8e28750 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2752,12 +2752,23 @@ static struct uart_driver serial8250_reg = {
  */
 int __init early_serial_setup(struct uart_port *port)
 {
+	struct uart_port *p;
+
 	if (port->line >= ARRAY_SIZE(serial8250_ports))
 		return -ENODEV;
 
 	serial8250_isa_init_ports();
-	serial8250_ports[port->line].port	= *port;
-	serial8250_ports[port->line].port.ops	= &serial8250_pops;
+	p = &serial8250_ports[port->line].port;
+	p->iobase       = port->iobase;
+	p->membase      = port->membase;
+	p->irq          = port->irq;
+	p->uartclk      = port->uartclk;
+	p->fifosize     = port->fifosize;
+	p->regshift     = port->regshift;
+	p->iotype       = port->iotype;
+	p->flags        = port->flags;
+	p->mapbase      = port->mapbase;
+	p->private_data = port->private_data;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 7d6a07d123b62bf4fa71867420c23da3ca36c995 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:49:47 +0000
Subject: 8250: Serial driver changes to support future Cavium OCTEON serial
 patches.

In order to use Cavium OCTEON specific serial i/o drivers, we first
patch the 8250 driver to use replaceable I/O functions.  Compatible
I/O functions are added for existing iotypeS.

An added benefit of this change is that it makes it easy to factor
some of the existing special cases out to board/SOC specific support
code.

The alternative is to load up 8250.c with a bunch of OCTEON specific
iotype code and bug work-arounds.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 8e28750..849af9d 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -303,16 +303,16 @@ static const u8 au_io_out_map[] = {
 };
 
 /* sane hardware needs no mapping */
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_in_map[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_out_map[offset];
 }
@@ -341,16 +341,16 @@ static const u8
 		[UART_SCR]	= 0x2c
 	};
 
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_in[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_out[offset];
 }
@@ -363,108 +363,170 @@ static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
 
 #endif
 
-static unsigned int serial_in(struct uart_8250_port *up, int offset)
+static unsigned int hub6_serial_in(struct uart_port *p, int offset)
 {
-	unsigned int tmp;
-	offset = map_8250_in_reg(up, offset) << up->port.regshift;
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	return inb(p->iobase + 1);
+}
 
-	switch (up->port.iotype) {
-	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		return inb(up->port.iobase + 1);
+static void hub6_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	outb(value, p->iobase + 1);
+}
 
-	case UPIO_MEM:
-	case UPIO_DWAPB:
-		return readb(up->port.membase + offset);
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readb(p->membase + offset);
+}
 
-	case UPIO_RM9000:
-	case UPIO_MEM32:
-		return readl(up->port.membase + offset);
+static void mem_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writeb(value, p->membase + offset);
+}
+
+static void mem32_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writel(value, p->membase + offset);
+}
+
+static unsigned int mem32_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readl(p->membase + offset);
+}
 
 #ifdef CONFIG_SERIAL_8250_AU1X00
-	case UPIO_AU:
-		return __raw_readl(up->port.membase + offset);
+static unsigned int au_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return __raw_readl(p->membase + offset);
+}
+
+static void au_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	__raw_writel(value, p->membase + offset);
+}
 #endif
 
-	case UPIO_TSI:
-		if (offset == UART_IIR) {
-			tmp = readl(up->port.membase + (UART_IIR & ~3));
-			return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
-		} else
-			return readb(up->port.membase + offset);
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+	unsigned int tmp;
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	if (offset == UART_IIR) {
+		tmp = readl(p->membase + (UART_IIR & ~3));
+		return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+	} else
+		return readb(p->membase + offset);
+}
 
-	default:
-		return inb(up->port.iobase + offset);
-	}
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+		writeb(value, p->membase + offset);
 }
 
-static void
-serial_out(struct uart_8250_port *up, int offset, int value)
+static void dwapb_serial_out(struct uart_port *p, int offset, int value)
 {
-	/* Save the offset before it's remapped */
 	int save_offset = offset;
-	offset = map_8250_out_reg(up, offset) << up->port.regshift;
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	/* Save the LCR value so it can be re-written when a
+	 * Busy Detect interrupt occurs. */
+	if (save_offset == UART_LCR) {
+		struct uart_8250_port *up = (struct uart_8250_port *)p;
+		up->lcr = value;
+	}
+	writeb(value, p->membase + offset);
+	/* Read the IER to ensure any interrupt is cleared before
+	 * returning from ISR. */
+	if (save_offset == UART_TX || save_offset == UART_IER)
+		value = p->serial_in(p, UART_IER);
+}
 
-	switch (up->port.iotype) {
+static unsigned int io_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return inb(p->iobase + offset);
+}
+
+static void io_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(value, p->iobase + offset);
+}
+
+static void set_io_from_upio(struct uart_port *p)
+{
+	switch (p->iotype) {
 	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		outb(value, up->port.iobase + 1);
+		p->serial_in = hub6_serial_in;
+		p->serial_out = hub6_serial_out;
 		break;
 
 	case UPIO_MEM:
-		writeb(value, up->port.membase + offset);
+		p->serial_in = mem_serial_in;
+		p->serial_out = mem_serial_out;
 		break;
 
 	case UPIO_RM9000:
 	case UPIO_MEM32:
-		writel(value, up->port.membase + offset);
+		p->serial_in = mem32_serial_in;
+		p->serial_out = mem32_serial_out;
 		break;
 
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
-		__raw_writel(value, up->port.membase + offset);
+		p->serial_in = au_serial_in;
+		p->serial_out = au_serial_out;
 		break;
 #endif
 	case UPIO_TSI:
-		if (!((offset == UART_IER) && (value & UART_IER_UUE)))
-			writeb(value, up->port.membase + offset);
+		p->serial_in = tsi_serial_in;
+		p->serial_out = tsi_serial_out;
 		break;
 
 	case UPIO_DWAPB:
-		/* Save the LCR value so it can be re-written when a
-		 * Busy Detect interrupt occurs. */
-		if (save_offset == UART_LCR)
-			up->lcr = value;
-		writeb(value, up->port.membase + offset);
-		/* Read the IER to ensure any interrupt is cleared before
-		 * returning from ISR. */
-		if (save_offset == UART_TX || save_offset == UART_IER)
-			value = serial_in(up, UART_IER);
+		p->serial_in = mem_serial_in;
+		p->serial_out = dwapb_serial_out;
 		break;
 
 	default:
-		outb(value, up->port.iobase + offset);
+		p->serial_in = io_serial_in;
+		p->serial_out = io_serial_out;
+		break;
 	}
 }
 
 static void
 serial_out_sync(struct uart_8250_port *up, int offset, int value)
 {
-	switch (up->port.iotype) {
+	struct uart_port *p = &up->port;
+	switch (p->iotype) {
 	case UPIO_MEM:
 	case UPIO_MEM32:
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
 #endif
 	case UPIO_DWAPB:
-		serial_out(up, offset, value);
-		serial_in(up, UART_LCR);	/* safe, no side-effects */
+		p->serial_out(p, offset, value);
+		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
 		break;
 	default:
-		serial_out(up, offset, value);
+		p->serial_out(p, offset, value);
 	}
 }
 
+#define serial_in(up, offset)		\
+	(up->port.serial_in(&(up)->port, (offset)))
+#define serial_out(up, offset, value)	\
+	(up->port.serial_out(&(up)->port, (offset), (value)))
 /*
  * We used to support using pause I/O for certain machines.  We
  * haven't supported this for a while, but just in case it's badly
@@ -2576,6 +2638,7 @@ static void __init serial8250_isa_init_ports(void)
 		up->port.membase  = old_serial_port[i].iomem_base;
 		up->port.iotype   = old_serial_port[i].io_type;
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
+		set_io_from_upio(&up->port);
 		if (share_irqs)
 			up->port.flags |= UPF_SHARE_IRQ;
 	}
@@ -2769,6 +2832,13 @@ int __init early_serial_setup(struct uart_port *port)
 	p->flags        = port->flags;
 	p->mapbase      = port->mapbase;
 	p->private_data = port->private_data;
+
+	set_io_from_upio(p);
+	if (port->serial_in)
+		p->serial_in = port->serial_in;
+	if (port->serial_out)
+		p->serial_out = port->serial_out;
+
 	return 0;
 }
 
@@ -2833,6 +2903,8 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.mapbase		= p->mapbase;
 		port.hub6		= p->hub6;
 		port.private_data	= p->private_data;
+		port.serial_in		= p->serial_in;
+		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
 		if (share_irqs)
 			port.flags |= UPF_SHARE_IRQ;
@@ -2986,6 +3058,12 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.private_data = port->private_data;
 		if (port->dev)
 			uart->port.dev = port->dev;
+		set_io_from_upio(&uart->port);
+		/* Possibly override default I/O functions.  */
+		if (port->serial_in)
+			uart->port.serial_in = port->serial_in;
+		if (port->serial_out)
+			uart->port.serial_out = port->serial_out;
 
 		ret = uart_add_one_port(&serial8250_reg, &uart->port);
 		if (ret == 0)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 3d37c94..77d83d9 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -28,6 +28,8 @@ struct plat_serial8250_port {
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
 	upf_t		flags;		/* UPF_* flags */
+	unsigned int	(*serial_in)(struct uart_port *, int);
+	void		(*serial_out)(struct uart_port *, int, int);
 };
 
 /*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 2395969..60061f4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -248,6 +248,8 @@ struct uart_port {
 	spinlock_t		lock;			/* port lock */
 	unsigned long		iobase;			/* in/out[bwl] */
 	unsigned char __iomem	*membase;		/* read/write[bwl] */
+	unsigned int		(*serial_in)(struct uart_port *, int);
+	void			(*serial_out)(struct uart_port *, int, int);
 	unsigned int		irq;			/* irq number */
 	unsigned int		uartclk;		/* base uart clock */
 	unsigned int		fifosize;		/* tx fifo size */
-- 
cgit v0.10.2


From 8e23fcc89c8091790903927449f8efb9b4e23960 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:49:54 +0000
Subject: Serial: Allow port type to be specified when calling
 serial8250_register_port.

Add flag value UPF_FIXED_TYPE which specifies that the UART type is
known and should not be probed.  For this case the UARTs properties
are just copied out of the uart_config entry.

This allows us to keep SOC specific 8250 probe code out of 8250.c.  In
this case we know the serial hardware will not be changing as it is on
the same silicon as the CPU, and we can specify it with certainty in
the board/cpu setup code.

The alternative is to load up 8250.c with a bunch of OCTEON specific
special cases in the probing code.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 849af9d..3ae4974 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2903,6 +2903,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.mapbase		= p->mapbase;
 		port.hub6		= p->hub6;
 		port.private_data	= p->private_data;
+		port.type		= p->type;
 		port.serial_in		= p->serial_in;
 		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
@@ -3058,6 +3059,14 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.private_data = port->private_data;
 		if (port->dev)
 			uart->port.dev = port->dev;
+
+		if (port->flags & UPF_FIXED_TYPE) {
+			uart->port.type = port->type;
+			uart->port.fifosize = uart_config[port->type].fifo_size;
+			uart->capabilities = uart_config[port->type].flags;
+			uart->tx_loadsz = uart_config[port->type].tx_loadsz;
+		}
+
 		set_io_from_upio(&uart->port);
 		/* Possibly override default I/O functions.  */
 		if (port->serial_in)
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 9425ed6..dc68b7e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2179,11 +2179,14 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
 	 * Now do the auto configuration stuff.  Note that config_port
 	 * is expected to claim the resources and map the port for us.
 	 */
-	flags = UART_CONFIG_TYPE;
+	flags = 0;
 	if (port->flags & UPF_AUTO_IRQ)
 		flags |= UART_CONFIG_IRQ;
 	if (port->flags & UPF_BOOT_AUTOCONF) {
-		port->type = PORT_UNKNOWN;
+		if (!(port->flags & UPF_FIXED_TYPE)) {
+			port->type = PORT_UNKNOWN;
+			flags |= UART_CONFIG_TYPE;
+		}
 		port->ops->config_port(port, flags);
 	}
 
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 77d83d9..d4d2a78 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -28,6 +28,7 @@ struct plat_serial8250_port {
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
 	upf_t		flags;		/* UPF_* flags */
+	unsigned int	type;		/* If UPF_FIXED_TYPE */
 	unsigned int	(*serial_in)(struct uart_port *, int);
 	void		(*serial_out)(struct uart_port *, int, int);
 };
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 60061f4..f155252 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -295,6 +295,8 @@ struct uart_port {
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
+/* The exact UART type is known and should not be probed.  */
+#define UPF_FIXED_TYPE		((__force upf_t) (1 << 27))
 #define UPF_BOOT_AUTOCONF	((__force upf_t) (1 << 28))
 #define UPF_FIXED_PORT		((__force upf_t) (1 << 29))
 #define UPF_DEAD		((__force upf_t) (1 << 30))
-- 
cgit v0.10.2


From 6b06f19151c335ee0c5b61839fa4e6838182ebb8 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:50:00 +0000
Subject: Serial: UART driver changes for Cavium OCTEON.

Cavium UART implementation is not covered by existing uart_configS.
Define a new uart_config (PORT_OCTEON) which is specified by OCTEON
platform device registration code.

Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 3ae4974..daa0056 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -279,6 +279,13 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_OCTEON] = {
+		.name		= "OCTEON",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f155252..b419984 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -40,7 +40,8 @@
 #define PORT_NS16550A	14
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
-#define PORT_MAX_8250	16	/* max port ID */
+#define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
+#define PORT_MAX_8250	17	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v0.10.2


From c847d47cb7b2fa78b17c9e17ed3fbd010ee3d3ca Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 2 Jan 2009 13:50:07 +0000
Subject: drivers/char/cyclades.c: cy_pci_probe: fix error path

We forgot to release resources in one case.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12137

Reported-by: Florian Lohoff <flo@rfc822.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 5e5b1dc..6a59f72 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5010,7 +5010,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		if (nchan == 0) {
 			dev_err(&pdev->dev, "Cyclom-Y PCI host card with no "
 					"Serial-Modules\n");
-			return -EIO;
+			goto err_unmap;
 		}
 	} else if (device_id == PCI_DEVICE_ID_CYCLOM_Z_Hi) {
 		struct RUNTIME_9060 __iomem *ctl_addr;
-- 
cgit v0.10.2


From ad36b88e2d22e9ef42797581d3ecea9feadd9488 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:50:20 +0000
Subject: tty: Fix an ircomm warning and note another bug

Roel Kluin noted that line is unsigned so one test is unneccessary. Also
add a warning for another flaw I noticed while making this change.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index e4e2cae..086d5ef 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -371,9 +371,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 	IRDA_DEBUG(2, "%s()\n", __func__ );
 
 	line = tty->index;
-	if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) {
+	if (line >= IRCOMM_TTY_PORTS)
 		return -ENODEV;
-	}
 
 	/* Check if instance already exists */
 	self = hashbin_lock_find(ircomm_tty, line, NULL);
@@ -405,6 +404,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 		 * Force TTY into raw mode by default which is usually what
 		 * we want for IrCOMM and IrLPT. This way applications will
 		 * not have to twiddle with printcap etc.
+		 *
+		 * Note this is completely usafe and doesn't work properly
 		 */
 		tty->termios->c_iflag = 0;
 		tty->termios->c_oflag = 0;
-- 
cgit v0.10.2


From 58eb17f155704062d76729d1fb7e23d3559ca86a Mon Sep 17 00:00:00 2001
From: Denis Joseph Barrow <D.Barow@option.com>
Date: Fri, 2 Jan 2009 13:50:29 +0000
Subject: hso modem detect fix patch against Alan Cox'es tty tree

Fixed incorrect check for the modem port, this prevents
crashes caused by issueing a tiocmget_submit_urb
on endpoints which don't exist for non modem devices.

Signed-off-by: Denis Joseph Barrow <D.Barow@option.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index d974d97..148af34 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2663,7 +2663,7 @@ static struct hso_device *hso_create_bulk_serial_device(
 	serial->parent = hso_dev;
 	hso_dev->port_data.dev_serial = serial;
 
-	if (port & HSO_PORT_MODEM) {
+	if ((port & HSO_PORT_MASK) == HSO_PORT_MODEM) {
 		num_urbs = 2;
 		serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
 					   GFP_KERNEL);
-- 
cgit v0.10.2


From 11cd29b028be88b13717401496fe4953fb96be03 Mon Sep 17 00:00:00 2001
From: Denis Joseph Barrow <D.Barow@option.com>
Date: Fri, 2 Jan 2009 13:50:36 +0000
Subject: hso maintainers update patch

Added D.J. Barrow as maintainer of hso driver.

Signed-off-by: Denis Joseph Barrow <D.Barow@option.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index ceb32ee..d5fc534 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2049,6 +2049,12 @@ M:	mikulas@artax.karlin.mff.cuni.cz
 W:	http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
 S:	Maintained
 
+HSO	3G Modem Driver (hso.c)
+P:	Denis Joseph Barrow
+M:	d.barow@option.com
+W:	http://www.pharscape.org
+S:	Maintained
+
 HTCPEN TOUCHSCREEN DRIVER
 P:	Pau Oliva Fora
 M:	pof@eslack.org
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 148af34..c4918b8 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -3,6 +3,8 @@
  * Driver for Option High Speed Mobile Devices.
  *
  *  Copyright (C) 2008 Option International
+ *                     Filip Aben <f.aben@option.com>
+ *                     Denis Joseph Barrow <d.barow@option.com>
  *  Copyright (C) 2007 Andrew Bird (Sphere Systems Ltd)
  *  			<ajb@spheresystems.co.uk>
  *  Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de>
-- 
cgit v0.10.2


From e65f0f8271b1b0452334e5da37fd35413a000de4 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Fri, 2 Jan 2009 13:50:43 +0000
Subject: serial_8250: support for Sealevel Systems Model 7803 COMM+8

Add support for Sealevel Systems Model 7803 COMM+8

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 2a2e1c7..c088146 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2287,6 +2287,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_COMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
+	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_7803,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_8_460800 },
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_UCOMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fa83dfe..218c73b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1796,6 +1796,7 @@
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM232	0x7202
 #define PCI_DEVICE_ID_SEALEVEL_COMM4	0x7401
 #define PCI_DEVICE_ID_SEALEVEL_COMM8	0x7801
+#define PCI_DEVICE_ID_SEALEVEL_7803	0x7803
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM8	0x7804
 
 #define PCI_VENDOR_ID_HYPERCOPE		0x1365
-- 
cgit v0.10.2


From 103ceffb9501531f6931df6aebc11a05189201f0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Fri, 2 Jan 2009 23:43:25 +0530
Subject: x86: mpparse.c fix style problems

Impact: cleanup, fix style problems, more readable

Fixes style problems:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/acpi.h> instead of <asm/acpi.h>
 WARNING: suspect code indent for conditional statements (8, 17)
 WARNING: space prohibited between function name and open parenthesis '('

total: 0 errors, 5 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69..c5c5b8d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/acpi.h>
 
-#include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/pgalloc.h>
 #include <asm/io_apic.h>
 #include <asm/proto.h>
-#include <asm/acpi.h>
 #include <asm/bios_ebda.h>
 #include <asm/e820.h>
 #include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 #endif
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		 set_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+		set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 			x86_quirks->mpc_oem_pci_bus(m);
 
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
-- 
cgit v0.10.2


From e8e32326279cba3d049b4325111f76618953195c Mon Sep 17 00:00:00 2001
From: Ingo Brueckl <ib@wupperonline.de>
Date: Fri, 2 Jan 2009 14:42:00 +0100
Subject: Fix compiler warning in arch/x86/mm/init_32.c

Signed-off-by: Ingo Brueckl <ib@wupperonline.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5b..f99a6c6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void)
 #endif /* !CONFIG_NUMA */
 
 #else
-# define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init()	do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
 #endif /* CONFIG_HIGHMEM */
 
 void __init native_pagetable_setup_start(pgd_t *base)
-- 
cgit v0.10.2


From 7ab21a8692094872298df172f54d55cba72fd308 Mon Sep 17 00:00:00 2001
From: Andy Spencer <spenceal@rose-hulman.edu>
Date: Fri, 2 Jan 2009 16:19:13 +0000
Subject: i8k: Enable i8k on Dell Precision Systems

Patch to enable i8k on Dell Precisions.

Signed-off-by: Andy Spencer <spenceal@rose-hulman.edu>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index b60d425..099fc89 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -485,6 +485,13 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "MP061"),
 		},
 	},
+	{
+		.ident = "Dell Precision",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Precision"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v0.10.2


From bef2a508b4276fd7897b2cb27df037d26361842c Mon Sep 17 00:00:00 2001
From: Federico Heinz <fheinz@vialibre.org.ar>
Date: Fri, 2 Jan 2009 16:19:23 +0000
Subject: i8k: Add Dell Vostro systems

This trivial patch adds support for i8k on the new Dell Vostro models.
I tested it on my Vostro 1400, and it works. It does print a warning
when loading the module:

	i8k: unable to get SMM BIOS version

But I couldn't figure out how to fix that. The module seems to work fine,
anyway...

Signed-off-by: Federico Heinz <fheinz@vialibre.org.ar>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index 099fc89..fc8cf7a 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -492,7 +492,14 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Precision"),
 		},
 	},
-	{ }
+	{
+		.ident = "Dell Vostro",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Vostro"),
+		},
+	},
+        { }
 };
 
 /*
-- 
cgit v0.10.2


From 92cde4d5396c3b6cbf3192286b687f97a889dffe Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 2 Jan 2009 15:40:55 +1100
Subject: Update powerpc maintainers

Ben Herrenschmidt is taking over as the primary powerpc architecture
maintainer.  I'll still be around as his backup/deputy.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index d5fc534..3148de2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2641,13 +2641,13 @@ W:	http://www.hansenpartnership.com/voyager
 S:	Maintained
 
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
-P:	Paul Mackerras
-M:	paulus@samba.org
 P:	Benjamin Herrenschmidt
 M:	benh@kernel.crashing.org
+P:	Paul Mackerras
+M:	paulus@samba.org
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@ozlabs.org
-T:	git kernel.org:/pub/scm/linux/kernel/git/paulus/powerpc.git
+T:	git kernel.org:/pub/scm/linux/kernel/git/benh/powerpc.git
 S:	Supported
 
 LINUX FOR POWER MACINTOSH
-- 
cgit v0.10.2


From 7943ecf161753ae92af74e7587c8438f221d55a5 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Dec 2008 14:37:43 -0300
Subject: V4L/DVB (10132): v4l2-compat-ioctl32: remove dependency on videodev.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index d0e1bd3..26fdf1e 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1065,8 +1065,9 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 		break;
 #endif
 	default:
-		v4l_print_ioctl("compat_ioctl32", cmd);
-		printk(KERN_CONT "\n");
+		printk(KERN_WARNING "compat_ioctl32: "
+			"unknown ioctl '%c', dir=%d, #%d (0x%08x)\n",
+			_IOC_TYPE(cmd), _IOC_DIR(cmd), _IOC_NR(cmd), cmd);
 		break;
 	}
 	return ret;
-- 
cgit v0.10.2


From 50a2a8b35edec09aff900a9b1c629776e11c5c88 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 22 Dec 2008 09:13:11 -0300
Subject: V4L/DVB (10133): v4l2-framework: use correct comment style.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index eeae76c..ba93442 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -410,7 +410,7 @@ for you.
 
 	err = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
 	if (err) {
-		video_device_release(vdev); // or kfree(my_vdev);
+		video_device_release(vdev); /* or kfree(my_vdev); */
 		return err;
 	}
 
-- 
cgit v0.10.2


From dfa9a5ae679ff2d23caa995d0f55a19abaf0596e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Dec 2008 12:17:23 -0300
Subject: V4L/DVB (10134): v4l2 doc: set v4l2_dev instead of parent.

Update the documentation now that the v4l2_dev field is in.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index ba93442..38d054a 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -390,8 +390,7 @@ allocated memory.
 
 You should also set these fields:
 
-- parent: set to the parent device (same device as was used to register
-  v4l2_device).
+- v4l2_dev: set to the v4l2_device parent device.
 - name: set to something descriptive and unique.
 - fops: set to the file_operations struct.
 - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance
@@ -516,5 +515,4 @@ void *video_drvdata(struct file *file);
 
 You can go from a video_device struct to the v4l2_device struct using:
 
-struct v4l2_device *v4l2_dev = dev_get_drvdata(vdev->parent);
-
+struct v4l2_device *v4l2_dev = vdev->v4l2_dev;
-- 
cgit v0.10.2


From bec43661b1dc0075b7445223ba775674133b164d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 06:58:20 -0300
Subject: V4L/DVB (10135): v4l2: introduce v4l2_file_operations.

Introduce a struct v4l2_file_operations for v4l2 drivers.

Remove the unnecessary inode argument.

Move compat32 handling (and llseek) into the v4l2-dev core: this is now
handled in the v4l2 core and no longer in the drivers themselves.

Note that this changeset reverts an earlier patch that changed the return
type of__video_ioctl2 from int to long. This change will be reinstated
later in a much improved version.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index 7d844af..fad7fd8 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -192,9 +192,9 @@ void saa7146_buffer_timeout(unsigned long data)
 /********************************************************************************/
 /* file operations */
 
-static int fops_open(struct inode *inode, struct file *file)
+static int fops_open(struct file *file)
 {
-	unsigned int minor = iminor(inode);
+	unsigned int minor = video_devdata(file)->minor;
 	struct saa7146_dev *h = NULL, *dev = NULL;
 	struct list_head *list;
 	struct saa7146_fh *fh = NULL;
@@ -202,7 +202,7 @@ static int fops_open(struct inode *inode, struct file *file)
 
 	enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-	DEB_EE(("inode:%p, file:%p, minor:%d\n",inode,file,minor));
+	DEB_EE(("file:%p, minor:%d\n", file, minor));
 
 	if (mutex_lock_interruptible(&saa7146_devices_lock))
 		return -ERESTARTSYS;
@@ -255,7 +255,7 @@ static int fops_open(struct inode *inode, struct file *file)
 		if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE)
 			result = saa7146_vbi_uops.open(dev,file);
 		if (dev->ext_vv_data->vbi_fops.open)
-			dev->ext_vv_data->vbi_fops.open(inode, file);
+			dev->ext_vv_data->vbi_fops.open(file);
 	} else {
 		DEB_S(("initializing video...\n"));
 		result = saa7146_video_uops.open(dev,file);
@@ -280,12 +280,12 @@ out:
 	return result;
 }
 
-static int fops_release(struct inode *inode, struct file *file)
+static int fops_release(struct file *file)
 {
 	struct saa7146_fh  *fh  = file->private_data;
 	struct saa7146_dev *dev = fh->dev;
 
-	DEB_EE(("inode:%p, file:%p\n",inode,file));
+	DEB_EE(("file:%p\n", file));
 
 	if (mutex_lock_interruptible(&saa7146_devices_lock))
 		return -ERESTARTSYS;
@@ -294,7 +294,7 @@ static int fops_release(struct inode *inode, struct file *file)
 		if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE)
 			saa7146_vbi_uops.release(dev,file);
 		if (dev->ext_vv_data->vbi_fops.release)
-			dev->ext_vv_data->vbi_fops.release(inode, file);
+			dev->ext_vv_data->vbi_fops.release(file);
 	} else {
 		saa7146_video_uops.release(dev,file);
 	}
@@ -308,10 +308,10 @@ static int fops_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int fops_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static int fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 /*
-	DEB_EE(("inode:%p, file:%p, cmd:%d, arg:%li\n",inode, file, cmd, arg));
+	DEB_EE(("file:%p, cmd:%d, arg:%li\n", file, cmd, arg));
 */
 	return video_usercopy(file, cmd, arg, saa7146_video_do_ioctl);
 }
@@ -416,7 +416,7 @@ static ssize_t fops_write(struct file *file, const char __user *data, size_t cou
 	}
 }
 
-static const struct file_operations video_fops =
+static const struct v4l2_file_operations video_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= fops_open,
@@ -426,7 +426,6 @@ static const struct file_operations video_fops =
 	.poll		= fops_poll,
 	.mmap		= fops_mmap,
 	.ioctl		= fops_ioctl,
-	.llseek		= no_llseek,
 };
 
 static void vv_callback(struct saa7146_dev *dev, unsigned long status)
diff --git a/drivers/media/dvb/ttpci/av7110_v4l.c b/drivers/media/dvb/ttpci/av7110_v4l.c
index b4a0cc5..315ba6f 100644
--- a/drivers/media/dvb/ttpci/av7110_v4l.c
+++ b/drivers/media/dvb/ttpci/av7110_v4l.c
@@ -567,7 +567,7 @@ static int av7110_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int av7110_vbi_reset(struct inode *inode, struct file *file)
+static int av7110_vbi_reset(struct file *file)
 {
 	struct saa7146_fh *fh = file->private_data;
 	struct saa7146_dev *dev = fh->dev;
diff --git a/drivers/media/radio/dsbr100.c b/drivers/media/radio/dsbr100.c
index 5474a22..2014ebc 100644
--- a/drivers/media/radio/dsbr100.c
+++ b/drivers/media/radio/dsbr100.c
@@ -154,8 +154,8 @@ devices, that would be 76 and 91.  */
 static int usb_dsbr100_probe(struct usb_interface *intf,
 			     const struct usb_device_id *id);
 static void usb_dsbr100_disconnect(struct usb_interface *intf);
-static int usb_dsbr100_open(struct inode *inode, struct file *file);
-static int usb_dsbr100_close(struct inode *inode, struct file *file);
+static int usb_dsbr100_open(struct file *file);
+static int usb_dsbr100_close(struct file *file);
 static int usb_dsbr100_suspend(struct usb_interface *intf,
 						pm_message_t message);
 static int usb_dsbr100_resume(struct usb_interface *intf);
@@ -566,7 +566,7 @@ static int vidioc_s_audio(struct file *file, void *priv,
 	return 0;
 }
 
-static int usb_dsbr100_open(struct inode *inode, struct file *file)
+static int usb_dsbr100_open(struct file *file)
 {
 	struct dsbr100_device *radio = video_drvdata(file);
 	int retval;
@@ -593,7 +593,7 @@ static int usb_dsbr100_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int usb_dsbr100_close(struct inode *inode, struct file *file)
+static int usb_dsbr100_close(struct file *file)
 {
 	struct dsbr100_device *radio = video_drvdata(file);
 	int retval;
@@ -653,15 +653,11 @@ static void usb_dsbr100_video_device_release(struct video_device *videodev)
 }
 
 /* File system interface */
-static const struct file_operations usb_dsbr100_fops = {
+static const struct v4l2_file_operations usb_dsbr100_fops = {
 	.owner		= THIS_MODULE,
 	.open		= usb_dsbr100_open,
 	.release	= usb_dsbr100_close,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek		= no_llseek,
 };
 
 static const struct v4l2_ioctl_ops usb_dsbr100_ioctl_ops = {
diff --git a/drivers/media/radio/radio-aimslab.c b/drivers/media/radio/radio-aimslab.c
index dd6d3df..bfa13b8 100644
--- a/drivers/media/radio/radio-aimslab.c
+++ b/drivers/media/radio/radio-aimslab.c
@@ -374,26 +374,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct rt_device rtrack_unit;
 
-static int rtrack_exclusive_open(struct inode *inode, struct file *file)
+static int rtrack_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &rtrack_unit.in_use) ? -EBUSY : 0;
 }
 
-static int rtrack_exclusive_release(struct inode *inode, struct file *file)
+static int rtrack_exclusive_release(struct file *file)
 {
 	clear_bit(0, &rtrack_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations rtrack_fops = {
+static const struct v4l2_file_operations rtrack_fops = {
 	.owner		= THIS_MODULE,
 	.open           = rtrack_exclusive_open,
 	.release        = rtrack_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops rtrack_ioctl_ops = {
diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c
index d784895..5604e88 100644
--- a/drivers/media/radio/radio-aztech.c
+++ b/drivers/media/radio/radio-aztech.c
@@ -338,26 +338,22 @@ static int vidioc_s_ctrl (struct file *file, void *priv,
 
 static struct az_device aztech_unit;
 
-static int aztech_exclusive_open(struct inode *inode, struct file *file)
+static int aztech_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &aztech_unit.in_use) ? -EBUSY : 0;
 }
 
-static int aztech_exclusive_release(struct inode *inode, struct file *file)
+static int aztech_exclusive_release(struct file *file)
 {
 	clear_bit(0, &aztech_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations aztech_fops = {
+static const struct v4l2_file_operations aztech_fops = {
 	.owner		= THIS_MODULE,
 	.open           = aztech_exclusive_open,
 	.release        = aztech_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops aztech_ioctl_ops = {
diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
index bfd37f3..cb3075a 100644
--- a/drivers/media/radio/radio-cadet.c
+++ b/drivers/media/radio/radio-cadet.c
@@ -529,7 +529,7 @@ static int vidioc_s_audio(struct file *file, void *priv,
 }
 
 static int
-cadet_open(struct inode *inode, struct file *file)
+cadet_open(struct file *file)
 {
 	users++;
 	if (1 == users) init_waitqueue_head(&read_queue);
@@ -537,7 +537,7 @@ cadet_open(struct inode *inode, struct file *file)
 }
 
 static int
-cadet_release(struct inode *inode, struct file *file)
+cadet_release(struct file *file)
 {
 	users--;
 	if (0 == users){
@@ -557,17 +557,13 @@ cadet_poll(struct file *file, struct poll_table_struct *wait)
 }
 
 
-static const struct file_operations cadet_fops = {
+static const struct v4l2_file_operations cadet_fops = {
 	.owner		= THIS_MODULE,
 	.open		= cadet_open,
 	.release       	= cadet_release,
 	.read		= cadet_read,
 	.ioctl		= video_ioctl2,
 	.poll		= cadet_poll,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops cadet_ioctl_ops = {
diff --git a/drivers/media/radio/radio-gemtek-pci.c b/drivers/media/radio/radio-gemtek-pci.c
index e15bee6..0c96bf8 100644
--- a/drivers/media/radio/radio-gemtek-pci.c
+++ b/drivers/media/radio/radio-gemtek-pci.c
@@ -358,26 +358,22 @@ MODULE_DEVICE_TABLE( pci, gemtek_pci_id );
 
 static int mx = 1;
 
-static int gemtek_pci_exclusive_open(struct inode *inode, struct file *file)
+static int gemtek_pci_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &in_use) ? -EBUSY : 0;
 }
 
-static int gemtek_pci_exclusive_release(struct inode *inode, struct file *file)
+static int gemtek_pci_exclusive_release(struct file *file)
 {
 	clear_bit(0, &in_use);
 	return 0;
 }
 
-static const struct file_operations gemtek_pci_fops = {
+static const struct v4l2_file_operations gemtek_pci_fops = {
 	.owner		= THIS_MODULE,
 	.open           = gemtek_pci_exclusive_open,
 	.release        = gemtek_pci_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops gemtek_pci_ioctl_ops = {
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index e13118d..2b68be7 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -394,26 +394,22 @@ static struct v4l2_queryctrl radio_qctrl[] = {
 	}
 };
 
-static int gemtek_exclusive_open(struct inode *inode, struct file *file)
+static int gemtek_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &in_use) ? -EBUSY : 0;
 }
 
-static int gemtek_exclusive_release(struct inode *inode, struct file *file)
+static int gemtek_exclusive_release(struct file *file)
 {
 	clear_bit(0, &in_use);
 	return 0;
 }
 
-static const struct file_operations gemtek_fops = {
+static const struct v4l2_file_operations gemtek_fops = {
 	.owner		= THIS_MODULE,
 	.open		= gemtek_exclusive_open,
 	.release	= gemtek_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek		= no_llseek
 };
 
 static int vidioc_querycap(struct file *file, void *priv,
diff --git a/drivers/media/radio/radio-maestro.c b/drivers/media/radio/radio-maestro.c
index 4bf4d00..ba3a13a 100644
--- a/drivers/media/radio/radio-maestro.c
+++ b/drivers/media/radio/radio-maestro.c
@@ -79,12 +79,12 @@ static unsigned long in_use;
 
 static int maestro_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 
-static int maestro_exclusive_open(struct inode *inode, struct file *file)
+static int maestro_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &in_use) ? -EBUSY : 0;
 }
 
-static int maestro_exclusive_release(struct inode *inode, struct file *file)
+static int maestro_exclusive_release(struct file *file)
 {
 	clear_bit(0, &in_use);
 	return 0;
@@ -110,15 +110,11 @@ static struct pci_driver maestro_r_driver = {
 	.remove		= __devexit_p(maestro_remove),
 };
 
-static const struct file_operations maestro_fops = {
+static const struct v4l2_file_operations maestro_fops = {
 	.owner		= THIS_MODULE,
 	.open           = maestro_exclusive_open,
 	.release        = maestro_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 struct radio_device {
diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c
index c777a17..c5dc00a 100644
--- a/drivers/media/radio/radio-maxiradio.c
+++ b/drivers/media/radio/radio-maxiradio.c
@@ -100,26 +100,22 @@ static unsigned long in_use;
 #define BITS2FREQ(x)	((x) * FREQ_STEP - FREQ_IF)
 
 
-static int maxiradio_exclusive_open(struct inode *inode, struct file *file)
+static int maxiradio_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &in_use) ? -EBUSY : 0;
 }
 
-static int maxiradio_exclusive_release(struct inode *inode, struct file *file)
+static int maxiradio_exclusive_release(struct file *file)
 {
 	clear_bit(0, &in_use);
 	return 0;
 }
 
-static const struct file_operations maxiradio_fops = {
+static const struct v4l2_file_operations maxiradio_fops = {
 	.owner		= THIS_MODULE,
 	.open           = maxiradio_exclusive_open,
 	.release        = maxiradio_exclusive_release,
 	.ioctl          = video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static struct radio_device
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index e730edd..0747dc8 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -127,8 +127,8 @@ static struct v4l2_queryctrl radio_qctrl[] = {
 static int usb_amradio_probe(struct usb_interface *intf,
 			     const struct usb_device_id *id);
 static void usb_amradio_disconnect(struct usb_interface *intf);
-static int usb_amradio_open(struct inode *inode, struct file *file);
-static int usb_amradio_close(struct inode *inode, struct file *file);
+static int usb_amradio_open(struct file *file);
+static int usb_amradio_close(struct file *file);
 static int usb_amradio_suspend(struct usb_interface *intf,
 				pm_message_t message);
 static int usb_amradio_resume(struct usb_interface *intf);
@@ -500,7 +500,7 @@ static int vidioc_s_input(struct file *filp, void *priv, unsigned int i)
 }
 
 /* open device - amradio_start() and amradio_setfreq() */
-static int usb_amradio_open(struct inode *inode, struct file *file)
+static int usb_amradio_open(struct file *file)
 {
 	struct amradio_device *radio = video_get_drvdata(video_devdata(file));
 
@@ -525,7 +525,7 @@ static int usb_amradio_open(struct inode *inode, struct file *file)
 }
 
 /*close device */
-static int usb_amradio_close(struct inode *inode, struct file *file)
+static int usb_amradio_close(struct file *file)
 {
 	struct amradio_device *radio = video_get_drvdata(video_devdata(file));
 	int retval;
@@ -572,15 +572,11 @@ static int usb_amradio_resume(struct usb_interface *intf)
 }
 
 /* File system interface */
-static const struct file_operations usb_amradio_fops = {
+static const struct v4l2_file_operations usb_amradio_fops = {
 	.owner		= THIS_MODULE,
 	.open		= usb_amradio_open,
 	.release	= usb_amradio_close,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek		= no_llseek,
 };
 
 static const struct v4l2_ioctl_ops usb_amradio_ioctl_ops = {
diff --git a/drivers/media/radio/radio-rtrack2.c b/drivers/media/radio/radio-rtrack2.c
index 7704f24..2587227 100644
--- a/drivers/media/radio/radio-rtrack2.c
+++ b/drivers/media/radio/radio-rtrack2.c
@@ -280,26 +280,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct rt_device rtrack2_unit;
 
-static int rtrack2_exclusive_open(struct inode *inode, struct file *file)
+static int rtrack2_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &rtrack2_unit.in_use) ? -EBUSY : 0;
 }
 
-static int rtrack2_exclusive_release(struct inode *inode, struct file *file)
+static int rtrack2_exclusive_release(struct file *file)
 {
 	clear_bit(0, &rtrack2_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations rtrack2_fops = {
+static const struct v4l2_file_operations rtrack2_fops = {
 	.owner		= THIS_MODULE,
 	.open           = rtrack2_exclusive_open,
 	.release        = rtrack2_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops rtrack2_ioctl_ops = {
diff --git a/drivers/media/radio/radio-sf16fmi.c b/drivers/media/radio/radio-sf16fmi.c
index 834d436..d358e48 100644
--- a/drivers/media/radio/radio-sf16fmi.c
+++ b/drivers/media/radio/radio-sf16fmi.c
@@ -280,26 +280,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct fmi_device fmi_unit;
 
-static int fmi_exclusive_open(struct inode *inode, struct file *file)
+static int fmi_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &fmi_unit.in_use) ? -EBUSY : 0;
 }
 
-static int fmi_exclusive_release(struct inode *inode, struct file *file)
+static int fmi_exclusive_release(struct file *file)
 {
 	clear_bit(0, &fmi_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations fmi_fops = {
+static const struct v4l2_file_operations fmi_fops = {
 	.owner		= THIS_MODULE,
 	.open           = fmi_exclusive_open,
 	.release        = fmi_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops fmi_ioctl_ops = {
diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c
index b1f47c3..92f17a3 100644
--- a/drivers/media/radio/radio-sf16fmr2.c
+++ b/drivers/media/radio/radio-sf16fmr2.c
@@ -396,26 +396,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct fmr2_device fmr2_unit;
 
-static int fmr2_exclusive_open(struct inode *inode, struct file *file)
+static int fmr2_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &fmr2_unit.in_use) ? -EBUSY : 0;
 }
 
-static int fmr2_exclusive_release(struct inode *inode, struct file *file)
+static int fmr2_exclusive_release(struct file *file)
 {
 	clear_bit(0, &fmr2_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations fmr2_fops = {
+static const struct v4l2_file_operations fmr2_fops = {
 	.owner          = THIS_MODULE,
 	.open           = fmr2_exclusive_open,
 	.release        = fmr2_exclusive_release,
 	.ioctl          = video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops fmr2_ioctl_ops = {
diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c
index 3e18302..457445e 100644
--- a/drivers/media/radio/radio-si470x.c
+++ b/drivers/media/radio/radio-si470x.c
@@ -1075,7 +1075,7 @@ static unsigned int si470x_fops_poll(struct file *file,
 /*
  * si470x_fops_open - file open
  */
-static int si470x_fops_open(struct inode *inode, struct file *file)
+static int si470x_fops_open(struct file *file)
 {
 	struct si470x_device *radio = video_drvdata(file);
 	int retval;
@@ -1105,7 +1105,7 @@ done:
 /*
  * si470x_fops_release - file release
  */
-static int si470x_fops_release(struct inode *inode, struct file *file)
+static int si470x_fops_release(struct file *file)
 {
 	struct si470x_device *radio = video_drvdata(file);
 	int retval = 0;
@@ -1147,15 +1147,11 @@ done:
 /*
  * si470x_fops - file operations interface
  */
-static const struct file_operations si470x_fops = {
+static const struct v4l2_file_operations si470x_fops = {
 	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
 	.read		= si470x_fops_read,
 	.poll		= si470x_fops_poll,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.open		= si470x_fops_open,
 	.release	= si470x_fops_release,
 };
diff --git a/drivers/media/radio/radio-terratec.c b/drivers/media/radio/radio-terratec.c
index 0abb186..0798d71 100644
--- a/drivers/media/radio/radio-terratec.c
+++ b/drivers/media/radio/radio-terratec.c
@@ -352,26 +352,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct tt_device terratec_unit;
 
-static int terratec_exclusive_open(struct inode *inode, struct file *file)
+static int terratec_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &terratec_unit.in_use) ? -EBUSY : 0;
 }
 
-static int terratec_exclusive_release(struct inode *inode, struct file *file)
+static int terratec_exclusive_release(struct file *file)
 {
 	clear_bit(0, &terratec_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations terratec_fops = {
+static const struct v4l2_file_operations terratec_fops = {
 	.owner		= THIS_MODULE,
 	.open           = terratec_exclusive_open,
 	.release        = terratec_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops terratec_ioctl_ops = {
diff --git a/drivers/media/radio/radio-trust.c b/drivers/media/radio/radio-trust.c
index e7b111f..bdf9cb6 100644
--- a/drivers/media/radio/radio-trust.c
+++ b/drivers/media/radio/radio-trust.c
@@ -337,26 +337,22 @@ static int vidioc_s_audio(struct file *file, void *priv,
 	return 0;
 }
 
-static int trust_exclusive_open(struct inode *inode, struct file *file)
+static int trust_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &in_use) ? -EBUSY : 0;
 }
 
-static int trust_exclusive_release(struct inode *inode, struct file *file)
+static int trust_exclusive_release(struct file *file)
 {
 	clear_bit(0, &in_use);
 	return 0;
 }
 
-static const struct file_operations trust_fops = {
+static const struct v4l2_file_operations trust_fops = {
 	.owner		= THIS_MODULE,
 	.open           = trust_exclusive_open,
 	.release        = trust_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops trust_ioctl_ops = {
diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c
index 952ec35..5c3b319 100644
--- a/drivers/media/radio/radio-typhoon.c
+++ b/drivers/media/radio/radio-typhoon.c
@@ -330,26 +330,22 @@ static struct typhoon_device typhoon_unit =
 	.mutefreq	= CONFIG_RADIO_TYPHOON_MUTEFREQ,
 };
 
-static int typhoon_exclusive_open(struct inode *inode, struct file *file)
+static int typhoon_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &typhoon_unit.in_use) ? -EBUSY : 0;
 }
 
-static int typhoon_exclusive_release(struct inode *inode, struct file *file)
+static int typhoon_exclusive_release(struct file *file)
 {
 	clear_bit(0, &typhoon_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations typhoon_fops = {
+static const struct v4l2_file_operations typhoon_fops = {
 	.owner		= THIS_MODULE,
 	.open           = typhoon_exclusive_open,
 	.release        = typhoon_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops typhoon_ioctl_ops = {
diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c
index 15b10ba..d2ac17e 100644
--- a/drivers/media/radio/radio-zoltrix.c
+++ b/drivers/media/radio/radio-zoltrix.c
@@ -401,27 +401,23 @@ static int vidioc_s_audio(struct file *file, void *priv,
 
 static struct zol_device zoltrix_unit;
 
-static int zoltrix_exclusive_open(struct inode *inode, struct file *file)
+static int zoltrix_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &zoltrix_unit.in_use) ? -EBUSY : 0;
 }
 
-static int zoltrix_exclusive_release(struct inode *inode, struct file *file)
+static int zoltrix_exclusive_release(struct file *file)
 {
 	clear_bit(0, &zoltrix_unit.in_use);
 	return 0;
 }
 
-static const struct file_operations zoltrix_fops =
+static const struct v4l2_file_operations zoltrix_fops =
 {
 	.owner		= THIS_MODULE,
 	.open           = zoltrix_exclusive_open,
 	.release        = zoltrix_exclusive_release,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops zoltrix_ioctl_ops = {
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index 2ba6abd..f18fb73 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -539,7 +539,7 @@ static int ar_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int ar_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+static int ar_ioctl(struct file *file, unsigned int cmd,
 		    unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, ar_do_ioctl);
@@ -744,27 +744,23 @@ void ar_release(struct video_device *vfd)
  ****************************************************************************/
 static struct ar_device ardev;
 
-static int ar_exclusive_open(struct inode *inode, struct file *file)
+static int ar_exclusive_open(struct file *file)
 {
 	return test_and_set_bit(0, &ardev.in_use) ? -EBUSY : 0;
 }
 
-static int ar_exclusive_release(struct inode *inode, struct file *file)
+static int ar_exclusive_release(struct file *file)
 {
 	clear_bit(0, &ardev.in_use);
 	return 0;
 }
 
-static const struct file_operations ar_fops = {
+static const struct v4l2_file_operations ar_fops = {
 	.owner		= THIS_MODULE,
 	.open		= ar_exclusive_open,
 	.release	= ar_exclusive_release,
 	.read		= ar_read,
 	.ioctl		= ar_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek		= no_llseek,
 };
 
 static struct video_device ar_template = {
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 9ec4cec..ebcb8e5 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -3208,9 +3208,9 @@ err:
 	return POLLERR;
 }
 
-static int bttv_open(struct inode *inode, struct file *file)
+static int bttv_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct bttv *btv = NULL;
 	struct bttv_fh *fh;
 	enum v4l2_buf_type type = 0;
@@ -3291,7 +3291,7 @@ static int bttv_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int bttv_release(struct inode *inode, struct file *file)
+static int bttv_release(struct file *file)
 {
 	struct bttv_fh *fh = file->private_data;
 	struct bttv *btv = fh->btv;
@@ -3346,14 +3346,12 @@ bttv_mmap(struct file *file, struct vm_area_struct *vma)
 	return videobuf_mmap_mapper(bttv_queue(fh),vma);
 }
 
-static const struct file_operations bttv_fops =
+static const struct v4l2_file_operations bttv_fops =
 {
 	.owner	  = THIS_MODULE,
 	.open	  = bttv_open,
 	.release  = bttv_release,
 	.ioctl	  = video_ioctl2,
-	.compat_ioctl	= v4l_compat_ioctl32,
-	.llseek	  = no_llseek,
 	.read	  = bttv_read,
 	.mmap	  = bttv_mmap,
 	.poll     = bttv_poll,
@@ -3422,9 +3420,9 @@ static struct video_device bttv_video_template = {
 /* ----------------------------------------------------------------------- */
 /* radio interface                                                         */
 
-static int radio_open(struct inode *inode, struct file *file)
+static int radio_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct bttv *btv = NULL;
 	struct bttv_fh *fh;
 	unsigned int i;
@@ -3467,7 +3465,7 @@ static int radio_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int radio_release(struct inode *inode, struct file *file)
+static int radio_release(struct file *file)
 {
 	struct bttv_fh *fh = file->private_data;
 	struct bttv *btv = fh->btv;
@@ -3633,15 +3631,13 @@ static unsigned int radio_poll(struct file *file, poll_table *wait)
 	return cmd.result;
 }
 
-static const struct file_operations radio_fops =
+static const struct v4l2_file_operations radio_fops =
 {
 	.owner	  = THIS_MODULE,
 	.open	  = radio_open,
 	.read     = radio_read,
 	.release  = radio_release,
-	.compat_ioctl	= v4l_compat_ioctl32,
 	.ioctl	  = video_ioctl2,
-	.llseek	  = no_llseek,
 	.poll     = radio_poll,
 };
 
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index 17f80d0..0b02be5 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -863,7 +863,7 @@ static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int qcam_ioctl(struct inode *inode, struct file *file,
+static int qcam_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
@@ -893,7 +893,7 @@ static ssize_t qcam_read(struct file *file, char __user *buf,
 	return len;
 }
 
-static int qcam_exclusive_open(struct inode *inode, struct file *file)
+static int qcam_exclusive_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam = (struct qcam_device *)dev;
@@ -901,7 +901,7 @@ static int qcam_exclusive_open(struct inode *inode, struct file *file)
 	return test_and_set_bit(0, &qcam->in_use) ? -EBUSY : 0;
 }
 
-static int qcam_exclusive_release(struct inode *inode, struct file *file)
+static int qcam_exclusive_release(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam = (struct qcam_device *)dev;
@@ -910,16 +910,12 @@ static int qcam_exclusive_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations qcam_fops = {
+static const struct v4l2_file_operations qcam_fops = {
 	.owner		= THIS_MODULE,
 	.open           = qcam_exclusive_open,
 	.release        = qcam_exclusive_release,
 	.ioctl          = qcam_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.read		= qcam_read,
-	.llseek         = no_llseek,
 };
 static struct video_device qcam_template=
 {
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index 21c71eb..837c16d 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -665,7 +665,7 @@ static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int qcam_ioctl(struct inode *inode, struct file *file,
+static int qcam_ioctl(struct file *file,
 		      unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
@@ -687,7 +687,7 @@ static ssize_t qcam_read(struct file *file, char __user *buf,
 	return len;
 }
 
-static int qcam_exclusive_open(struct inode *inode, struct file *file)
+static int qcam_exclusive_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam = (struct qcam_device *)dev;
@@ -695,7 +695,7 @@ static int qcam_exclusive_open(struct inode *inode, struct file *file)
 	return test_and_set_bit(0, &qcam->in_use) ? -EBUSY : 0;
 }
 
-static int qcam_exclusive_release(struct inode *inode, struct file *file)
+static int qcam_exclusive_release(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam = (struct qcam_device *)dev;
@@ -705,16 +705,12 @@ static int qcam_exclusive_release(struct inode *inode, struct file *file)
 }
 
 /* video device template */
-static const struct file_operations qcam_fops = {
+static const struct v4l2_file_operations qcam_fops = {
 	.owner		= THIS_MODULE,
 	.open           = qcam_exclusive_open,
 	.release        = qcam_exclusive_release,
 	.ioctl          = qcam_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.read		= qcam_read,
-	.llseek         = no_llseek,
 };
 
 static struct video_device qcam_template=
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 1740b9e..476171cf 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1472,11 +1472,11 @@ static int cafe_v4l_mmap(struct file *filp, struct vm_area_struct *vma)
 
 
-static int cafe_v4l_open(struct inode *inode, struct file *filp)
+static int cafe_v4l_open(struct file *filp)
 {
 	struct cafe_camera *cam;
 
-	cam = cafe_find_dev(iminor(inode));
+	cam = cafe_find_dev(video_devdata(filp)->minor);
 	if (cam == NULL)
 		return -ENODEV;
 	filp->private_data = cam;
@@ -1494,7 +1494,7 @@ static int cafe_v4l_open(struct inode *inode, struct file *filp)
 }
 
 
-static int cafe_v4l_release(struct inode *inode, struct file *filp)
+static int cafe_v4l_release(struct file *filp)
 {
 	struct cafe_camera *cam = filp->private_data;
 
@@ -1759,7 +1759,7 @@ static void cafe_v4l_dev_release(struct video_device *vd)
  * clone it for specific real devices.
  */
 
-static const struct file_operations cafe_v4l_fops = {
+static const struct v4l2_file_operations cafe_v4l_fops = {
 	.owner = THIS_MODULE,
 	.open = cafe_v4l_open,
 	.release = cafe_v4l_release,
@@ -1767,7 +1767,6 @@ static const struct file_operations cafe_v4l_fops = {
 	.poll = cafe_v4l_poll,
 	.mmap = cafe_v4l_mmap,
 	.ioctl = video_ioctl2,
-	.llseek = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops cafe_v4l_ioctl_ops = {
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index 028a400..9925ec0 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -3148,7 +3148,7 @@ static void put_cam(struct cpia_camera_ops* ops)
 }
 
 /* ------------------------- V4L interface --------------------- */
-static int cpia_open(struct inode *inode, struct file *file)
+static int cpia_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct cam_data *cam = video_get_drvdata(dev);
@@ -3225,7 +3225,7 @@ static int cpia_open(struct inode *inode, struct file *file)
 	return err;
 }
 
-static int cpia_close(struct inode *inode, struct file *file)
+static int cpia_close(struct file *file)
 {
 	struct  video_device *dev = file->private_data;
 	struct cam_data *cam = video_get_drvdata(dev);
@@ -3720,7 +3720,7 @@ static int cpia_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return retval;
 }
 
-static int cpia_ioctl(struct inode *inode, struct file *file,
+static int cpia_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, cpia_do_ioctl);
@@ -3780,17 +3780,13 @@ static int cpia_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations cpia_fops = {
+static const struct v4l2_file_operations cpia_fops = {
 	.owner		= THIS_MODULE,
 	.open		= cpia_open,
 	.release       	= cpia_close,
 	.read		= cpia_read,
 	.mmap		= cpia_mmap,
 	.ioctl          = cpia_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static struct video_device cpia_template = {
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index 3c2d7ea..91870cc 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -239,7 +239,7 @@ static struct v4l2_queryctrl controls[] = {
  *  cpia2_open
  *
  *****************************************************************************/
-static int cpia2_open(struct inode *inode, struct file *file)
+static int cpia2_open(struct file *file)
 {
 	struct camera_data *cam = video_drvdata(file);
 	int retval = 0;
@@ -302,7 +302,7 @@ err_return:
  *  cpia2_close
  *
  *****************************************************************************/
-static int cpia2_close(struct inode *inode, struct file *file)
+static int cpia2_close(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct camera_data *cam = video_get_drvdata(dev);
@@ -1841,7 +1841,7 @@ static int cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return retval;
 }
 
-static int cpia2_ioctl(struct inode *inode, struct file *file,
+static int cpia2_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, cpia2_do_ioctl);
@@ -1912,17 +1912,13 @@ static void reset_camera_struct_v4l(struct camera_data *cam)
 /***
  * The v4l video device structure initialized for this device
  ***/
-static const struct file_operations fops_template = {
+static const struct v4l2_file_operations fops_template = {
 	.owner		= THIS_MODULE,
 	.open		= cpia2_open,
 	.release	= cpia2_close,
 	.read		= cpia2_v4l_read,
 	.poll		= cpia2_v4l_poll,
 	.ioctl		= cpia2_ioctl,
-	.llseek		= no_llseek,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.mmap		= cpia2_mmap,
 };
 
diff --git a/drivers/media/video/cx18/cx18-fileops.c b/drivers/media/video/cx18/cx18-fileops.c
index 425271a..055f6e0 100644
--- a/drivers/media/video/cx18/cx18-fileops.c
+++ b/drivers/media/video/cx18/cx18-fileops.c
@@ -552,7 +552,7 @@ void cx18_stop_capture(struct cx18_open_id *id, int gop_end)
 	}
 }
 
-int cx18_v4l2_close(struct inode *inode, struct file *filp)
+int cx18_v4l2_close(struct file *filp)
 {
 	struct cx18_open_id *id = filp->private_data;
 	struct cx18 *cx = id->cx;
@@ -650,12 +650,12 @@ static int cx18_serialized_open(struct cx18_stream *s, struct file *filp)
 	return 0;
 }
 
-int cx18_v4l2_open(struct inode *inode, struct file *filp)
+int cx18_v4l2_open(struct file *filp)
 {
 	int res, x, y = 0;
 	struct cx18 *cx = NULL;
 	struct cx18_stream *s = NULL;
-	int minor = iminor(inode);
+	int minor = video_devdata(filp)->minor;
 
 	/* Find which card this open was on */
 	spin_lock(&cx18_cards_lock);
diff --git a/drivers/media/video/cx18/cx18-fileops.h b/drivers/media/video/cx18/cx18-fileops.h
index 46da028..92e2d5d 100644
--- a/drivers/media/video/cx18/cx18-fileops.h
+++ b/drivers/media/video/cx18/cx18-fileops.h
@@ -22,12 +22,12 @@
  */
 
 /* Testing/Debugging */
-int cx18_v4l2_open(struct inode *inode, struct file *filp);
+int cx18_v4l2_open(struct file *filp);
 ssize_t cx18_v4l2_read(struct file *filp, char __user *buf, size_t count,
 		      loff_t *pos);
 ssize_t cx18_v4l2_write(struct file *filp, const char __user *buf, size_t count,
 		       loff_t *pos);
-int cx18_v4l2_close(struct inode *inode, struct file *filp);
+int cx18_v4l2_close(struct file *filp);
 unsigned int cx18_v4l2_enc_poll(struct file *filp, poll_table *wait);
 int cx18_start_capture(struct cx18_open_id *id);
 void cx18_stop_capture(struct cx18_open_id *id, int gop_end);
diff --git a/drivers/media/video/cx18/cx18-ioctl.c b/drivers/media/video/cx18/cx18-ioctl.c
index e608748..5023075 100644
--- a/drivers/media/video/cx18/cx18-ioctl.c
+++ b/drivers/media/video/cx18/cx18-ioctl.c
@@ -783,7 +783,7 @@ static int cx18_default(struct file *file, void *fh, int cmd, void *arg)
 	return 0;
 }
 
-int cx18_v4l2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+int cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
 		    unsigned long arg)
 {
 	struct video_device *vfd = video_devdata(filp);
@@ -795,7 +795,7 @@ int cx18_v4l2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 
 	if (cx18_debug & CX18_DBGFLG_IOCTL)
 		vfd->debug = V4L2_DEBUG_IOCTL | V4L2_DEBUG_IOCTL_ARG;
-	res = video_ioctl2(inode, filp, cmd, arg);
+	res = video_ioctl2(filp, cmd, arg);
 	vfd->debug = 0;
 	mutex_unlock(&cx->serialize_lock);
 	return res;
diff --git a/drivers/media/video/cx18/cx18-ioctl.h b/drivers/media/video/cx18/cx18-ioctl.h
index 08fe24e..50b8d60 100644
--- a/drivers/media/video/cx18/cx18-ioctl.h
+++ b/drivers/media/video/cx18/cx18-ioctl.h
@@ -29,5 +29,5 @@ void cx18_set_funcs(struct video_device *vdev);
 int cx18_s_std(struct file *file, void *fh, v4l2_std_id *std);
 int cx18_s_frequency(struct file *file, void *fh, struct v4l2_frequency *vf);
 int cx18_s_input(struct file *file, void *fh, unsigned int inp);
-int cx18_v4l2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+int cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
 		    unsigned long arg);
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index 63c336c..89c1ec9 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -37,13 +37,12 @@
 
 #define CX18_DSP0_INTERRUPT_MASK     	0xd0004C
 
-static struct file_operations cx18_v4l2_enc_fops = {
+static struct v4l2_file_operations cx18_v4l2_enc_fops = {
 	.owner = THIS_MODULE,
 	.read = cx18_v4l2_read,
 	.open = cx18_v4l2_open,
 	/* FIXME change to video_ioctl2 if serialization lock can be removed */
 	.ioctl = cx18_v4l2_ioctl,
-	.compat_ioctl = v4l_compat_ioctl32,
 	.release = cx18_v4l2_close,
 	.poll = cx18_v4l2_enc_poll,
 };
@@ -61,49 +60,41 @@ static struct {
 	int num_offset;
 	int dma;
 	enum v4l2_buf_type buf_type;
-	struct file_operations *fops;
 } cx18_stream_info[] = {
 	{	/* CX18_ENC_STREAM_TYPE_MPG */
 		"encoder MPEG",
 		VFL_TYPE_GRABBER, 0,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_VIDEO_CAPTURE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_TS */
 		"TS",
 		VFL_TYPE_GRABBER, -1,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_VIDEO_CAPTURE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_YUV */
 		"encoder YUV",
 		VFL_TYPE_GRABBER, CX18_V4L2_ENC_YUV_OFFSET,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_VIDEO_CAPTURE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_VBI */
 		"encoder VBI",
 		VFL_TYPE_VBI, 0,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_VBI_CAPTURE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_PCM */
 		"encoder PCM audio",
 		VFL_TYPE_GRABBER, CX18_V4L2_ENC_PCM_OFFSET,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_PRIVATE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_IDX */
 		"encoder IDX",
 		VFL_TYPE_GRABBER, -1,
 		PCI_DMA_FROMDEVICE, V4L2_BUF_TYPE_VIDEO_CAPTURE,
-		&cx18_v4l2_enc_fops
 	},
 	{	/* CX18_ENC_STREAM_TYPE_RAD */
 		"encoder radio",
 		VFL_TYPE_RADIO, 0,
 		PCI_DMA_NONE, V4L2_BUF_TYPE_PRIVATE,
-		&cx18_v4l2_enc_fops
 	},
 };
 
@@ -184,7 +175,7 @@ static int cx18_prep_dev(struct cx18 *cx, int type)
 
 	s->v4l2dev->num = num;
 	s->v4l2dev->parent = &cx->dev->dev;
-	s->v4l2dev->fops = cx18_stream_info[type].fops;
+	s->v4l2dev->fops = &cx18_v4l2_enc_fops;
 	s->v4l2dev->release = video_device_release;
 	s->v4l2dev->tvnorms = V4L2_STD_ALL;
 	cx18_set_funcs(s->v4l2dev);
diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c
index 798d240..d988813 100644
--- a/drivers/media/video/cx23885/cx23885-417.c
+++ b/drivers/media/video/cx23885/cx23885-417.c
@@ -1573,9 +1573,9 @@ static int vidioc_queryctrl(struct file *file, void *priv,
 	return cx23885_queryctrl(dev, c);
 }
 
-static int mpeg_open(struct inode *inode, struct file *file)
+static int mpeg_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct cx23885_dev *h, *dev = NULL;
 	struct list_head *list;
 	struct cx23885_fh *fh;
@@ -1617,7 +1617,7 @@ static int mpeg_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int mpeg_release(struct inode *inode, struct file *file)
+static int mpeg_release(struct file *file)
 {
 	struct cx23885_fh  *fh  = file->private_data;
 	struct cx23885_dev *dev = fh->dev;
@@ -1694,15 +1694,13 @@ static int mpeg_mmap(struct file *file, struct vm_area_struct *vma)
 	return videobuf_mmap_mapper(&fh->mpegq, vma);
 }
 
-static struct file_operations mpeg_fops = {
+static struct v4l2_file_operations mpeg_fops = {
 	.owner	       = THIS_MODULE,
 	.open	       = mpeg_open,
 	.release       = mpeg_release,
 	.read	       = mpeg_read,
 	.poll          = mpeg_poll,
 	.mmap	       = mpeg_mmap,
-	.ioctl	       = video_ioctl2,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops mpeg_ioctl_ops = {
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index c742a10..637c4d0 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -718,9 +718,9 @@ static int get_resource(struct cx23885_fh *fh)
 	}
 }
 
-static int video_open(struct inode *inode, struct file *file)
+static int video_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct cx23885_dev *h, *dev = NULL;
 	struct cx23885_fh *fh;
 	struct list_head *list;
@@ -834,7 +834,7 @@ static unsigned int video_poll(struct file *file,
 	return 0;
 }
 
-static int video_release(struct inode *inode, struct file *file)
+static int video_release(struct file *file)
 {
 	struct cx23885_fh *fh = file->private_data;
 	struct cx23885_dev *dev = fh->dev;
@@ -1422,7 +1422,7 @@ int cx23885_video_irq(struct cx23885_dev *dev, u32 status)
 /* ----------------------------------------------------------- */
 /* exported stuff                                              */
 
-static const struct file_operations video_fops = {
+static const struct v4l2_file_operations video_fops = {
 	.owner	       = THIS_MODULE,
 	.open	       = video_open,
 	.release       = video_release,
@@ -1430,8 +1430,6 @@ static const struct file_operations video_fops = {
 	.poll          = video_poll,
 	.mmap	       = video_mmap,
 	.ioctl	       = video_ioctl2,
-	.compat_ioctl  = v4l_compat_ioctl32,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops video_ioctl_ops = {
@@ -1479,13 +1477,11 @@ static struct video_device cx23885_video_template = {
 	.current_norm         = V4L2_STD_NTSC_M,
 };
 
-static const struct file_operations radio_fops = {
+static const struct v4l2_file_operations radio_fops = {
 	.owner         = THIS_MODULE,
 	.open          = video_open,
 	.release       = video_release,
 	.ioctl         = video_ioctl2,
-	.compat_ioctl  = v4l_compat_ioctl32,
-	.llseek        = no_llseek,
 };
 
 
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index e162a70..7f5b8bf 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -1049,16 +1049,16 @@ static int vidioc_s_std (struct file *file, void *priv, v4l2_std_id *id)
 
 /* FIXME: cx88_ioctl_hook not implemented */
 
-static int mpeg_open(struct inode *inode, struct file *file)
+static int mpeg_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct cx8802_dev *dev = NULL;
 	struct cx8802_fh *fh;
 	struct cx8802_driver *drv = NULL;
 	int err;
 
 	lock_kernel();
-	dev = cx8802_get_device(inode);
+	dev = cx8802_get_device(minor);
 
 	dprintk( 1, "%s\n", __func__);
 
@@ -1114,7 +1114,7 @@ static int mpeg_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int mpeg_release(struct inode *inode, struct file *file)
+static int mpeg_release(struct file *file)
 {
 	struct cx8802_fh  *fh  = file->private_data;
 	struct cx8802_dev *dev = fh->dev;
@@ -1132,7 +1132,7 @@ static int mpeg_release(struct inode *inode, struct file *file)
 	kfree(fh);
 
 	/* Make sure we release the hardware */
-	dev = cx8802_get_device(inode);
+	dev = cx8802_get_device(video_devdata(file)->minor);
 	if (dev == NULL)
 		return -ENODEV;
 
@@ -1178,7 +1178,7 @@ mpeg_mmap(struct file *file, struct vm_area_struct * vma)
 	return videobuf_mmap_mapper(&fh->mpegq, vma);
 }
 
-static const struct file_operations mpeg_fops =
+static const struct v4l2_file_operations mpeg_fops =
 {
 	.owner	       = THIS_MODULE,
 	.open	       = mpeg_open,
@@ -1187,7 +1187,6 @@ static const struct file_operations mpeg_fops =
 	.poll          = mpeg_poll,
 	.mmap	       = mpeg_mmap,
 	.ioctl	       = video_ioctl2,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops mpeg_ioctl_ops = {
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index a04fee2..59164fc 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -578,9 +578,8 @@ static int cx8802_resume_common(struct pci_dev *pci_dev)
 
 #if defined(CONFIG_VIDEO_CX88_BLACKBIRD) || \
     defined(CONFIG_VIDEO_CX88_BLACKBIRD_MODULE)
-struct cx8802_dev * cx8802_get_device(struct inode *inode)
+struct cx8802_dev *cx8802_get_device(int minor)
 {
-	int minor = iminor(inode);
 	struct cx8802_dev *dev;
 
 	list_for_each_entry(dev, &cx8802_devlist, devlist)
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index b96ce99..b93b7ab 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -757,9 +757,9 @@ static int get_ressource(struct cx8800_fh *fh)
 	}
 }
 
-static int video_open(struct inode *inode, struct file *file)
+static int video_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct cx8800_dev *h,*dev = NULL;
 	struct cx88_core *core;
 	struct cx8800_fh *fh;
@@ -904,7 +904,7 @@ video_poll(struct file *file, struct poll_table_struct *wait)
 	return 0;
 }
 
-static int video_release(struct inode *inode, struct file *file)
+static int video_release(struct file *file)
 {
 	struct cx8800_fh  *fh  = file->private_data;
 	struct cx8800_dev *dev = fh->dev;
@@ -1693,7 +1693,7 @@ static irqreturn_t cx8800_irq(int irq, void *dev_id)
 /* ----------------------------------------------------------- */
 /* exported stuff                                              */
 
-static const struct file_operations video_fops =
+static const struct v4l2_file_operations video_fops =
 {
 	.owner	       = THIS_MODULE,
 	.open	       = video_open,
@@ -1702,8 +1702,6 @@ static const struct file_operations video_fops =
 	.poll          = video_poll,
 	.mmap	       = video_mmap,
 	.ioctl	       = video_ioctl2,
-	.compat_ioctl  = v4l_compat_ioctl32,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops video_ioctl_ops = {
@@ -1752,14 +1750,12 @@ static struct video_device cx8800_video_template = {
 	.current_norm         = V4L2_STD_NTSC_M,
 };
 
-static const struct file_operations radio_fops =
+static const struct v4l2_file_operations radio_fops =
 {
 	.owner         = THIS_MODULE,
 	.open          = video_open,
 	.release       = video_release,
 	.ioctl         = video_ioctl2,
-	.compat_ioctl  = v4l_compat_ioctl32,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops radio_ioctl_ops = {
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 20649b2..eb9ce30 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -643,7 +643,7 @@ int cx88_audio_thread(void *data);
 
 int cx8802_register_driver(struct cx8802_driver *drv);
 int cx8802_unregister_driver(struct cx8802_driver *drv);
-struct cx8802_dev * cx8802_get_device(struct inode *inode);
+struct cx8802_dev *cx8802_get_device(int minor);
 struct cx8802_driver * cx8802_get_driver(struct cx8802_dev *dev, enum cx88_board_type btype);
 
 /* ----------------------------------------------------------- */
diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index f850451..819ccea 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -1000,12 +1000,11 @@ void em28xx_wake_i2c(struct em28xx *dev)
 static LIST_HEAD(em28xx_devlist);
 static DEFINE_MUTEX(em28xx_devlist_mutex);
 
-struct em28xx *em28xx_get_device(struct inode *inode,
+struct em28xx *em28xx_get_device(int minor,
 				 enum v4l2_buf_type *fh_type,
 				 int *has_radio)
 {
 	struct em28xx *h, *dev = NULL;
-	int minor = iminor(inode);
 
 	*fh_type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	*has_radio = 0;
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 5352753..9cb7c64 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1582,15 +1582,15 @@ static int radio_queryctrl(struct file *file, void *priv,
  * em28xx_v4l2_open()
  * inits the device and starts isoc transfer
  */
-static int em28xx_v4l2_open(struct inode *inode, struct file *filp)
+static int em28xx_v4l2_open(struct file *filp)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(filp)->minor;
 	int errCode = 0, radio;
 	struct em28xx *dev;
 	enum v4l2_buf_type fh_type;
 	struct em28xx_fh *fh;
 
-	dev = em28xx_get_device(inode, &fh_type, &radio);
+	dev = em28xx_get_device(minor, &fh_type, &radio);
 
 	if (NULL == dev)
 		return -ENODEV;
@@ -1686,7 +1686,7 @@ void em28xx_release_analog_resources(struct em28xx *dev)
  * stops streaming and deallocates all resources allocated by the v4l2
  * calls and ioctls
  */
-static int em28xx_v4l2_close(struct inode *inode, struct file *filp)
+static int em28xx_v4l2_close(struct file *filp)
 {
 	struct em28xx_fh *fh  = filp->private_data;
 	struct em28xx    *dev = fh->dev;
@@ -1826,7 +1826,7 @@ static int em28xx_v4l2_mmap(struct file *filp, struct vm_area_struct *vma)
 	return rc;
 }
 
-static const struct file_operations em28xx_v4l_fops = {
+static const struct v4l2_file_operations em28xx_v4l_fops = {
 	.owner         = THIS_MODULE,
 	.open          = em28xx_v4l2_open,
 	.release       = em28xx_v4l2_close,
@@ -1834,8 +1834,6 @@ static const struct file_operations em28xx_v4l_fops = {
 	.poll          = em28xx_v4l2_poll,
 	.mmap          = em28xx_v4l2_mmap,
 	.ioctl	       = video_ioctl2,
-	.llseek        = no_llseek,
-	.compat_ioctl  = v4l_compat_ioctl32,
 };
 
 static const struct v4l2_ioctl_ops video_ioctl_ops = {
@@ -1890,13 +1888,11 @@ static const struct video_device em28xx_video_template = {
 	.current_norm               = V4L2_STD_PAL,
 };
 
-static const struct file_operations radio_fops = {
+static const struct v4l2_file_operations radio_fops = {
 	.owner         = THIS_MODULE,
 	.open          = em28xx_v4l2_open,
 	.release       = em28xx_v4l2_close,
 	.ioctl	       = video_ioctl2,
-	.compat_ioctl  = v4l_compat_ioctl32,
-	.llseek        = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops radio_ioctl_ops = {
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index b5eddc2..afc5f6d 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -583,7 +583,7 @@ int em28xx_gpio_set(struct em28xx *dev, struct em28xx_reg_seq *gpio);
 void em28xx_wake_i2c(struct em28xx *dev);
 void em28xx_remove_from_devlist(struct em28xx *dev);
 void em28xx_add_into_devlist(struct em28xx *dev);
-struct em28xx *em28xx_get_device(struct inode *inode,
+struct em28xx *em28xx_get_device(int minor,
 				 enum v4l2_buf_type *fh_type,
 				 int *has_radio);
 int em28xx_register_extension(struct em28xx_ops *dev);
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 83c0711..3aeb879 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -1206,7 +1206,7 @@ static void et61x251_release_resources(struct kref *kref)
 }
 
 
-static int et61x251_open(struct inode* inode, struct file* filp)
+static int et61x251_open(struct file *filp)
 {
 	struct et61x251_device* cam;
 	int err = 0;
@@ -1291,7 +1291,7 @@ out:
 }
 
 
-static int et61x251_release(struct inode* inode, struct file* filp)
+static int et61x251_release(struct file *filp)
 {
 	struct et61x251_device* cam;
 
@@ -2392,8 +2392,8 @@ et61x251_vidioc_s_parm(struct et61x251_device* cam, void __user * arg)
 }
 
 
-static int et61x251_ioctl_v4l2(struct inode* inode, struct file* filp,
-			       unsigned int cmd, void __user * arg)
+static int et61x251_ioctl_v4l2(struct file *filp,
+			       unsigned int cmd, void __user *arg)
 {
 	struct et61x251_device *cam = video_drvdata(filp);
 
@@ -2487,7 +2487,7 @@ static int et61x251_ioctl_v4l2(struct inode* inode, struct file* filp,
 }
 
 
-static int et61x251_ioctl(struct inode* inode, struct file* filp,
+static int et61x251_ioctl(struct file *filp,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct et61x251_device *cam = video_drvdata(filp);
@@ -2511,7 +2511,7 @@ static int et61x251_ioctl(struct inode* inode, struct file* filp,
 
 	V4LDBG(3, "et61x251", cmd);
 
-	err = et61x251_ioctl_v4l2(inode, filp, cmd, (void __user *)arg);
+	err = et61x251_ioctl_v4l2(filp, cmd, (void __user *)arg);
 
 	mutex_unlock(&cam->fileop_mutex);
 
@@ -2519,18 +2519,14 @@ static int et61x251_ioctl(struct inode* inode, struct file* filp,
 }
 
 
-static const struct file_operations et61x251_fops = {
+static const struct v4l2_file_operations et61x251_fops = {
 	.owner = THIS_MODULE,
 	.open =    et61x251_open,
 	.release = et61x251_release,
 	.ioctl =   et61x251_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
 	.read =    et61x251_read,
 	.poll =    et61x251_poll,
 	.mmap =    et61x251_mmap,
-	.llseek =  no_llseek,
 };
 
 /*****************************************************************************/
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 8b9f3bd..5e36b9a 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -875,7 +875,7 @@ static void gspca_release(struct video_device *vfd)
 	kfree(gspca_dev);
 }
 
-static int dev_open(struct inode *inode, struct file *file)
+static int dev_open(struct file *file)
 {
 	struct gspca_dev *gspca_dev;
 	int ret;
@@ -922,7 +922,7 @@ out:
 	return ret;
 }
 
-static int dev_close(struct inode *inode, struct file *file)
+static int dev_close(struct file *file)
 {
 	struct gspca_dev *gspca_dev = file->private_data;
 
@@ -1802,17 +1802,13 @@ out:
 	return ret;
 }
 
-static struct file_operations dev_fops = {
+static struct v4l2_file_operations dev_fops = {
 	.owner = THIS_MODULE,
 	.open = dev_open,
 	.release = dev_close,
 	.read = dev_read,
 	.mmap = dev_mmap,
-	.unlocked_ioctl = __video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek = no_llseek,
+	.unlocked_ioctl = video_ioctl2,
 	.poll	= dev_poll,
 };
 
diff --git a/drivers/media/video/ivtv/ivtv-fileops.c b/drivers/media/video/ivtv/ivtv-fileops.c
index 5eb5875..d594bc2 100644
--- a/drivers/media/video/ivtv/ivtv-fileops.c
+++ b/drivers/media/video/ivtv/ivtv-fileops.c
@@ -831,7 +831,7 @@ static void ivtv_stop_decoding(struct ivtv_open_id *id, int flags, u64 pts)
 	ivtv_release_stream(s);
 }
 
-int ivtv_v4l2_close(struct inode *inode, struct file *filp)
+int ivtv_v4l2_close(struct file *filp)
 {
 	struct ivtv_open_id *id = filp->private_data;
 	struct ivtv *itv = id->itv;
@@ -978,7 +978,7 @@ static int ivtv_serialized_open(struct ivtv_stream *s, struct file *filp)
 	return 0;
 }
 
-int ivtv_v4l2_open(struct inode *inode, struct file *filp)
+int ivtv_v4l2_open(struct file *filp)
 {
 	int res;
 	struct ivtv *itv = NULL;
diff --git a/drivers/media/video/ivtv/ivtv-fileops.h b/drivers/media/video/ivtv/ivtv-fileops.h
index df81e79..049a292 100644
--- a/drivers/media/video/ivtv/ivtv-fileops.h
+++ b/drivers/media/video/ivtv/ivtv-fileops.h
@@ -22,12 +22,12 @@
 #define IVTV_FILEOPS_H
 
 /* Testing/Debugging */
-int ivtv_v4l2_open(struct inode *inode, struct file *filp);
+int ivtv_v4l2_open(struct file *filp);
 ssize_t ivtv_v4l2_read(struct file *filp, char __user *buf, size_t count,
 		      loff_t * pos);
 ssize_t ivtv_v4l2_write(struct file *filp, const char __user *buf, size_t count,
 		       loff_t * pos);
-int ivtv_v4l2_close(struct inode *inode, struct file *filp);
+int ivtv_v4l2_close(struct file *filp);
 unsigned int ivtv_v4l2_enc_poll(struct file *filp, poll_table * wait);
 unsigned int ivtv_v4l2_dec_poll(struct file *filp, poll_table * wait);
 int ivtv_start_capture(struct ivtv_open_id *id);
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index cd990a4..a6cd024 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -1827,7 +1827,7 @@ static long ivtv_serialized_ioctl(struct ivtv *itv, struct file *filp,
 
 	if (ivtv_debug & IVTV_DBGFLG_IOCTL)
 		vfd->debug = V4L2_DEBUG_IOCTL | V4L2_DEBUG_IOCTL_ARG;
-	ret = __video_ioctl2(filp, cmd, arg);
+	ret = video_ioctl2(filp, cmd, arg);
 	vfd->debug = 0;
 	return ret;
 }
diff --git a/drivers/media/video/ivtv/ivtv-streams.c b/drivers/media/video/ivtv/ivtv-streams.c
index f77d764..854a950 100644
--- a/drivers/media/video/ivtv/ivtv-streams.c
+++ b/drivers/media/video/ivtv/ivtv-streams.c
@@ -43,24 +43,22 @@
 #include "ivtv-cards.h"
 #include "ivtv-streams.h"
 
-static const struct file_operations ivtv_v4l2_enc_fops = {
+static const struct v4l2_file_operations ivtv_v4l2_enc_fops = {
 	.owner = THIS_MODULE,
 	.read = ivtv_v4l2_read,
 	.write = ivtv_v4l2_write,
 	.open = ivtv_v4l2_open,
 	.unlocked_ioctl = ivtv_v4l2_ioctl,
-	.compat_ioctl = v4l_compat_ioctl32,
 	.release = ivtv_v4l2_close,
 	.poll = ivtv_v4l2_enc_poll,
 };
 
-static const struct file_operations ivtv_v4l2_dec_fops = {
+static const struct v4l2_file_operations ivtv_v4l2_dec_fops = {
 	.owner = THIS_MODULE,
 	.read = ivtv_v4l2_read,
 	.write = ivtv_v4l2_write,
 	.open = ivtv_v4l2_open,
 	.unlocked_ioctl = ivtv_v4l2_ioctl,
-	.compat_ioctl = v4l_compat_ioctl32,
 	.release = ivtv_v4l2_close,
 	.poll = ivtv_v4l2_dec_poll,
 };
@@ -78,7 +76,7 @@ static struct {
 	int num_offset;
 	int dma, pio;
 	enum v4l2_buf_type buf_type;
-	const struct file_operations *fops;
+	const struct v4l2_file_operations *fops;
 } ivtv_stream_info[] = {
 	{	/* IVTV_ENC_STREAM_TYPE_MPG */
 		"encoder MPG",
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 6418f4a..c408e61 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -841,7 +841,7 @@ again:
 /* video4linux integration                                                  */
 /****************************************************************************/
 
-static int meye_open(struct inode *inode, struct file *file)
+static int meye_open(struct file *file)
 {
 	int i;
 
@@ -863,7 +863,7 @@ static int meye_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int meye_release(struct inode *inode, struct file *file)
+static int meye_release(struct file *file)
 {
 	mchip_hic_stop();
 	mchip_dma_free();
@@ -1684,17 +1684,13 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations meye_fops = {
+static const struct v4l2_file_operations meye_fops = {
 	.owner		= THIS_MODULE,
 	.open		= meye_open,
 	.release	= meye_release,
 	.mmap		= meye_mmap,
 	.ioctl		= video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.poll		= meye_poll,
-	.llseek		= no_llseek,
 };
 
 static const struct v4l2_ioctl_ops meye_ioctl_ops = {
diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c
index 85c3c7c..73eb656 100644
--- a/drivers/media/video/omap24xxcam.c
+++ b/drivers/media/video/omap24xxcam.c
@@ -1454,9 +1454,9 @@ static int omap24xxcam_mmap(struct file *file, struct vm_area_struct *vma)
 	return rval;
 }
 
-static int omap24xxcam_open(struct inode *inode, struct file *file)
+static int omap24xxcam_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct omap24xxcam_device *cam = omap24xxcam.priv;
 	struct omap24xxcam_fh *fh;
 	struct v4l2_format format;
@@ -1511,7 +1511,7 @@ out_try_module_get:
 	return -ENODEV;
 }
 
-static int omap24xxcam_release(struct inode *inode, struct file *file)
+static int omap24xxcam_release(struct file *file)
 {
 	struct omap24xxcam_fh *fh = file->private_data;
 	struct omap24xxcam_device *cam = fh->cam;
@@ -1559,8 +1559,7 @@ static int omap24xxcam_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations omap24xxcam_fops = {
-	.llseek	 = no_llseek,
+static struct v4l2_file_operations omap24xxcam_fops = {
 	.ioctl	 = video_ioctl2,
 	.poll	 = omap24xxcam_poll,
 	.mmap	 = omap24xxcam_mmap,
diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index 6ee9b69..f1754dc 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c
@@ -3915,7 +3915,7 @@ ov51x_dealloc(struct usb_ov511 *ov)
  ***************************************************************************/
 
 static int
-ov51x_v4l1_open(struct inode *inode, struct file *file)
+ov51x_v4l1_open(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct usb_ov511 *ov = video_get_drvdata(vdev);
@@ -3972,7 +3972,7 @@ out:
 }
 
 static int
-ov51x_v4l1_close(struct inode *inode, struct file *file)
+ov51x_v4l1_close(struct file *file)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_ov511 *ov = video_get_drvdata(vdev);
@@ -4450,7 +4450,7 @@ redo:
 }
 
 static int
-ov51x_v4l1_ioctl(struct inode *inode, struct file *file,
+ov51x_v4l1_ioctl(struct file *file,
 		 unsigned int cmd, unsigned long arg)
 {
 	struct video_device *vdev = file->private_data;
@@ -4661,17 +4661,13 @@ ov51x_v4l1_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations ov511_fops = {
+static const struct v4l2_file_operations ov511_fops = {
 	.owner =	THIS_MODULE,
 	.open =		ov51x_v4l1_open,
 	.release =	ov51x_v4l1_close,
 	.read =		ov51x_v4l1_read,
 	.mmap =		ov51x_v4l1_mmap,
 	.ioctl =	ov51x_v4l1_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek =	no_llseek,
 };
 
 static struct video_device vdev_template = {
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index 45730fa..24f2b3d 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -862,7 +862,7 @@ static int pms_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int pms_ioctl(struct inode *inode, struct file *file,
+static int pms_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, pms_do_ioctl);
@@ -881,7 +881,7 @@ static ssize_t pms_read(struct file *file, char __user *buf,
 	return len;
 }
 
-static int pms_exclusive_open(struct inode *inode, struct file *file)
+static int pms_exclusive_open(struct file *file)
 {
 	struct video_device *v = video_devdata(file);
 	struct pms_device *pd = (struct pms_device *)v;
@@ -889,7 +889,7 @@ static int pms_exclusive_open(struct inode *inode, struct file *file)
 	return test_and_set_bit(0, &pd->in_use) ? -EBUSY : 0;
 }
 
-static int pms_exclusive_release(struct inode *inode, struct file *file)
+static int pms_exclusive_release(struct file *file)
 {
 	struct video_device *v = video_devdata(file);
 	struct pms_device *pd = (struct pms_device *)v;
@@ -898,16 +898,12 @@ static int pms_exclusive_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations pms_fops = {
+static const struct v4l2_file_operations pms_fops = {
 	.owner		= THIS_MODULE,
 	.open           = pms_exclusive_open,
 	.release        = pms_exclusive_release,
 	.ioctl          = pms_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.read           = pms_read,
-	.llseek         = no_llseek,
 };
 
 static struct video_device pms_template=
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index 52af1c4..50554b4 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -948,7 +948,7 @@ static void pvr2_v4l2_internal_check(struct pvr2_channel *chp)
 }
 
 
-static int pvr2_v4l2_ioctl(struct inode *inode, struct file *file,
+static int pvr2_v4l2_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 
@@ -960,7 +960,7 @@ static int pvr2_v4l2_ioctl(struct inode *inode, struct file *file,
 }
 
 
-static int pvr2_v4l2_release(struct inode *inode, struct file *file)
+static int pvr2_v4l2_release(struct file *file)
 {
 	struct pvr2_v4l2_fh *fhp = file->private_data;
 	struct pvr2_v4l2 *vp = fhp->vhead;
@@ -1008,7 +1008,7 @@ static int pvr2_v4l2_release(struct inode *inode, struct file *file)
 }
 
 
-static int pvr2_v4l2_open(struct inode *inode, struct file *file)
+static int pvr2_v4l2_open(struct file *file)
 {
 	struct pvr2_v4l2_dev *dip; /* Our own context pointer */
 	struct pvr2_v4l2_fh *fhp;
@@ -1235,13 +1235,12 @@ static unsigned int pvr2_v4l2_poll(struct file *file, poll_table *wait)
 }
 
 
-static const struct file_operations vdev_fops = {
+static const struct v4l2_file_operations vdev_fops = {
 	.owner      = THIS_MODULE,
 	.open       = pvr2_v4l2_open,
 	.release    = pvr2_v4l2_release,
 	.read       = pvr2_v4l2_read,
 	.ioctl      = pvr2_v4l2_ioctl,
-	.llseek     = no_llseek,
 	.poll       = pvr2_v4l2_poll,
 };
 
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index 1ce9da1..315337b 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -142,16 +142,16 @@ static struct {
 
 /***/
 
-static int pwc_video_open(struct inode *inode, struct file *file);
-static int pwc_video_close(struct inode *inode, struct file *file);
+static int pwc_video_open(struct file *file);
+static int pwc_video_close(struct file *file);
 static ssize_t pwc_video_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos);
 static unsigned int pwc_video_poll(struct file *file, poll_table *wait);
-static int  pwc_video_ioctl(struct inode *inode, struct file *file,
+static int  pwc_video_ioctl(struct file *file,
 			    unsigned int ioctlnr, unsigned long arg);
 static int  pwc_video_mmap(struct file *file, struct vm_area_struct *vma);
 
-static const struct file_operations pwc_fops = {
+static const struct v4l2_file_operations pwc_fops = {
 	.owner =	THIS_MODULE,
 	.open =		pwc_video_open,
 	.release =     	pwc_video_close,
@@ -159,10 +159,6 @@ static const struct file_operations pwc_fops = {
 	.poll =		pwc_video_poll,
 	.mmap =		pwc_video_mmap,
 	.ioctl =        pwc_video_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek =       no_llseek,
 };
 static struct video_device pwc_template = {
 	.name =		"Philips Webcam",	/* Filled in later */
@@ -1104,7 +1100,7 @@ static const char *pwc_sensor_type_to_string(unsigned int sensor_type)
 /***************************************************************************/
 /* Video4Linux functions */
 
-static int pwc_video_open(struct inode *inode, struct file *file)
+static int pwc_video_open(struct file *file)
 {
 	int i, ret;
 	struct video_device *vdev = video_devdata(file);
@@ -1224,7 +1220,7 @@ static void pwc_cleanup(struct pwc_device *pdev)
 }
 
 /* Note that all cleanup is done in the reverse order as in _open */
-static int pwc_video_close(struct inode *inode, struct file *file)
+static int pwc_video_close(struct file *file)
 {
 	struct video_device *vdev = file->private_data;
 	struct pwc_device *pdev;
@@ -1399,7 +1395,7 @@ static unsigned int pwc_video_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
-static int pwc_video_ioctl(struct inode *inode, struct file *file,
+static int pwc_video_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct video_device *vdev = file->private_data;
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index 3c3f8cf..13f85ad 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -1502,9 +1502,9 @@ static int vidioc_s_jpegcomp(struct file *file, void *priv,
 	dprintk(2, "setting jpeg quality %d\n", jc->quality);
 	return 0;
 }
-static int s2255_open(struct inode *inode, struct file *file)
+static int s2255_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct s2255_dev *h, *dev = NULL;
 	struct s2255_fh *fh;
 	struct list_head *list;
@@ -1711,11 +1711,11 @@ static void s2255_destroy(struct kref *kref)
 	mutex_unlock(&dev->open_lock);
 }
 
-static int s2255_close(struct inode *inode, struct file *file)
+static int s2255_close(struct file *file)
 {
 	struct s2255_fh *fh = file->private_data;
 	struct s2255_dev *dev = fh->dev;
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	if (!dev)
 		return -ENODEV;
 
@@ -1759,15 +1759,13 @@ static int s2255_mmap_v4l(struct file *file, struct vm_area_struct *vma)
 	return ret;
 }
 
-static const struct file_operations s2255_fops_v4l = {
+static const struct v4l2_file_operations s2255_fops_v4l = {
 	.owner = THIS_MODULE,
 	.open = s2255_open,
 	.release = s2255_close,
 	.poll = s2255_poll,
 	.ioctl = video_ioctl2,	/* V4L2 ioctl handler */
-	.compat_ioctl = v4l_compat_ioctl32,
 	.mmap = s2255_mmap_v4l,
-	.llseek = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops s2255_ioctl_ops = {
diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c
index f159441..018dee5 100644
--- a/drivers/media/video/saa5246a.c
+++ b/drivers/media/video/saa5246a.c
@@ -944,7 +944,7 @@ static inline unsigned int vtx_fix_command(unsigned int cmd)
 /*
  *	Handle the locking
  */
-static int saa5246a_ioctl(struct inode *inode, struct file *file,
+static int saa5246a_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct saa5246a_device *t = video_drvdata(file);
@@ -957,7 +957,7 @@ static int saa5246a_ioctl(struct inode *inode, struct file *file,
 	return err;
 }
 
-static int saa5246a_open(struct inode *inode, struct file *file)
+static int saa5246a_open(struct file *file)
 {
 	struct saa5246a_device *t = video_drvdata(file);
 
@@ -999,7 +999,7 @@ static int saa5246a_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int saa5246a_release(struct inode *inode, struct file *file)
+static int saa5246a_release(struct file *file)
 {
 	struct saa5246a_device *t = video_drvdata(file);
 
@@ -1018,12 +1018,11 @@ static int saa5246a_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations saa_fops = {
+static const struct v4l2_file_operations saa_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = saa5246a_open,
 	.release = saa5246a_release,
 	.ioctl	 = saa5246a_ioctl,
-	.llseek	 = no_llseek,
 };
 
 static struct video_device saa_template =
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index 6ef3aff..e73bb73 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -479,7 +479,7 @@ static inline unsigned int vtx_fix_command(unsigned int cmd)
  *	Handle the locking
  */
 
-static int saa5249_ioctl(struct inode *inode, struct file *file,
+static int saa5249_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct saa5249_device *t = video_drvdata(file);
@@ -492,7 +492,7 @@ static int saa5249_ioctl(struct inode *inode, struct file *file,
 	return err;
 }
 
-static int saa5249_open(struct inode *inode, struct file *file)
+static int saa5249_open(struct file *file)
 {
 	struct saa5249_device *t = video_drvdata(file);
 	int pgbuf;
@@ -529,7 +529,7 @@ static int saa5249_open(struct inode *inode, struct file *file)
 
 
-static int saa5249_release(struct inode *inode, struct file *file)
+static int saa5249_release(struct file *file)
 {
 	struct saa5249_device *t = video_drvdata(file);
 
@@ -539,15 +539,11 @@ static int saa5249_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations saa_fops = {
+static const struct v4l2_file_operations saa_fops = {
 	.owner		= THIS_MODULE,
 	.open		= saa5249_open,
 	.release       	= saa5249_release,
 	.ioctl          = saa5249_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek         = no_llseek,
 };
 
 static struct video_device saa_template =
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 7f40511..3beba48 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -83,9 +83,9 @@ static int ts_init_encoder(struct saa7134_dev* dev)
 
 /* ------------------------------------------------------------------ */
 
-static int ts_open(struct inode *inode, struct file *file)
+static int ts_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct saa7134_dev *dev;
 	int err;
 
@@ -119,7 +119,7 @@ done:
 	return err;
 }
 
-static int ts_release(struct inode *inode, struct file *file)
+static int ts_release(struct file *file)
 {
 	struct saa7134_dev *dev = file->private_data;
 
@@ -437,7 +437,7 @@ static int empress_g_std(struct file *file, void *priv, v4l2_std_id *id)
 	return 0;
 }
 
-static const struct file_operations ts_fops =
+static const struct v4l2_file_operations ts_fops =
 {
 	.owner	  = THIS_MODULE,
 	.open	  = ts_open,
@@ -446,7 +446,6 @@ static const struct file_operations ts_fops =
 	.poll	  = ts_poll,
 	.mmap	  = ts_mmap,
 	.ioctl	  = video_ioctl2,
-	.llseek   = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops ts_ioctl_ops = {
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 02bb674..6b2ab57 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -1326,9 +1326,9 @@ static int saa7134_resource(struct saa7134_fh *fh)
 	return 0;
 }
 
-static int video_open(struct inode *inode, struct file *file)
+static int video_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct saa7134_dev *dev;
 	struct saa7134_fh *fh;
 	enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
@@ -1462,7 +1462,7 @@ err:
 	return POLLERR;
 }
 
-static int video_release(struct inode *inode, struct file *file)
+static int video_release(struct file *file)
 {
 	struct saa7134_fh  *fh  = file->private_data;
 	struct saa7134_dev *dev = fh->dev;
@@ -2377,7 +2377,7 @@ static int radio_queryctrl(struct file *file, void *priv,
 	return 0;
 }
 
-static const struct file_operations video_fops =
+static const struct v4l2_file_operations video_fops =
 {
 	.owner	  = THIS_MODULE,
 	.open	  = video_open,
@@ -2386,8 +2386,6 @@ static const struct file_operations video_fops =
 	.poll     = video_poll,
 	.mmap	  = video_mmap,
 	.ioctl	  = video_ioctl2,
-	.compat_ioctl	= v4l_compat_ioctl32,
-	.llseek   = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops video_ioctl_ops = {
@@ -2441,13 +2439,11 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 #endif
 };
 
-static const struct file_operations radio_fops = {
+static const struct v4l2_file_operations radio_fops = {
 	.owner	  = THIS_MODULE,
 	.open	  = video_open,
 	.release  = video_release,
 	.ioctl	  = video_ioctl2,
-	.compat_ioctl	= v4l_compat_ioctl32,
-	.llseek   = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops radio_ioctl_ops = {
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index d652f25..5b27f323 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -932,7 +932,7 @@ static void usb_se401_remove_disconnected (struct usb_se401 *se401)
  ***************************************************************************/
 
 
-static int se401_open(struct inode *inode, struct file *file)
+static int se401_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct usb_se401 *se401 = (struct usb_se401 *)dev;
@@ -954,7 +954,7 @@ static int se401_open(struct inode *inode, struct file *file)
 	return err;
 }
 
-static int se401_close(struct inode *inode, struct file *file)
+static int se401_close(struct file *file)
 {
 	struct video_device *dev = file->private_data;
 	struct usb_se401 *se401 = (struct usb_se401 *)dev;
@@ -1138,7 +1138,7 @@ static int se401_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int se401_ioctl(struct inode *inode, struct file *file,
+static int se401_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, se401_do_ioctl);
@@ -1222,17 +1222,13 @@ static int se401_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations se401_fops = {
+static const struct v4l2_file_operations se401_fops = {
 	.owner =	THIS_MODULE,
 	.open =         se401_open,
 	.release =      se401_close,
 	.read =         se401_read,
 	.mmap =         se401_mmap,
 	.ioctl =        se401_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek =       no_llseek,
 };
 static struct video_device se401_template = {
 	.name =         "se401 USB camera",
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 01a8efb8..c2582e2 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -1746,7 +1746,7 @@ static void sn9c102_release_resources(struct kref *kref)
 }
 
 
-static int sn9c102_open(struct inode* inode, struct file* filp)
+static int sn9c102_open(struct file *filp)
 {
 	struct sn9c102_device* cam;
 	int err = 0;
@@ -1857,7 +1857,7 @@ out:
 }
 
 
-static int sn9c102_release(struct inode* inode, struct file* filp)
+static int sn9c102_release(struct file *filp)
 {
 	struct sn9c102_device* cam;
 
@@ -3092,8 +3092,8 @@ sn9c102_vidioc_s_audio(struct sn9c102_device* cam, void __user * arg)
 }
 
 
-static int sn9c102_ioctl_v4l2(struct inode* inode, struct file* filp,
-			      unsigned int cmd, void __user * arg)
+static int sn9c102_ioctl_v4l2(struct file *filp,
+			      unsigned int cmd, void __user *arg)
 {
 	struct sn9c102_device *cam = video_drvdata(filp);
 
@@ -3196,7 +3196,7 @@ static int sn9c102_ioctl_v4l2(struct inode* inode, struct file* filp,
 }
 
 
-static int sn9c102_ioctl(struct inode* inode, struct file* filp,
+static int sn9c102_ioctl(struct file *filp,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct sn9c102_device *cam = video_drvdata(filp);
@@ -3220,7 +3220,7 @@ static int sn9c102_ioctl(struct inode* inode, struct file* filp,
 
 	V4LDBG(3, "sn9c102", cmd);
 
-	err = sn9c102_ioctl_v4l2(inode, filp, cmd, (void __user *)arg);
+	err = sn9c102_ioctl_v4l2(filp, cmd, (void __user *)arg);
 
 	mutex_unlock(&cam->fileop_mutex);
 
@@ -3229,18 +3229,14 @@ static int sn9c102_ioctl(struct inode* inode, struct file* filp,
 
 /*****************************************************************************/
 
-static const struct file_operations sn9c102_fops = {
+static const struct v4l2_file_operations sn9c102_fops = {
 	.owner = THIS_MODULE,
 	.open = sn9c102_open,
 	.release = sn9c102_release,
 	.ioctl = sn9c102_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
 	.read = sn9c102_read,
 	.poll = sn9c102_poll,
 	.mmap = sn9c102_mmap,
-	.llseek = no_llseek,
 };
 
 /*****************************************************************************/
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 90077cb..9986e02 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -256,7 +256,7 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 	vfree(icd->user_formats);
 }
 
-static int soc_camera_open(struct inode *inode, struct file *file)
+static int soc_camera_open(struct file *file)
 {
 	struct video_device *vdev;
 	struct soc_camera_device *icd;
@@ -330,7 +330,7 @@ emgd:
 	return ret;
 }
 
-static int soc_camera_close(struct inode *inode, struct file *file)
+static int soc_camera_close(struct file *file)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
@@ -400,7 +400,7 @@ static unsigned int soc_camera_poll(struct file *file, poll_table *pt)
 	return ici->ops->poll(file, pt);
 }
 
-static struct file_operations soc_camera_fops = {
+static struct v4l2_file_operations soc_camera_fops = {
 	.owner		= THIS_MODULE,
 	.open		= soc_camera_open,
 	.release	= soc_camera_close,
@@ -408,7 +408,6 @@ static struct file_operations soc_camera_fops = {
 	.read		= soc_camera_read,
 	.mmap		= soc_camera_mmap,
 	.poll		= soc_camera_poll,
-	.llseek		= no_llseek,
 };
 
 static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index f9516d0..26378cf 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -664,7 +664,7 @@ static void stk_free_buffers(struct stk_camera *dev)
 
 /* v4l file operations */
 
-static int v4l_stk_open(struct inode *inode, struct file *fp)
+static int v4l_stk_open(struct file *fp)
 {
 	struct stk_camera *dev;
 	struct video_device *vdev;
@@ -684,7 +684,7 @@ static int v4l_stk_open(struct inode *inode, struct file *fp)
 	return 0;
 }
 
-static int v4l_stk_release(struct inode *inode, struct file *fp)
+static int v4l_stk_release(struct file *fp)
 {
 	struct stk_camera *dev = fp->private_data;
 
@@ -1281,7 +1281,7 @@ static int stk_vidioc_enum_framesizes(struct file *filp,
 	}
 }
 
-static struct file_operations v4l_stk_fops = {
+static struct v4l2_file_operations v4l_stk_fops = {
 	.owner = THIS_MODULE,
 	.open = v4l_stk_open,
 	.release = v4l_stk_release,
@@ -1289,10 +1289,6 @@ static struct file_operations v4l_stk_fops = {
 	.poll = v4l_stk_poll,
 	.mmap = v4l_stk_mmap,
 	.ioctl = video_ioctl2,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek = no_llseek
 };
 
 static const struct v4l2_ioctl_ops v4l_stk_ioctl_ops = {
diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c
index bbad54f..10d2608 100644
--- a/drivers/media/video/stradis.c
+++ b/drivers/media/video/stradis.c
@@ -1275,7 +1275,7 @@ static void make_clip_tab(struct saa7146 *saa, struct video_clip *cr, int ncr)
 		clip_draw_rectangle(clipmap, 0, 0, 1024, -saa->win.y);
 }
 
-static int saa_ioctl(struct inode *inode, struct file *file,
+static int saa_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long argl)
 {
 	struct saa7146 *saa = file->private_data;
@@ -1877,7 +1877,7 @@ static ssize_t saa_write(struct file *file, const char __user * buf,
 	return count;
 }
 
-static int saa_open(struct inode *inode, struct file *file)
+static int saa_open(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct saa7146 *saa = container_of(vdev, struct saa7146, video_dev);
@@ -1895,7 +1895,7 @@ static int saa_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int saa_release(struct inode *inode, struct file *file)
+static int saa_release(struct file *file)
 {
 	struct saa7146 *saa = file->private_data;
 	saa->user--;
@@ -1906,16 +1906,12 @@ static int saa_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations saa_fops = {
+static const struct v4l2_file_operations saa_fops = {
 	.owner = THIS_MODULE,
 	.open = saa_open,
 	.release = saa_release,
 	.ioctl = saa_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
 	.read = saa_read,
-	.llseek = no_llseek,
 	.write = saa_write,
 	.mmap = saa_mmap,
 };
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 42acc92..0783b0a 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -1080,7 +1080,7 @@ static int stv680_newframe (struct usb_stv *stv680, int framenr)
  * Video4Linux
  *********************************************************************/
 
-static int stv_open (struct inode *inode, struct file *file)
+static int stv_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct usb_stv *stv680 = video_get_drvdata(dev);
@@ -1106,7 +1106,7 @@ static int stv_open (struct inode *inode, struct file *file)
 	return err;
 }
 
-static int stv_close (struct inode *inode, struct file *file)
+static int stv_close(struct file *file)
 {
 	struct video_device *dev = file->private_data;
 	struct usb_stv *stv680 = video_get_drvdata(dev);
@@ -1299,7 +1299,7 @@ static int stv680_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int stv680_ioctl(struct inode *inode, struct file *file,
+static int stv680_ioctl(struct file *file,
 			unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, stv680_do_ioctl);
@@ -1391,17 +1391,13 @@ static ssize_t stv680_read (struct file *file, char __user *buf,
 	return realcount;
 }				/* stv680_read */
 
-static const struct file_operations stv680_fops = {
+static const struct v4l2_file_operations stv680_fops = {
 	.owner =	THIS_MODULE,
 	.open =		stv_open,
 	.release =     	stv_close,
 	.read =		stv680_read,
 	.mmap =		stv680_mmap,
 	.ioctl =        stv680_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek =       no_llseek,
 };
 static struct video_device stv680_template = {
 	.name =		"STV0680 USB camera",
diff --git a/drivers/media/video/usbvideo/usbvideo.c b/drivers/media/video/usbvideo/usbvideo.c
index 148a1f9..9bf8243 100644
--- a/drivers/media/video/usbvideo/usbvideo.c
+++ b/drivers/media/video/usbvideo/usbvideo.c
@@ -41,13 +41,13 @@ module_param(video_nr, int, 0);
 static void usbvideo_Disconnect(struct usb_interface *intf);
 static void usbvideo_CameraRelease(struct uvd *uvd);
 
-static int usbvideo_v4l_ioctl(struct inode *inode, struct file *file,
+static int usbvideo_v4l_ioctl(struct file *file,
 			      unsigned int cmd, unsigned long arg);
 static int usbvideo_v4l_mmap(struct file *file, struct vm_area_struct *vma);
-static int usbvideo_v4l_open(struct inode *inode, struct file *file);
+static int usbvideo_v4l_open(struct file *file);
 static ssize_t usbvideo_v4l_read(struct file *file, char __user *buf,
 			     size_t count, loff_t *ppos);
-static int usbvideo_v4l_close(struct inode *inode, struct file *file);
+static int usbvideo_v4l_close(struct file *file);
 
 static int usbvideo_StartDataPump(struct uvd *uvd);
 static void usbvideo_StopDataPump(struct uvd *uvd);
@@ -942,17 +942,13 @@ static int usbvideo_find_struct(struct usbvideo *cams)
 	return rv;
 }
 
-static const struct file_operations usbvideo_fops = {
+static const struct v4l2_file_operations usbvideo_fops = {
 	.owner =  THIS_MODULE,
 	.open =   usbvideo_v4l_open,
 	.release =usbvideo_v4l_close,
 	.read =   usbvideo_v4l_read,
 	.mmap =   usbvideo_v4l_mmap,
 	.ioctl =  usbvideo_v4l_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
-	.llseek = no_llseek,
 };
 static const struct video_device usbvideo_template = {
 	.fops =       &usbvideo_fops,
@@ -1113,7 +1109,7 @@ static int usbvideo_v4l_mmap(struct file *file, struct vm_area_struct *vma)
  * 27-Jan-2000 Used USBVIDEO_NUMSBUF as number of URB buffers.
  * 24-May-2000 Corrected to prevent race condition (MOD_xxx_USE_COUNT).
  */
-static int usbvideo_v4l_open(struct inode *inode, struct file *file)
+static int usbvideo_v4l_open(struct file *file)
 {
 	struct video_device *dev = video_devdata(file);
 	struct uvd *uvd = (struct uvd *) dev;
@@ -1233,7 +1229,7 @@ static int usbvideo_v4l_open(struct inode *inode, struct file *file)
  * 27-Jan-2000 Used USBVIDEO_NUMSBUF as number of URB buffers.
  * 24-May-2000 Moved MOD_DEC_USE_COUNT outside of code that can sleep.
  */
-static int usbvideo_v4l_close(struct inode *inode, struct file *file)
+static int usbvideo_v4l_close(struct file *file)
 {
 	struct video_device *dev = file->private_data;
 	struct uvd *uvd = (struct uvd *) dev;
@@ -1501,7 +1497,7 @@ static int usbvideo_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int usbvideo_v4l_ioctl(struct inode *inode, struct file *file,
+static int usbvideo_v4l_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, usbvideo_v4l_do_ioctl);
diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c
index 4602597..53197a4 100644
--- a/drivers/media/video/usbvideo/vicam.c
+++ b/drivers/media/video/usbvideo/vicam.c
@@ -230,7 +230,7 @@ set_camera_power(struct vicam_camera *cam, int state)
 }
 
 static int
-vicam_ioctl(struct inode *inode, struct file *file, unsigned int ioctlnr, unsigned long arg)
+vicam_ioctl(struct file *file, unsigned int ioctlnr, unsigned long arg)
 {
 	void __user *user_arg = (void __user *)arg;
 	struct vicam_camera *cam = file->private_data;
@@ -470,7 +470,7 @@ vicam_ioctl(struct inode *inode, struct file *file, unsigned int ioctlnr, unsign
 }
 
 static int
-vicam_open(struct inode *inode, struct file *file)
+vicam_open(struct file *file)
 {
 	struct vicam_camera *cam = video_drvdata(file);
 
@@ -536,7 +536,7 @@ vicam_open(struct inode *inode, struct file *file)
 }
 
 static int
-vicam_close(struct inode *inode, struct file *file)
+vicam_close(struct file *file)
 {
 	struct vicam_camera *cam = file->private_data;
 	int open_count;
@@ -783,17 +783,13 @@ vicam_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static const struct file_operations vicam_fops = {
+static const struct v4l2_file_operations vicam_fops = {
 	.owner		= THIS_MODULE,
 	.open		= vicam_open,
 	.release	= vicam_close,
 	.read		= vicam_read,
 	.mmap		= vicam_mmap,
 	.ioctl		= vicam_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek		= no_llseek,
 };
 
 static struct video_device vicam_template = {
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 85661b1..21456b8 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -355,7 +355,7 @@ static void usbvision_remove_sysfs(struct video_device *vdev)
  * then allocates buffers needed for video processing.
  *
  */
-static int usbvision_v4l2_open(struct inode *inode, struct file *file)
+static int usbvision_v4l2_open(struct file *file)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode = 0;
@@ -432,7 +432,7 @@ static int usbvision_v4l2_open(struct inode *inode, struct file *file)
  * allocated in usbvision_v4l2_open().
  *
  */
-static int usbvision_v4l2_close(struct inode *inode, struct file *file)
+static int usbvision_v4l2_close(struct file *file)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 
@@ -1178,7 +1178,7 @@ static int usbvision_v4l2_mmap(struct file *file, struct vm_area_struct *vma)
  * Here comes the stuff for radio on usbvision based devices
  *
  */
-static int usbvision_radio_open(struct inode *inode, struct file *file)
+static int usbvision_radio_open(struct file *file)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode = 0;
@@ -1228,7 +1228,7 @@ out:
 }
 
 
-static int usbvision_radio_close(struct inode *inode, struct file *file)
+static int usbvision_radio_close(struct file *file)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode = 0;
@@ -1266,13 +1266,13 @@ static int usbvision_radio_close(struct inode *inode, struct file *file)
  * Here comes the stuff for vbi on usbvision based devices
  *
  */
-static int usbvision_vbi_open(struct inode *inode, struct file *file)
+static int usbvision_vbi_open(struct file *file)
 {
 	/* TODO */
 	return -ENODEV;
 }
 
-static int usbvision_vbi_close(struct inode *inode, struct file *file)
+static int usbvision_vbi_close(struct file *file)
 {
 	/* TODO */
 	return -ENODEV;
@@ -1285,7 +1285,7 @@ static int usbvision_do_vbi_ioctl(struct file *file,
 	return -ENOIOCTLCMD;
 }
 
-static int usbvision_vbi_ioctl(struct inode *inode, struct file *file,
+static int usbvision_vbi_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, usbvision_do_vbi_ioctl);
@@ -1297,16 +1297,14 @@ static int usbvision_vbi_ioctl(struct inode *inode, struct file *file,
 //
 
 // Video template
-static const struct file_operations usbvision_fops = {
+static const struct v4l2_file_operations usbvision_fops = {
 	.owner             = THIS_MODULE,
 	.open		= usbvision_v4l2_open,
 	.release	= usbvision_v4l2_close,
 	.read		= usbvision_v4l2_read,
 	.mmap		= usbvision_v4l2_mmap,
 	.ioctl		= video_ioctl2,
-	.llseek		= no_llseek,
 /* 	.poll          = video_poll, */
-	.compat_ioctl  = v4l_compat_ioctl32,
 };
 
 static const struct v4l2_ioctl_ops usbvision_ioctl_ops = {
@@ -1355,13 +1353,11 @@ static struct video_device usbvision_video_template = {
 
 
 // Radio template
-static const struct file_operations usbvision_radio_fops = {
+static const struct v4l2_file_operations usbvision_radio_fops = {
 	.owner             = THIS_MODULE,
 	.open		= usbvision_radio_open,
 	.release	= usbvision_radio_close,
 	.ioctl		= video_ioctl2,
-	.llseek		= no_llseek,
-	.compat_ioctl  = v4l_compat_ioctl32,
 };
 
 static const struct v4l2_ioctl_ops usbvision_radio_ioctl_ops = {
@@ -1392,13 +1388,11 @@ static struct video_device usbvision_radio_template = {
 };
 
 // vbi template
-static const struct file_operations usbvision_vbi_fops = {
+static const struct v4l2_file_operations usbvision_vbi_fops = {
 	.owner             = THIS_MODULE,
 	.open		= usbvision_vbi_open,
 	.release	= usbvision_vbi_close,
 	.ioctl		= usbvision_vbi_ioctl,
-	.llseek		= no_llseek,
-	.compat_ioctl  = v4l_compat_ioctl32,
 };
 
 static struct video_device usbvision_vbi_template=
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index afcc693..df9e937 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -406,7 +406,7 @@ static int uvc_has_privileges(struct uvc_fh *handle)
  * V4L2 file operations
  */
 
-static int uvc_v4l2_open(struct inode *inode, struct file *file)
+static int uvc_v4l2_open(struct file *file)
 {
 	struct uvc_video_device *video;
 	struct uvc_fh *handle;
@@ -444,7 +444,7 @@ done:
 	return ret;
 }
 
-static int uvc_v4l2_release(struct inode *inode, struct file *file)
+static int uvc_v4l2_release(struct file *file)
 {
 	struct uvc_video_device *video = video_drvdata(file);
 	struct uvc_fh *handle = (struct uvc_fh *)file->private_data;
@@ -996,7 +996,7 @@ static int uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return ret;
 }
 
-static int uvc_v4l2_ioctl(struct inode *inode, struct file *file,
+static int uvc_v4l2_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	if (uvc_trace_param & UVC_TRACE_IOCTL) {
@@ -1097,13 +1097,11 @@ static unsigned int uvc_v4l2_poll(struct file *file, poll_table *wait)
 	return uvc_queue_poll(&video->queue, file, wait);
 }
 
-struct file_operations uvc_fops = {
+const struct v4l2_file_operations uvc_fops = {
 	.owner		= THIS_MODULE,
 	.open		= uvc_v4l2_open,
 	.release	= uvc_v4l2_release,
 	.ioctl		= uvc_v4l2_ioctl,
-	.compat_ioctl	= v4l_compat_ioctl32,
-	.llseek		= no_llseek,
 	.read		= uvc_v4l2_read,
 	.mmap		= uvc_v4l2_mmap,
 	.poll		= uvc_v4l2_poll,
diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index 896b791..bcf4361 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -753,7 +753,7 @@ static inline int uvc_queue_streaming(struct uvc_video_queue *queue)
 }
 
 /* V4L2 interface */
-extern struct file_operations uvc_fops;
+extern const struct v4l2_file_operations uvc_fops;
 
 /* Video */
 extern int uvc_video_init(struct uvc_video_device *video);
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 26fdf1e..b4f3914 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1072,12 +1072,7 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 	return ret;
 }
-#else
-long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	return -ENOIOCTLCMD;
-}
-#endif
 EXPORT_SYMBOL_GPL(v4l_compat_ioctl32);
+#endif
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 7ad6711..0000134 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -31,6 +31,7 @@
 
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
 
 #define VIDEO_NUM_DEVICES	256
 #define VIDEO_NAME              "video4linux"
@@ -182,7 +183,7 @@ static int v4l2_ioctl(struct inode *inode, struct file *filp,
 		return -ENOTTY;
 	/* Allow ioctl to continue even if the device was unregistered.
 	   Things like dequeueing buffers might still be useful. */
-	return vdev->fops->ioctl(inode, filp, cmd, arg);
+	return vdev->fops->ioctl(filp, cmd, arg);
 }
 
 static long v4l2_unlocked_ioctl(struct file *filp,
@@ -197,20 +198,6 @@ static long v4l2_unlocked_ioctl(struct file *filp,
 	return vdev->fops->unlocked_ioctl(filp, cmd, arg);
 }
 
-#ifdef CONFIG_COMPAT
-static long v4l2_compat_ioctl(struct file *filp,
-		unsigned int cmd, unsigned long arg)
-{
-	struct video_device *vdev = video_devdata(filp);
-
-	if (!vdev->fops->compat_ioctl)
-		return -ENOIOCTLCMD;
-	/* Allow ioctl to continue even if the device was unregistered.
-	   Things like dequeueing buffers might still be useful. */
-	return vdev->fops->compat_ioctl(filp, cmd, arg);
-}
-#endif
-
 static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm)
 {
 	struct video_device *vdev = video_devdata(filp);
@@ -239,7 +226,7 @@ static int v4l2_open(struct inode *inode, struct file *filp)
 	/* and increase the device refcount */
 	video_get(vdev);
 	mutex_unlock(&videodev_lock);
-	ret = vdev->fops->open(inode, filp);
+	ret = vdev->fops->open(filp);
 	/* decrease the refcount in case of an error */
 	if (ret)
 		video_put(vdev);
@@ -250,7 +237,7 @@ static int v4l2_open(struct inode *inode, struct file *filp)
 static int v4l2_release(struct inode *inode, struct file *filp)
 {
 	struct video_device *vdev = video_devdata(filp);
-	int ret = vdev->fops->release(inode, filp);
+	int ret = vdev->fops->release(filp);
 
 	/* decrease the refcount unconditionally since the release()
 	   return value is ignored. */
@@ -266,7 +253,7 @@ static const struct file_operations v4l2_unlocked_fops = {
 	.mmap = v4l2_mmap,
 	.unlocked_ioctl = v4l2_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l2_compat_ioctl,
+	.compat_ioctl = v4l_compat_ioctl32,
 #endif
 	.release = v4l2_release,
 	.poll = v4l2_poll,
@@ -281,7 +268,7 @@ static const struct file_operations v4l2_fops = {
 	.mmap = v4l2_mmap,
 	.ioctl = v4l2_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l2_compat_ioctl,
+	.compat_ioctl = v4l_compat_ioctl32,
 #endif
 	.release = v4l2_release,
 	.poll = v4l2_poll,
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index b063381..3b834f4 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -1852,7 +1852,7 @@ static int __video_do_ioctl(struct file *file,
 	return ret;
 }
 
-long __video_ioctl2(struct file *file,
+int video_ioctl2(struct file *file,
 	       unsigned int cmd, unsigned long arg)
 {
 	char	sbuf[128];
@@ -1944,11 +1944,4 @@ out:
 	kfree(mbuf);
 	return err;
 }
-EXPORT_SYMBOL(__video_ioctl2);
-
-int video_ioctl2(struct inode *inode, struct file *file,
-	       unsigned int cmd, unsigned long arg)
-{
-	return __video_ioctl2(file, cmd, arg);
-}
 EXPORT_SYMBOL(video_ioctl2);
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index a72a361..63863fa 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -4019,7 +4019,7 @@ out:
 
 /* File operations */
 
-static int vino_open(struct inode *inode, struct file *file)
+static int vino_open(struct file *file)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
 	int ret = 0;
@@ -4050,7 +4050,7 @@ static int vino_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static int vino_close(struct inode *inode, struct file *file)
+static int vino_close(struct file *file)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
 	dprintk("close():\n");
@@ -4343,7 +4343,7 @@ static int vino_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int vino_ioctl(struct inode *inode, struct file *file,
+static int vino_ioctl(struct file *file,
 		      unsigned int cmd, unsigned long arg)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
@@ -4364,14 +4364,13 @@ static int vino_ioctl(struct inode *inode, struct file *file,
 /* __initdata */
 static int vino_init_stage;
 
-static const struct file_operations vino_fops = {
+static const struct v4l2_file_operations vino_fops = {
 	.owner		= THIS_MODULE,
 	.open		= vino_open,
 	.release	= vino_close,
 	.ioctl		= vino_ioctl,
 	.mmap		= vino_mmap,
 	.poll		= vino_poll,
-	.llseek		= no_llseek,
 };
 
 static struct video_device v4l_device_template = {
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index e15e48f..81d5aa5 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -1024,9 +1024,9 @@ static int vidioc_s_ctrl(struct file *file, void *priv,
 	File operations for the device
    ------------------------------------------------------------------*/
 
-static int vivi_open(struct inode *inode, struct file *file)
+static int vivi_open(struct file *file)
 {
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 	struct vivi_dev *dev;
 	struct vivi_fh *fh = NULL;
 	int i;
@@ -1127,13 +1127,13 @@ vivi_poll(struct file *file, struct poll_table_struct *wait)
 	return videobuf_poll_stream(file, q, wait);
 }
 
-static int vivi_close(struct inode *inode, struct file *file)
+static int vivi_close(struct file *file)
 {
 	struct vivi_fh         *fh = file->private_data;
 	struct vivi_dev *dev       = fh->dev;
 	struct vivi_dmaqueue *vidq = &dev->vidq;
 
-	int minor = iminor(inode);
+	int minor = video_devdata(file)->minor;
 
 	vivi_stop_thread(vidq);
 	videobuf_stop(&fh->vb_vidq);
@@ -1195,16 +1195,14 @@ static int vivi_mmap(struct file *file, struct vm_area_struct *vma)
 	return ret;
 }
 
-static const struct file_operations vivi_fops = {
+static const struct v4l2_file_operations vivi_fops = {
 	.owner		= THIS_MODULE,
 	.open           = vivi_open,
 	.release        = vivi_close,
 	.read           = vivi_read,
 	.poll		= vivi_poll,
 	.ioctl          = video_ioctl2, /* V4L2 ioctl handler */
-	.compat_ioctl   = v4l_compat_ioctl32,
 	.mmap           = vivi_mmap,
-	.llseek         = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops vivi_ioctl_ops = {
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index 56c570c..91500f5 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -180,19 +180,19 @@ static int w9966_i2c_wbyte(struct w9966_dev* cam, int data);
 static int w9966_i2c_rbyte(struct w9966_dev* cam);
 #endif
 
-static int w9966_v4l_ioctl(struct inode *inode, struct file *file,
+static int w9966_v4l_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg);
 static ssize_t w9966_v4l_read(struct file *file, char __user *buf,
 			      size_t count, loff_t *ppos);
 
-static int w9966_exclusive_open(struct inode *inode, struct file *file)
+static int w9966_exclusive_open(struct file *file)
 {
 	struct w9966_dev *cam = video_drvdata(file);
 
 	return test_and_set_bit(0, &cam->in_use) ? -EBUSY : 0;
 }
 
-static int w9966_exclusive_release(struct inode *inode, struct file *file)
+static int w9966_exclusive_release(struct file *file)
 {
 	struct w9966_dev *cam = video_drvdata(file);
 
@@ -200,16 +200,12 @@ static int w9966_exclusive_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations w9966_fops = {
+static const struct v4l2_file_operations w9966_fops = {
 	.owner		= THIS_MODULE,
 	.open           = w9966_exclusive_open,
 	.release        = w9966_exclusive_release,
 	.ioctl          = w9966_v4l_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
 	.read           = w9966_v4l_read,
-	.llseek         = no_llseek,
 };
 static struct video_device w9966_template = {
 	.name           = W9966_DRIVERNAME,
@@ -877,7 +873,7 @@ static int w9966_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int w9966_v4l_ioctl(struct inode *inode, struct file *file,
+static int w9966_v4l_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, w9966_v4l_do_ioctl);
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 4dfb43b..159b4ed 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -399,13 +399,13 @@ MODULE_PARM_DESC(specific_debug,
  ****************************************************************************/
 
 /* Video4linux interface */
-static const struct file_operations w9968cf_fops;
-static int w9968cf_open(struct inode*, struct file*);
-static int w9968cf_release(struct inode*, struct file*);
-static int w9968cf_mmap(struct file*, struct vm_area_struct*);
-static int w9968cf_ioctl(struct inode*, struct file*, unsigned, unsigned long);
-static ssize_t w9968cf_read(struct file*, char __user *, size_t, loff_t*);
-static int w9968cf_v4l_ioctl(struct inode*, struct file*, unsigned int,
+static const struct v4l2_file_operations w9968cf_fops;
+static int w9968cf_open(struct file *);
+static int w9968cf_release(struct file *);
+static int w9968cf_mmap(struct file *, struct vm_area_struct *);
+static int w9968cf_ioctl(struct file *, unsigned, unsigned long);
+static ssize_t w9968cf_read(struct file *, char __user *, size_t, loff_t *);
+static int w9968cf_v4l_ioctl(struct file *, unsigned int,
 			     void __user *);
 
 /* USB-specific */
@@ -2662,7 +2662,7 @@ static void w9968cf_release_resources(struct w9968cf_device* cam)
  * Video4Linux interface                                                    *
  ****************************************************************************/
 
-static int w9968cf_open(struct inode* inode, struct file* filp)
+static int w9968cf_open(struct file *filp)
 {
 	struct w9968cf_device* cam;
 	int err;
@@ -2748,7 +2748,7 @@ deallocate_memory:
 }
 
 
-static int w9968cf_release(struct inode* inode, struct file* filp)
+static int w9968cf_release(struct file *filp)
 {
 	struct w9968cf_device* cam;
 
@@ -2886,7 +2886,7 @@ static int w9968cf_mmap(struct file* filp, struct vm_area_struct *vma)
 
 
 static int
-w9968cf_ioctl(struct inode* inode, struct file* filp,
+w9968cf_ioctl(struct file *filp,
 	      unsigned int cmd, unsigned long arg)
 {
 	struct w9968cf_device* cam;
@@ -2909,15 +2909,15 @@ w9968cf_ioctl(struct inode* inode, struct file* filp,
 		return -EIO;
 	}
 
-	err = w9968cf_v4l_ioctl(inode, filp, cmd, (void __user *)arg);
+	err = w9968cf_v4l_ioctl(filp, cmd, (void __user *)arg);
 
 	mutex_unlock(&cam->fileop_mutex);
 	return err;
 }
 
 
-static int w9968cf_v4l_ioctl(struct inode* inode, struct file* filp,
-			     unsigned int cmd, void __user * arg)
+static int w9968cf_v4l_ioctl(struct file *filp,
+			     unsigned int cmd, void __user *arg)
 {
 	struct w9968cf_device* cam;
 	const char* v4l1_ioctls[] = {
@@ -3456,17 +3456,13 @@ ioctl_fail:
 }
 
 
-static const struct file_operations w9968cf_fops = {
+static const struct v4l2_file_operations w9968cf_fops = {
 	.owner =   THIS_MODULE,
 	.open =    w9968cf_open,
 	.release = w9968cf_release,
 	.read =    w9968cf_read,
 	.ioctl =   w9968cf_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
 	.mmap =    w9968cf_mmap,
-	.llseek =  no_llseek,
 };
 
 
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index 9d00e60..46590f6 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -649,7 +649,7 @@ static void zc0301_release_resources(struct kref *kref)
 }
 
 
-static int zc0301_open(struct inode* inode, struct file* filp)
+static int zc0301_open(struct file *filp)
 {
 	struct zc0301_device* cam;
 	int err = 0;
@@ -733,7 +733,7 @@ out:
 }
 
 
-static int zc0301_release(struct inode* inode, struct file* filp)
+static int zc0301_release(struct file *filp)
 {
 	struct zc0301_device* cam;
 
@@ -1793,8 +1793,8 @@ zc0301_vidioc_s_parm(struct zc0301_device* cam, void __user * arg)
 }
 
 
-static int zc0301_ioctl_v4l2(struct inode* inode, struct file* filp,
-			     unsigned int cmd, void __user * arg)
+static int zc0301_ioctl_v4l2(struct file *filp,
+			     unsigned int cmd, void __user *arg)
 {
 	struct zc0301_device *cam = video_drvdata(filp);
 
@@ -1888,7 +1888,7 @@ static int zc0301_ioctl_v4l2(struct inode* inode, struct file* filp,
 }
 
 
-static int zc0301_ioctl(struct inode* inode, struct file* filp,
+static int zc0301_ioctl(struct file *filp,
 			unsigned int cmd, unsigned long arg)
 {
 	struct zc0301_device *cam = video_drvdata(filp);
@@ -1912,7 +1912,7 @@ static int zc0301_ioctl(struct inode* inode, struct file* filp,
 
 	V4LDBG(3, "zc0301", cmd);
 
-	err = zc0301_ioctl_v4l2(inode, filp, cmd, (void __user *)arg);
+	err = zc0301_ioctl_v4l2(filp, cmd, (void __user *)arg);
 
 	mutex_unlock(&cam->fileop_mutex);
 
@@ -1920,18 +1920,14 @@ static int zc0301_ioctl(struct inode* inode, struct file* filp,
 }
 
 
-static const struct file_operations zc0301_fops = {
+static const struct v4l2_file_operations zc0301_fops = {
 	.owner =   THIS_MODULE,
 	.open =    zc0301_open,
 	.release = zc0301_release,
 	.ioctl =   zc0301_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
-#endif
 	.read =    zc0301_read,
 	.poll =    zc0301_poll,
 	.mmap =    zc0301_mmap,
-	.llseek =  no_llseek,
 };
 
 /*****************************************************************************/
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index 00b97d9..ce4a5e5 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -1197,10 +1197,9 @@ zoran_close_end_session (struct file *file)
  */
 
 static int
-zoran_open (struct inode *inode,
-	    struct file  *file)
+zoran_open(struct file  *file)
 {
-	unsigned int minor = iminor(inode);
+	unsigned int minor = video_devdata(file)->minor;
 	struct zoran *zr = NULL;
 	struct zoran_fh *fh;
 	int i, res, first_open = 0, have_module_locks = 0;
@@ -1340,8 +1339,7 @@ open_unlock_and_return:
 }
 
 static int
-zoran_close (struct inode *inode,
-	     struct file  *file)
+zoran_close(struct file  *file)
 {
 	struct zoran_fh *fh = file->private_data;
 	struct zoran *zr = fh->zr;
@@ -4192,10 +4190,9 @@ static int zoran_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 
 static int
-zoran_ioctl (struct inode *inode,
-	     struct file  *file,
-	     unsigned int  cmd,
-	     unsigned long arg)
+zoran_ioctl(struct file  *file,
+	    unsigned int  cmd,
+	    unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, zoran_do_ioctl);
 }
@@ -4620,15 +4617,11 @@ zoran_mmap (struct file           *file,
 	return 0;
 }
 
-static const struct file_operations zoran_fops = {
+static const struct v4l2_file_operations zoran_fops = {
 	.owner = THIS_MODULE,
 	.open = zoran_open,
 	.release = zoran_close,
 	.ioctl = zoran_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= v4l_compat_ioctl32,
-#endif
-	.llseek = no_llseek,
 	.read = zoran_read,
 	.write = zoran_write,
 	.mmap = zoran_mmap,
diff --git a/drivers/media/video/zr364xx.c b/drivers/media/video/zr364xx.c
index a1d81ed..bf68ed9 100644
--- a/drivers/media/video/zr364xx.c
+++ b/drivers/media/video/zr364xx.c
@@ -634,7 +634,7 @@ static int zr364xx_vidioc_streamoff(struct file *file, void *priv,
 
 
 /* open the camera */
-static int zr364xx_open(struct inode *inode, struct file *file)
+static int zr364xx_open(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct zr364xx_camera *cam = video_get_drvdata(vdev);
@@ -688,7 +688,7 @@ out:
 
 
 /* release the camera */
-static int zr364xx_release(struct inode *inode, struct file *file)
+static int zr364xx_release(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct zr364xx_camera *cam;
@@ -761,14 +761,13 @@ static int zr364xx_mmap(struct file *file, struct vm_area_struct *vma)
 }
 
 
-static const struct file_operations zr364xx_fops = {
+static const struct v4l2_file_operations zr364xx_fops = {
 	.owner = THIS_MODULE,
 	.open = zr364xx_open,
 	.release = zr364xx_release,
 	.read = zr364xx_read,
 	.mmap = zr364xx_mmap,
 	.ioctl = video_ioctl2,
-	.llseek = no_llseek,
 };
 
 static const struct v4l2_ioctl_ops zr364xx_ioctl_ops = {
diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h
index 6bbb0d9..fd7f4fe 100644
--- a/include/media/saa7146_vv.h
+++ b/include/media/saa7146_vv.h
@@ -179,7 +179,7 @@ struct saa7146_ext_vv
 	struct saa7146_extension_ioctls *ioctls;
 	int (*ioctl)(struct saa7146_fh*, unsigned int cmd, void *arg);
 
-	struct file_operations vbi_fops;
+	struct v4l2_file_operations vbi_fops;
 };
 
 struct saa7146_use_ops  {
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 0a88d1d..4d8ce34 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -25,6 +25,7 @@
 #define VFL_TYPE_MAX		4
 
 struct v4l2_ioctl_callbacks;
+struct video_device;
 struct v4l2_device;
 
 /* Flag to mark the video_device struct as unregistered.
@@ -32,6 +33,18 @@ struct v4l2_device;
    device access. It is set by video_unregister_device. */
 #define V4L2_FL_UNREGISTERED	(0)
 
+struct v4l2_file_operations {
+	struct module *owner;
+	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
+	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
+	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	int (*ioctl) (struct file *, unsigned int, unsigned long);
+	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*open) (struct file *);
+	int (*release) (struct file *);
+};
+
 /*
  * Newer version of video_device, handled by videodev2.c
  * 	This version moves redundant code from video device code to
@@ -41,7 +54,7 @@ struct v4l2_device;
 struct video_device
 {
 	/* device ops */
-	const struct file_operations *fops;
+	const struct v4l2_file_operations *fops;
 
 	/* sysfs */
 	struct device dev;		/* v4l device */
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index fcdb58c..835af43 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -286,27 +286,18 @@ int v4l_compat_translate_ioctl(struct file *file,
 #define v4l_compat_translate_ioctl(file, cmd, arg, ioctl) (-EINVAL)
 #endif
 
+#ifdef CONFIG_COMPAT
 /* 32 Bits compatibility layer for 64 bits processors */
 extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd,
 				unsigned long arg);
+#endif
 
 /* Include support for obsoleted stuff */
 extern int video_usercopy(struct file *file, unsigned int cmd,
 				unsigned long arg, v4l2_kioctl func);
 
 /* Standard handlers for V4L ioctl's */
-
-/* This prototype is used on fops.unlocked_ioctl */
-extern long __video_ioctl2(struct file *file,
-			unsigned int cmd, unsigned long arg);
-
-/* This prototype is used on fops.ioctl
- * Since fops.ioctl enables Kernel Big Lock, it is preferred
- * to use __video_ioctl2 instead.
- * It should be noticed that there's no lock code inside
- * video_ioctl2().
- */
-extern int video_ioctl2(struct inode *inode, struct file *file,
+extern int video_ioctl2(struct file *file,
 			unsigned int cmd, unsigned long arg);
 
 #endif /* _V4L2_IOCTL_H */
diff --git a/include/sound/tea575x-tuner.h b/include/sound/tea575x-tuner.h
index b6870cb..426899e 100644
--- a/include/sound/tea575x-tuner.h
+++ b/include/sound/tea575x-tuner.h
@@ -36,7 +36,7 @@ struct snd_tea575x_ops {
 struct snd_tea575x {
 	struct snd_card *card;
 	struct video_device vd;		/* video device */
-	struct file_operations fops;
+	struct v4l2_file_operations fops;
 	int dev_nr;			/* requested device number + 1 */
 	int vd_registered;		/* video device is registered */
 	int tea5759;			/* 5759 chip is present */
diff --git a/sound/i2c/other/tea575x-tuner.c b/sound/i2c/other/tea575x-tuner.c
index 549b4eb..90f416c 100644
--- a/sound/i2c/other/tea575x-tuner.c
+++ b/sound/i2c/other/tea575x-tuner.c
@@ -84,7 +84,7 @@ static void snd_tea575x_set_freq(struct snd_tea575x *tea)
  * Linux Video interface
  */
 
-static int snd_tea575x_ioctl(struct inode *inode, struct file *file,
+static int snd_tea575x_ioctl(struct file *file,
 			     unsigned int cmd, unsigned long data)
 {
 	struct snd_tea575x *tea = video_drvdata(file);
@@ -174,14 +174,14 @@ static void snd_tea575x_release(struct video_device *vfd)
 {
 }
 
-static int snd_tea575x_exclusive_open(struct inode *inode, struct file *file)
+static int snd_tea575x_exclusive_open(struct file *file)
 {
 	struct snd_tea575x *tea = video_drvdata(file);
 
 	return test_and_set_bit(0, &tea->in_use) ? -EBUSY : 0;
 }
 
-static int snd_tea575x_exclusive_release(struct inode *inode, struct file *file)
+static int snd_tea575x_exclusive_release(struct file *file)
 {
 	struct snd_tea575x *tea = video_drvdata(file);
 
-- 
cgit v0.10.2


From c7dd09dabc278b03980c8e93d0eee3843b5ad514 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Dec 2008 13:42:25 -0300
Subject: V4L/DVB (10136): v4l2 doc: update v4l2-framework.txt

Mention the new v4l2_file_operations struct.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 38d054a..3b483c1 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -392,13 +392,16 @@ You should also set these fields:
 
 - v4l2_dev: set to the v4l2_device parent device.
 - name: set to something descriptive and unique.
-- fops: set to the file_operations struct.
+- fops: set to the v4l2_file_operations struct.
 - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance
   (highly recommended to use this and it might become compulsory in the
   future!), then set this to your v4l2_ioctl_ops struct.
 
-If you use v4l2_ioctl_ops, then you should set .unlocked_ioctl to
-__video_ioctl2 or .ioctl to video_ioctl2 in your file_operations struct.
+If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or
+.ioctl to video_ioctl2 in your v4l2_file_operations struct.
+
+The v4l2_file_operations struct is a subset of file_operations. The main
+difference is that the inode argument is omitted since it is never used.
 
 
 video_device registration
-- 
cgit v0.10.2


From 4ab9203b1b7e45f1beae7eb0c67d663a26257a69 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Dec 2008 15:02:16 -0300
Subject: V4L/DVB (10137): v4l2-compat32: only build if needed

Add CONFIG_COMPAT check in Makefile.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 1611c33..72f6d03 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -12,7 +12,10 @@ omap2cam-objs	:=	omap24xxcam.o omap24xxcam-dma.o
 
 videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-subdev.o
 
-obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-compat-ioctl32.o v4l2-int-device.o
+obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-int-device.o
+ifeq ($(CONFIG_COMPAT),y)
+  obj-$(CONFIG_VIDEO_DEV) += v4l2-compat-ioctl32.o
+endif
 
 obj-$(CONFIG_VIDEO_V4L2_COMMON) += v4l2-common.o
 
-- 
cgit v0.10.2


From 069b747931f13eda289c1d59a09ecc8162281a76 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 07:04:34 -0300
Subject: V4L/DVB (10138): v4l2-ioctl: change to long return type to match
 unlocked_ioctl.

Since internal to v4l2 the ioctl prototype is the same regardless of it
being called through .ioctl or .unlocked_ioctl, we need to convert it all
to the long return type of unlocked_ioctl.

Thanks to Jean-Francois Moine for posting an initial patch for this and
thus bringing it to our attention.

Cc: Jean-Francois Moine <moinejf@free.fr>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index fad7fd8..cf06f4d 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -308,7 +308,7 @@ static int fops_release(struct file *file)
 	return 0;
 }
 
-static int fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 /*
 	DEB_EE(("file:%p, cmd:%d, arg:%li\n", file, cmd, arg));
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index 101b01d..6098b62 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -834,13 +834,14 @@ static int video_end(struct saa7146_fh *fh, struct file *file)
  * copying is done already, arg is a kernel pointer.
  */
 
-int saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+long saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct saa7146_fh *fh  = file->private_data;
 	struct saa7146_dev *dev = fh->dev;
 	struct saa7146_vv *vv = dev->vv_data;
 
-	int err = 0, result = 0, ee = 0;
+	long err = 0;
+	int result = 0, ee = 0;
 
 	struct saa7146_use_ops *ops;
 	struct videobuf_queue *q;
diff --git a/drivers/media/dvb/ttpci/av7110_v4l.c b/drivers/media/dvb/ttpci/av7110_v4l.c
index 315ba6f..c5b9c70 100644
--- a/drivers/media/dvb/ttpci/av7110_v4l.c
+++ b/drivers/media/dvb/ttpci/av7110_v4l.c
@@ -316,7 +316,7 @@ static int av7110_dvb_c_switch(struct saa7146_fh *fh)
 	return 0;
 }
 
-static int av7110_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
+static long av7110_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 {
 	struct saa7146_dev *dev = fh->dev;
 	struct av7110 *av7110 = (struct av7110*) dev->ext_priv;
diff --git a/drivers/media/dvb/ttpci/budget-av.c b/drivers/media/dvb/ttpci/budget-av.c
index f996cef..4182121 100644
--- a/drivers/media/dvb/ttpci/budget-av.c
+++ b/drivers/media/dvb/ttpci/budget-av.c
@@ -1493,7 +1493,7 @@ static struct saa7146_extension_ioctls ioctls[] = {
 	{0, 0}
 };
 
-static int av_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
+static long av_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 {
 	struct saa7146_dev *dev = fh->dev;
 	struct budget_av *budget_av = (struct budget_av *) dev->ext_priv;
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index f18fb73..d137bac 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -396,7 +396,7 @@ out_up:
 	return ret;
 }
 
-static int ar_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long ar_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct ar_device *ar = video_get_drvdata(dev);
@@ -539,7 +539,7 @@ static int ar_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int ar_ioctl(struct file *file, unsigned int cmd,
+static long ar_ioctl(struct file *file, unsigned int cmd,
 		    unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, ar_do_ioctl);
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index 0b02be5..10dbd4a 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -706,7 +706,7 @@ static long qc_capture(struct qcam_device * q, char __user *buf, unsigned long l
  *	Video4linux interfacing
  */
 
-static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam=(struct qcam_device *)dev;
@@ -863,7 +863,7 @@ static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int qcam_ioctl(struct file *file,
+static long qcam_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index 837c16d..85cf177 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -500,7 +500,7 @@ static long qc_capture(struct qcam_device *q, char __user *buf, unsigned long le
  *	Video4linux interfacing
  */
 
-static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam=(struct qcam_device *)dev;
@@ -665,7 +665,7 @@ static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int qcam_ioctl(struct file *file,
+static long qcam_ioctl(struct file *file,
 		      unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index 9925ec0..c3b0c8c 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -3333,7 +3333,7 @@ static ssize_t cpia_read(struct file *file, char __user *buf,
 	return cam->decompressed_frame.count;
 }
 
-static int cpia_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long cpia_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = file->private_data;
 	struct cam_data *cam = video_get_drvdata(dev);
@@ -3720,7 +3720,7 @@ static int cpia_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return retval;
 }
 
-static int cpia_ioctl(struct file *file,
+static long cpia_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, cpia_do_ioctl);
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index 91870cc..9c25894 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -1572,10 +1572,10 @@ static int ioctl_dqbuf(void *arg,struct camera_data *cam, struct file *file)
  *  cpia2_ioctl
  *
  *****************************************************************************/
-static int cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct camera_data *cam = video_drvdata(file);
-	int retval = 0;
+	long retval = 0;
 
 	if (!cam)
 		return -ENOTTY;
@@ -1841,7 +1841,7 @@ static int cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return retval;
 }
 
-static int cpia2_ioctl(struct file *file,
+static long cpia2_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, cpia2_do_ioctl);
diff --git a/drivers/media/video/cx18/cx18-ioctl.c b/drivers/media/video/cx18/cx18-ioctl.c
index 5023075..8aa152b 100644
--- a/drivers/media/video/cx18/cx18-ioctl.c
+++ b/drivers/media/video/cx18/cx18-ioctl.c
@@ -755,7 +755,7 @@ static int cx18_log_status(struct file *file, void *fh)
 	return 0;
 }
 
-static int cx18_default(struct file *file, void *fh, int cmd, void *arg)
+static long cx18_default(struct file *file, void *fh, int cmd, void *arg)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
@@ -783,13 +783,13 @@ static int cx18_default(struct file *file, void *fh, int cmd, void *arg)
 	return 0;
 }
 
-int cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
+long cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
 		    unsigned long arg)
 {
 	struct video_device *vfd = video_devdata(filp);
 	struct cx18_open_id *id = (struct cx18_open_id *)filp->private_data;
 	struct cx18 *cx = id->cx;
-	int res;
+	long res;
 
 	mutex_lock(&cx->serialize_lock);
 
diff --git a/drivers/media/video/cx18/cx18-ioctl.h b/drivers/media/video/cx18/cx18-ioctl.h
index 50b8d60..e2ca0d1 100644
--- a/drivers/media/video/cx18/cx18-ioctl.h
+++ b/drivers/media/video/cx18/cx18-ioctl.h
@@ -29,5 +29,5 @@ void cx18_set_funcs(struct video_device *vdev);
 int cx18_s_std(struct file *file, void *fh, v4l2_std_id *std);
 int cx18_s_frequency(struct file *file, void *fh, struct v4l2_frequency *vf);
 int cx18_s_input(struct file *file, void *fh, unsigned int inp);
-int cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
+long cx18_v4l2_ioctl(struct file *filp, unsigned int cmd,
 		    unsigned long arg);
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 3aeb879..d1c1e45 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -2392,7 +2392,7 @@ et61x251_vidioc_s_parm(struct et61x251_device* cam, void __user * arg)
 }
 
 
-static int et61x251_ioctl_v4l2(struct file *filp,
+static long et61x251_ioctl_v4l2(struct file *filp,
 			       unsigned int cmd, void __user *arg)
 {
 	struct et61x251_device *cam = video_drvdata(filp);
@@ -2487,11 +2487,11 @@ static int et61x251_ioctl_v4l2(struct file *filp,
 }
 
 
-static int et61x251_ioctl(struct file *filp,
+static long et61x251_ioctl(struct file *filp,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct et61x251_device *cam = video_drvdata(filp);
-	int err = 0;
+	long err = 0;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
 		return -ERESTARTSYS;
diff --git a/drivers/media/video/hexium_gemini.c b/drivers/media/video/hexium_gemini.c
index 352f84d..79393d1 100644
--- a/drivers/media/video/hexium_gemini.c
+++ b/drivers/media/video/hexium_gemini.c
@@ -306,7 +306,7 @@ static int hexium_detach(struct saa7146_dev *dev)
 	return 0;
 }
 
-static int hexium_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
+static long hexium_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 {
 	struct saa7146_dev *dev = fh->dev;
 	struct hexium *hexium = (struct hexium *) dev->ext_priv;
diff --git a/drivers/media/video/hexium_orion.c b/drivers/media/video/hexium_orion.c
index 8d3c148..074bec7 100644
--- a/drivers/media/video/hexium_orion.c
+++ b/drivers/media/video/hexium_orion.c
@@ -370,7 +370,7 @@ static int hexium_detach(struct saa7146_dev *dev)
 	return 0;
 }
 
-static int hexium_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
+static long hexium_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 {
 	struct saa7146_dev *dev = fh->dev;
 	struct hexium *hexium = (struct hexium *) dev->ext_priv;
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index a6cd024..1f6ca93 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -1725,7 +1725,7 @@ static int ivtv_decoder_ioctls(struct file *filp, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int ivtv_default(struct file *file, void *fh, int cmd, void *arg)
+static long ivtv_default(struct file *file, void *fh, int cmd, void *arg)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index c408e61..b76e33d 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1577,7 +1577,7 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 	return 0;
 }
 
-static int vidioc_default(struct file *file, void *fh, int cmd, void *arg)
+static long vidioc_default(struct file *file, void *fh, int cmd, void *arg)
 {
 	switch (cmd) {
 	case MEYEIOC_G_PARAMS:
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index a622dbb..b8577ad 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -483,7 +483,7 @@ static int msp_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 }
 
 #ifdef CONFIG_VIDEO_ALLOW_V4L1
-static int msp_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
+static long msp_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	struct msp_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c
index 7f13028..e3cbe14 100644
--- a/drivers/media/video/mxb.c
+++ b/drivers/media/video/mxb.c
@@ -489,7 +489,7 @@ static int mxb_detach(struct saa7146_dev *dev)
 	return 0;
 }
 
-static int mxb_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
+static long mxb_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg)
 {
 	struct saa7146_dev *dev = fh->dev;
 	struct mxb *mxb = (struct mxb *)dev->ext_priv;
diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index f1754dc..9af5532 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c
@@ -4010,7 +4010,7 @@ ov51x_v4l1_close(struct file *file)
 }
 
 /* Do not call this function directly! */
-static int
+static long
 ov51x_v4l1_ioctl_internal(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
@@ -4449,7 +4449,7 @@ redo:
 	return 0;
 }
 
-static int
+static long
 ov51x_v4l1_ioctl(struct file *file,
 		 unsigned int cmd, unsigned long arg)
 {
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index 24f2b3d..a1ad38f 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -680,7 +680,7 @@ static int pms_capture(struct pms_device *dev, char __user *buf, int rgb555, int
  *	Video4linux interfacing
  */
 
-static int pms_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long pms_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct pms_device *pd=(struct pms_device *)dev;
@@ -862,7 +862,7 @@ static int pms_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int pms_ioctl(struct file *file,
+static long pms_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, pms_do_ioctl);
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index 50554b4..b9aedce 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -168,13 +168,13 @@ static const char *get_v4l_name(int v4l_type)
  * This is part of Video 4 Linux API. The procedure handles ioctl() calls.
  *
  */
-static int pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct pvr2_v4l2_fh *fh = file->private_data;
 	struct pvr2_v4l2 *vp = fh->vhead;
 	struct pvr2_v4l2_dev *dev_info = fh->dev_info;
 	struct pvr2_hdw *hdw = fh->channel.mc_head->hdw;
-	int ret = -EINVAL;
+	long ret = -EINVAL;
 
 	if (pvrusb2_debug & PVR2_TRACE_V4LIOCTL) {
 		v4l_print_ioctl(pvr2_hdw_get_driver_name(hdw),cmd);
@@ -871,20 +871,20 @@ static int pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	if (ret < 0) {
 		if (pvrusb2_debug & PVR2_TRACE_V4LIOCTL) {
 			pvr2_trace(PVR2_TRACE_V4LIOCTL,
-				   "pvr2_v4l2_do_ioctl failure, ret=%d",ret);
+				   "pvr2_v4l2_do_ioctl failure, ret=%ld", ret);
 		} else {
 			if (pvrusb2_debug & PVR2_TRACE_V4LIOCTL) {
 				pvr2_trace(PVR2_TRACE_V4LIOCTL,
-					   "pvr2_v4l2_do_ioctl failure, ret=%d"
-					   " command was:",ret);
+					   "pvr2_v4l2_do_ioctl failure, ret=%ld"
+					   " command was:", ret);
 				v4l_print_ioctl(pvr2_hdw_get_driver_name(hdw),
 						cmd);
 			}
 		}
 	} else {
 		pvr2_trace(PVR2_TRACE_V4LIOCTL,
-			   "pvr2_v4l2_do_ioctl complete, ret=%d (0x%x)",
-			   ret,ret);
+			   "pvr2_v4l2_do_ioctl complete, ret=%ld (0x%lx)",
+			   ret, ret);
 	}
 	return ret;
 }
@@ -948,7 +948,7 @@ static void pvr2_v4l2_internal_check(struct pvr2_channel *chp)
 }
 
 
-static int pvr2_v4l2_ioctl(struct file *file,
+static long pvr2_v4l2_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 
diff --git a/drivers/media/video/pwc/pwc-ctrl.c b/drivers/media/video/pwc/pwc-ctrl.c
index c665302..f9fbe02 100644
--- a/drivers/media/video/pwc/pwc-ctrl.c
+++ b/drivers/media/video/pwc/pwc-ctrl.c
@@ -1266,9 +1266,9 @@ int pwc_get_cmos_sensor(struct pwc_device *pdev, int *sensor)
 /* copy local variable to arg */
 #define ARG_OUT(ARG_name) /* nothing */
 
-int pwc_ioctl(struct pwc_device *pdev, unsigned int cmd, void *arg)
+long pwc_ioctl(struct pwc_device *pdev, unsigned int cmd, void *arg)
 {
-	int ret = 0;
+	long ret = 0;
 
 	switch(cmd) {
 	case VIDIOCPWCRUSER:
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index 315337b..39fbc97 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -147,7 +147,7 @@ static int pwc_video_close(struct file *file);
 static ssize_t pwc_video_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos);
 static unsigned int pwc_video_poll(struct file *file, poll_table *wait);
-static int  pwc_video_ioctl(struct file *file,
+static long  pwc_video_ioctl(struct file *file,
 			    unsigned int ioctlnr, unsigned long arg);
 static int  pwc_video_mmap(struct file *file, struct vm_area_struct *vma);
 
@@ -1395,12 +1395,12 @@ static unsigned int pwc_video_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
-static int pwc_video_ioctl(struct file *file,
+static long pwc_video_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct pwc_device *pdev;
-	int r = -ENODEV;
+	long r = -ENODEV;
 
 	if (!vdev)
 		goto out;
diff --git a/drivers/media/video/pwc/pwc-v4l.c b/drivers/media/video/pwc/pwc-v4l.c
index d7c1473..bc0a464 100644
--- a/drivers/media/video/pwc/pwc-v4l.c
+++ b/drivers/media/video/pwc/pwc-v4l.c
@@ -337,7 +337,7 @@ static int pwc_vidioc_set_fmt(struct pwc_device *pdev, struct v4l2_format *f)
 
 }
 
-int pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+long pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct pwc_device *pdev;
diff --git a/drivers/media/video/pwc/pwc.h b/drivers/media/video/pwc/pwc.h
index c046a25..01411fb 100644
--- a/drivers/media/video/pwc/pwc.h
+++ b/drivers/media/video/pwc/pwc.h
@@ -337,10 +337,10 @@ extern int pwc_get_dynamic_noise(struct pwc_device *pdev, int *noise);
 extern int pwc_camera_power(struct pwc_device *pdev, int power);
 
 /* Private ioctl()s; see pwc-ioctl.h */
-extern int pwc_ioctl(struct pwc_device *pdev, unsigned int cmd, void *arg);
+extern long pwc_ioctl(struct pwc_device *pdev, unsigned int cmd, void *arg);
 
 /** Functions in pwc-v4l.c */
-extern int pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
+extern long pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
 
 /** pwc-uncompress.c */
 /* Expand frame to image, possibly including decompression. Uses read_frame and fill_image */
diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c
index 018dee5..e637e44 100644
--- a/drivers/media/video/saa5246a.c
+++ b/drivers/media/video/saa5246a.c
@@ -804,7 +804,7 @@ static inline int saa5246a_stop_dau(struct saa5246a_device *t,
  *
  *  Returns 0 if successful
  */
-static int do_saa5246a_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long do_saa5246a_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct saa5246a_device *t = video_drvdata(file);
 
@@ -944,11 +944,11 @@ static inline unsigned int vtx_fix_command(unsigned int cmd)
 /*
  *	Handle the locking
  */
-static int saa5246a_ioctl(struct file *file,
+static long saa5246a_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct saa5246a_device *t = video_drvdata(file);
-	int err;
+	long err;
 
 	cmd = vtx_fix_command(cmd);
 	mutex_lock(&t->lock);
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index e73bb73..e297651 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -190,7 +190,7 @@ static int i2c_getdata(struct saa5249_device *t, int count, u8 *buf)
  *	Standard character-device-driver functions
  */
 
-static int do_saa5249_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long do_saa5249_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	static int virtual_mode = false;
 	struct saa5249_device *t = video_drvdata(file);
@@ -479,11 +479,11 @@ static inline unsigned int vtx_fix_command(unsigned int cmd)
  *	Handle the locking
  */
 
-static int saa5249_ioctl(struct file *file,
+static long saa5249_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct saa5249_device *t = video_drvdata(file);
-	int err;
+	long err;
 
 	cmd = vtx_fix_command(cmd);
 	mutex_lock(&t->lock);
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index 5b27f323..5990ab3 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -975,7 +975,7 @@ static int se401_close(struct file *file)
 	return 0;
 }
 
-static int se401_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long se401_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_se401 *se401 = (struct usb_se401 *)vdev;
@@ -1138,7 +1138,7 @@ static int se401_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int se401_ioctl(struct file *file,
+static long se401_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, se401_do_ioctl);
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index c2582e2..23edfdc 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -3092,7 +3092,7 @@ sn9c102_vidioc_s_audio(struct sn9c102_device* cam, void __user * arg)
 }
 
 
-static int sn9c102_ioctl_v4l2(struct file *filp,
+static long sn9c102_ioctl_v4l2(struct file *filp,
 			      unsigned int cmd, void __user *arg)
 {
 	struct sn9c102_device *cam = video_drvdata(filp);
@@ -3196,7 +3196,7 @@ static int sn9c102_ioctl_v4l2(struct file *filp,
 }
 
 
-static int sn9c102_ioctl(struct file *filp,
+static long sn9c102_ioctl(struct file *filp,
 			 unsigned int cmd, unsigned long arg)
 {
 	struct sn9c102_device *cam = video_drvdata(filp);
diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c
index 10d2608..0eb3130 100644
--- a/drivers/media/video/stradis.c
+++ b/drivers/media/video/stradis.c
@@ -1275,7 +1275,7 @@ static void make_clip_tab(struct saa7146 *saa, struct video_clip *cr, int ncr)
 		clip_draw_rectangle(clipmap, 0, 0, 1024, -saa->win.y);
 }
 
-static int saa_ioctl(struct file *file,
+static long saa_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long argl)
 {
 	struct saa7146 *saa = file->private_data;
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 0783b0a..75f286f 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -1132,7 +1132,7 @@ static int stv_close(struct file *file)
 	return 0;
 }
 
-static int stv680_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long stv680_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_stv *stv680 = video_get_drvdata(vdev);
@@ -1299,7 +1299,7 @@ static int stv680_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int stv680_ioctl(struct file *file,
+static long stv680_ioctl(struct file *file,
 			unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, stv680_do_ioctl);
diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c
index 2644e0d..6afb705 100644
--- a/drivers/media/video/tda9840.c
+++ b/drivers/media/video/tda9840.c
@@ -137,7 +137,7 @@ static int tda9840_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *t)
 	return 0;
 }
 
-static int tda9840_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
+static long tda9840_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 {
 	int byte;
 
diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c
index 31dde86..7519fd1 100644
--- a/drivers/media/video/tea6415c.c
+++ b/drivers/media/video/tea6415c.c
@@ -122,7 +122,7 @@ static int switch_matrix(struct i2c_client *client, int i, int o)
 	return ret;
 }
 
-static int tea6415c_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
+static long tea6415c_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 {
 	if (cmd == TEA6415C_SWITCH) {
 		struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 38e519f..081e74f 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -90,7 +90,7 @@ static int tea6420_switch(struct i2c_client *client, int i, int o, int g)
 	return 0;
 }
 
-static int tea6420_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
+static long tea6420_ioctl(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 {
 	if (cmd == TEA6420_SWITCH) {
 		struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 97d7509..30640fb 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -800,7 +800,7 @@ static int tuner_s_standby(struct v4l2_subdev *sd, u32 standby)
 }
 
 #ifdef CONFIG_VIDEO_ALLOW_V4L1
-static int tuner_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
+static long tuner_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	struct tuner *t = to_tuner(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/usbvideo/usbvideo.c b/drivers/media/video/usbvideo/usbvideo.c
index 9bf8243..dea8b32 100644
--- a/drivers/media/video/usbvideo/usbvideo.c
+++ b/drivers/media/video/usbvideo/usbvideo.c
@@ -41,7 +41,7 @@ module_param(video_nr, int, 0);
 static void usbvideo_Disconnect(struct usb_interface *intf);
 static void usbvideo_CameraRelease(struct uvd *uvd);
 
-static int usbvideo_v4l_ioctl(struct file *file,
+static long usbvideo_v4l_ioctl(struct file *file,
 			      unsigned int cmd, unsigned long arg);
 static int usbvideo_v4l_mmap(struct file *file, struct vm_area_struct *vma);
 static int usbvideo_v4l_open(struct file *file);
@@ -1277,7 +1277,7 @@ static int usbvideo_v4l_close(struct file *file)
  * History:
  * 22-Jan-2000 Corrected VIDIOCSPICT to reject unsupported settings.
  */
-static int usbvideo_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long usbvideo_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct uvd *uvd = file->private_data;
 
@@ -1497,7 +1497,7 @@ static int usbvideo_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int usbvideo_v4l_ioctl(struct file *file,
+static long usbvideo_v4l_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, usbvideo_v4l_do_ioctl);
diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c
index 53197a4..2f11063 100644
--- a/drivers/media/video/usbvideo/vicam.c
+++ b/drivers/media/video/usbvideo/vicam.c
@@ -229,12 +229,12 @@ set_camera_power(struct vicam_camera *cam, int state)
 	return 0;
 }
 
-static int
+static long
 vicam_ioctl(struct file *file, unsigned int ioctlnr, unsigned long arg)
 {
 	void __user *user_arg = (void __user *)arg;
 	struct vicam_camera *cam = file->private_data;
-	int retval = 0;
+	long retval = 0;
 
 	if (!cam)
 		return -ENODEV;
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 21456b8..7c61c6d 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -1278,14 +1278,14 @@ static int usbvision_vbi_close(struct file *file)
 	return -ENODEV;
 }
 
-static int usbvision_do_vbi_ioctl(struct file *file,
+static long usbvision_do_vbi_ioctl(struct file *file,
 				 unsigned int cmd, void *arg)
 {
 	/* TODO */
 	return -ENOIOCTLCMD;
 }
 
-static int usbvision_vbi_ioctl(struct file *file,
+static long usbvision_vbi_ioctl(struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, usbvision_do_vbi_ioctl);
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index df9e937..fa150ff 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -472,12 +472,12 @@ static int uvc_v4l2_release(struct file *file)
 	return 0;
 }
 
-static int uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct uvc_video_device *video = video_get_drvdata(vdev);
 	struct uvc_fh *handle = (struct uvc_fh *)file->private_data;
-	int ret = 0;
+	long ret = 0;
 
 	switch (cmd) {
 	/* Query capabilities */
@@ -996,7 +996,7 @@ static int uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return ret;
 }
 
-static int uvc_v4l2_ioctl(struct file *file,
+static long uvc_v4l2_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	if (uvc_trace_param & UVC_TRACE_IOCTL) {
diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c
index f13c0a9..d450cab 100644
--- a/drivers/media/video/v4l1-compat.c
+++ b/drivers/media/video/v4l1-compat.c
@@ -267,12 +267,12 @@ done:
 
 /* ----------------------------------------------------------------- */
 
-static noinline int v4l1_compat_get_capabilities(
+static noinline long v4l1_compat_get_capabilities(
 					struct video_capability *cap,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_framebuffer fbuf;
 	struct v4l2_capability *cap2;
 
@@ -286,13 +286,13 @@ static noinline int v4l1_compat_get_capabilities(
 
 	err = drv(file, VIDIOC_QUERYCAP, cap2);
 	if (err < 0) {
-		dprintk("VIDIOCGCAP / VIDIOC_QUERYCAP: %d\n", err);
+		dprintk("VIDIOCGCAP / VIDIOC_QUERYCAP: %ld\n", err);
 		goto done;
 	}
 	if (cap2->capabilities & V4L2_CAP_VIDEO_OVERLAY) {
 		err = drv(file, VIDIOC_G_FBUF, &fbuf);
 		if (err < 0) {
-			dprintk("VIDIOCGCAP / VIDIOC_G_FBUF: %d\n", err);
+			dprintk("VIDIOCGCAP / VIDIOC_G_FBUF: %ld\n", err);
 			memset(&fbuf, 0, sizeof(fbuf));
 		}
 		err = 0;
@@ -324,12 +324,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_get_frame_buffer(
+static noinline long v4l1_compat_get_frame_buffer(
 					struct video_buffer *buffer,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_framebuffer fbuf;
 
 	memset(buffer, 0, sizeof(*buffer));
@@ -337,7 +337,7 @@ static noinline int v4l1_compat_get_frame_buffer(
 
 	err = drv(file, VIDIOC_G_FBUF, &fbuf);
 	if (err < 0) {
-		dprintk("VIDIOCGFBUF / VIDIOC_G_FBUF: %d\n", err);
+		dprintk("VIDIOCGFBUF / VIDIOC_G_FBUF: %ld\n", err);
 		goto done;
 	}
 	buffer->base   = fbuf.base;
@@ -378,12 +378,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_frame_buffer(
+static noinline long v4l1_compat_set_frame_buffer(
 					struct video_buffer *buffer,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_framebuffer fbuf;
 
 	memset(&fbuf, 0, sizeof(fbuf));
@@ -410,16 +410,16 @@ static noinline int v4l1_compat_set_frame_buffer(
 	fbuf.fmt.bytesperline = buffer->bytesperline;
 	err = drv(file, VIDIOC_S_FBUF, &fbuf);
 	if (err < 0)
-		dprintk("VIDIOCSFBUF / VIDIOC_S_FBUF: %d\n", err);
+		dprintk("VIDIOCSFBUF / VIDIOC_S_FBUF: %ld\n", err);
 	return err;
 }
 
-static noinline int v4l1_compat_get_win_cap_dimensions(
+static noinline long v4l1_compat_get_win_cap_dimensions(
 					struct video_window *win,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_format *fmt;
 
 	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
@@ -432,7 +432,7 @@ static noinline int v4l1_compat_get_win_cap_dimensions(
 	fmt->type = V4L2_BUF_TYPE_VIDEO_OVERLAY;
 	err = drv(file, VIDIOC_G_FMT, fmt);
 	if (err < 0)
-		dprintk("VIDIOCGWIN / VIDIOC_G_WIN: %d\n", err);
+		dprintk("VIDIOCGWIN / VIDIOC_G_WIN: %ld\n", err);
 	if (err == 0) {
 		win->x         = fmt->fmt.win.w.left;
 		win->y         = fmt->fmt.win.w.top;
@@ -447,7 +447,7 @@ static noinline int v4l1_compat_get_win_cap_dimensions(
 	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	err = drv(file, VIDIOC_G_FMT, fmt);
 	if (err < 0) {
-		dprintk("VIDIOCGWIN / VIDIOC_G_FMT: %d\n", err);
+		dprintk("VIDIOCGWIN / VIDIOC_G_FMT: %ld\n", err);
 		goto done;
 	}
 	win->x         = 0;
@@ -462,12 +462,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_win_cap_dimensions(
+static noinline long v4l1_compat_set_win_cap_dimensions(
 					struct video_window *win,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err, err1, err2;
+	long err, err1, err2;
 	struct v4l2_format *fmt;
 
 	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
@@ -479,7 +479,7 @@ static noinline int v4l1_compat_set_win_cap_dimensions(
 	drv(file, VIDIOC_STREAMOFF, &fmt->type);
 	err1 = drv(file, VIDIOC_G_FMT, fmt);
 	if (err1 < 0)
-		dprintk("VIDIOCSWIN / VIDIOC_G_FMT: %d\n", err1);
+		dprintk("VIDIOCSWIN / VIDIOC_G_FMT: %ld\n", err1);
 	if (err1 == 0) {
 		fmt->fmt.pix.width  = win->width;
 		fmt->fmt.pix.height = win->height;
@@ -487,7 +487,7 @@ static noinline int v4l1_compat_set_win_cap_dimensions(
 		fmt->fmt.pix.bytesperline = 0;
 		err = drv(file, VIDIOC_S_FMT, fmt);
 		if (err < 0)
-			dprintk("VIDIOCSWIN / VIDIOC_S_FMT #1: %d\n",
+			dprintk("VIDIOCSWIN / VIDIOC_S_FMT #1: %ld\n",
 				err);
 		win->width  = fmt->fmt.pix.width;
 		win->height = fmt->fmt.pix.height;
@@ -504,7 +504,7 @@ static noinline int v4l1_compat_set_win_cap_dimensions(
 	fmt->fmt.win.clipcount = win->clipcount;
 	err2 = drv(file, VIDIOC_S_FMT, fmt);
 	if (err2 < 0)
-		dprintk("VIDIOCSWIN / VIDIOC_S_FMT #2: %d\n", err2);
+		dprintk("VIDIOCSWIN / VIDIOC_S_FMT #2: %ld\n", err2);
 
 	if (err1 != 0 && err2 != 0)
 		err = err1;
@@ -514,12 +514,12 @@ static noinline int v4l1_compat_set_win_cap_dimensions(
 	return err;
 }
 
-static noinline int v4l1_compat_turn_preview_on_off(
+static noinline long v4l1_compat_turn_preview_on_off(
 					int *on,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	enum v4l2_buf_type captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
 	if (0 == *on) {
@@ -530,16 +530,16 @@ static noinline int v4l1_compat_turn_preview_on_off(
 	}
 	err = drv(file, VIDIOC_OVERLAY, on);
 	if (err < 0)
-		dprintk("VIDIOCCAPTURE / VIDIOC_PREVIEW: %d\n", err);
+		dprintk("VIDIOCCAPTURE / VIDIOC_PREVIEW: %ld\n", err);
 	return err;
 }
 
-static noinline int v4l1_compat_get_input_info(
+static noinline long v4l1_compat_get_input_info(
 					struct video_channel *chan,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_input	input2;
 	v4l2_std_id    		sid;
 
@@ -548,7 +548,7 @@ static noinline int v4l1_compat_get_input_info(
 	err = drv(file, VIDIOC_ENUMINPUT, &input2);
 	if (err < 0) {
 		dprintk("VIDIOCGCHAN / VIDIOC_ENUMINPUT: "
-			"channel=%d err=%d\n", chan->channel, err);
+			"channel=%d err=%ld\n", chan->channel, err);
 		goto done;
 	}
 	chan->channel = input2.index;
@@ -569,7 +569,7 @@ static noinline int v4l1_compat_get_input_info(
 	chan->norm = 0;
 	err = drv(file, VIDIOC_G_STD, &sid);
 	if (err < 0)
-		dprintk("VIDIOCGCHAN / VIDIOC_G_STD: %d\n", err);
+		dprintk("VIDIOCGCHAN / VIDIOC_G_STD: %ld\n", err);
 	if (err == 0) {
 		if (sid & V4L2_STD_PAL)
 			chan->norm = VIDEO_MODE_PAL;
@@ -582,17 +582,17 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_input(
+static noinline long v4l1_compat_set_input(
 					struct video_channel *chan,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	v4l2_std_id sid = 0;
 
 	err = drv(file, VIDIOC_S_INPUT, &chan->channel);
 	if (err < 0)
-		dprintk("VIDIOCSCHAN / VIDIOC_S_INPUT: %d\n", err);
+		dprintk("VIDIOCSCHAN / VIDIOC_S_INPUT: %ld\n", err);
 	switch (chan->norm) {
 	case VIDEO_MODE_PAL:
 		sid = V4L2_STD_PAL;
@@ -607,17 +607,17 @@ static noinline int v4l1_compat_set_input(
 	if (0 != sid) {
 		err = drv(file, VIDIOC_S_STD, &sid);
 		if (err < 0)
-			dprintk("VIDIOCSCHAN / VIDIOC_S_STD: %d\n", err);
+			dprintk("VIDIOCSCHAN / VIDIOC_S_STD: %ld\n", err);
 	}
 	return err;
 }
 
-static noinline int v4l1_compat_get_picture(
+static noinline long v4l1_compat_get_picture(
 					struct video_picture *pict,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_format *fmt;
 
 	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
@@ -640,7 +640,7 @@ static noinline int v4l1_compat_get_picture(
 	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	err = drv(file, VIDIOC_G_FMT, fmt);
 	if (err < 0) {
-		dprintk("VIDIOCGPICT / VIDIOC_G_FMT: %d\n", err);
+		dprintk("VIDIOCGPICT / VIDIOC_G_FMT: %ld\n", err);
 		goto done;
 	}
 
@@ -654,12 +654,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_picture(
+static noinline long v4l1_compat_set_picture(
 					struct video_picture *pict,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_framebuffer fbuf;
 	int mem_err = 0, ovl_err = 0;
 	struct v4l2_format *fmt;
@@ -694,7 +694,7 @@ static noinline int v4l1_compat_set_picture(
 	   support memory capture.  Trying to set the memory capture
 	   parameters would be pointless.  */
 	if (err < 0) {
-		dprintk("VIDIOCSPICT / VIDIOC_G_FMT: %d\n", err);
+		dprintk("VIDIOCSPICT / VIDIOC_G_FMT: %ld\n", err);
 		mem_err = -1000;  /* didn't even try */
 	} else if (fmt->fmt.pix.pixelformat !=
 		 palette_to_pixelformat(pict->palette)) {
@@ -711,7 +711,7 @@ static noinline int v4l1_compat_set_picture(
 	   support overlay.  Trying to set the overlay parameters
 	   would be quite pointless.  */
 	if (err < 0) {
-		dprintk("VIDIOCSPICT / VIDIOC_G_FBUF: %d\n", err);
+		dprintk("VIDIOCSPICT / VIDIOC_G_FBUF: %ld\n", err);
 		ovl_err = -1000;  /* didn't even try */
 	} else if (fbuf.fmt.pixelformat !=
 		 palette_to_pixelformat(pict->palette)) {
@@ -736,12 +736,13 @@ static noinline int v4l1_compat_set_picture(
 	return err;
 }
 
-static noinline int v4l1_compat_get_tuner(
+static noinline long v4l1_compat_get_tuner(
 					struct video_tuner *tun,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err, i;
+	long err;
+	int i;
 	struct v4l2_tuner	tun2;
 	struct v4l2_standard	std2;
 	v4l2_std_id    		sid;
@@ -749,7 +750,7 @@ static noinline int v4l1_compat_get_tuner(
 	memset(&tun2, 0, sizeof(tun2));
 	err = drv(file, VIDIOC_G_TUNER, &tun2);
 	if (err < 0) {
-		dprintk("VIDIOCGTUNER / VIDIOC_G_TUNER: %d\n", err);
+		dprintk("VIDIOCGTUNER / VIDIOC_G_TUNER: %ld\n", err);
 		goto done;
 	}
 	memcpy(tun->name, tun2.name,
@@ -775,7 +776,7 @@ static noinline int v4l1_compat_get_tuner(
 
 	err = drv(file, VIDIOC_G_STD, &sid);
 	if (err < 0)
-		dprintk("VIDIOCGTUNER / VIDIOC_G_STD: %d\n", err);
+		dprintk("VIDIOCGTUNER / VIDIOC_G_STD: %ld\n", err);
 	if (err == 0) {
 		if (sid & V4L2_STD_PAL)
 			tun->mode = VIDEO_MODE_PAL;
@@ -794,12 +795,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_select_tuner(
+static noinline long v4l1_compat_select_tuner(
 					struct video_tuner *tun,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_tuner	t;/*84 bytes on x86_64*/
 	memset(&t, 0, sizeof(t));
 
@@ -807,34 +808,34 @@ static noinline int v4l1_compat_select_tuner(
 
 	err = drv(file, VIDIOC_S_INPUT, &t);
 	if (err < 0)
-		dprintk("VIDIOCSTUNER / VIDIOC_S_INPUT: %d\n", err);
+		dprintk("VIDIOCSTUNER / VIDIOC_S_INPUT: %ld\n", err);
 	return err;
 }
 
-static noinline int v4l1_compat_get_frequency(
+static noinline long v4l1_compat_get_frequency(
 					unsigned long *freq,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_frequency   freq2;
 	memset(&freq2, 0, sizeof(freq2));
 
 	freq2.tuner = 0;
 	err = drv(file, VIDIOC_G_FREQUENCY, &freq2);
 	if (err < 0)
-		dprintk("VIDIOCGFREQ / VIDIOC_G_FREQUENCY: %d\n", err);
+		dprintk("VIDIOCGFREQ / VIDIOC_G_FREQUENCY: %ld\n", err);
 	if (0 == err)
 		*freq = freq2.frequency;
 	return err;
 }
 
-static noinline int v4l1_compat_set_frequency(
+static noinline long v4l1_compat_set_frequency(
 					unsigned long *freq,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_frequency   freq2;
 	memset(&freq2, 0, sizeof(freq2));
 
@@ -842,16 +843,17 @@ static noinline int v4l1_compat_set_frequency(
 	freq2.frequency = *freq;
 	err = drv(file, VIDIOC_S_FREQUENCY, &freq2);
 	if (err < 0)
-		dprintk("VIDIOCSFREQ / VIDIOC_S_FREQUENCY: %d\n", err);
+		dprintk("VIDIOCSFREQ / VIDIOC_S_FREQUENCY: %ld\n", err);
 	return err;
 }
 
-static noinline int v4l1_compat_get_audio(
+static noinline long v4l1_compat_get_audio(
 					struct video_audio *aud,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err, i;
+	long err;
+	int i;
 	struct v4l2_queryctrl	qctrl2;
 	struct v4l2_audio	aud2;
 	struct v4l2_tuner	tun2;
@@ -859,7 +861,7 @@ static noinline int v4l1_compat_get_audio(
 
 	err = drv(file, VIDIOC_G_AUDIO, &aud2);
 	if (err < 0) {
-		dprintk("VIDIOCGAUDIO / VIDIOC_G_AUDIO: %d\n", err);
+		dprintk("VIDIOCGAUDIO / VIDIOC_G_AUDIO: %ld\n", err);
 		goto done;
 	}
 	memcpy(aud->name, aud2.name,
@@ -903,7 +905,7 @@ static noinline int v4l1_compat_get_audio(
 	memset(&tun2, 0, sizeof(tun2));
 	err = drv(file, VIDIOC_G_TUNER, &tun2);
 	if (err < 0) {
-		dprintk("VIDIOCGAUDIO / VIDIOC_G_TUNER: %d\n", err);
+		dprintk("VIDIOCGAUDIO / VIDIOC_G_TUNER: %ld\n", err);
 		err = 0;
 		goto done;
 	}
@@ -918,12 +920,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_audio(
+static noinline long v4l1_compat_set_audio(
 					struct video_audio *aud,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_audio	aud2;
 	struct v4l2_tuner	tun2;
 
@@ -933,7 +935,7 @@ static noinline int v4l1_compat_set_audio(
 	aud2.index = aud->audio;
 	err = drv(file, VIDIOC_S_AUDIO, &aud2);
 	if (err < 0) {
-		dprintk("VIDIOCSAUDIO / VIDIOC_S_AUDIO: %d\n", err);
+		dprintk("VIDIOCSAUDIO / VIDIOC_S_AUDIO: %ld\n", err);
 		goto done;
 	}
 
@@ -950,7 +952,7 @@ static noinline int v4l1_compat_set_audio(
 
 	err = drv(file, VIDIOC_G_TUNER, &tun2);
 	if (err < 0)
-		dprintk("VIDIOCSAUDIO / VIDIOC_G_TUNER: %d\n", err);
+		dprintk("VIDIOCSAUDIO / VIDIOC_G_TUNER: %ld\n", err);
 	if (err == 0) {
 		switch (aud->mode) {
 		default:
@@ -967,19 +969,19 @@ static noinline int v4l1_compat_set_audio(
 		}
 		err = drv(file, VIDIOC_S_TUNER, &tun2);
 		if (err < 0)
-			dprintk("VIDIOCSAUDIO / VIDIOC_S_TUNER: %d\n", err);
+			dprintk("VIDIOCSAUDIO / VIDIOC_S_TUNER: %ld\n", err);
 	}
 	err = 0;
 done:
 	return err;
 }
 
-static noinline int v4l1_compat_capture_frame(
+static noinline long v4l1_compat_capture_frame(
 					struct video_mmap *mm,
 					struct file *file,
 					v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	enum v4l2_buf_type      captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	struct v4l2_buffer	buf;
 	struct v4l2_format	*fmt;
@@ -994,7 +996,7 @@ static noinline int v4l1_compat_capture_frame(
 	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	err = drv(file, VIDIOC_G_FMT, fmt);
 	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_G_FMT: %d\n", err);
+		dprintk("VIDIOCMCAPTURE / VIDIOC_G_FMT: %ld\n", err);
 		goto done;
 	}
 	if (mm->width   != fmt->fmt.pix.width  ||
@@ -1010,7 +1012,7 @@ static noinline int v4l1_compat_capture_frame(
 		fmt->fmt.pix.bytesperline = 0;
 		err = drv(file, VIDIOC_S_FMT, fmt);
 		if (err < 0) {
-			dprintk("VIDIOCMCAPTURE / VIDIOC_S_FMT: %d\n", err);
+			dprintk("VIDIOCMCAPTURE / VIDIOC_S_FMT: %ld\n", err);
 			goto done;
 		}
 	}
@@ -1018,28 +1020,28 @@ static noinline int v4l1_compat_capture_frame(
 	buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	err = drv(file, VIDIOC_QUERYBUF, &buf);
 	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_QUERYBUF: %d\n", err);
+		dprintk("VIDIOCMCAPTURE / VIDIOC_QUERYBUF: %ld\n", err);
 		goto done;
 	}
 	err = drv(file, VIDIOC_QBUF, &buf);
 	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_QBUF: %d\n", err);
+		dprintk("VIDIOCMCAPTURE / VIDIOC_QBUF: %ld\n", err);
 		goto done;
 	}
 	err = drv(file, VIDIOC_STREAMON, &captype);
 	if (err < 0)
-		dprintk("VIDIOCMCAPTURE / VIDIOC_STREAMON: %d\n", err);
+		dprintk("VIDIOCMCAPTURE / VIDIOC_STREAMON: %ld\n", err);
 done:
 	kfree(fmt);
 	return err;
 }
 
-static noinline int v4l1_compat_sync(
+static noinline long v4l1_compat_sync(
 				int *i,
 				struct file *file,
 				v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	enum v4l2_buf_type captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 	struct v4l2_buffer buf;
 	struct poll_wqueues *pwq;
@@ -1050,7 +1052,7 @@ static noinline int v4l1_compat_sync(
 	err = drv(file, VIDIOC_QUERYBUF, &buf);
 	if (err < 0) {
 		/*  No such buffer */
-		dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %d\n", err);
+		dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %ld\n", err);
 		goto done;
 	}
 	if (!(buf.flags & V4L2_BUF_FLAG_MAPPED)) {
@@ -1062,7 +1064,7 @@ static noinline int v4l1_compat_sync(
 	/* make sure capture actually runs so we don't block forever */
 	err = drv(file, VIDIOC_STREAMON, &captype);
 	if (err < 0) {
-		dprintk("VIDIOCSYNC / VIDIOC_STREAMON: %d\n", err);
+		dprintk("VIDIOCSYNC / VIDIOC_STREAMON: %ld\n", err);
 		goto done;
 	}
 
@@ -1076,7 +1078,7 @@ static noinline int v4l1_compat_sync(
 			break;
 		err = drv(file, VIDIOC_QUERYBUF, &buf);
 		if (err < 0)
-			dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %d\n", err);
+			dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %ld\n", err);
 	}
 	kfree(pwq);
 	if (!(buf.flags & V4L2_BUF_FLAG_DONE)) /* not done */
@@ -1084,18 +1086,18 @@ static noinline int v4l1_compat_sync(
 	do {
 		err = drv(file, VIDIOC_DQBUF, &buf);
 		if (err < 0)
-			dprintk("VIDIOCSYNC / VIDIOC_DQBUF: %d\n", err);
+			dprintk("VIDIOCSYNC / VIDIOC_DQBUF: %ld\n", err);
 	} while (err == 0 && buf.index != *i);
 done:
 	return err;
 }
 
-static noinline int v4l1_compat_get_vbi_format(
+static noinline long v4l1_compat_get_vbi_format(
 				struct vbi_format *fmt,
 				struct file *file,
 				v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_format *fmt2;
 
 	fmt2 = kzalloc(sizeof(*fmt2), GFP_KERNEL);
@@ -1107,7 +1109,7 @@ static noinline int v4l1_compat_get_vbi_format(
 
 	err = drv(file, VIDIOC_G_FMT, fmt2);
 	if (err < 0) {
-		dprintk("VIDIOCGVBIFMT / VIDIOC_G_FMT: %d\n", err);
+		dprintk("VIDIOCGVBIFMT / VIDIOC_G_FMT: %ld\n", err);
 		goto done;
 	}
 	if (fmt2->fmt.vbi.sample_format != V4L2_PIX_FMT_GREY) {
@@ -1128,12 +1130,12 @@ done:
 	return err;
 }
 
-static noinline int v4l1_compat_set_vbi_format(
+static noinline long v4l1_compat_set_vbi_format(
 				struct vbi_format *fmt,
 				struct file *file,
 				v4l2_kioctl drv)
 {
-	int err;
+	long err;
 	struct v4l2_format	*fmt2 = NULL;
 
 	if (VIDEO_PALETTE_RAW != fmt->sample_format) {
@@ -1157,7 +1159,7 @@ static noinline int v4l1_compat_set_vbi_format(
 	fmt2->fmt.vbi.flags            = fmt->flags;
 	err = drv(file, VIDIOC_TRY_FMT, fmt2);
 	if (err < 0) {
-		dprintk("VIDIOCSVBIFMT / VIDIOC_TRY_FMT: %d\n", err);
+		dprintk("VIDIOCSVBIFMT / VIDIOC_TRY_FMT: %ld\n", err);
 		goto done;
 	}
 
@@ -1174,7 +1176,7 @@ static noinline int v4l1_compat_set_vbi_format(
 	}
 	err = drv(file, VIDIOC_S_FMT, fmt2);
 	if (err < 0)
-		dprintk("VIDIOCSVBIFMT / VIDIOC_S_FMT: %d\n", err);
+		dprintk("VIDIOCSVBIFMT / VIDIOC_S_FMT: %ld\n", err);
 done:
 	kfree(fmt2);
 	return err;
@@ -1183,13 +1185,13 @@ done:
 /*
  *	This function is exported.
  */
-int
+long
 v4l_compat_translate_ioctl(struct file		*file,
 			   int			cmd,
 			   void			*arg,
 			   v4l2_kioctl          drv)
 {
-	int err;
+	long err;
 
 	switch (cmd) {
 	case VIDIOCGCAP:	/* capability */
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index b4f3914..28861e4 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -222,9 +222,9 @@ static int get_microcode32(struct video_code *kp, struct video_code32 __user *up
 
 #endif
 
-static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	int ret = -ENOIOCTLCMD;
+	long ret = -ENOIOCTLCMD;
 
 	if (file->f_op->unlocked_ioctl)
 		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
@@ -705,7 +705,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 #define VIDIOC_G_OUTPUT32	_IOR ('V', 46, s32)
 #define VIDIOC_S_OUTPUT32	_IOWR('V', 47, s32)
 
-static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	union {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
@@ -726,7 +726,7 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 	} karg;
 	void __user *up = compat_ptr(arg);
 	int compatible_arg = 1;
-	int err = 0;
+	long err = 0;
 
 	/* First, convert the command. */
 	switch (cmd) {
@@ -939,7 +939,7 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 
 long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	int ret = -ENOIOCTLCMD;
+	long ret = -ENOIOCTLCMD;
 
 	if (!file->f_op->ioctl && !file->f_op->unlocked_ioctl)
 		return ret;
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 3b834f4..8f629ef 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -392,14 +392,14 @@ video_fix_command(unsigned int cmd)
 /*
  * Obsolete usercopy function - Should be removed soon
  */
-int
+long
 video_usercopy(struct file *file, unsigned int cmd, unsigned long arg,
 		v4l2_kioctl func)
 {
 	char	sbuf[128];
 	void    *mbuf = NULL;
 	void	*parg = NULL;
-	int	err  = -EINVAL;
+	long	err  = -EINVAL;
 	int     is_ext_ctrl;
 	size_t  ctrls_size = 0;
 	void __user *user_ptr = NULL;
@@ -623,13 +623,13 @@ static int check_fmt(const struct v4l2_ioctl_ops *ops, enum v4l2_buf_type type)
 	return -EINVAL;
 }
 
-static int __video_do_ioctl(struct file *file,
+static long __video_do_ioctl(struct file *file,
 		unsigned int cmd, void *arg)
 {
 	struct video_device *vfd = video_devdata(file);
 	const struct v4l2_ioctl_ops *ops = vfd->ioctl_ops;
 	void *fh = file->private_data;
-	int ret = -EINVAL;
+	long ret = -EINVAL;
 
 	if ((vfd->debug & V4L2_DEBUG_IOCTL) &&
 				!(vfd->debug & V4L2_DEBUG_IOCTL_ARG)) {
@@ -1845,20 +1845,20 @@ static int __video_do_ioctl(struct file *file,
 	if (vfd->debug & V4L2_DEBUG_IOCTL_ARG) {
 		if (ret < 0) {
 			v4l_print_ioctl(vfd->name, cmd);
-			printk(KERN_CONT " error %d\n", ret);
+			printk(KERN_CONT " error %ld\n", ret);
 		}
 	}
 
 	return ret;
 }
 
-int video_ioctl2(struct file *file,
+long video_ioctl2(struct file *file,
 	       unsigned int cmd, unsigned long arg)
 {
 	char	sbuf[128];
 	void    *mbuf = NULL;
 	void	*parg = NULL;
-	int	err  = -EINVAL;
+	long	err  = -EINVAL;
 	int     is_ext_ctrl;
 	size_t  ctrls_size = 0;
 	void __user *user_ptr = NULL;
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index 63863fa..88bf845 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -4237,7 +4237,7 @@ error:
 	return ret;
 }
 
-static int vino_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long vino_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
 
@@ -4343,11 +4343,11 @@ static int vino_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int vino_ioctl(struct file *file,
+static long vino_ioctl(struct file *file,
 		      unsigned int cmd, unsigned long arg)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
-	int ret;
+	long ret;
 
 	if (mutex_lock_interruptible(&vcs->mutex))
 		return -EINTR;
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index 91500f5..038ff32 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -180,7 +180,7 @@ static int w9966_i2c_wbyte(struct w9966_dev* cam, int data);
 static int w9966_i2c_rbyte(struct w9966_dev* cam);
 #endif
 
-static int w9966_v4l_ioctl(struct file *file,
+static long w9966_v4l_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg);
 static ssize_t w9966_v4l_read(struct file *file, char __user *buf,
 			      size_t count, loff_t *ppos);
@@ -723,7 +723,7 @@ static int w9966_wReg_i2c(struct w9966_dev* cam, int reg, int data)
  *	Video4linux interfacing
  */
 
-static int w9966_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long w9966_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct w9966_dev *cam = video_drvdata(file);
 
@@ -873,7 +873,7 @@ static int w9966_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int w9966_v4l_ioctl(struct file *file,
+static long w9966_v4l_ioctl(struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 	return video_usercopy(file, cmd, arg, w9966_v4l_do_ioctl);
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 159b4ed..a3997b7 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -403,9 +403,9 @@ static const struct v4l2_file_operations w9968cf_fops;
 static int w9968cf_open(struct file *);
 static int w9968cf_release(struct file *);
 static int w9968cf_mmap(struct file *, struct vm_area_struct *);
-static int w9968cf_ioctl(struct file *, unsigned, unsigned long);
+static long w9968cf_ioctl(struct file *, unsigned, unsigned long);
 static ssize_t w9968cf_read(struct file *, char __user *, size_t, loff_t *);
-static int w9968cf_v4l_ioctl(struct file *, unsigned int,
+static long w9968cf_v4l_ioctl(struct file *, unsigned int,
 			     void __user *);
 
 /* USB-specific */
@@ -2885,12 +2885,12 @@ static int w9968cf_mmap(struct file* filp, struct vm_area_struct *vma)
 }
 
 
-static int
+static long
 w9968cf_ioctl(struct file *filp,
 	      unsigned int cmd, unsigned long arg)
 {
 	struct w9968cf_device* cam;
-	int err;
+	long err;
 
 	cam = (struct w9968cf_device*)video_get_drvdata(video_devdata(filp));
 
@@ -2916,7 +2916,7 @@ w9968cf_ioctl(struct file *filp,
 }
 
 
-static int w9968cf_v4l_ioctl(struct file *filp,
+static long w9968cf_v4l_ioctl(struct file *filp,
 			     unsigned int cmd, void __user *arg)
 {
 	struct w9968cf_device* cam;
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index 46590f6..9697104 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -1793,7 +1793,7 @@ zc0301_vidioc_s_parm(struct zc0301_device* cam, void __user * arg)
 }
 
 
-static int zc0301_ioctl_v4l2(struct file *filp,
+static long zc0301_ioctl_v4l2(struct file *filp,
 			     unsigned int cmd, void __user *arg)
 {
 	struct zc0301_device *cam = video_drvdata(filp);
@@ -1888,7 +1888,7 @@ static int zc0301_ioctl_v4l2(struct file *filp,
 }
 
 
-static int zc0301_ioctl(struct file *filp,
+static long zc0301_ioctl(struct file *filp,
 			unsigned int cmd, unsigned long arg)
 {
 	struct zc0301_device *cam = video_drvdata(filp);
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index ce4a5e5..b58b9dd 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -1938,7 +1938,7 @@ zoran_set_input (struct zoran *zr,
  *   ioctl routine
  */
 
-static int zoran_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+static long zoran_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct zoran_fh *fh = file->private_data;
 	struct zoran *zr = fh->zr;
@@ -4189,7 +4189,7 @@ static int zoran_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 }
 
 
-static int
+static long
 zoran_ioctl(struct file  *file,
 	    unsigned int  cmd,
 	    unsigned long arg)
diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h
index fd7f4fe..c8d0b23 100644
--- a/include/media/saa7146_vv.h
+++ b/include/media/saa7146_vv.h
@@ -177,7 +177,7 @@ struct saa7146_ext_vv
 	int (*std_callback)(struct saa7146_dev*, struct saa7146_standard *);
 
 	struct saa7146_extension_ioctls *ioctls;
-	int (*ioctl)(struct saa7146_fh*, unsigned int cmd, void *arg);
+	long (*ioctl)(struct saa7146_fh *, unsigned int cmd, void *arg);
 
 	struct v4l2_file_operations vbi_fops;
 };
@@ -216,7 +216,7 @@ void saa7146_set_gpio(struct saa7146_dev *saa, u8 pin, u8 data);
 extern struct saa7146_use_ops saa7146_video_uops;
 int saa7146_start_preview(struct saa7146_fh *fh);
 int saa7146_stop_preview(struct saa7146_fh *fh);
-int saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
+long saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
 
 /* from saa7146_vbi.c */
 extern struct saa7146_use_ops saa7146_vbi_uops;
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 4d8ce34..e36faab 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -38,7 +38,7 @@ struct v4l2_file_operations {
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
-	int (*ioctl) (struct file *, unsigned int, unsigned long);
+	long (*ioctl) (struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct file *);
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 97b283a..9bf4ccc 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -80,7 +80,7 @@ void v4l2_device_unregister_subdev(struct v4l2_subdev *sd);
 #define __v4l2_device_call_subdevs_until_err(dev, cond, o, f, args...)  \
 ({ 									\
 	struct v4l2_subdev *sd; 					\
-	int err = 0; 							\
+	long err = 0; 							\
 									\
 	list_for_each_entry(sd, &(dev)->subdevs, list) { 		\
 		if ((cond) && sd->ops->o && sd->ops->o->f) 		\
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 835af43..172c396 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -239,7 +239,7 @@ struct v4l2_ioctl_ops {
 					   struct v4l2_frmivalenum *fival);
 
 	/* For other private ioctls */
-	int (*vidioc_default)	       (struct file *file, void *fh,
+	long (*vidioc_default)	       (struct file *file, void *fh,
 					int cmd, void *arg);
 };
 
@@ -277,10 +277,10 @@ extern const char *v4l2_field_names[];
 extern const char *v4l2_type_names[];
 
 /*  Compatibility layer interface  --  v4l1-compat module */
-typedef int (*v4l2_kioctl)(struct file *file,
+typedef long (*v4l2_kioctl)(struct file *file,
 			   unsigned int cmd, void *arg);
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-int v4l_compat_translate_ioctl(struct file *file,
+long v4l_compat_translate_ioctl(struct file *file,
 			       int cmd, void *arg, v4l2_kioctl driver_ioctl);
 #else
 #define v4l_compat_translate_ioctl(file, cmd, arg, ioctl) (-EINVAL)
@@ -293,11 +293,11 @@ extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd,
 #endif
 
 /* Include support for obsoleted stuff */
-extern int video_usercopy(struct file *file, unsigned int cmd,
+extern long video_usercopy(struct file *file, unsigned int cmd,
 				unsigned long arg, v4l2_kioctl func);
 
 /* Standard handlers for V4L ioctl's */
-extern int video_ioctl2(struct file *file,
+extern long video_ioctl2(struct file *file,
 			unsigned int cmd, unsigned long arg);
 
 #endif /* _V4L2_IOCTL_H */
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index ceef016..2517344 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -79,7 +79,7 @@ struct v4l2_subdev_core_ops {
 	int (*g_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
 	int (*s_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
 	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
-	int (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
+	long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
 	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
diff --git a/sound/i2c/other/tea575x-tuner.c b/sound/i2c/other/tea575x-tuner.c
index 90f416c..9d98a66 100644
--- a/sound/i2c/other/tea575x-tuner.c
+++ b/sound/i2c/other/tea575x-tuner.c
@@ -84,7 +84,7 @@ static void snd_tea575x_set_freq(struct snd_tea575x *tea)
  * Linux Video interface
  */
 
-static int snd_tea575x_ioctl(struct file *file,
+static long snd_tea575x_ioctl(struct file *file,
 			     unsigned int cmd, unsigned long data)
 {
 	struct snd_tea575x *tea = video_drvdata(file);
-- 
cgit v0.10.2


From 9bb7cde793f0637cfbdd21c04050ffcef33a5624 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 06:42:40 -0300
Subject: V4L/DVB (10139): v4l: rename v4l_compat_ioctl32 to
 v4l2_compat_ioctl32

This rename prevents conflicts with the older compat_ioctl32 module.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 28861e4..ec81b97 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -937,7 +937,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 	return err;
 }
 
-long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
+long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	long ret = -ENOIOCTLCMD;
 
@@ -1072,7 +1072,7 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(v4l_compat_ioctl32);
+EXPORT_SYMBOL_GPL(v4l2_compat_ioctl32);
 #endif
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 0000134..13f87c2 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -253,7 +253,7 @@ static const struct file_operations v4l2_unlocked_fops = {
 	.mmap = v4l2_mmap,
 	.unlocked_ioctl = v4l2_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
+	.compat_ioctl = v4l2_compat_ioctl32,
 #endif
 	.release = v4l2_release,
 	.poll = v4l2_poll,
@@ -268,7 +268,7 @@ static const struct file_operations v4l2_fops = {
 	.mmap = v4l2_mmap,
 	.ioctl = v4l2_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl = v4l_compat_ioctl32,
+	.compat_ioctl = v4l2_compat_ioctl32,
 #endif
 	.release = v4l2_release,
 	.poll = v4l2_poll,
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 172c396..bf0e723 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -288,7 +288,7 @@ long v4l_compat_translate_ioctl(struct file *file,
 
 #ifdef CONFIG_COMPAT
 /* 32 Bits compatibility layer for 64 bits processors */
-extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd,
+extern long v4l2_compat_ioctl32(struct file *file, unsigned int cmd,
 				unsigned long arg);
 #endif
 
-- 
cgit v0.10.2


From da1b5c95e49bb564ae8c61ed135d34ed09acbb66 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 07:07:53 -0300
Subject: V4L/DVB (10140): gp8psk: fix incorrect return code (EINVAL instead of
 -EINVAL)

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/dvb-usb/gp8psk.c b/drivers/media/dvb/dvb-usb/gp8psk.c
index c1da962..3dd6843 100644
--- a/drivers/media/dvb/dvb-usb/gp8psk.c
+++ b/drivers/media/dvb/dvb-usb/gp8psk.c
@@ -187,7 +187,7 @@ int gp8psk_bcm4500_reload(struct dvb_usb_device *d)
 	/* load BCM4500 firmware */
 	if (gp_product_id == USB_PID_GENPIX_8PSK_REV_1_WARM)
 		if (gp8psk_load_bcm4500fw(d))
-			return EINVAL;
+			return -EINVAL;
 	return 0;
 }
 
-- 
cgit v0.10.2


From aecde8b53b8ee1330a5a8206200f0d6b8845a6e0 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 07:14:19 -0300
Subject: V4L/DVB (10141): v4l2: debugging API changed to match against driver
 name instead of ID.

Since the i2c driver ID will be removed in the near future we have to
modify the v4l2 debugging API to use the driver name instead of driver ID.

Note that this API is not used in applications other than v4l2-dbg.cpp
as it is for debugging and testing only.

Should anyone use the old VIDIOC_G_CHIP_IDENT, then this will be logged
with a warning that it is deprecated and will be removed in 2.6.30.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 3b483c1..ff12437 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -184,7 +184,7 @@ may be NULL if the subdev driver does not support anything from that category.
 It looks like this:
 
 struct v4l2_subdev_core_ops {
-	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
 	...
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index ebcb8e5..d2f43bd 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -2039,7 +2039,7 @@ static int bttv_log_status(struct file *file, void *f)
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int bttv_g_register(struct file *file, void *f,
-					struct v4l2_register *reg)
+					struct v4l2_dbg_register *reg)
 {
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
@@ -2047,18 +2047,19 @@ static int bttv_g_register(struct file *file, void *f,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	/* bt848 has a 12-bit register space */
 	reg->reg &= 0xfff;
 	reg->val = btread(reg->reg);
+	reg->size = 1;
 
 	return 0;
 }
 
 static int bttv_s_register(struct file *file, void *f,
-					struct v4l2_register *reg)
+					struct v4l2_dbg_register *reg)
 {
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
@@ -2066,7 +2067,7 @@ static int bttv_s_register(struct file *file, void *f,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	/* bt848 has a 12-bit register space */
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 476171cf..34a39d2 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -859,7 +859,7 @@ static int __cafe_cam_reset(struct cafe_camera *cam)
  */
 static int cafe_cam_init(struct cafe_camera *cam)
 {
-	struct v4l2_chip_ident chip = { V4L2_CHIP_MATCH_I2C_ADDR, 0, 0, 0 };
+	struct v4l2_dbg_chip_ident chip;
 	int ret;
 
 	mutex_lock(&cam->s_mutex);
@@ -869,8 +869,9 @@ static int cafe_cam_init(struct cafe_camera *cam)
 	ret = __cafe_cam_reset(cam);
 	if (ret)
 		goto out;
-	chip.match_chip = cam->sensor->addr;
-	ret = __cafe_cam_cmd(cam, VIDIOC_G_CHIP_IDENT, &chip);
+	chip.match.type = V4L2_CHIP_MATCH_I2C_ADDR;
+	chip.match.addr = cam->sensor->addr;
+	ret = __cafe_cam_cmd(cam, VIDIOC_DBG_G_CHIP_IDENT, &chip);
 	if (ret)
 		goto out;
 	cam->sensor_type = chip.ident;
diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c
index 70fcd0d..14bebf8 100644
--- a/drivers/media/video/cs5345.c
+++ b/drivers/media/video/cs5345.c
@@ -95,25 +95,24 @@ static int cs5345_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	reg->size = 1;
 	reg->val = cs5345_read(sd, reg->reg & 0x1f);
 	return 0;
 }
 
-static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -122,7 +121,7 @@ static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index cb65d51..7292a63 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -102,7 +102,7 @@ static int cs53l32a_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return 0;
 }
 
-static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index 8941f58..83e1c63 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -242,7 +242,7 @@ int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg)
 			return retval;
 		}
 	}
-	if (cmd != VIDIOC_G_CHIP_IDENT)
+	if (cmd != VIDIOC_DBG_G_CHIP_IDENT)
 		CX18_ERR("i2c addr 0x%02x not found for cmd 0x%x!\n",
 			       addr, cmd);
 	return -ENODEV;
@@ -268,17 +268,6 @@ static int cx18_i2c_id_addr(struct cx18 *cx, u32 id)
 	return retval;
 }
 
-/* Find the i2c device name matching the DRIVERID */
-static const char *cx18_i2c_id_name(u32 id)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hw_driverids); i++)
-		if (hw_driverids[i] == id)
-			return hw_devicenames[i];
-	return "unknown device";
-}
-
 /* Find the i2c device name matching the CX18_HW_ flag */
 static const char *cx18_i2c_hw_name(u32 hw)
 {
@@ -326,21 +315,6 @@ int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg)
 	return cx18_call_i2c_client(cx, addr, cmd, arg);
 }
 
-/* Calls i2c device based on I2C driver ID. */
-int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg)
-{
-	int addr;
-
-	addr = cx18_i2c_id_addr(cx, id);
-	if (addr < 0) {
-		if (cmd != VIDIOC_G_CHIP_IDENT)
-			CX18_ERR("i2c ID 0x%08x (%s) not found for cmd 0x%x!\n",
-				id, cx18_i2c_id_name(id), cmd);
-		return addr;
-	}
-	return cx18_call_i2c_client(cx, addr, cmd, arg);
-}
-
 /* broadcast cmd for all I2C clients and for the gpio subsystem */
 void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg)
 {
diff --git a/drivers/media/video/cx18/cx18-i2c.h b/drivers/media/video/cx18/cx18-i2c.h
index 113c3f9..4869739 100644
--- a/drivers/media/video/cx18/cx18-i2c.h
+++ b/drivers/media/video/cx18/cx18-i2c.h
@@ -23,7 +23,6 @@
 
 int cx18_i2c_hw_addr(struct cx18 *cx, u32 hw);
 int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg);
-int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg);
 int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg);
 void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg);
 int cx18_i2c_register(struct cx18 *cx, unsigned idx);
diff --git a/drivers/media/video/cx18/cx18-ioctl.c b/drivers/media/video/cx18/cx18-ioctl.c
index 8aa152b..7086aab 100644
--- a/drivers/media/video/cx18/cx18-ioctl.c
+++ b/drivers/media/video/cx18/cx18-ioctl.c
@@ -254,30 +254,24 @@ static int cx18_s_fmt_sliced_vbi_cap(struct file *file, void *fh,
 }
 
 static int cx18_g_chip_ident(struct file *file, void *fh,
-				struct v4l2_chip_ident *chip)
+				struct v4l2_dbg_chip_ident *chip)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
-	if (chip->match_type == V4L2_CHIP_MATCH_HOST) {
-		if (v4l2_chip_match_host(chip->match_type, chip->match_chip))
-			chip->ident = V4L2_IDENT_CX23418;
+	if (v4l2_chip_match_host(&chip->match)) {
+		chip->ident = V4L2_IDENT_CX23418;
 		return 0;
 	}
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, chip->match_chip, VIDIOC_G_CHIP_IDENT,
-					chip);
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR)
-		return cx18_call_i2c_client(cx, chip->match_chip,
-						VIDIOC_G_CHIP_IDENT, chip);
-	return -EINVAL;
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_G_CHIP_IDENT, chip);
+	return 0;
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 {
-	struct v4l2_register *regs = arg;
+	struct v4l2_dbg_register *regs = arg;
 	unsigned long flags;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -286,6 +280,7 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 		return -EINVAL;
 
 	spin_lock_irqsave(&cx18_cards_lock, flags);
+	regs->size = 4;
 	if (cmd == VIDIOC_DBG_G_REGISTER)
 		regs->val = cx18_read_enc(cx, regs->reg);
 	else
@@ -295,31 +290,25 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 }
 
 static int cx18_g_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return cx18_cxc(cx, VIDIOC_DBG_G_REGISTER, reg);
-	if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER,
-					reg);
-	return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER,
-					reg);
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_G_REGISTER, reg);
+	return 0;
 }
 
 static int cx18_s_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return cx18_cxc(cx, VIDIOC_DBG_S_REGISTER, reg);
-	if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER,
-					reg);
-	return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER,
-					reg);
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_S_REGISTER, reg);
+	return 0;
 }
 #endif
 
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 637c4d0..2d81c4d 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -1326,11 +1326,11 @@ static int vidioc_s_frequency(struct file *file, void *priv,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_G_REGISTER, reg);
@@ -1339,11 +1339,11 @@ static int vidioc_g_register(struct file *file, void *fh,
 }
 
 static int vidioc_s_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_S_REGISTER, reg);
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 2ad2771..88f2fd3 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -1120,25 +1120,24 @@ static int cx25840_init(struct v4l2_subdev *sd, u32 val)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	reg->size = 1;
 	reg->val = cx25840_read(client, reg->reg & 0x0fff);
 	return 0;
 }
 
-static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1362,7 +1361,7 @@ static int cx25840_reset(struct v4l2_subdev *sd, u32 val)
 	return 0;
 }
 
-static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct cx25840_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index b93b7ab..791e69d 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1447,25 +1447,26 @@ static int vidioc_s_frequency (struct file *file, void *priv,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* cx2388x has a 24-bit register space */
-	reg->val = cx_read(reg->reg&0xffffff);
+	reg->val = cx_read(reg->reg & 0xffffff);
+	reg->size = 4;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
-	cx_write(reg->reg&0xffffff, reg->val);
+	cx_write(reg->reg & 0xffffff, reg->val);
 	return 0;
 }
 #endif
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 9cb7c64..416b691 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1154,7 +1154,7 @@ static int em28xx_reg_len(int reg)
 }
 
 static int vidioc_g_chip_ident(struct file *file, void *priv,
-	       struct v4l2_chip_ident *chip)
+	       struct v4l2_dbg_chip_ident *chip)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
@@ -1162,20 +1162,20 @@ static int vidioc_g_chip_ident(struct file *file, void *priv,
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
 
-	em28xx_i2c_call_clients(dev, VIDIOC_G_CHIP_IDENT, chip);
+	em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
 
 	return 0;
 }
 
 
 static int vidioc_g_register(struct file *file, void *priv,
-			     struct v4l2_register *reg)
+			     struct v4l2_dbg_register *reg)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
 	int ret;
 
-	switch (reg->match_type) {
+	switch (reg->match.type) {
 	case V4L2_CHIP_MATCH_AC97:
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_ac97(dev, reg->reg);
@@ -1184,6 +1184,7 @@ static int vidioc_g_register(struct file *file, void *priv,
 			return ret;
 
 		reg->val = ret;
+		reg->size = 1;
 		return 0;
 	case V4L2_CHIP_MATCH_I2C_DRIVER:
 		em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_REGISTER, reg);
@@ -1192,12 +1193,13 @@ static int vidioc_g_register(struct file *file, void *priv,
 		/* Not supported yet */
 		return -EINVAL;
 	default:
-		if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+		if (!v4l2_chip_match_host(&reg->match))
 			return -EINVAL;
 	}
 
 	/* Match host */
-	if (em28xx_reg_len(reg->reg) == 1) {
+	reg->size = em28xx_reg_len(reg->reg);
+	if (reg->size == 1) {
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_reg(dev, reg->reg);
 		mutex_unlock(&dev->lock);
@@ -1207,7 +1209,7 @@ static int vidioc_g_register(struct file *file, void *priv,
 
 		reg->val = ret;
 	} else {
-		__le64 val = 0;
+		__le16 val = 0;
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_reg_req_len(dev, USB_REQ_GET_STATUS,
 						   reg->reg, (char *)&val, 2);
@@ -1215,21 +1217,21 @@ static int vidioc_g_register(struct file *file, void *priv,
 		if (ret < 0)
 			return ret;
 
-		reg->val = le64_to_cpu(val);
+		reg->val = le16_to_cpu(val);
 	}
 
 	return 0;
 }
 
 static int vidioc_s_register(struct file *file, void *priv,
-			     struct v4l2_register *reg)
+			     struct v4l2_dbg_register *reg)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
-	__le64 buf;
+	__le16 buf;
 	int    rc;
 
-	switch (reg->match_type) {
+	switch (reg->match.type) {
 	case V4L2_CHIP_MATCH_AC97:
 		mutex_lock(&dev->lock);
 		rc = em28xx_write_ac97(dev, reg->reg, reg->val);
@@ -1243,12 +1245,12 @@ static int vidioc_s_register(struct file *file, void *priv,
 		/* Not supported yet */
 		return -EINVAL;
 	default:
-		if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+		if (!v4l2_chip_match_host(&reg->match))
 			return -EINVAL;
 	}
 
 	/* Match host */
-	buf = cpu_to_le64(reg->val);
+	buf = cpu_to_le16(reg->val);
 
 	mutex_lock(&dev->lock);
 	rc = em28xx_write_regs(dev, reg->reg, (char *)&buf,
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index 08b7629..e8e5921 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -902,18 +902,19 @@ static void ivtv_load_and_init_modules(struct ivtv *itv)
 	}
 
 	if (hw & IVTV_HW_SAA711X) {
-		struct v4l2_chip_ident v = { V4L2_CHIP_MATCH_I2C_DRIVER, I2C_DRIVERID_SAA711X };
+		struct v4l2_dbg_chip_ident v;
 
 		/* determine the exact saa711x model */
 		itv->hw_flags &= ~IVTV_HW_SAA711X;
 
+		v.match.type = V4L2_CHIP_MATCH_I2C_DRIVER;
+		strlcpy(v.match.name, "saa7115", sizeof(v.match.name));
 		ivtv_call_hw(itv, IVTV_HW_SAA711X, core, g_chip_ident, &v);
 		if (v.ident == V4L2_IDENT_SAA7114) {
 			itv->hw_flags |= IVTV_HW_SAA7114;
 			/* VBI is not yet supported by the saa7114 driver. */
 			itv->v4l2_cap &= ~(V4L2_CAP_SLICED_VBI_CAPTURE|V4L2_CAP_VBI_CAPTURE);
-		}
-		else {
+		} else {
 			itv->hw_flags |= IVTV_HW_SAA7115;
 		}
 		itv->vbi.raw_decoder_line_size = 1443;
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index 1f6ca93..f6b3ef6 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -674,19 +674,19 @@ static int ivtv_s_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f
 	return ret;
 }
 
-static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident *chip)
+static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_dbg_chip_ident *chip)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
-	if (chip->match_type == V4L2_CHIP_MATCH_HOST) {
-		if (v4l2_chip_match_host(chip->match_type, chip->match_chip))
+	if (chip->match.type == V4L2_CHIP_MATCH_HOST) {
+		if (v4l2_chip_match_host(&chip->match))
 			chip->ident = itv->has_cx23415 ? V4L2_IDENT_CX23415 : V4L2_IDENT_CX23416;
 		return 0;
 	}
-	if (chip->match_type != V4L2_CHIP_MATCH_I2C_DRIVER &&
-	    chip->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (chip->match.type != V4L2_CHIP_MATCH_I2C_DRIVER &&
+	    chip->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 	/* TODO: is this correct? */
 	return ivtv_call_all_err(itv, core, g_chip_ident, chip);
@@ -695,7 +695,7 @@ static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 {
-	struct v4l2_register *regs = arg;
+	struct v4l2_dbg_register *regs = arg;
 	volatile u8 __iomem *reg_start;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -710,6 +710,7 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 	else
 		return -EINVAL;
 
+	regs->size = 4;
 	if (cmd == VIDIOC_DBG_G_REGISTER)
 		regs->val = readl(regs->reg + reg_start);
 	else
@@ -717,11 +718,11 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *reg)
+static int ivtv_g_register(struct file *file, void *fh, struct v4l2_dbg_register *reg)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return ivtv_itvc(itv, VIDIOC_DBG_G_REGISTER, reg);
 	/* TODO: subdev errors should not be ignored, this should become a
 	   subdev helper function. */
@@ -729,11 +730,11 @@ static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *re
 	return 0;
 }
 
-static int ivtv_s_register(struct file *file, void *fh, struct v4l2_register *reg)
+static int ivtv_s_register(struct file *file, void *fh, struct v4l2_dbg_register *reg)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return ivtv_itvc(itv, VIDIOC_DBG_S_REGISTER, reg);
 	/* TODO: subdev errors should not be ignored, this should become a
 	   subdev helper function. */
diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c
index 07be14a..de397ef 100644
--- a/drivers/media/video/m52790.c
+++ b/drivers/media/video/m52790.c
@@ -80,29 +80,28 @@ static int m52790_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing *r
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct m52790_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (reg->reg != 0)
 		return -EINVAL;
+	reg->size = 1;
 	reg->val = state->input | state->output;
 	return 0;
 }
 
-static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct m52790_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -115,7 +114,7 @@ static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index b8577ad..4d7a918 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -733,7 +733,7 @@ static int msp_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 	return 0;
 }
 
-static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct msp_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 1a1a124..c1bf75e 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -343,14 +343,14 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9m001_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9m001->client->addr)
+	if (id->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m001->model;
@@ -361,16 +361,17 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9m001_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m001->client->addr)
+	if (reg->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
+	reg->size = 2;
 	reg->val = reg_read(icd, reg->reg);
 
 	if (reg->val > 0xffff)
@@ -380,14 +381,14 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9m001_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m001->client->addr)
+	if (reg->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index c89ea41..5b8e209 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -514,14 +514,14 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9m111_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9m111->client->addr)
+	if (id->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m111->model;
@@ -532,18 +532,19 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9m111_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	int val;
 
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
-	if (reg->match_chip != mt9m111->client->addr)
+	if (reg->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	val = mt9m111_reg_read(icd, reg->reg);
+	reg->size = 2;
 	reg->val = (u64)val;
 
 	if (reg->val > 0xffff)
@@ -553,14 +554,14 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9m111_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m111->client->addr)
+	if (reg->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	if (mt9m111_reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 1a9d539..349d8e3 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -326,14 +326,14 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9t031_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9t031->client->addr)
+	if (id->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9t031->model;
@@ -344,14 +344,14 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9t031_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9t031->client->addr)
+	if (reg->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	reg->val = reg_read(icd, reg->reg);
@@ -363,14 +363,14 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9t031_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9t031->client->addr)
+	if (reg->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 14a5f9c..b04c8cb 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -422,14 +422,14 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9v022_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9v022->client->addr)
+	if (id->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9v022->model;
@@ -440,16 +440,17 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9v022_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9v022->client->addr)
+	if (reg->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
+	reg->size = 2;
 	reg->val = reg_read(icd, reg->reg);
 
 	if (reg->val > 0xffff)
@@ -459,14 +460,14 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9v022_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9v022->client->addr)
+	if (reg->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index ea032f5..ca26b0c 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -1310,7 +1310,7 @@ static int ov7670_command(struct i2c_client *client, unsigned int cmd,
 		void *arg)
 {
 	switch (cmd) {
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_chip_ident_i2c_client(client, arg, V4L2_IDENT_OV7670, 0);
 
 	case VIDIOC_INT_RESET:
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 54b736f..3c9e0ba 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -724,7 +724,7 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 }
 
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_chip_ident   *id)
+			      struct v4l2_dbg_chip_ident   *id)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 
@@ -736,11 +736,12 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int ov772x_get_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	int                 ret;
 
+	reg->size = 1;
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
@@ -754,7 +755,7 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 }
 
 static int ov772x_set_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index 4358079..8fb92ac 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -4732,26 +4732,25 @@ static int pvr2_hdw_get_eeprom_addr(struct pvr2_hdw *hdw)
 
 
 int pvr2_hdw_register_access(struct pvr2_hdw *hdw,
-			     u32 match_type, u32 match_chip, u64 reg_id,
-			     int setFl,u64 *val_ptr)
+			     struct v4l2_dbg_match *match, u64 reg_id,
+			     int setFl, u64 *val_ptr)
 {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	struct pvr2_i2c_client *cp;
-	struct v4l2_register req;
+	struct v4l2_dbg_register req;
 	int stat = 0;
 	int okFl = 0;
 
 	if (!capable(CAP_SYS_ADMIN)) return -EPERM;
 
-	req.match_type = match_type;
-	req.match_chip = match_chip;
+	req.match = *match;
 	req.reg = reg_id;
 	if (setFl) req.val = *val_ptr;
 	mutex_lock(&hdw->i2c_list_lock); do {
 		list_for_each_entry(cp, &hdw->i2c_clients, list) {
 			if (!v4l2_chip_match_i2c_client(
 				    cp->client,
-				    req.match_type, req.match_chip)) {
+				    &req.match)) {
 				continue;
 			}
 			stat = pvr2_i2c_client_cmd(
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.h b/drivers/media/video/pvrusb2/pvrusb2-hdw.h
index 49482d1..1b4fec3 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.h
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.h
@@ -242,8 +242,8 @@ void pvr2_hdw_v4l_store_minor_number(struct pvr2_hdw *,
    setFl   - true to set the register, false to read it
    val_ptr - storage location for source / result. */
 int pvr2_hdw_register_access(struct pvr2_hdw *,
-			     u32 match_type, u32 match_chip,u64 reg_id,
-			     int setFl,u64 *val_ptr);
+			     struct v4l2_dbg_match *match, u64 reg_id,
+			     int setFl, u64 *val_ptr);
 
 /* The following entry points are all lower level things you normally don't
    want to worry about. */
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index b9aedce..878fd52 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -851,11 +851,11 @@ static long pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	case VIDIOC_DBG_G_REGISTER:
 	{
 		u64 val;
-		struct v4l2_register *req = (struct v4l2_register *)arg;
+		struct v4l2_dbg_register *req = (struct v4l2_dbg_register *)arg;
 		if (cmd == VIDIOC_DBG_S_REGISTER) val = req->val;
 		ret = pvr2_hdw_register_access(
-			hdw,req->match_type,req->match_chip,req->reg,
-			cmd == VIDIOC_DBG_S_REGISTER,&val);
+			hdw, &req->match, req->reg,
+			cmd == VIDIOC_DBG_S_REGISTER, &val);
 		if (cmd == VIDIOC_DBG_G_REGISTER) req->val = val;
 		break;
 	}
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index 22708ec..46c796c 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -1371,25 +1371,24 @@ static int saa711x_g_vbi_data(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_dat
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa711x_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1398,7 +1397,7 @@ static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa711x_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c
index bfc8565..d6848f7 100644
--- a/drivers/media/video/saa7127.c
+++ b/drivers/media/video/saa7127.c
@@ -623,25 +623,24 @@ static int saa7127_s_vbi_data(struct v4l2_subdev *sd, const struct v4l2_sliced_v
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa7127_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -650,7 +649,7 @@ static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa7127_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index 1fb6ecc..1fee6e8 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -838,7 +838,7 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg)
 		h->standard = *((v4l2_std_id *) arg);
 		break;
 
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_chip_ident_i2c_client(client,
 				arg, h->chip, h->revision);
 
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 3beba48..c9d8beb 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -405,7 +405,7 @@ static int empress_querymenu(struct file *file, void *priv,
 }
 
 static int empress_g_chip_ident(struct file *file, void *fh,
-	       struct v4l2_chip_ident *chip)
+	       struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa7134_dev *dev = file->private_data;
 
@@ -413,12 +413,12 @@ static int empress_g_chip_ident(struct file *file, void *fh,
 	chip->revision = 0;
 	if (dev->mpeg_i2c_client == NULL)
 		return -EINVAL;
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER &&
-	    chip->match_chip == I2C_DRIVERID_SAA6752HS)
-		return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip);
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR &&
-	    chip->match_chip == dev->mpeg_i2c_client->addr)
-		return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip);
+	if (chip->match.type == V4L2_CHIP_MATCH_I2C_DRIVER &&
+	    !strcmp(chip->match.name, "saa6752hs"))
+		return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
+	if (chip->match.type == V4L2_CHIP_MATCH_I2C_ADDR &&
+	    chip->match.addr == dev->mpeg_i2c_client->addr)
+		return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
 	return -EINVAL;
 }
 
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 6b2ab57..a1f7e35 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -2247,24 +2247,25 @@ static int saa7134_g_parm(struct file *file, void *fh,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *priv,
-			      struct v4l2_register *reg)
+			      struct v4l2_dbg_register *reg)
 {
 	struct saa7134_fh *fh = priv;
 	struct saa7134_dev *dev = fh->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	reg->val = saa_readb(reg->reg);
+	reg->size = 1;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct saa7134_fh *fh = priv;
 	struct saa7134_dev *dev = fh->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	saa_writeb(reg->reg&0xffffff, reg->val);
 	return 0;
diff --git a/drivers/media/video/saa717x.c b/drivers/media/video/saa717x.c
index 9befca6..454ad1d 100644
--- a/drivers/media/video/saa717x.c
+++ b/drivers/media/video/saa717x.c
@@ -1171,25 +1171,26 @@ static int saa717x_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa717x_read(sd, reg->reg);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	u16 addr = reg->reg & 0xffff;
 	u8 val = reg->val & 0xff;
 
-	if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 9986e02..fcb05f0 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -699,7 +699,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 }
 
 static int soc_camera_g_chip_ident(struct file *file, void *fh,
-				   struct v4l2_chip_ident *id)
+				   struct v4l2_dbg_chip_ident *id)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
@@ -712,7 +712,7 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int soc_camera_g_register(struct file *file, void *fh,
-				 struct v4l2_register *reg)
+				 struct v4l2_dbg_register *reg)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
@@ -724,7 +724,7 @@ static int soc_camera_g_register(struct file *file, void *fh,
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
-				 struct v4l2_register *reg)
+				 struct v4l2_dbg_register *reg)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index d0c794d..5aeccb3 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -1762,7 +1762,7 @@ static int tvaudio_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *fr
 	return 0;
 }
 
-static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index a388a9f..2cd64ef 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -963,7 +963,7 @@ static int tvp5150_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt)
 
 
 static int tvp5150_g_chip_ident(struct v4l2_subdev *sd,
-				struct v4l2_chip_ident *chip)
+				struct v4l2_dbg_chip_ident *chip)
 {
 	int rev;
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -977,25 +977,24 @@ static int tvp5150_g_chip_ident(struct v4l2_subdev *sd,
 
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = tvp5150_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index d5cdc4b..52c0357 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -575,7 +575,7 @@ static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 }
 
 static int tw9910_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_chip_ident *id)
+			      struct v4l2_dbg_chip_ident *id)
 {
 	id->ident = V4L2_IDENT_TW9910;
 	id->revision = 0;
@@ -606,7 +606,7 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int tw9910_get_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
 	int ret;
@@ -627,7 +627,7 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 }
 
 static int tw9910_set_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
 
diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c
index 7a609a3..4f16eff 100644
--- a/drivers/media/video/upd64031a.c
+++ b/drivers/media/video/upd64031a.c
@@ -147,7 +147,7 @@ static int upd64031a_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing
 	return upd64031a_s_frequency(sd, NULL);
 }
 
-static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
@@ -162,25 +162,24 @@ static int upd64031a_log_status(struct v4l2_subdev *sd)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = upd64031a_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c
index 58412cb..4b712f6 100644
--- a/drivers/media/video/upd64083.c
+++ b/drivers/media/video/upd64083.c
@@ -120,25 +120,24 @@ static int upd64083_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = upd64083_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -147,7 +146,7 @@ static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg
 }
 #endif
 
-static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 7c61c6d..2be5e47 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -477,12 +477,12 @@ static int usbvision_v4l2_close(struct file *file)
  */
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* NT100x has a 8-bit register space */
 	errCode = usbvision_read_reg(usbvision, reg->reg&0xff);
@@ -492,16 +492,17 @@ static int vidioc_g_register (struct file *file, void *priv,
 		return errCode;
 	}
 	reg->val = errCode;
+	reg->size = 1;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* NT100x has a 8-bit register space */
 	errCode = usbvision_write_reg(usbvision, reg->reg&0xff, reg->val);
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index c676b0b..b8f2be8 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -797,11 +797,11 @@ u32 v4l2_ctrl_next(const u32 * const * ctrl_classes, u32 id)
 }
 EXPORT_SYMBOL(v4l2_ctrl_next);
 
-int v4l2_chip_match_host(u32 match_type, u32 match_chip)
+int v4l2_chip_match_host(const struct v4l2_dbg_match *match)
 {
-	switch (match_type) {
+	switch (match->type) {
 	case V4L2_CHIP_MATCH_HOST:
-		return match_chip == 0;
+		return match->addr == 0;
 	default:
 		return 0;
 	}
@@ -809,23 +809,34 @@ int v4l2_chip_match_host(u32 match_type, u32 match_chip)
 EXPORT_SYMBOL(v4l2_chip_match_host);
 
 #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE))
-int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 match_type, u32 match_chip)
+int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match)
 {
-	switch (match_type) {
+	int len;
+
+	if (c == NULL || match == NULL)
+		return 0;
+
+	switch (match->type) {
 	case V4L2_CHIP_MATCH_I2C_DRIVER:
-		return (c != NULL && c->driver != NULL && c->driver->id == match_chip);
+		if (c->driver == NULL || c->driver->driver.name == NULL)
+			return 0;
+		len = strlen(c->driver->driver.name);
+		/* legacy drivers have a ' suffix, don't try to match that */
+		if (len && c->driver->driver.name[len - 1] == '\'')
+			len--;
+		return len && !strncmp(c->driver->driver.name, match->name, len);
 	case V4L2_CHIP_MATCH_I2C_ADDR:
-		return (c != NULL && c->addr == match_chip);
+		return c->addr == match->addr;
 	default:
 		return 0;
 	}
 }
 EXPORT_SYMBOL(v4l2_chip_match_i2c_client);
 
-int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip,
+int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip,
 		u32 ident, u32 revision)
 {
-	if (!v4l2_chip_match_i2c_client(c, chip->match_type, chip->match_chip))
+	if (!v4l2_chip_match_i2c_client(c, &chip->match))
 		return 0;
 	if (chip->ident == V4L2_IDENT_NONE) {
 		chip->ident = ident;
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index ec81b97..110376b 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1046,7 +1046,8 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_TRY_ENCODER_CMD:
 	case VIDIOC_DBG_S_REGISTER:
 	case VIDIOC_DBG_G_REGISTER:
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
+	case VIDIOC_G_CHIP_IDENT_OLD:
 	case VIDIOC_S_HW_FREQ_SEEK:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 8f629ef..52d687b 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -266,7 +266,7 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_DBG_S_REGISTER)]   = "VIDIOC_DBG_S_REGISTER",
 	[_IOC_NR(VIDIOC_DBG_G_REGISTER)]   = "VIDIOC_DBG_G_REGISTER",
 
-	[_IOC_NR(VIDIOC_G_CHIP_IDENT)]     = "VIDIOC_G_CHIP_IDENT",
+	[_IOC_NR(VIDIOC_DBG_G_CHIP_IDENT)] = "VIDIOC_DBG_G_CHIP_IDENT",
 	[_IOC_NR(VIDIOC_S_HW_FREQ_SEEK)]   = "VIDIOC_S_HW_FREQ_SEEK",
 #endif
 };
@@ -1720,7 +1720,7 @@ static long __video_do_ioctl(struct file *file,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	case VIDIOC_DBG_G_REGISTER:
 	{
-		struct v4l2_register *p = arg;
+		struct v4l2_dbg_register *p = arg;
 
 		if (!capable(CAP_SYS_ADMIN))
 			ret = -EPERM;
@@ -1730,7 +1730,7 @@ static long __video_do_ioctl(struct file *file,
 	}
 	case VIDIOC_DBG_S_REGISTER:
 	{
-		struct v4l2_register *p = arg;
+		struct v4l2_dbg_register *p = arg;
 
 		if (!capable(CAP_SYS_ADMIN))
 			ret = -EPERM;
@@ -1739,9 +1739,9 @@ static long __video_do_ioctl(struct file *file,
 		break;
 	}
 #endif
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 	{
-		struct v4l2_chip_ident *p = arg;
+		struct v4l2_dbg_chip_ident *p = arg;
 
 		if (!ops->vidioc_g_chip_ident)
 			break;
@@ -1750,6 +1750,11 @@ static long __video_do_ioctl(struct file *file,
 			dbgarg(cmd, "chip_ident=%u, revision=0x%x\n", p->ident, p->revision);
 		break;
 	}
+	case VIDIOC_G_CHIP_IDENT_OLD:
+		printk(KERN_ERR "VIDIOC_G_CHIP_IDENT has been deprecated and will disappear in 2.6.30.\n");
+		printk(KERN_ERR "It is a debugging ioctl and must not be used in applications!\n");
+		return -EINVAL;
+
 	case VIDIOC_S_HW_FREQ_SEEK:
 	{
 		struct v4l2_hw_freq_seek *p = arg;
diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c
index e3612f2..fbe9cc0 100644
--- a/drivers/media/video/v4l2-subdev.c
+++ b/drivers/media/video/v4l2-subdev.c
@@ -37,7 +37,7 @@ int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 		return v4l2_subdev_call(sd, core, queryctrl, arg);
 	case VIDIOC_LOG_STATUS:
 		return v4l2_subdev_call(sd, core, log_status);
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_subdev_call(sd, core, g_chip_ident, arg);
 	case VIDIOC_INT_S_STANDBY:
 		return v4l2_subdev_call(sd, core, s_standby, arg ? (*(u32 *)arg) : 0);
diff --git a/drivers/media/video/vp27smpx.c b/drivers/media/video/vp27smpx.c
index f72b859..5d73f66 100644
--- a/drivers/media/video/vp27smpx.c
+++ b/drivers/media/video/vp27smpx.c
@@ -113,7 +113,7 @@ static int vp27smpx_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
 	return 0;
 }
 
-static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c
index 12a31e7..f2864d5 100644
--- a/drivers/media/video/wm8739.c
+++ b/drivers/media/video/wm8739.c
@@ -233,7 +233,7 @@ static int wm8739_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 	return -EINVAL;
 }
 
-static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index d0220b0..53fcd4284 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -130,7 +130,7 @@ static int wm8775_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return 0;
 }
 
-static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 1f126e3..5571dbe 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1370,25 +1370,41 @@ struct v4l2_streamparm {
 /*
  *	A D V A N C E D   D E B U G G I N G
  *
- *	NOTE: EXPERIMENTAL API
+ *	NOTE: EXPERIMENTAL API, NEVER RELY ON THIS IN APPLICATIONS!
+ *	FOR DEBUGGING, TESTING AND INTERNAL USE ONLY!
  */
 
 /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */
 
 #define V4L2_CHIP_MATCH_HOST       0  /* Match against chip ID on host (0 for the host) */
-#define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver ID */
+#define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver name */
 #define V4L2_CHIP_MATCH_I2C_ADDR   2  /* Match against I2C 7-bit address */
 #define V4L2_CHIP_MATCH_AC97       3  /* Match against anciliary AC97 chip */
 
-struct v4l2_register {
-	__u32 match_type; /* Match type */
-	__u32 match_chip; /* Match this chip, meaning determined by match_type */
+struct v4l2_dbg_match {
+	__u32 type; /* Match type */
+	union {     /* Match this chip, meaning determined by type */
+		__u32 addr;
+		char name[32];
+	};
+} __attribute__ ((packed));
+
+struct v4l2_dbg_register {
+	struct v4l2_dbg_match match;
+	__u32 size;	/* register size in bytes */
 	__u64 reg;
 	__u64 val;
-};
+} __attribute__ ((packed));
+
+/* VIDIOC_DBG_G_CHIP_IDENT */
+struct v4l2_dbg_chip_ident {
+	struct v4l2_dbg_match match;
+	__u32 ident;       /* chip identifier as specified in <media/v4l2-chip-ident.h> */
+	__u32 revision;    /* chip revision, chip specific */
+} __attribute__ ((packed));
 
-/* VIDIOC_G_CHIP_IDENT */
-struct v4l2_chip_ident {
+/* VIDIOC_G_CHIP_IDENT_OLD: Deprecated, do not use */
+struct v4l2_chip_ident_old {
 	__u32 match_type;  /* Match type */
 	__u32 match_chip;  /* Match this chip, meaning determined by match_type */
 	__u32 ident;       /* chip identifier as specified in <media/v4l2-chip-ident.h> */
@@ -1460,13 +1476,22 @@ struct v4l2_chip_ident {
 #define VIDIOC_G_ENC_INDEX       _IOR('V', 76, struct v4l2_enc_idx)
 #define VIDIOC_ENCODER_CMD      _IOWR('V', 77, struct v4l2_encoder_cmd)
 #define VIDIOC_TRY_ENCODER_CMD  _IOWR('V', 78, struct v4l2_encoder_cmd)
+#endif
 
-/* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */
-#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_register)
-#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_register)
-
-#define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
+#if 1
+/* Experimental, meant for debugging, testing and internal use.
+   Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined.
+   You must be root to use these ioctls. Never use these in applications! */
+#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_dbg_register)
+#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_dbg_register)
+
+/* Experimental, meant for debugging, testing and internal use.
+   Never use this ioctl in applications! */
+#define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident)
+/* This is deprecated and will go away in 2.6.30 */
+#define VIDIOC_G_CHIP_IDENT_OLD _IOWR('V', 81, struct v4l2_chip_ident_old)
 #endif
+
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 425b6a9..7440d92 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -164,12 +164,12 @@ struct soc_camera_ops {
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
-			   struct v4l2_chip_ident *);
+			   struct v4l2_dbg_chip_ident *);
 	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*get_register)(struct soc_camera_device *, struct v4l2_register *);
-	int (*set_register)(struct soc_camera_device *, struct v4l2_register *);
+	int (*get_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
+	int (*set_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
 #endif
 	int (*get_control)(struct soc_camera_device *, struct v4l2_control *);
 	int (*set_control)(struct soc_camera_device *, struct v4l2_control *);
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 43dbb65..9aaf652 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -2,7 +2,7 @@
     v4l2 chip identifiers header
 
     This header provides a list of chip identifiers that can be returned
-    through the VIDIOC_G_CHIP_IDENT ioctl.
+    through the VIDIOC_DBG_G_CHIP_IDENT ioctl.
 
     Copyright (C) 2007 Hans Verkuil <hverkuil@xs4all.nl>
 
@@ -24,7 +24,7 @@
 #ifndef V4L2_CHIP_IDENT_H_
 #define V4L2_CHIP_IDENT_H_
 
-/* VIDIOC_G_CHIP_IDENT: identifies the actual chip installed on the board */
+/* VIDIOC_DBG_G_CHIP_IDENT: identifies the actual chip installed on the board */
 enum {
 	/* general idents: reserved range 0-49 */
 	V4L2_IDENT_NONE      = 0,       /* No chip matched */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index f99c866..95e74f1 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -114,10 +114,10 @@ u32 v4l2_ctrl_next(const u32 * const *ctrl_classes, u32 id);
 /* Register/chip ident helper function */
 
 struct i2c_client; /* forward reference */
-int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 id_type, u32 chip_id);
-int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip,
+int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match);
+int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip,
 		u32 ident, u32 revision);
-int v4l2_chip_match_host(u32 id_type, u32 chip_id);
+int v4l2_chip_match_host(const struct v4l2_dbg_match *match);
 
 /* ------------------------------------------------------------------------- */
 
diff --git a/include/media/v4l2-int-device.h b/include/media/v4l2-int-device.h
index ecda3c7..fbf5855 100644
--- a/include/media/v4l2-int-device.h
+++ b/include/media/v4l2-int-device.h
@@ -219,7 +219,7 @@ enum v4l2_int_ioctl_num {
 	vidioc_int_reset_num,
 	/* VIDIOC_INT_INIT */
 	vidioc_int_init_num,
-	/* VIDIOC_INT_G_CHIP_IDENT */
+	/* VIDIOC_DBG_G_CHIP_IDENT */
 	vidioc_int_g_chip_ident_num,
 
 	/*
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index bf0e723..b01c044 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -225,12 +225,12 @@ struct v4l2_ioctl_ops {
 	/* Debugging ioctls */
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*vidioc_g_register)       (struct file *file, void *fh,
-					struct v4l2_register *reg);
+					struct v4l2_dbg_register *reg);
 	int (*vidioc_s_register)       (struct file *file, void *fh,
-					struct v4l2_register *reg);
+					struct v4l2_dbg_register *reg);
 #endif
 	int (*vidioc_g_chip_ident)     (struct file *file, void *fh,
-					struct v4l2_chip_ident *chip);
+					struct v4l2_dbg_chip_ident *chip);
 
 	int (*vidioc_enum_framesizes)   (struct file *file, void *fh,
 					 struct v4l2_frmsizeenum *fsize);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 2517344..37b09e5 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -69,7 +69,7 @@ struct tuner_setup;
    not yet implemented) since ops provide proper type-checking.
  */
 struct v4l2_subdev_core_ops {
-	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
 	int (*s_standby)(struct v4l2_subdev *sd, u32 standby);
@@ -81,8 +81,8 @@ struct v4l2_subdev_core_ops {
 	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
 	long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
-	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
+	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg);
+	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg);
 #endif
 };
 
-- 
cgit v0.10.2


From 9ed55375919bc30c448c6dd5107e8d593f96856f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 30 Dec 2008 16:40:00 -0300
Subject: V4L/DVB (10144): cx24116: build fix

Add missed MODULE check to eliminate inapropriate
declaration being choosed which causes a build error.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/frontends/cx24116.h b/drivers/media/dvb/frontends/cx24116.h
index 4cb3ddd..b1b76b4 100644
--- a/drivers/media/dvb/frontends/cx24116.h
+++ b/drivers/media/dvb/frontends/cx24116.h
@@ -37,7 +37,8 @@ struct cx24116_config {
 	u8 mpg_clk_pos_pol:0x02;
 };
 
-#if defined(CONFIG_DVB_CX24116) || defined(CONFIG_DVB_CX24116_MODULE)
+#if defined(CONFIG_DVB_CX24116) || \
+	(defined(CONFIG_DVB_CX24116_MODULE) && defined(MODULE))
 extern struct dvb_frontend *cx24116_attach(
 	const struct cx24116_config *config,
 	struct i2c_adapter *i2c);
-- 
cgit v0.10.2


From f347535a6065be6f9e65526fa82c088d68040f42 Mon Sep 17 00:00:00 2001
From: roel kluin <roel.kluin@gmail.com>
Date: Wed, 26 Nov 2008 22:03:18 -0300
Subject: V4L/DVB (10148): cx23885: unsigned cx23417_mailbox cannot be negative

Unsigned cx23417_mailbox cannot be negative

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c
index d988813..8f1db57 100644
--- a/drivers/media/video/cx23885/cx23885-417.c
+++ b/drivers/media/video/cx23885/cx23885-417.c
@@ -1027,12 +1027,13 @@ static int cx23885_initialize_codec(struct cx23885_dev *dev)
 			printk(KERN_ERR "%s() f/w load failed\n", __func__);
 			return retval;
 		}
-		dev->cx23417_mailbox = cx23885_find_mailbox(dev);
-		if (dev->cx23417_mailbox < 0) {
+		retval = cx23885_find_mailbox(dev);
+		if (retval < 0) {
 			printk(KERN_ERR "%s() mailbox < 0, error\n",
 				__func__);
 			return -1;
 		}
+		dev->cx23417_mailbox = retval;
 		retval = cx23885_api_cmd(dev, CX2341X_ENC_PING_FW, 0, 0);
 		if (retval < 0) {
 			printk(KERN_ERR
-- 
cgit v0.10.2


From fbe9834a4a67a21a405043af727073acd103f842 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 12 Nov 2008 12:00:18 -0300
Subject: V4L/DVB (10149): ttusb-budget: make it depend on PCI

Since dvb-ttusb-budget.c relies on pci_alloc_consistent and
pci_free_consistent, make it depend on PCI in Kconfig.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/ttusb-budget/Kconfig b/drivers/media/dvb/ttusb-budget/Kconfig
index f546bcc..2663ae3 100644
--- a/drivers/media/dvb/ttusb-budget/Kconfig
+++ b/drivers/media/dvb/ttusb-budget/Kconfig
@@ -1,6 +1,6 @@
 config DVB_TTUSB_BUDGET
 	tristate "Technotrend/Hauppauge Nova-USB devices"
-	depends on DVB_CORE && USB && I2C
+	depends on DVB_CORE && USB && I2C && PCI
 	select DVB_CX22700 if !DVB_FE_CUSTOMISE
 	select DVB_TDA1004X if !DVB_FE_CUSTOMISE
 	select DVB_VES1820 if !DVB_FE_CUSTOMISE
-- 
cgit v0.10.2


From 91f7c130c277a08ebef92ac23ed60adc62e505e0 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 12 Nov 2008 12:04:28 -0300
Subject: V4L/DVB (10150): ttusb-dec: make it depend on PCI

Since ttusb_dec.c relies on pci_alloc_consistent and
pci_free_consistent, make it depend on PCI in Kconfig.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/ttusb-dec/Kconfig b/drivers/media/dvb/ttusb-dec/Kconfig
index d5f48a3..290254a 100644
--- a/drivers/media/dvb/ttusb-dec/Kconfig
+++ b/drivers/media/dvb/ttusb-dec/Kconfig
@@ -1,6 +1,6 @@
 config DVB_TTUSB_DEC
 	tristate "Technotrend/Hauppauge USB DEC devices"
-	depends on DVB_CORE && USB && INPUT
+	depends on DVB_CORE && USB && INPUT && PCI
 	select CRC32
 	help
 	  Support for external USB adapters designed by Technotrend and
-- 
cgit v0.10.2


From 899a6f67b90206c330bd93c7c8f3f8bb8b80397a Mon Sep 17 00:00:00 2001
From: Dmitri Belimov <d.belimov@gmail.com>
Date: Tue, 23 Dec 2008 03:50:09 -0300
Subject: V4L/DVB (10151): Fix I2C bridge error in zl10353

Fix I2C bridge error in zl10353 if no tunner attached to internal I2C
bus of zl10353 chip.

When set enable bridge from internal I2C bus to the main I2C bus
(saa7134) the main I2C bus stopped very hardly. No any communication. In
our next board we solder additional resistors to internal I2C bus.

Signed-off-by: Beholder Intl. Ltd. Dmitry Belimov <d.belimov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/frontends/zl10353.c b/drivers/media/dvb/frontends/zl10353.c
index 5506f80..170720b 100644
--- a/drivers/media/dvb/frontends/zl10353.c
+++ b/drivers/media/dvb/frontends/zl10353.c
@@ -587,8 +587,15 @@ static int zl10353_init(struct dvb_frontend *fe)
 
 static int zl10353_i2c_gate_ctrl(struct dvb_frontend* fe, int enable)
 {
+	struct zl10353_state *state = fe->demodulator_priv;
 	u8 val = 0x0a;
 
+	if (state->config.no_tuner) {
+		/* No tuner attached to the internal I2C bus */
+		/* If set enable I2C bridge, the main I2C bus stopped hardly */
+		return 0;
+	}
+
 	if (enable)
 		val |= 0x10;
 
-- 
cgit v0.10.2


From f204ae40ad79bbf50d85427a5cf39fcebdb4a993 Mon Sep 17 00:00:00 2001
From: Dmitri Belimov <d.belimov@gmail.com>
Date: Tue, 23 Dec 2008 03:51:38 -0300
Subject: V4L/DVB (10152): Change configuration of the Beholder H6 card

Signed-off-by: Beholder Intl. Ltd. Dmitry Belimov <d.belimov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index a2e3f67..e240b4b 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -4462,6 +4462,7 @@ struct saa7134_board saa7134_boards[] = {
 		.tuner_addr     = ADDR_UNSET,
 		.radio_addr     = ADDR_UNSET,
 		.tda9887_conf   = TDA9887_PRESENT,
+		.mpeg           = SAA7134_MPEG_DVB,
 		.inputs         = {{
 			.name = name_tv,
 			.vmux = 3,
@@ -4480,8 +4481,6 @@ struct saa7134_board saa7134_boards[] = {
 			.name = name_radio,
 			.amux = LINE2,
 		},
-		/* no DVB support for now */
-		/* .mpeg           = SAA7134_MPEG_DVB, */
 	},
 	[SAA7134_BOARD_ASUSTeK_TIGER_3IN1] = {
 		.name           = "Asus Tiger 3in1",
@@ -6025,6 +6024,7 @@ int saa7134_board_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_M6:
 	case SAA7134_BOARD_BEHOLD_M63:
 	case SAA7134_BOARD_BEHOLD_M6_EXTRA:
+	case SAA7134_BOARD_BEHOLD_H6:
 		dev->has_remote = SAA7134_REMOTE_I2C;
 		break;
 	case SAA7134_BOARD_AVERMEDIA_A169_B:
-- 
cgit v0.10.2


From 47aeba5addd88b178438ba9000600b9844ca0ee1 Mon Sep 17 00:00:00 2001
From: Dmitri Belimov <d.belimov@gmail.com>
Date: Tue, 23 Dec 2008 03:53:03 -0300
Subject: V4L/DVB (10153): Add the Beholder H6 card to DVB-T part of sources.

Signed-off-by: Beholder Intl. Ltd. Dmitry Belimov <d.belimov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c
index d9a5652..a25a740 100644
--- a/drivers/media/video/saa7134/saa7134-dvb.c
+++ b/drivers/media/video/saa7134/saa7134-dvb.c
@@ -49,6 +49,8 @@
 #include "lnbp21.h"
 #include "tuner-simple.h"
 
+#include "zl10353.h"
+
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
 MODULE_LICENSE("GPL");
 
@@ -854,6 +856,12 @@ static struct tda1004x_config ads_tech_duo_config = {
 	.request_firmware = philips_tda1004x_request_firmware
 };
 
+static struct zl10353_config behold_h6_config = {
+	.demod_address = 0x1e>>1,
+	.no_tuner      = 1,
+	.parallel_ts   = 1,
+};
+
 /* ==================================================================
  * tda10086 based DVB-S cards, helper functions
  */
@@ -1357,6 +1365,16 @@ static int dvb_init(struct saa7134_dev *dev)
 					 &tda827x_cfg_0) < 0)
 			goto dettach_frontend;
 		break;
+	case SAA7134_BOARD_BEHOLD_H6:
+		dev->dvb.frontend = dvb_attach(zl10353_attach,
+						&behold_h6_config,
+						&dev->i2c_adap);
+		if (dev->dvb.frontend) {
+			dvb_attach(simple_tuner_attach, dev->dvb.frontend,
+				   &dev->i2c_adap, 0x61,
+				   TUNER_PHILIPS_FMD1216ME_MK3);
+		}
+		break;
 	default:
 		wprintk("Huh? unknown DVB card?\n");
 		break;
-- 
cgit v0.10.2


From b0c4be8cffb3f466759ddf621a74a10093537521 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 30 Dec 2008 19:10:09 -0300
Subject: V4L/DVB (10154): saa7134: fix a merge conflict on Behold H6 board

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c
index a25a740..0776ecf 100644
--- a/drivers/media/video/saa7134/saa7134-dvb.c
+++ b/drivers/media/video/saa7134/saa7134-dvb.c
@@ -1366,11 +1366,11 @@ static int dvb_init(struct saa7134_dev *dev)
 			goto dettach_frontend;
 		break;
 	case SAA7134_BOARD_BEHOLD_H6:
-		dev->dvb.frontend = dvb_attach(zl10353_attach,
+		fe0->dvb.frontend = dvb_attach(zl10353_attach,
 						&behold_h6_config,
 						&dev->i2c_adap);
-		if (dev->dvb.frontend) {
-			dvb_attach(simple_tuner_attach, dev->dvb.frontend,
+		if (fe0->dvb.frontend) {
+			dvb_attach(simple_tuner_attach, fe0->dvb.frontend,
 				   &dev->i2c_adap, 0x61,
 				   TUNER_PHILIPS_FMD1216ME_MK3);
 		}
-- 
cgit v0.10.2


From 46a60cfef581307d8273919182ae939d44ff7cca Mon Sep 17 00:00:00 2001
From: Fabio Belavenuto <belavenuto@gmail.com>
Date: Tue, 30 Dec 2008 19:27:09 -0300
Subject: V4L/DVB (10155): Add TEA5764 radio driver

Add support for radio driver TEA5764 from NXP.
This chip is connected in pxa I2C bus in EZX phones
from Motorola, the chip is used in phone model A1200.
This driver is for OpenEZX project (www.openezx.org)
Tested with A1200 phone, openezx kernel and fm-tools

[mchehab@redhat.com: Fixed CodingStyle and solved some merge conflicts]
Signed-off-by: Fabio Belavenuto <belavenuto@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig
index 5189c4e..3315cac 100644
--- a/drivers/media/radio/Kconfig
+++ b/drivers/media/radio/Kconfig
@@ -387,4 +387,23 @@ config USB_MR800
 	  To compile this driver as a module, choose M here: the
 	  module will be called radio-mr800.
 
+config RADIO_TEA5764
+	tristate "TEA5764 I2C FM radio support"
+	depends on I2C && VIDEO_V4L2
+	---help---
+	  Say Y here if you want to use the TEA5764 FM chip found in
+	  EZX phones. This FM chip is present in EZX phones from Motorola,
+	  connected to internal pxa I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called radio-tea5764.
+
+config RADIO_TEA5764_XTAL
+	bool "TEA5764 crystal reference"
+	depends on RADIO_TEA5764=y
+	default y
+	help
+	  Say Y here if TEA5764 have a 32768 Hz crystal in circuit, say N
+	  here if TEA5764 reference frequency is connected in FREQIN.
+
 endif # RADIO_ADAPTERS
diff --git a/drivers/media/radio/Makefile b/drivers/media/radio/Makefile
index 240ec63c..0f2b35b 100644
--- a/drivers/media/radio/Makefile
+++ b/drivers/media/radio/Makefile
@@ -19,5 +19,6 @@ obj-$(CONFIG_RADIO_MAESTRO) += radio-maestro.o
 obj-$(CONFIG_USB_DSBR) += dsbr100.o
 obj-$(CONFIG_USB_SI470X) += radio-si470x.o
 obj-$(CONFIG_USB_MR800) += radio-mr800.o
+obj-$(CONFIG_RADIO_TEA5764) += radio-tea5764.o
 
 EXTRA_CFLAGS += -Isound
diff --git a/drivers/media/radio/radio-tea5764.c b/drivers/media/radio/radio-tea5764.c
new file mode 100644
index 0000000..4d35308
--- /dev/null
+++ b/drivers/media/radio/radio-tea5764.c
@@ -0,0 +1,634 @@
+/*
+ * driver/media/radio/radio-tea5764.c
+ *
+ * Driver for TEA5764 radio chip for linux 2.6.
+ * This driver is for TEA5764 chip from NXP, used in EZX phones from Motorola.
+ * The I2C protocol is used for communicate with chip.
+ *
+ * Based in radio-tea5761.c Copyright (C) 2005 Nokia Corporation
+ *
+ *  Copyright (c) 2008 Fabio Belavenuto <belavenuto@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * History:
+ * 2008-12-06   Fabio Belavenuto <belavenuto@gmail.com>
+ *              initial code
+ *
+ * TODO:
+ *  add platform_data support for IRQs platform dependencies
+ *  add RDS support
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>			/* Initdata			*/
+#include <linux/videodev2.h>		/* kernel radio structs		*/
+#include <linux/i2c.h>			/* I2C				*/
+#include <media/v4l2-common.h>
+#include <media/v4l2-ioctl.h>
+#include <linux/version.h>      	/* for KERNEL_VERSION MACRO     */
+
+#define DRIVER_VERSION	"v0.01"
+#define RADIO_VERSION	KERNEL_VERSION(0, 0, 1)
+
+#define DRIVER_AUTHOR	"Fabio Belavenuto <belavenuto@gmail.com>"
+#define DRIVER_DESC	"A driver for the TEA5764 radio chip for EZX Phones."
+
+#define PINFO(format, ...)\
+	printk(KERN_INFO KBUILD_MODNAME ": "\
+		DRIVER_VERSION ": " format "\n", ## __VA_ARGS__)
+#define PWARN(format, ...)\
+	printk(KERN_WARNING KBUILD_MODNAME ": "\
+		DRIVER_VERSION ": " format "\n", ## __VA_ARGS__)
+# define PDEBUG(format, ...)\
+	printk(KERN_DEBUG KBUILD_MODNAME ": "\
+		DRIVER_VERSION ": " format "\n", ## __VA_ARGS__)
+
+/* Frequency limits in MHz -- these are European values.  For Japanese
+devices, that would be 76000 and 91000.  */
+#define FREQ_MIN  87500
+#define FREQ_MAX 108000
+#define FREQ_MUL 16
+
+/* TEA5764 registers */
+#define TEA5764_MANID		0x002b
+#define TEA5764_CHIPID		0x5764
+
+#define TEA5764_INTREG_BLMSK	0x0001
+#define TEA5764_INTREG_FRRMSK	0x0002
+#define TEA5764_INTREG_LEVMSK	0x0008
+#define TEA5764_INTREG_IFMSK	0x0010
+#define TEA5764_INTREG_BLMFLAG	0x0100
+#define TEA5764_INTREG_FRRFLAG	0x0200
+#define TEA5764_INTREG_LEVFLAG	0x0800
+#define TEA5764_INTREG_IFFLAG	0x1000
+
+#define TEA5764_FRQSET_SUD	0x8000
+#define TEA5764_FRQSET_SM	0x4000
+
+#define TEA5764_TNCTRL_PUPD1	0x8000
+#define TEA5764_TNCTRL_PUPD0	0x4000
+#define TEA5764_TNCTRL_BLIM	0x2000
+#define TEA5764_TNCTRL_SWPM	0x1000
+#define TEA5764_TNCTRL_IFCTC	0x0800
+#define TEA5764_TNCTRL_AFM	0x0400
+#define TEA5764_TNCTRL_SMUTE	0x0200
+#define TEA5764_TNCTRL_SNC	0x0100
+#define TEA5764_TNCTRL_MU	0x0080
+#define TEA5764_TNCTRL_SSL1	0x0040
+#define TEA5764_TNCTRL_SSL0	0x0020
+#define TEA5764_TNCTRL_HLSI	0x0010
+#define TEA5764_TNCTRL_MST	0x0008
+#define TEA5764_TNCTRL_SWP	0x0004
+#define TEA5764_TNCTRL_DTC	0x0002
+#define TEA5764_TNCTRL_AHLSI	0x0001
+
+#define TEA5764_TUNCHK_LEVEL(x)	(((x) & 0x00F0) >> 4)
+#define TEA5764_TUNCHK_IFCNT(x) (((x) & 0xFE00) >> 9)
+#define TEA5764_TUNCHK_TUNTO	0x0100
+#define TEA5764_TUNCHK_LD	0x0008
+#define TEA5764_TUNCHK_STEREO	0x0004
+
+#define TEA5764_TESTREG_TRIGFR	0x0800
+
+struct tea5764_regs {
+	u16 intreg;				/* INTFLAG & INTMSK */
+	u16 frqset;				/* FRQSETMSB & FRQSETLSB */
+	u16 tnctrl;				/* TNCTRL1 & TNCTRL2 */
+	u16 frqchk;				/* FRQCHKMSB & FRQCHKLSB */
+	u16 tunchk;				/* IFCHK & LEVCHK */
+	u16 testreg;				/* TESTBITS & TESTMODE */
+	u16 rdsstat;				/* RDSSTAT1 & RDSSTAT2 */
+	u16 rdslb;				/* RDSLBMSB & RDSLBLSB */
+	u16 rdspb;				/* RDSPBMSB & RDSPBLSB */
+	u16 rdsbc;				/* RDSBBC & RDSGBC */
+	u16 rdsctrl;				/* RDSCTRL1 & RDSCTRL2 */
+	u16 rdsbbl;				/* PAUSEDET & RDSBBL */
+	u16 manid;				/* MANID1 & MANID2 */
+	u16 chipid;				/* CHIPID1 & CHIPID2 */
+} __attribute__ ((packed));
+
+struct tea5764_write_regs {
+	u8 intreg;				/* INTMSK */
+	u16 frqset;				/* FRQSETMSB & FRQSETLSB */
+	u16 tnctrl;				/* TNCTRL1 & TNCTRL2 */
+	u16 testreg;				/* TESTBITS & TESTMODE */
+	u16 rdsctrl;				/* RDSCTRL1 & RDSCTRL2 */
+	u16 rdsbbl;				/* PAUSEDET & RDSBBL */
+} __attribute__ ((packed));
+
+#ifndef RADIO_TEA5764_XTAL
+#define RADIO_TEA5764_XTAL 1
+#endif
+
+static int radio_nr = -1;
+static int use_xtal = RADIO_TEA5764_XTAL;
+
+struct tea5764_device {
+	struct i2c_client		*i2c_client;
+	struct video_device		*videodev;
+	struct tea5764_regs		regs;
+	struct mutex			mutex;
+	int				users;
+};
+
+/* I2C code related */
+int tea5764_i2c_read(struct tea5764_device *radio)
+{
+	int i;
+	u16 *p = (u16 *) &radio->regs;
+
+	struct i2c_msg msgs[1] = {
+		{ radio->i2c_client->addr, I2C_M_RD, sizeof(radio->regs),
+			(void *)&radio->regs },
+	};
+	if (i2c_transfer(radio->i2c_client->adapter, msgs, 1) != 1)
+		return -EIO;
+	for (i = 0; i < sizeof(struct tea5764_regs) / sizeof(u16); i++)
+		p[i] = __be16_to_cpu(p[i]);
+
+	return 0;
+}
+
+int tea5764_i2c_write(struct tea5764_device *radio)
+{
+	struct tea5764_write_regs wr;
+	struct tea5764_regs *r = &radio->regs;
+	struct i2c_msg msgs[1] = {
+		{ radio->i2c_client->addr, 0, sizeof(wr), (void *) &wr },
+	};
+	wr.intreg  = r->intreg & 0xff;
+	wr.frqset  = __cpu_to_be16(r->frqset);
+	wr.tnctrl  = __cpu_to_be16(r->tnctrl);
+	wr.testreg = __cpu_to_be16(r->testreg);
+	wr.rdsctrl = __cpu_to_be16(r->rdsctrl);
+	wr.rdsbbl  = __cpu_to_be16(r->rdsbbl);
+	if (i2c_transfer(radio->i2c_client->adapter, msgs, 1) != 1)
+		return -EIO;
+	return 0;
+}
+
+/* V4L2 code related */
+static struct v4l2_queryctrl radio_qctrl[] = {
+	{
+		.id            = V4L2_CID_AUDIO_MUTE,
+		.name          = "Mute",
+		.minimum       = 0,
+		.maximum       = 1,
+		.default_value = 1,
+		.type          = V4L2_CTRL_TYPE_BOOLEAN,
+	}
+};
+
+static void tea5764_power_up(struct tea5764_device *radio)
+{
+	struct tea5764_regs *r = &radio->regs;
+
+	if (!(r->tnctrl & TEA5764_TNCTRL_PUPD0)) {
+		r->tnctrl &= ~(TEA5764_TNCTRL_AFM | TEA5764_TNCTRL_MU |
+			       TEA5764_TNCTRL_HLSI);
+		if (!use_xtal)
+			r->testreg |= TEA5764_TESTREG_TRIGFR;
+		else
+			r->testreg &= ~TEA5764_TESTREG_TRIGFR;
+
+		r->tnctrl |= TEA5764_TNCTRL_PUPD0;
+		tea5764_i2c_write(radio);
+	}
+}
+
+static void tea5764_power_down(struct tea5764_device *radio)
+{
+	struct tea5764_regs *r = &radio->regs;
+
+	if (r->tnctrl & TEA5764_TNCTRL_PUPD0) {
+		r->tnctrl &= ~TEA5764_TNCTRL_PUPD0;
+		tea5764_i2c_write(radio);
+	}
+}
+
+static void tea5764_set_freq(struct tea5764_device *radio, int freq)
+{
+	struct tea5764_regs *r = &radio->regs;
+
+	/* formula: (freq [+ or -] 225000) / 8192 */
+	if (r->tnctrl & TEA5764_TNCTRL_HLSI)
+		r->frqset = (freq + 225000) / 8192;
+	else
+		r->frqset = (freq - 225000) / 8192;
+}
+
+static int tea5764_get_freq(struct tea5764_device *radio)
+{
+	struct tea5764_regs *r = &radio->regs;
+
+	if (r->tnctrl & TEA5764_TNCTRL_HLSI)
+		return (r->frqchk * 8192) - 225000;
+	else
+		return (r->frqchk * 8192) + 225000;
+}
+
+/* tune an frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */
+static void tea5764_tune(struct tea5764_device *radio, int freq)
+{
+	tea5764_set_freq(radio, freq);
+	if (tea5764_i2c_write(radio))
+		PWARN("Could not set frequency!");
+}
+
+static void tea5764_set_audout_mode(struct tea5764_device *radio, int audmode)
+{
+	struct tea5764_regs *r = &radio->regs;
+	int tnctrl = r->tnctrl;
+
+	if (audmode == V4L2_TUNER_MODE_MONO)
+		r->tnctrl |= TEA5764_TNCTRL_MST;
+	else
+		r->tnctrl &= ~TEA5764_TNCTRL_MST;
+	if (tnctrl != r->tnctrl)
+		tea5764_i2c_write(radio);
+}
+
+static int tea5764_get_audout_mode(struct tea5764_device *radio)
+{
+	struct tea5764_regs *r = &radio->regs;
+
+	if (r->tnctrl & TEA5764_TNCTRL_MST)
+		return V4L2_TUNER_MODE_MONO;
+	else
+		return V4L2_TUNER_MODE_STEREO;
+}
+
+static void tea5764_mute(struct tea5764_device *radio, int on)
+{
+	struct tea5764_regs *r = &radio->regs;
+	int tnctrl = r->tnctrl;
+
+	if (on)
+		r->tnctrl |= TEA5764_TNCTRL_MU;
+	else
+		r->tnctrl &= ~TEA5764_TNCTRL_MU;
+	if (tnctrl != r->tnctrl)
+		tea5764_i2c_write(radio);
+}
+
+static int tea5764_is_muted(struct tea5764_device *radio)
+{
+	return radio->regs.tnctrl & TEA5764_TNCTRL_MU;
+}
+
+/* V4L2 vidioc */
+static int vidioc_querycap(struct file *file, void  *priv,
+					struct v4l2_capability *v)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+	struct video_device *dev = radio->videodev;
+
+	strlcpy(v->driver, dev->dev.driver->name, sizeof(v->driver));
+	strlcpy(v->card, dev->name, sizeof(v->card));
+	snprintf(v->bus_info, sizeof(v->bus_info), "I2C:%s", dev->dev.bus_id);
+	v->version = RADIO_VERSION;
+	v->capabilities = V4L2_CAP_TUNER | V4L2_CAP_RADIO;
+	return 0;
+}
+
+static int vidioc_g_tuner(struct file *file, void *priv,
+				struct v4l2_tuner *v)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+	struct tea5764_regs *r = &radio->regs;
+
+	if (v->index > 0)
+		return -EINVAL;
+
+	memset(v, 0, sizeof(v));
+	strcpy(v->name, "FM");
+	v->type = V4L2_TUNER_RADIO;
+	tea5764_i2c_read(radio);
+	v->rangelow   = FREQ_MIN * FREQ_MUL;
+	v->rangehigh  = FREQ_MAX * FREQ_MUL;
+	v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO;
+	if (r->tunchk & TEA5764_TUNCHK_STEREO)
+			v->rxsubchans = V4L2_TUNER_SUB_STEREO;
+	v->audmode = tea5764_get_audout_mode(radio);
+	v->signal = TEA5764_TUNCHK_LEVEL(r->tunchk) * 0xffff / 0xf;
+	v->afc = TEA5764_TUNCHK_IFCNT(r->tunchk);
+
+	return 0;
+}
+
+static int vidioc_s_tuner(struct file *file, void *priv,
+				struct v4l2_tuner *v)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+
+	if (v->index > 0)
+		return -EINVAL;
+
+	tea5764_set_audout_mode(radio, v->audmode);
+	return 0;
+}
+
+static int vidioc_s_frequency(struct file *file, void *priv,
+				struct v4l2_frequency *f)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+
+	if (f->tuner != 0)
+		return -EINVAL;
+	if (f->frequency == 0) {
+		/* We special case this as a power down control. */
+		tea5764_power_down(radio);
+	}
+	if (f->frequency < (FREQ_MIN * FREQ_MUL))
+		return -EINVAL;
+	if (f->frequency > (FREQ_MAX * FREQ_MUL))
+		return -EINVAL;
+	tea5764_power_up(radio);
+	tea5764_tune(radio, (f->frequency * 125) / 2);
+	return 0;
+}
+
+static int vidioc_g_frequency(struct file *file, void *priv,
+				struct v4l2_frequency *f)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+	struct tea5764_regs *r = &radio->regs;
+
+	tea5764_i2c_read(radio);
+	memset(f, 0, sizeof(f));
+	f->type = V4L2_TUNER_RADIO;
+	if (r->tnctrl & TEA5764_TNCTRL_PUPD0)
+		f->frequency = (tea5764_get_freq(radio) * 2) / 125;
+	else
+		f->frequency = 0;
+
+	return 0;
+}
+
+static int vidioc_queryctrl(struct file *file, void *priv,
+			    struct v4l2_queryctrl *qc)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(radio_qctrl); i++) {
+		if (qc->id && qc->id == radio_qctrl[i].id) {
+			memcpy(qc, &(radio_qctrl[i]), sizeof(*qc));
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static int vidioc_g_ctrl(struct file *file, void *priv,
+			    struct v4l2_control *ctrl)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		tea5764_i2c_read(radio);
+		ctrl->value = tea5764_is_muted(radio) ? 1 : 0;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int vidioc_s_ctrl(struct file *file, void *priv,
+			    struct v4l2_control *ctrl)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		tea5764_mute(radio, ctrl->value);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int vidioc_g_input(struct file *filp, void *priv, unsigned int *i)
+{
+	*i = 0;
+	return 0;
+}
+
+static int vidioc_s_input(struct file *filp, void *priv, unsigned int i)
+{
+	if (i != 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int vidioc_g_audio(struct file *file, void *priv,
+			   struct v4l2_audio *a)
+{
+	if (a->index > 1)
+		return -EINVAL;
+
+	strcpy(a->name, "Radio");
+	a->capability = V4L2_AUDCAP_STEREO;
+	return 0;
+}
+
+static int vidioc_s_audio(struct file *file, void *priv,
+			   struct v4l2_audio *a)
+{
+	if (a->index != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int tea5764_open(struct file *file)
+{
+	/* Currently we support only one device */
+	int minor = video_devdata(file)->minor;
+	struct tea5764_device *radio = video_drvdata(file);
+
+	if (radio->videodev->minor != minor)
+		return -ENODEV;
+
+	mutex_lock(&radio->mutex);
+	/* Only exclusive access */
+	if (radio->users) {
+		mutex_unlock(&radio->mutex);
+		return -EBUSY;
+	}
+	radio->users++;
+	mutex_unlock(&radio->mutex);
+	file->private_data = radio;
+	return 0;
+}
+
+static int tea5764_close(struct file *file)
+{
+	struct tea5764_device *radio = video_drvdata(file);
+
+	if (!radio)
+		return -ENODEV;
+	mutex_lock(&radio->mutex);
+	radio->users--;
+	mutex_unlock(&radio->mutex);
+	return 0;
+}
+
+/* File system interface */
+static const struct v4l2_file_operations tea5764_fops = {
+	.owner		= THIS_MODULE,
+	.open           = tea5764_open,
+	.release        = tea5764_close,
+	.ioctl		= video_ioctl2,
+};
+
+static const struct v4l2_ioctl_ops tea5764_ioctl_ops = {
+	.vidioc_querycap    = vidioc_querycap,
+	.vidioc_g_tuner     = vidioc_g_tuner,
+	.vidioc_s_tuner     = vidioc_s_tuner,
+	.vidioc_g_audio     = vidioc_g_audio,
+	.vidioc_s_audio     = vidioc_s_audio,
+	.vidioc_g_input     = vidioc_g_input,
+	.vidioc_s_input     = vidioc_s_input,
+	.vidioc_g_frequency = vidioc_g_frequency,
+	.vidioc_s_frequency = vidioc_s_frequency,
+	.vidioc_queryctrl   = vidioc_queryctrl,
+	.vidioc_g_ctrl      = vidioc_g_ctrl,
+	.vidioc_s_ctrl      = vidioc_s_ctrl,
+};
+
+/* V4L2 interface */
+static struct video_device tea5764_radio_template = {
+	.name		= "TEA5764 FM-Radio",
+	.fops           = &tea5764_fops,
+	.ioctl_ops 	= &tea5764_ioctl_ops,
+	.release	= video_device_release,
+};
+
+/* I2C probe: check if the device exists and register with v4l if it is */
+static int __devinit tea5764_i2c_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct tea5764_device *radio;
+	struct tea5764_regs *r;
+	int ret;
+
+	PDEBUG("probe");
+	radio = kmalloc(sizeof(struct tea5764_device), GFP_KERNEL);
+	if (!radio)
+		return -ENOMEM;
+
+	mutex_init(&radio->mutex);
+	radio->i2c_client = client;
+	ret = tea5764_i2c_read(radio);
+	if (ret)
+		goto errfr;
+	r = &radio->regs;
+	PDEBUG("chipid = %04X, manid = %04X", r->chipid, r->manid);
+	if (r->chipid != TEA5764_CHIPID ||
+		(r->manid & 0x0fff) != TEA5764_MANID) {
+		PWARN("This chip is not a TEA5764!");
+		ret = -EINVAL;
+		goto errfr;
+	}
+
+	radio->videodev = video_device_alloc();
+	if (!(radio->videodev)) {
+		ret = -ENOMEM;
+		goto errfr;
+	}
+	memcpy(radio->videodev, &tea5764_radio_template,
+		sizeof(tea5764_radio_template));
+
+	i2c_set_clientdata(client, radio);
+	video_set_drvdata(radio->videodev, radio);
+
+	ret = video_register_device(radio->videodev, VFL_TYPE_RADIO, radio_nr);
+	if (ret < 0) {
+		PWARN("Could not register video device!");
+		goto errrel;
+	}
+
+	/* initialize and power off the chip */
+	tea5764_i2c_read(radio);
+	tea5764_set_audout_mode(radio, V4L2_TUNER_MODE_STEREO);
+	tea5764_mute(radio, 1);
+	tea5764_power_down(radio);
+
+	PINFO("registered.");
+	return 0;
+errrel:
+	video_device_release(radio->videodev);
+errfr:
+	kfree(radio);
+	return ret;
+}
+
+static int __devexit tea5764_i2c_remove(struct i2c_client *client)
+{
+	struct tea5764_device *radio = i2c_get_clientdata(client);
+
+	PDEBUG("remove");
+	if (radio) {
+		tea5764_power_down(radio);
+		video_unregister_device(radio->videodev);
+		kfree(radio);
+	}
+	return 0;
+}
+
+/* I2C subsystem interface */
+static const struct i2c_device_id tea5764_id[] = {
+	{ "radio-tea5764", 0 },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(i2c, tea5764_id);
+
+static struct i2c_driver tea5764_i2c_driver = {
+	.driver = {
+		.name = "radio-tea5764",
+		.owner = THIS_MODULE,
+	},
+	.probe = tea5764_i2c_probe,
+	.remove = __devexit_p(tea5764_i2c_remove),
+	.id_table = tea5764_id,
+};
+
+/* init the driver */
+static int __init tea5764_init(void)
+{
+	int ret = i2c_add_driver(&tea5764_i2c_driver);
+
+	printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_VERSION ": "
+		DRIVER_DESC "\n");
+	return ret;
+}
+
+/* cleanup the driver */
+static void __exit tea5764_exit(void)
+{
+	i2c_del_driver(&tea5764_i2c_driver);
+}
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+module_param(use_xtal, int, 1);
+MODULE_PARM_DESC(use_xtal, "Chip have a xtal connected in board");
+module_param(radio_nr, int, 0);
+MODULE_PARM_DESC(radio_nr, "video4linux device number to use");
+
+module_init(tea5764_init);
+module_exit(tea5764_exit);
-- 
cgit v0.10.2


From 6a2d802ca01bd83b860145e7497a7a049c354cd7 Mon Sep 17 00:00:00 2001
From: Pham Thanh Nam <phamthanhnam.ptn@gmail.com>
Date: Tue, 30 Dec 2008 23:26:09 -0300
Subject: V4L/DVB (10156): saa7134: Add support for Avermedia AVer TV GO 007 FM
 Plus

This patch adds support for Avermedia AVer TV GO 007 FM Plus (M15C) on
saa7134 driver (PCI ID 1461:f31d).

Signed-off-by: Pham Thanh Nam <phamthanhnam.ptn@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index 335aef4..b8d4705 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -152,3 +152,4 @@
 151 -> ADS Tech Instant HDTV                    [1421:0380]
 152 -> Asus Tiger Rev:1.00                      [1043:4857]
 153 -> Kworld Plus TV Analog Lite PCI           [17de:7128]
+154 -> Avermedia AVerTV GO 007 FM Plus          [1461:f31d]
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index e240b4b..e9c471c 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -4642,6 +4642,38 @@ struct saa7134_board saa7134_boards[] = {
 			.amux = 2,
 		},
 	},
+	[SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS] = {
+		.name           = "Avermedia AVerTV GO 007 FM Plus",
+		.audio_clock    = 0x00187de7,
+		.tuner_type     = TUNER_PHILIPS_TDA8290,
+		.radio_type     = UNSET,
+		.tuner_addr	= ADDR_UNSET,
+		.radio_addr	= ADDR_UNSET,
+		.gpiomask       = 0x00300003,
+		/* .gpiomask       = 0x8c240003, */
+		.inputs         = { {
+			.name = name_tv,
+			.vmux = 1,
+			.amux = TV,
+			.tv   = 1,
+			.gpio = 0x01,
+		}, {
+			.name = name_svideo,
+			.vmux = 6,
+			.amux = LINE1,
+			.gpio = 0x02,
+		} },
+		.radio = {
+			.name = name_radio,
+			.amux = TV,
+			.gpio = 0x00300001,
+		},
+		.mute = {
+			.name = name_mute,
+			.amux = TV,
+			.gpio = 0x01,
+		},
+	},
 };
 
 const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards);
@@ -5739,6 +5771,13 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subdevice    = PCI_ANY_ID,
 		.driver_data  = SAA7134_BOARD_UNKNOWN,
 	},{
+		.vendor       = PCI_VENDOR_ID_PHILIPS,
+		.device       = PCI_DEVICE_ID_PHILIPS_SAA7133,
+		.subvendor    = 0x1461, /* Avermedia Technologies Inc */
+		.subdevice    = 0xf31d,
+		.driver_data  = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS,
+
+	}, {
 		/* --- end of list --- */
 	}
 };
@@ -5929,6 +5968,7 @@ int saa7134_board_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_GENIUS_TVGO_A11MCE:
 	case SAA7134_BOARD_REAL_ANGEL_220:
 	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
+	case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS:
 		dev->has_remote = SAA7134_REMOTE_GPIO;
 		break;
 	case SAA7134_BOARD_FLYDVBS_LR300:
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index d2124f6..8a106d3 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -449,6 +449,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_AVERMEDIA_STUDIO_507:
 	case SAA7134_BOARD_AVERMEDIA_GO_007_FM:
 	case SAA7134_BOARD_AVERMEDIA_M102:
+	case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS:
 		ir_codes     = ir_codes_avermedia;
 		mask_keycode = 0x0007C8;
 		mask_keydown = 0x000010;
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index f6c1fcc..14ee265 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -276,6 +276,7 @@ struct saa7134_format {
 #define SAA7134_BOARD_ADS_INSTANT_HDTV_PCI  151
 #define SAA7134_BOARD_ASUSTeK_TIGER         152
 #define SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG 153
+#define SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS 154
 
 #define SAA7134_MAXBOARDS 32
 #define SAA7134_INPUT_MAX 8
-- 
cgit v0.10.2


From 5e6de7d9a1a373414a41a7441100f90b71c6119f Mon Sep 17 00:00:00 2001
From: Mark Lord <mlord@pobox.com>
Date: Wed, 3 Dec 2008 15:26:15 -0300
Subject: V4L/DVB (10157): Add USB ID for the Sil4701 radio from DealExtreme

Signed-off-by: Mark Lord <mlord@pobox.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[tobias.lorenz@gmx.net: Code beautifications and documentation added]
Signed-off-by: Tobias Lorenz <tobias.lorenz@gmx.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/Documentation/video4linux/si470x.txt b/Documentation/video4linux/si470x.txt
index 11c5fd2..49679e6 100644
--- a/Documentation/video4linux/si470x.txt
+++ b/Documentation/video4linux/si470x.txt
@@ -41,6 +41,7 @@ chips are known to work:
 - 10c4:818a: Silicon Labs USB FM Radio Reference Design
 - 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF)
 - 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700)
+- 10c5:819a: DealExtreme USB Radio
 
 
 Software
diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c
index 457445e..67cbce8 100644
--- a/drivers/media/radio/radio-si470x.c
+++ b/drivers/media/radio/radio-si470x.c
@@ -96,6 +96,8 @@
  * 2008-10-20	Alexey Klimov <klimov.linux@gmail.com>
  * 		- add support for KWorld USB FM Radio FM700
  * 		- blacklisted KWorld radio in hid-core.c and hid-ids.h
+ * 2008-12-03	Mark Lord <mlord@pobox.com>
+ *		- add support for DealExtreme USB Radio
  *
  * ToDo:
  * - add firmware download/update support
@@ -138,6 +140,8 @@ static struct usb_device_id si470x_usb_driver_id_table[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x06e1, 0xa155, USB_CLASS_HID, 0, 0) },
 	/* KWorld USB FM Radio SnapMusic Mobile 700 (FM700) */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x1b80, 0xd700, USB_CLASS_HID, 0, 0) },
+	/* DealExtreme USB Radio */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x10c5, 0x819a, USB_CLASS_HID, 0, 0) },
 	/* Terminating entry */
 	{ }
 };
-- 
cgit v0.10.2


From 87ea5f9d389717ff6da60dc014ce79ae14b7947c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Dec 2008 06:37:50 -0300
Subject: V4L/DVB (10160): em28xx: update chip id for em2710

em2710 uses the same chip ID as em2820 (0x12).

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/em28xx/em28xx-reg.h b/drivers/media/video/em28xx/em28xx-reg.h
index 65dcb91..24e39c5 100644
--- a/drivers/media/video/em28xx/em28xx-reg.h
+++ b/drivers/media/video/em28xx/em28xx-reg.h
@@ -160,7 +160,7 @@
 
 /* FIXME: Need to be populated with the other chip ID's */
 enum em28xx_chip_id {
-	CHIP_ID_EM2820 = 18,
+	CHIP_ID_EM2820 = 18,	/* Also used by em2710 */
 	CHIP_ID_EM2840 = 20,
 	CHIP_ID_EM2750 = 33,
 	CHIP_ID_EM2860 = 34,
-- 
cgit v0.10.2


From 4a87d7c4f259efa89ae6627c49e403eef51a3058 Mon Sep 17 00:00:00 2001
From: Pham Thanh Nam <phamthanhnam.ptn@gmail.com>
Date: Wed, 31 Dec 2008 06:57:19 -0300
Subject: V4L/DVB (10161): saa7134: fix autodetection for AVer TV GO 007 FM
 Plus

This patch fixes autodetection for Avermedia AVerTV GO 007 FM Plus (M15C)
(PCI ID 1461:f31d).

Signed-off-by: Pham Thanh Nam <phamthanhnam.ptn@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index e9c471c..e2febcd 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -5733,6 +5733,13 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subdevice    = 0x7128,
 		.driver_data  = SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG,
 	}, {
+		.vendor       = PCI_VENDOR_ID_PHILIPS,
+		.device       = PCI_DEVICE_ID_PHILIPS_SAA7133,
+		.subvendor    = 0x1461, /* Avermedia Technologies Inc */
+		.subdevice    = 0xf31d,
+		.driver_data  = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS,
+
+	}, {
 		/* --- boards without eeprom + subsystem ID --- */
 		.vendor       = PCI_VENDOR_ID_PHILIPS,
 		.device       = PCI_DEVICE_ID_PHILIPS_SAA7134,
@@ -5771,13 +5778,6 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subdevice    = PCI_ANY_ID,
 		.driver_data  = SAA7134_BOARD_UNKNOWN,
 	},{
-		.vendor       = PCI_VENDOR_ID_PHILIPS,
-		.device       = PCI_DEVICE_ID_PHILIPS_SAA7133,
-		.subvendor    = 0x1461, /* Avermedia Technologies Inc */
-		.subdevice    = 0xf31d,
-		.driver_data  = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS,
-
-	}, {
 		/* --- end of list --- */
 	}
 };
-- 
cgit v0.10.2


From 0b82c5d6748a15758875f78ac772c6d48ebead2a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Dec 2008 09:34:18 -0300
Subject: V4L/DVB (10162): tuner-simple: Fix tuner type set message

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c
index fb3f3b3..1a21191 100644
--- a/drivers/media/common/tuners/tuner-simple.c
+++ b/drivers/media/common/tuners/tuner-simple.c
@@ -1059,7 +1059,12 @@ struct dvb_frontend *simple_tuner_attach(struct dvb_frontend *fe,
 	memcpy(&fe->ops.tuner_ops, &simple_tuner_ops,
 	       sizeof(struct dvb_tuner_ops));
 
-	tuner_info("type set to %d (%s)\n", type, priv->tun->name);
+	if (type != priv->type)
+		tuner_warn("couldn't set type to %d. Using %d (%s) instead\n",
+			    type, priv->type, priv->tun->name);
+	else
+		tuner_info("type set to %d (%s)\n",
+			   priv->type, priv->tun->name);
 
 	if ((debug) || ((atv_input[priv->nr] > 0) ||
 			(dtv_input[priv->nr] > 0))) {
-- 
cgit v0.10.2


From 9baed99ee7a834b1f2599e13f219087f01c63f38 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Dec 2008 09:37:33 -0300
Subject: V4L/DVB (10163): em28xx: allocate adev together with struct em28xx
 dev

Some devices require different setups on struct_audio. Due to that, we
may need to change some fields at dev.adev during device probe. So, this
patch moves the dynamic memory allocation of adev at em28xx-alsa to the
dynamic allocation of struct em28xx dev that happens during device
probe.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c
index 15c03f0..94378cc 100644
--- a/drivers/media/video/em28xx/em28xx-audio.c
+++ b/drivers/media/video/em28xx/em28xx-audio.c
@@ -62,9 +62,9 @@ static int em28xx_isoc_audio_deinit(struct em28xx *dev)
 
 	dprintk("Stopping isoc\n");
 	for (i = 0; i < EM28XX_AUDIO_BUFS; i++) {
-		usb_unlink_urb(dev->adev->urb[i]);
-		usb_free_urb(dev->adev->urb[i]);
-		dev->adev->urb[i] = NULL;
+		usb_unlink_urb(dev->adev.urb[i]);
+		usb_free_urb(dev->adev.urb[i]);
+		dev->adev.urb[i] = NULL;
 	}
 
 	return 0;
@@ -81,8 +81,8 @@ static void em28xx_audio_isocirq(struct urb *urb)
 	unsigned int             stride;
 	struct snd_pcm_substream *substream;
 	struct snd_pcm_runtime   *runtime;
-	if (dev->adev->capture_pcm_substream) {
-		substream = dev->adev->capture_pcm_substream;
+	if (dev->adev.capture_pcm_substream) {
+		substream = dev->adev.capture_pcm_substream;
 		runtime = substream->runtime;
 		stride = runtime->frame_bits >> 3;
 
@@ -95,7 +95,7 @@ static void em28xx_audio_isocirq(struct urb *urb)
 			if (!length)
 				continue;
 
-			oldptr = dev->adev->hwptr_done_capture;
+			oldptr = dev->adev.hwptr_done_capture;
 			if (oldptr + length >= runtime->buffer_size) {
 				unsigned int cnt =
 				    runtime->buffer_size - oldptr;
@@ -110,16 +110,16 @@ static void em28xx_audio_isocirq(struct urb *urb)
 
 			snd_pcm_stream_lock(substream);
 
-			dev->adev->hwptr_done_capture += length;
-			if (dev->adev->hwptr_done_capture >=
+			dev->adev.hwptr_done_capture += length;
+			if (dev->adev.hwptr_done_capture >=
 			    runtime->buffer_size)
-				dev->adev->hwptr_done_capture -=
+				dev->adev.hwptr_done_capture -=
 				    runtime->buffer_size;
 
-			dev->adev->capture_transfer_done += length;
-			if (dev->adev->capture_transfer_done >=
+			dev->adev.capture_transfer_done += length;
+			if (dev->adev.capture_transfer_done >=
 			    runtime->period_size) {
-				dev->adev->capture_transfer_done -=
+				dev->adev.capture_transfer_done -=
 				    runtime->period_size;
 				period_elapsed = 1;
 			}
@@ -131,7 +131,7 @@ static void em28xx_audio_isocirq(struct urb *urb)
 	}
 	urb->status = 0;
 
-	if (dev->adev->shutdown)
+	if (dev->adev.shutdown)
 		return;
 
 	status = usb_submit_urb(urb, GFP_ATOMIC);
@@ -154,17 +154,17 @@ static int em28xx_init_audio_isoc(struct em28xx *dev)
 		struct urb *urb;
 		int j, k;
 
-		dev->adev->transfer_buffer[i] = kmalloc(sb_size, GFP_ATOMIC);
-		if (!dev->adev->transfer_buffer[i])
+		dev->adev.transfer_buffer[i] = kmalloc(sb_size, GFP_ATOMIC);
+		if (!dev->adev.transfer_buffer[i])
 			return -ENOMEM;
 
-		memset(dev->adev->transfer_buffer[i], 0x80, sb_size);
+		memset(dev->adev.transfer_buffer[i], 0x80, sb_size);
 		urb = usb_alloc_urb(EM28XX_NUM_AUDIO_PACKETS, GFP_ATOMIC);
 		if (!urb) {
 			em28xx_errdev("usb_alloc_urb failed!\n");
 			for (j = 0; j < i; j++) {
-				usb_free_urb(dev->adev->urb[j]);
-				kfree(dev->adev->transfer_buffer[j]);
+				usb_free_urb(dev->adev.urb[j]);
+				kfree(dev->adev.transfer_buffer[j]);
 			}
 			return -ENOMEM;
 		}
@@ -173,7 +173,7 @@ static int em28xx_init_audio_isoc(struct em28xx *dev)
 		urb->context = dev;
 		urb->pipe = usb_rcvisocpipe(dev->udev, 0x83);
 		urb->transfer_flags = URB_ISO_ASAP;
-		urb->transfer_buffer = dev->adev->transfer_buffer[i];
+		urb->transfer_buffer = dev->adev.transfer_buffer[i];
 		urb->interval = 1;
 		urb->complete = em28xx_audio_isocirq;
 		urb->number_of_packets = EM28XX_NUM_AUDIO_PACKETS;
@@ -185,11 +185,11 @@ static int em28xx_init_audio_isoc(struct em28xx *dev)
 			urb->iso_frame_desc[j].length =
 			    EM28XX_AUDIO_MAX_PACKET_SIZE;
 		}
-		dev->adev->urb[i] = urb;
+		dev->adev.urb[i] = urb;
 	}
 
 	for (i = 0; i < EM28XX_AUDIO_BUFS; i++) {
-		errCode = usb_submit_urb(dev->adev->urb[i], GFP_ATOMIC);
+		errCode = usb_submit_urb(dev->adev.urb[i], GFP_ATOMIC);
 		if (errCode) {
 			em28xx_isoc_audio_deinit(dev);
 
@@ -202,16 +202,16 @@ static int em28xx_init_audio_isoc(struct em28xx *dev)
 
 static int em28xx_cmd(struct em28xx *dev, int cmd, int arg)
 {
-	dprintk("%s transfer\n", (dev->adev->capture_stream == STREAM_ON)?
+	dprintk("%s transfer\n", (dev->adev.capture_stream == STREAM_ON) ?
 				 "stop" : "start");
 
 	switch (cmd) {
 	case EM28XX_CAPTURE_STREAM_EN:
-		if (dev->adev->capture_stream == STREAM_OFF && arg == 1) {
-			dev->adev->capture_stream = STREAM_ON;
+		if (dev->adev.capture_stream == STREAM_OFF && arg == 1) {
+			dev->adev.capture_stream = STREAM_ON;
 			em28xx_init_audio_isoc(dev);
-		} else if (dev->adev->capture_stream == STREAM_ON && arg == 0) {
-			dev->adev->capture_stream = STREAM_OFF;
+		} else if (dev->adev.capture_stream == STREAM_ON && arg == 0) {
+			dev->adev.capture_stream = STREAM_OFF;
 			em28xx_isoc_audio_deinit(dev);
 		} else {
 			printk(KERN_ERR "An underrun very likely occurred. "
@@ -289,17 +289,17 @@ static int snd_em28xx_capture_open(struct snd_pcm_substream *substream)
 		goto err;
 
 	runtime->hw = snd_em28xx_hw_capture;
-	if (dev->alt == 0 && dev->adev->users == 0) {
+	if (dev->alt == 0 && dev->adev.users == 0) {
 		int errCode;
 		dev->alt = 7;
 		errCode = usb_set_interface(dev->udev, 0, 7);
 		dprintk("changing alternate number to 7\n");
 	}
 
-	dev->adev->users++;
+	dev->adev.users++;
 
 	snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
-	dev->adev->capture_pcm_substream = substream;
+	dev->adev.capture_pcm_substream = substream;
 	runtime->private_data = dev;
 
 	return 0;
@@ -311,7 +311,7 @@ err:
 static int snd_em28xx_pcm_close(struct snd_pcm_substream *substream)
 {
 	struct em28xx *dev = snd_pcm_substream_chip(substream);
-	dev->adev->users--;
+	dev->adev.users--;
 
 	dprintk("closing device\n");
 
@@ -320,10 +320,10 @@ static int snd_em28xx_pcm_close(struct snd_pcm_substream *substream)
 	em28xx_audio_analog_set(dev);
 	mutex_unlock(&dev->lock);
 
-	if (dev->adev->users == 0 && dev->adev->shutdown == 1) {
-		dprintk("audio users: %d\n", dev->adev->users);
+	if (dev->adev.users == 0 && dev->adev.shutdown == 1) {
+		dprintk("audio users: %d\n", dev->adev.users);
 		dprintk("disabling audio stream!\n");
-		dev->adev->shutdown = 0;
+		dev->adev.shutdown = 0;
 		dprintk("released lock\n");
 		em28xx_cmd(dev, EM28XX_CAPTURE_STREAM_EN, 0);
 	}
@@ -356,7 +356,7 @@ static int snd_em28xx_hw_capture_free(struct snd_pcm_substream *substream)
 
 	dprintk("Stop capture, if needed\n");
 
-	if (dev->adev->capture_stream == STREAM_ON)
+	if (dev->adev.capture_stream == STREAM_ON)
 		em28xx_cmd(dev, EM28XX_CAPTURE_STREAM_EN, 0);
 
 	return 0;
@@ -379,7 +379,7 @@ static int snd_em28xx_capture_trigger(struct snd_pcm_substream *substream,
 		em28xx_cmd(dev, EM28XX_CAPTURE_STREAM_EN, 1);
 		return 0;
 	case SNDRV_PCM_TRIGGER_STOP:
-		dev->adev->shutdown = 1;
+		dev->adev.shutdown = 1;
 		return 0;
 	default:
 		return -EINVAL;
@@ -393,7 +393,7 @@ static snd_pcm_uframes_t snd_em28xx_capture_pointer(struct snd_pcm_substream
 
 	snd_pcm_uframes_t hwptr_done;
 	dev = snd_pcm_substream_chip(substream);
-	hwptr_done = dev->adev->hwptr_done_capture;
+	hwptr_done = dev->adev.hwptr_done_capture;
 
 	return hwptr_done;
 }
@@ -420,7 +420,7 @@ static struct snd_pcm_ops snd_em28xx_pcm_capture = {
 
 static int em28xx_audio_init(struct em28xx *dev)
 {
-	struct em28xx_audio *adev;
+	struct em28xx_audio *adev = &dev->adev;
 	struct snd_pcm      *pcm;
 	struct snd_card     *card;
 	static int          devnr;
@@ -438,16 +438,9 @@ static int em28xx_audio_init(struct em28xx *dev)
 	printk(KERN_INFO "em28xx-audio.c: Copyright (C) 2006 Markus "
 			 "Rechberger\n");
 
-	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
-	if (!adev) {
-		printk(KERN_ERR "em28xx-audio.c: out of memory\n");
-		return -1;
-	}
 	card = snd_card_new(index[devnr], "Em28xx Audio", THIS_MODULE, 0);
-	if (card == NULL) {
-		kfree(adev);
+	if (card == NULL)
 		return -ENOMEM;
-	}
 
 	spin_lock_init(&adev->slock);
 	err = snd_pcm_new(card, "Em28xx Audio", 0, 0, 1, &pcm);
@@ -471,7 +464,6 @@ static int em28xx_audio_init(struct em28xx *dev)
 	}
 	adev->sndcard = card;
 	adev->udev = dev->udev;
-	dev->adev = adev;
 
 	return 0;
 }
@@ -488,10 +480,9 @@ static int em28xx_audio_fini(struct em28xx *dev)
 		return 0;
 	}
 
-	if (dev->adev) {
-		snd_card_free(dev->adev->sndcard);
-		kfree(dev->adev);
-		dev->adev = NULL;
+	if (dev->adev.sndcard) {
+		snd_card_free(dev->adev.sndcard);
+		dev->adev.sndcard = NULL;
 	}
 
 	return 0;
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index afc5f6d..6c6b94a 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -473,7 +473,7 @@ struct em28xx {
 	unsigned long i2c_hash;	/* i2c devicelist hash -
 				   for boards with generic ID */
 
-	struct em28xx_audio *adev;
+	struct em28xx_audio adev;
 
 	/* states */
 	enum em28xx_dev_state state;
-- 
cgit v0.10.2


From cb889a2f3515b140bef193cf6ffcdb099349b8aa Mon Sep 17 00:00:00 2001
From: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>
Date: Wed, 31 Dec 2008 14:11:23 -0300
Subject: V4L/DVB (10164): Add missing S2 caps flag to S2API

The attached patch adds a capability flag that allows an application
to determine whether a particular device can handle "second generation
modulation" transponders. This is necessary in order for applications
to be able to decide which device to use for a given channel in
a multi device environment, where DVB-S and DVB-S2 devices are mixed.

It is assumed that a device capable of handling "second generation
modulation" can implicitly handle "first generation modulation".
The flag is not named anything with DVBS2 in order to allow its
use with future DVBT2 devices as well (should they ever come).

Signed-off by: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>

Acked-by: Steven Toth <stoth@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 79a8ed8..926d28d 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -63,6 +63,7 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   // We need more bitspace for newer APIs, indicate this.
+	FE_CAN_2G_MODULATION		= 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2)
 	FE_NEEDS_BENDING		= 0x20000000, // not supported anymore, don't use (frontend requires frequency bending)
 	FE_CAN_RECOVER			= 0x40000000, // frontend can recover from a cable unplug automatically
 	FE_CAN_MUTE_TS			= 0x80000000  // frontend can stop spurious TS data output
-- 
cgit v0.10.2


From faed4aa586f0c16020676481033665e959916c6a Mon Sep 17 00:00:00 2001
From: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>
Date: Wed, 31 Dec 2008 14:13:56 -0300
Subject: V4L/DVB (10165): Add FE_CAN_2G_MODULATION flag to frontends that
 support DVB-S2

Report to userspace that cx24116 and stv0899 drivers support DVB-S2.

Signed-off by: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>

Acked-by: Steven Toth <stoth@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/frontends/cx24116.c b/drivers/media/dvb/frontends/cx24116.c
index 9b6c89e..4f514d3 100644
--- a/drivers/media/dvb/frontends/cx24116.c
+++ b/drivers/media/dvb/frontends/cx24116.c
@@ -1463,6 +1463,7 @@ static struct dvb_frontend_ops cx24116_ops = {
 			FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_4_5 | FE_CAN_FEC_5_6 | FE_CAN_FEC_6_7 |
 			FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
+			FE_CAN_2G_MODULATION |
 			FE_CAN_QPSK | FE_CAN_RECOVER
 	},
 
diff --git a/drivers/media/dvb/frontends/stb0899_drv.c b/drivers/media/dvb/frontends/stb0899_drv.c
index 5288201..bee28f7 100644
--- a/drivers/media/dvb/frontends/stb0899_drv.c
+++ b/drivers/media/dvb/frontends/stb0899_drv.c
@@ -1618,6 +1618,7 @@ static struct dvb_frontend_ops stb0899_ops = {
 
 		.caps 			= FE_CAN_INVERSION_AUTO	|
 					  FE_CAN_FEC_AUTO	|
+					  FE_CAN_2G_MODULATION	|
 					  FE_CAN_QPSK
 	},
 
-- 
cgit v0.10.2


From e4cda3e0728156c6be1d03e72ef20ea811da4ad5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Dec 2008 14:26:57 -0300
Subject: V4L/DVB (10166): dvb frontend: stop using non-C99 compliant comments

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 926d28d..55026b1 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,11 +62,11 @@ typedef enum fe_caps {
 	FE_CAN_HIERARCHY_AUTO		= 0x100000,
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
-	FE_HAS_EXTENDED_CAPS		= 0x800000,   // We need more bitspace for newer APIs, indicate this.
-	FE_CAN_2G_MODULATION		= 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2)
-	FE_NEEDS_BENDING		= 0x20000000, // not supported anymore, don't use (frontend requires frequency bending)
-	FE_CAN_RECOVER			= 0x40000000, // frontend can recover from a cable unplug automatically
-	FE_CAN_MUTE_TS			= 0x80000000  // frontend can stop spurious TS data output
+	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
+	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
+	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
+	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
+	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
 } fe_caps_t;
 
 
@@ -122,15 +122,15 @@ typedef enum fe_sec_mini_cmd {
 
 
 typedef enum fe_status {
-	FE_HAS_SIGNAL	= 0x01,   /*  found something above the noise level */
-	FE_HAS_CARRIER	= 0x02,   /*  found a DVB signal  */
-	FE_HAS_VITERBI	= 0x04,   /*  FEC is stable  */
-	FE_HAS_SYNC	= 0x08,   /*  found sync bytes  */
-	FE_HAS_LOCK	= 0x10,   /*  everything's working... */
-	FE_TIMEDOUT	= 0x20,   /*  no lock within the last ~2 seconds */
-	FE_REINIT	= 0x40    /*  frontend was reinitialized,  */
-} fe_status_t;			  /*  application is recommended to reset */
-				  /*  DiSEqC, tone and parameters */
+	FE_HAS_SIGNAL	= 0x01,   /* found something above the noise level */
+	FE_HAS_CARRIER	= 0x02,   /* found a DVB signal  */
+	FE_HAS_VITERBI	= 0x04,   /* FEC is stable  */
+	FE_HAS_SYNC	= 0x08,   /* found sync bytes  */
+	FE_HAS_LOCK	= 0x10,   /* everything's working... */
+	FE_TIMEDOUT	= 0x20,   /* no lock within the last ~2 seconds */
+	FE_REINIT	= 0x40    /* frontend was reinitialized,  */
+} fe_status_t;			  /* application is recommended to reset */
+				  /* DiSEqC, tone and parameters */
 
 typedef enum fe_spectral_inversion {
 	INVERSION_OFF,
-- 
cgit v0.10.2


From dd72f31b4fa87c68e16484a3ed3e4d1843ad7f06 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@linuxtv.org>
Date: Fri, 28 Nov 2008 01:02:56 -0300
Subject: V4L/DVB (10167): sms1xxx: add support for inverted gpio

negative gpio values signify inverted polarity

Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c
index fd62e0b..5a9882f 100644
--- a/drivers/media/dvb/siano/sms-cards.c
+++ b/drivers/media/dvb/siano/sms-cards.c
@@ -131,9 +131,10 @@ struct sms_board *sms_get_board(int id)
 	return &sms_boards[id];
 }
 
-static int sms_set_gpio(struct smscore_device_t *coredev, u32 pin, int enable)
+static int sms_set_gpio(struct smscore_device_t *coredev, int pin, int enable)
 {
-	int ret;
+	int lvl, ret;
+	u32 gpio;
 	struct smscore_gpio_config gpioconfig = {
 		.direction            = SMS_GPIO_DIRECTION_OUTPUT,
 		.pullupdown           = SMS_GPIO_PULLUPDOWN_NONE,
@@ -145,12 +146,20 @@ static int sms_set_gpio(struct smscore_device_t *coredev, u32 pin, int enable)
 	if (pin == 0)
 		return -EINVAL;
 
-	ret = smscore_configure_gpio(coredev, pin, &gpioconfig);
+	if (pin < 0) {
+		/* inverted gpio */
+		gpio = pin * -1;
+		lvl = enable ? 0 : 1;
+	} else {
+		gpio = pin;
+		lvl = enable ? 1 : 0;
+	}
 
+	ret = smscore_configure_gpio(coredev, gpio, &gpioconfig);
 	if (ret < 0)
 		return ret;
 
-	return smscore_set_gpio(coredev, pin, enable);
+	return smscore_set_gpio(coredev, gpio, lvl);
 }
 
 int sms_board_setup(struct smscore_device_t *coredev)
-- 
cgit v0.10.2


From f4c82548d4fa86fb3606f6ee219b070b88592a1e Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@linuxtv.org>
Date: Mon, 15 Dec 2008 17:28:41 -0300
Subject: V4L/DVB (10168): sms1xxx: fix inverted gpio for lna control on tiger
 r2

The GPIO logic for LNA control on the Tiger r2 devices was inverted.
This patch corrects the problem.

Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c
index 5a9882f..4307e4e 100644
--- a/drivers/media/dvb/siano/sms-cards.c
+++ b/drivers/media/dvb/siano/sms-cards.c
@@ -120,7 +120,7 @@ static struct sms_board sms_boards[] = {
 		.name	= "Hauppauge WinTV MiniCard",
 		.type	= SMS_NOVA_B0,
 		.fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-hcw-55xxx-dvbt-02.fw",
-		.lna_ctrl  = 1,
+		.lna_ctrl  = -1,
 	},
 };
 
-- 
cgit v0.10.2


From 6b55009e1dc7c2a66c8f5fad67045f0536c9bbd8 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@linuxtv.org>
Date: Fri, 2 Jan 2009 15:55:29 -0300
Subject: V4L/DVB (10170): tuner-simple: prevent possible OOPS caused by divide
 by zero error

A user reported the following OOPS with his pcHDTV HD5500 card, which
uses a cx88 PCI bridge with a LG-TDVS-H06xF frontend module, made up
of a TUA6034 tuner, TDA988x IF demod, and LG DT3303 ATSC/QAM demod.

Somehow, tuner-core gets loaded before the digital driver configures
the tuner, and tuner-core somehow incorrectly sets the tuner type to
LG NTSC (TAPE series) instead of LG TDVS-H06xF.  This tuner type does
not have the tuning stepsize defined, so an OOPS occurs during the
digital tune function.

We still dont know how the type gets set incorrectly in the first place.
The user has a tainted kernel with a binary nividia module, which COULD
have something to do with this, but it's hard to say for sure.

Nevertheless, to avoid this division by zero, we should check that
stepsize is defined.  If stepsize is not defined, print an error and
bail out on the tune request.

cx8800 0000:05:01.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
cx88[0]: subsystem: 7063:5500, board: pcHDTV HD5500 HDTV [card=47,autodetected], frontend(s): 1
cx88[0]: TV tuner type 47, Radio tuner type -1
tuner' 2-0043: chip found @ 0x86 (cx88[0])
tda9887 2-0043: creating new instance
tda9887 2-0043: tda988[5/6/7] found
tuner' 2-0061: chip found @ 0xc2 (cx88[0])
tuner-simple 2-0061: creating new instance
tuner-simple 2-0061: type set to 47 (LG NTSC (TAPE series))
cx88[0]/0: found at 0000:05:01.0, rev: 5, irq: 19, latency: 32, mmio: 0xea000000
cx88[0]/0: registered device video1 [v4l2]
cx88[0]/0: registered device vbi1
cx88_audio 0000:05:01.1: PCI INT A -> GSI 19 (level, low) -> IRQ 19
cx88[0]/1: CX88x/0: ALSA support for cx2388x boards
cx88[0]/2: cx2388x 8802 Driver Manager
cx88-mpeg driver manager 0000:05:01.2: PCI INT A -> GSI 19 (level, low) -> IRQ 19
cx88[0]/2: found at 0000:05:01.2, rev: 5, irq: 19, latency: 32, mmio: 0xec000000
cx8802_probe() allocating 1 frontend(s)
cx88/2: cx2388x dvb driver version 0.0.6 loaded
cx88/2: registering cx8802 driver, type: dvb access: shared
cx88[0]/2: subsystem: 7063:5500, board: pcHDTV HD5500 HDTV [card=47]
cx88[0]/2: cx2388x based DVB/ATSC card
tuner-simple 2-0061: attaching existing instance
tuner-simple 2-0061: type set to 64 (LG NTSC (TAPE series))
tda9887 2-0043: attaching existing instance
DVB: registering new adapter (cx88[0])
DVB: registering adapter 0 frontend 0 (LG Electronics LGDT3303 VSB/QAM Frontend)...
[snip]
stepsize=0
divide error: 0000 [1] SMP
CPU 1
Modules linked in: nls_utf8 fuse sco bridge stp bnep l2cap bluetooth sunrpc nf_conntrack_netbios_ns nf_conntrack_ftp ip6t_REJECT nf_conntrack_ipv6 ip6table_filter ip6_tables ipv6 cpufreq_ondemand acpi_cpufreq freq_table xfs lgdt330x dm_multipath cx88_dvb cx88_vp3054_i2c uinput tda9887 tda8290 snd_emu10k1_synth snd_emux_synth snd_seq_virmidi snd_seq_midi_emul tuner_simple tuner_types tuner msp3400 cx8800 cx88_alsa cx8802 snd_emu10k1 cx88xx snd_rawmidi snd_ac97_codec ir_common ac97_bus saa7115 snd_seq_dummy snd_seq_oss snd_seq_midi_event videobuf_dvb snd_seq dvb_core snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_device videobuf_dma_sg ppdev parport_pc snd_timer videobuf_core snd_page_alloc btcx_risc emu10k1_gp ivtv i2c_algo_bit cx2341x snd_util_mem snd_hwdep nvidia(P) gameport v4l2_common i2c_i801 snd soundcore parport videodev v4l1_compat v4l2_compat_ioctl32 tveeprom i2c_core pcspkr iTCO_wdt iTCO_vendor_support sky2 joydev floppy shpchp ata_generic pata_acpi pata_jmicron [last unloaded: microcode]
Pid: 3553, comm: kdvb-ad-0-fe-0 Tainted: P          2.6.27.9-159.fc10.x86_64 #1
RIP: 0010:[<ffffffffa09bc37a>]  [<ffffffffa09bc37a>] simple_dvb_calc_regs+0xab/0x281 [tuner_simple]
RSP: 0018:ffff8800605dfd30  EFLAGS: 00010246
RAX: 000000000365c040 RBX: ffff8800605dfdb0 RCX: ffff88007acb8c10
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000246
RBP: ffff8800605dfda0 R08: ffff8800605dfba0 R09: 0000000000000082
R10: 00000010e73c9df1 R11: 0000000100000000 R12: ffff88007ac29c00
R13: ffff88007ac29c00 R14: ffff88007acbb408 R15: ffffffffa09b6fb0
FS:  0000000000000000(0000) GS:ffff88007f804880(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00000000004e8f40 CR3: 000000007114e000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kdvb-ad-0-fe-0 (pid: 3553, threadinfo ffff8800605de000, task ffff88006fca0000)
Stack:  ffff8800605dfd40 00000000ffffffa1 ffff88007c055860 0000000000000001
 ffff8800605dfda0 ffff8800605dfda0 ffff88007acb8c10 ffffffffa004e48c
 8e01880000000390 ffff88007acb8c10 ffff88007ac29c00 0000000000000000
Call Trace:
 [<ffffffffa004e48c>] ? i2c_transfer+0x80/0x8b [i2c_core]
 [<ffffffffa09bc768>] simple_dvb_set_params+0x3e/0x9b [tuner_simple]
 [<ffffffffa0a0335a>] lgdt330x_set_parameters+0x188/0x1b9 [lgdt330x]
 [<ffffffffa08c9116>] dvb_frontend_swzigzag_autotune+0x18e/0x1b5 [dvb_core]
 [<ffffffffa08c9f6a>] dvb_frontend_swzigzag+0x1bc/0x21e [dvb_core]
 [<ffffffffa08ca4f4>] dvb_frontend_thread+0x528/0x62b [dvb_core]
 [<ffffffff810551e1>] ? autoremove_wake_function+0x0/0x38
 [<ffffffffa08c9fcc>] ? dvb_frontend_thread+0x0/0x62b [dvb_core]
 [<ffffffff81054e9b>] kthread+0x49/0x76
 [<ffffffff810116e9>] child_rip+0xa/0x11
 [<ffffffff81010a07>] ? restore_args+0x0/0x30
 [<ffffffff81054e52>] ? kthread+0x0/0x76
 [<ffffffff810116df>] ? child_rip+0x0/0x11

Code: 48 8b 05 2a 4e 00 00 41 8b 77 1c 31 d2 0f b7 40 0a 89 f1 03 45 d0 d1 e9 03 0d 23 4e 00 00 69 c0 24 f4 00 00 8d 04 01 48 8b 4d c0 <f7> f6 8a 55 d6 88 53 04 41 89 c4 c1 e8 08 88 43 01 8a 45 d7 44
RIP  [<ffffffffa09bc37a>] simple_dvb_calc_regs+0xab/0x281 [tuner_simple]
 RSP <ffff8800605dfd30>

Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c
index 1a21191..de7adaf 100644
--- a/drivers/media/common/tuners/tuner-simple.c
+++ b/drivers/media/common/tuners/tuner-simple.c
@@ -820,6 +820,15 @@ static u32 simple_dvb_configure(struct dvb_frontend *fe, u8 *buf,
 	int ret;
 	unsigned frequency = params->frequency / 62500;
 
+	if (!tun->stepsize) {
+		/* tuner-core was loaded before the digital tuner was
+		 * configured and somehow picked the wrong tuner type */
+		tuner_err("attempt to treat tuner %d (%s) as digital tuner "
+			  "without stepsize defined.\n",
+			  priv->type, priv->tun->name);
+		return 0; /* failure */
+	}
+
 	t_params = simple_tuner_params(fe, TUNER_PARAM_TYPE_DIGITAL);
 	ret = simple_config_lookup(fe, t_params, &frequency, &config, &cb);
 	if (ret < 0)
-- 
cgit v0.10.2


From cfb2a494bb7dca9cf8d1632fbed14b34db051980 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 26 Dec 2008 22:41:18 +0100
Subject: m68k: fix recursive dependency in Kconfig

We had a recursive dependency between MMU_MOTOROLA and MMU_SUN3
Fix it by dropping the unused dependencies on MMU_MOTOROLA.

MMU_MOTOROLA is set to y only using select so any dependencies
are anyway ignored.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 836fb66..c825bde 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
 
 config MMU_MOTOROLA
 	bool
-	depends on MMU && !MMU_SUN3
 
 config MMU_SUN3
 	bool
-- 
cgit v0.10.2


From eaa2a87460eca27ce725d63bbcf3b2da053828b7 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 26 Dec 2008 21:07:57 +0100
Subject: kconfig: explain symbol value defaults

Added a few comments - no functional change.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 9d4cba1..455f2c8 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -65,9 +65,13 @@ enum symbol_type {
 	S_UNKNOWN, S_BOOLEAN, S_TRISTATE, S_INT, S_HEX, S_STRING, S_OTHER
 };
 
+/* enum values are used as index to symbol.def[] */
 enum {
 	S_DEF_USER,		/* main user value */
-	S_DEF_AUTO,
+	S_DEF_AUTO,		/* values read from auto.conf */
+	S_DEF_DEF3,		/* Reserved for UI usage */
+	S_DEF_DEF4,		/* Reserved for UI usage */
+	S_DEF_COUNT
 };
 
 struct symbol {
@@ -75,7 +79,7 @@ struct symbol {
 	char *name;
 	enum symbol_type type;
 	struct symbol_value curr;
-	struct symbol_value def[4];
+	struct symbol_value def[S_DEF_COUNT];
 	tristate visible;
 	int flags;
 	struct property *prop;
-- 
cgit v0.10.2


From 5b2cf365a8e9bbf781939e941ed548c9743fdeea Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 26 Dec 2008 21:25:00 +0100
Subject: kconfig: add comments to symbol flags

No functional changes - only comments.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 455f2c8..0bdb58e 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -88,22 +88,24 @@ struct symbol {
 
 #define for_all_symbols(i, sym) for (i = 0; i < 257; i++) for (sym = symbol_hash[i]; sym; sym = sym->next) if (sym->type != S_OTHER)
 
-#define SYMBOL_CONST		0x0001
-#define SYMBOL_CHECK		0x0008
-#define SYMBOL_CHOICE		0x0010
-#define SYMBOL_CHOICEVAL	0x0020
-#define SYMBOL_VALID		0x0080
-#define SYMBOL_OPTIONAL		0x0100
-#define SYMBOL_WRITE		0x0200
-#define SYMBOL_CHANGED		0x0400
-#define SYMBOL_AUTO		0x1000
-#define SYMBOL_CHECKED		0x2000
-#define SYMBOL_WARNED		0x8000
-#define SYMBOL_DEF		0x10000
-#define SYMBOL_DEF_USER		0x10000
-#define SYMBOL_DEF_AUTO		0x20000
-#define SYMBOL_DEF3		0x40000
-#define SYMBOL_DEF4		0x80000
+#define SYMBOL_CONST      0x0001  /* symbol is const */
+#define SYMBOL_CHECK      0x0008  /* used during dependency checking */
+#define SYMBOL_CHOICE     0x0010  /* start of a choice block (null name) */
+#define SYMBOL_CHOICEVAL  0x0020  /* used as a value in a choice block */
+#define SYMBOL_VALID      0x0080  /* set when symbol.curr is calculated */
+#define SYMBOL_OPTIONAL   0x0100  /* choice is optional - values can be 'n' */
+#define SYMBOL_WRITE      0x0200  /* ? */
+#define SYMBOL_CHANGED    0x0400  /* ? */
+#define SYMBOL_AUTO       0x1000  /* value from environment variable */
+#define SYMBOL_CHECKED    0x2000  /* used during dependency checking */
+#define SYMBOL_WARNED     0x8000  /* warning has been issued */
+
+/* Set when symbol.def[] is used */
+#define SYMBOL_DEF        0x10000  /* First bit of SYMBOL_DEF */
+#define SYMBOL_DEF_USER   0x10000  /* symbol.def[S_DEF_USER] is valid */
+#define SYMBOL_DEF_AUTO   0x20000  /* symbol.def[S_DEF_AUTO] is valid */
+#define SYMBOL_DEF3       0x40000  /* symbol.def[S_DEF_3] is valid */
+#define SYMBOL_DEF4       0x80000  /* symbol.def[S_DEF_4] is valid */
 
 #define SYMBOL_MAXLENGTH	256
 #define SYMBOL_HASHSIZE		257
-- 
cgit v0.10.2


From cf82607a904d3b2ed3d66f8799f00d1099c1849c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 26 Dec 2008 21:32:31 +0100
Subject: kconfig: struct property commented

No functional changes

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 0bdb58e..6408fef 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -111,21 +111,41 @@ struct symbol {
 #define SYMBOL_HASHSIZE		257
 #define SYMBOL_HASHMASK		0xff
 
+/* A property represent the config options that can be associated
+ * with a config "symbol".
+ * Sample:
+ * config FOO
+ *         default y
+ *         prompt "foo prompt"
+ *         select BAR
+ * config BAZ
+ *         int "BAZ Value"
+ *         range 1..255
+ */
 enum prop_type {
-	P_UNKNOWN, P_PROMPT, P_COMMENT, P_MENU, P_DEFAULT, P_CHOICE,
-	P_SELECT, P_RANGE, P_ENV
+	P_UNKNOWN,
+	P_PROMPT,   /* prompt "foo prompt" or "BAZ Value" */
+	P_COMMENT,  /* text associated with a comment */
+	P_MENU,     /* prompt associated with a menuconfig option */
+	P_DEFAULT,  /* default y */
+	P_CHOICE,   /* choice value */
+	P_SELECT,   /* select BAR */
+	P_RANGE,    /* range 7..100 (for a symbol) */
+	P_ENV,      /* value from environment variable */
 };
 
 struct property {
-	struct property *next;
-	struct symbol *sym;
-	enum prop_type type;
-	const char *text;
+	struct property *next;     /* next property - null if last */
+	struct symbol *sym;        /* the symbol for which the property is associated */
+	enum prop_type type;       /* type of property */
+	const char *text;          /* the prompt value - P_PROMPT, P_MENU, P_COMMENT */
 	struct expr_value visible;
-	struct expr *expr;
-	struct menu *menu;
-	struct file *file;
-	int lineno;
+	struct expr *expr;         /* the optional conditional part of the property */
+	struct menu *menu;         /* the menu the property are associated with
+	                            * valid for: P_SELECT, P_RANGE, P_CHOICE,
+	                            * P_PROMPT, P_DEFAULT, P_MENU, P_COMMENT */
+	struct file *file;         /* what file was this property defined */
+	int lineno;                /* what lineno was this property defined */
 };
 
 #define for_all_properties(sym, st, tok) \
-- 
cgit v0.10.2


From 7826005e5a53645d7aab7c13eda76126eadebf0b Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sat, 27 Dec 2008 21:51:59 +0100
Subject: kconfig: improve error messages for bad source statements

We now say where we detect the second source of a file,
and where we detect a recursively source of the same file.
This makes it easier to fix such errors.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Roman Zippel <zippel@linux-m68k.org>

diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped
index 7342ce0..dc3e818 100644
--- a/scripts/kconfig/lex.zconf.c_shipped
+++ b/scripts/kconfig/lex.zconf.c_shipped
@@ -2370,11 +2370,14 @@ void zconf_nextfile(const char *name)
 	current_buf = buf;
 
 	if (file->flags & FILE_BUSY) {
-		printf("recursive scan (%s)?\n", name);
+		printf("%s:%d: do not source '%s' from itself\n",
+		       zconf_curname(), zconf_lineno(), name);
 		exit(1);
 	}
 	if (file->flags & FILE_SCANNED) {
-		printf("file %s already scanned?\n", name);
+		printf("%s:%d: file '%s' is already sourced from '%s'\n",
+		       zconf_curname(), zconf_lineno(), name,
+		       file->parent->name);
 		exit(1);
 	}
 	file->flags |= FILE_BUSY;
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 5164ef7..21ff69c 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l
@@ -314,11 +314,14 @@ void zconf_nextfile(const char *name)
 	current_buf = buf;
 
 	if (file->flags & FILE_BUSY) {
-		printf("recursive scan (%s)?\n", name);
+		printf("%s:%d: do not source '%s' from itself\n",
+		       zconf_curname(), zconf_lineno(), name);
 		exit(1);
 	}
 	if (file->flags & FILE_SCANNED) {
-		printf("file %s already scanned?\n", name);
+		printf("%s:%d: file '%s' is already sourced from '%s'\n",
+		       zconf_curname(), zconf_lineno(), name,
+		       file->parent->name);
 		exit(1);
 	}
 	file->flags |= FILE_BUSY;
-- 
cgit v0.10.2


From 46b8af50ba5c072b74740c5fa8ba08e6eabb22f8 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sat, 27 Dec 2008 02:43:36 -0500
Subject: headers_check.pl: disallow extern's

Since prototypes with "extern" refer to kernel functions, they make no
sense in userspace, so reject them automatically.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
[sam: made it into a warning]
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 488a3b1..5bdd975 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -14,7 +14,9 @@
 #    Only include files located in asm* and linux* are checked.
 #    The rest are assumed to be system include files.
 #
-# 2) TODO: check for leaked CONFIG_ symbols
+# 2) It is checked that prototypes does not use "extern"
+#
+# 3) TODO: check for leaked CONFIG_ symbols
 
 use strict;
 
@@ -33,6 +35,7 @@ foreach my $file (@files) {
 	while ($line = <FH>) {
 		$lineno++;
 		check_include();
+		check_prototypes();
 	}
 	close FH;
 }
@@ -54,3 +57,10 @@ sub check_include
 		}
 	}
 }
+
+sub check_prototypes
+{
+	if ($line =~ m/^\s*extern\b/) {
+		printf STDERR "$filename:$lineno: extern's make no sense in userspace\n";
+	}
+}
-- 
cgit v0.10.2


From 7e557a2509f9e1477c10295b74e29e4e93fa2392 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sat, 27 Dec 2008 19:52:20 +0100
Subject: kbuild: check for leaked CONFIG_ symbols to userspace

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 5bdd975..72924a7 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -16,7 +16,7 @@
 #
 # 2) It is checked that prototypes does not use "extern"
 #
-# 3) TODO: check for leaked CONFIG_ symbols
+# 3) Check for leaked CONFIG_ symbols
 
 use strict;
 
@@ -36,6 +36,7 @@ foreach my $file (@files) {
 		$lineno++;
 		check_include();
 		check_prototypes();
+		check_config();
 	}
 	close FH;
 }
@@ -64,3 +65,11 @@ sub check_prototypes
 		printf STDERR "$filename:$lineno: extern's make no sense in userspace\n";
 	}
 }
+
+sub check_config
+{
+	if ($line =~ m/[^a-zA-Z0-9_]+CONFIG_([a-zA-Z0-9]+)[^a-zA-Z0-9]/) {
+		printf STDERR "$filename:$lineno: leaks CONFIG_$1 to userspace where it is not valid\n";
+	}
+}
+
-- 
cgit v0.10.2


From 4307184f2b9240d0443bdf944c7b9eac044fe67b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sat, 27 Dec 2008 03:23:15 -0500
Subject: kbuild: in headers_install autoconvert asm/inline/volatile to __xxx__

Headers in userspace should be using the __xxx__ form of the asm, inline,
and volatile keywords.  Since people like to revert these things without
realizing what's going on, have the headers install step autoconvert these
keywords.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
index 7d2b414..c6ae405 100644
--- a/scripts/headers_install.pl
+++ b/scripts/headers_install.pl
@@ -36,6 +36,9 @@ foreach my $file (@files) {
 		$line =~ s/\s__attribute_const__\s/ /g;
 		$line =~ s/\s__attribute_const__$//g;
 		$line =~ s/^#include <linux\/compiler.h>//;
+		$line =~ s/(^|\s)(inline)\b/$1__$2__/g;
+		$line =~ s/(^|\s)(asm)\b(\s|[(]|$)/$1__$2__$3/g;
+		$line =~ s/(^|\s|[(])(volatile)\b(\s|[(]|$)/$1__$2__$3/g;
 		printf OUTFILE "%s", $line;
 	}
 	close OUTFILE;
-- 
cgit v0.10.2


From 80a7d1d991e35b0370c0396f36f6a076869a6bac Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sat, 27 Dec 2008 22:38:44 +0100
Subject: kbuild: disable sparse warning "returning void-valued expression"

The sparse warning -Wreturn-void ("returning void-valued expression")
is off by default, but it is enabled with -Wall, so add
-Wno-return-void to CHECKFLAGS to disable it.

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Makefile b/Makefile
index d13a969..f900666 100644
--- a/Makefile
+++ b/Makefile
@@ -321,7 +321,8 @@ KALLSYMS	= scripts/kallsyms
 PERL		= perl
 CHECK		= sparse
 
-CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
+CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
+		  -Wbitwise -Wno-return-void $(CF)
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)
-- 
cgit v0.10.2


From 2af238e455ef5fd31c2f7a06c2db3f13d843b9bf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 29 Feb 2008 14:21:53 -0800
Subject: kbuild: make *config usage docs

Create a kconfig user assistance guide, with a few tips and hints
about using menuconfig, xconfig, and gconfig.

Mostly contains user interface, environment variables, and search topics,
along with mini.config/custom.config usage.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index 1146442..54a118a 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -4,5 +4,7 @@ kconfig-language.txt
 	- specification of Config Language, the language in Kconfig files
 makefiles.txt
 	- developer information for linux kernel makefiles
+kconfig.txt
+	- usage help for make *config
 modules.txt
 	- how to build modules and to install them
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
new file mode 100644
index 0000000..26a7c0a
--- /dev/null
+++ b/Documentation/kbuild/kconfig.txt
@@ -0,0 +1,188 @@
+This file contains some assistance for using "make *config".
+
+Use "make help" to list all of the possible configuration targets.
+
+The xconfig ('qconf') and menuconfig ('mconf') programs also
+have embedded help text.  Be sure to check it for navigation,
+search, and other general help text.
+
+======================================================================
+General
+--------------------------------------------------
+
+New kernel releases often introduce new config symbols.  Often more
+important, new kernel releases may rename config symbols.  When
+this happens, using a previously working .config file and running
+"make oldconfig" won't necessarily produce a working new kernel
+for you, so you may find that you need to see what NEW kernel
+symbols have been introduced.
+
+To see a list of new config symbols when using "make oldconfig", use
+
+	cp user/some/old.config .config
+	yes "" | make oldconfig >conf.new
+
+and the config program will list as (NEW) any new symbols that have
+unknown values.  Of course, the .config file is also updated with
+new (default) values, so you can use:
+
+	grep "(NEW)" conf.new
+
+to see the new config symbols or you can 'diff' the previous and
+new .config files to see the differences:
+
+	diff .config.old .config | less
+
+(Yes, we need something better here.)
+
+
+======================================================================
+menuconfig
+--------------------------------------------------
+
+SEARCHING for CONFIG symbols
+
+Searching in menuconfig:
+
+	The Search function searches for kernel configuration symbol
+	names, so you have to know something close to what you are
+	looking for.
+
+	Example:
+		/hotplug
+		This lists all config symbols that contain "hotplug",
+		e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
+
+	For search help, enter / followed TAB-TAB-TAB (to highlight
+	<Help>) and Enter.  This will tell you that you can also use
+	regular expressions (regexes) in the search string, so if you
+	are not interested in MEMORY_HOTPLUG, you could try
+
+		/^hotplug
+
+
+______________________________________________________________________
+Color Themes for 'menuconfig'
+
+It is possible to select different color themes using the variable
+MENUCONFIG_COLOR.  To select a theme use:
+
+	make MENUCONFIG_COLOR=<theme> menuconfig
+
+Available themes are:
+  mono       => selects colors suitable for monochrome displays
+  blackbg    => selects a color scheme with black background
+  classic    => theme with blue background. The classic look
+  bluetitle  => a LCD friendly version of classic. (default)
+
+______________________________________________________________________
+Environment variables in 'menuconfig'
+
+KCONFIG_ALLCONFIG
+--------------------------------------------------
+(partially based on lkml email from/by Rob Landley, re: miniconfig)
+--------------------------------------------------
+The allyesconfig/allmodconfig/allnoconfig/randconfig variants can
+also use the environment variable KCONFIG_ALLCONFIG as a flag or a
+filename that contains config symbols that the user requires to be
+set to a specific value.  If KCONFIG_ALLCONFIG is used without a
+filename, "make *config" checks for a file named
+"all{yes/mod/no/random}.config" (corresponding to the *config command
+that was used) for symbol values that are to be forced.  If this file
+is not found, it checks for a file named "all.config" to contain forced
+values.
+
+This enables you to create "miniature" config (miniconfig) or custom
+config files containing just the config symbols that you are interested
+in.  Then the kernel config system generates the full .config file,
+including dependencies of your miniconfig file, based on the miniconfig
+file.
+
+This 'KCONFIG_ALLCONFIG' file is a config file which contains
+(usually a subset of all) preset config symbols.  These variable
+settings are still subject to normal dependency checks.
+
+Examples:
+	KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig
+or
+	KCONFIG_ALLCONFIG=mini.config make allnoconfig
+or
+	make KCONFIG_ALLCONFIG=mini.config allnoconfig
+
+These examples will disable most options (allnoconfig) but enable or
+disable the options that are explicitly listed in the specified
+mini-config files.
+
+KCONFIG_NOSILENTUPDATE
+--------------------------------------------------
+If this variable has a non-blank value, it prevents silent kernel
+config udpates (requires explicit updates).
+
+KCONFIG_CONFIG
+--------------------------------------------------
+This environment variable can be used to specify a default kernel config
+file name to override the default name of ".config".
+
+KCONFIG_OVERWRITECONFIG
+--------------------------------------------------
+If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
+break symlinks when .config is a symlink to somewhere else.
+
+KCONFIG_NOTIMESTAMP
+--------------------------------------------------
+If this environment variable exists and is non-null, the timestamp line
+in generated .config files is omitted.
+
+KCONFIG_AUTOCONFIG
+--------------------------------------------------
+This environment variable can be set to specify the path & name of the
+"auto.conf" file.  Its default value is "include/config/auto.conf".
+
+KCONFIG_AUTOHEADER
+--------------------------------------------------
+This environment variable can be set to specify the path & name of the
+"autoconf.h" (header) file.  Its default value is "include/linux/autoconf.h".
+
+______________________________________________________________________
+menuconfig User Interface Options
+----------------------------------------------------------------------
+MENUCONFIG_MODE
+--------------------------------------------------
+This mode shows all sub-menus in one large tree.
+
+Example:
+	MENUCONFIG_MODE=single_menu make menuconfig
+
+======================================================================
+xconfig
+--------------------------------------------------
+
+Searching in xconfig:
+
+	The Search function searches for kernel configuration symbol
+	names, so you have to know something close to what you are
+	looking for.
+
+	Example:
+		Ctrl-F hotplug
+	or
+		Menu: File, Search, hotplug
+
+	lists all config symbol entries that contain "hotplug" in
+	the symbol name.  In this Search dialog, you may change the
+	config setting for any of the entries that are not grayed out.
+	You can also enter a different search string without having
+	to return to the main menu.
+
+
+======================================================================
+gconfig
+--------------------------------------------------
+
+Searching in gconfig:
+
+	None (gconfig isn't maintained as well as xconfig or menuconfig);
+	however, gconfig does have a few more viewing choices than
+	xconfig does.
+
+###
diff --git a/README b/README
index 159912c..90a0765 100644
--- a/README
+++ b/README
@@ -52,11 +52,11 @@ DOCUMENTATION:
 
  - The Documentation/DocBook/ subdirectory contains several guides for
    kernel developers and users.  These guides can be rendered in a
-   number of formats:  PostScript (.ps), PDF, and HTML, among others.
-   After installation, "make psdocs", "make pdfdocs", or "make htmldocs"
-   will render the documentation in the requested format.
+   number of formats:  PostScript (.ps), PDF, HTML, & man-pages, among others.
+   After installation, "make psdocs", "make pdfdocs", "make htmldocs",
+   or "make mandocs" will render the documentation in the requested format.
 
-INSTALLING the kernel:
+INSTALLING the kernel source:
 
  - If you install the full sources, put the kernel tarball in a
    directory where you have permissions (eg. your home directory) and
@@ -187,14 +187,9 @@ CONFIGURING the kernel:
 	"make randconfig"  Create a ./.config file by setting symbol
 			   values to random values.
 
-   The allyesconfig/allmodconfig/allnoconfig/randconfig variants can
-   also use the environment variable KCONFIG_ALLCONFIG to specify a
-   filename that contains config options that the user requires to be
-   set to a specific value.  If KCONFIG_ALLCONFIG=filename is not used,
-   "make *config" checks for a file named "all{yes/mod/no/random}.config"
-   for symbol values that are to be forced.  If this file is not found,
-   it checks for a file named "all.config" to contain forced values.
-   
+   You can find more information on using the Linux kernel config tools
+   in Documentation/kbuild/make-configs.txt.
+
 	NOTES on "make config":
 	- having unnecessary drivers will make the kernel bigger, and can
 	  under some circumstances lead to problems: probing for a
@@ -231,6 +226,19 @@ COMPILING the kernel:
  - If you configured any of the parts of the kernel as `modules', you
    will also have to do "make modules_install".
 
+ - Verbose kernel compile/build output:
+
+   Normally the kernel build system runs in a fairly quiet mode (but not
+   totally silent).  However, sometimes you or other kernel developers need
+   to see compile, link, or other commands exactly as they are executed.
+   For this, use "verbose" build mode.  This is done by inserting
+   "V=1" in the "make" command.  E.g.:
+
+	make V=1 all
+
+   To have the build system also tell the reason for the rebuild of each
+   target, use "V=2".  The default is "V=0".
+
  - Keep a backup kernel handy in case something goes wrong.  This is 
    especially true for the development releases, since each new release
    contains new code which has not been debugged.  Make sure you keep a
-- 
cgit v0.10.2


From acc08b516f25b79cfcff310e51d95048bfcf7b0d Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Mon, 29 Dec 2008 13:45:52 +0100
Subject: kbuild: document environment variables

Add kbuild.txt to Documentation/kbuild
More stuff can be added later - at least we have
som of the varous environment variables documented now.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index 54a118a..e8d2b6d 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -1,10 +1,12 @@
 00-INDEX
-    	- this file: info on the kernel build process
+	- this file: info on the kernel build process
+kbuild.txt
+	- developer information on kbuild
+kconfig.txt
+	- usage help for make *config
 kconfig-language.txt
 	- specification of Config Language, the language in Kconfig files
 makefiles.txt
 	- developer information for linux kernel makefiles
-kconfig.txt
-	- usage help for make *config
 modules.txt
 	- how to build modules and to install them
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
new file mode 100644
index 0000000..5177184
--- /dev/null
+++ b/Documentation/kbuild/kbuild.txt
@@ -0,0 +1,126 @@
+Environment variables
+
+KCPPFLAGS
+--------------------------------------------------
+Additional options to pass when preprocessing. The preprocessing options
+will be used in all cases where kbuild do preprocessing including
+building C files and assembler files.
+
+KAFLAGS
+--------------------------------------------------
+Additional options to the assembler.
+
+KCFLAGS
+--------------------------------------------------
+Additional options to the C compiler.
+
+KBUILD_VERBOSE
+--------------------------------------------------
+Set the kbuild verbosity. Can be assinged same values as "V=...".
+See make help for the full list.
+Setting "V=..." takes precedence over KBUILD_VERBOSE.
+
+KBUILD_EXTMOD
+--------------------------------------------------
+Set the directory to look for the kernel source when building external
+modules.
+The directory can be specified in several ways:
+1) Use "M=..." on the command line
+2) Environmnet variable KBUILD_EXTMOD
+3) Environmnet variable SUBDIRS
+The possibilities are listed in the order they take precedence.
+Using "M=..." will always override the others.
+
+KBUILD_OUTPUT
+--------------------------------------------------
+Specify the output directory when building the kernel.
+The output directory can also be specificed using "O=...".
+Setting "O=..." takes precedence over KBUILD_OUTPUT
+
+ARCH
+--------------------------------------------------
+Set ARCH to the architecture to be built.
+In most cases the name of the architecture is the same as the
+directory name found in the arch/ directory.
+But some architectures suach as x86 and sparc has aliases.
+x86: i386 for 32 bit, x86_64 for 64 bit
+sparc: sparc for 32 bit, sparc64 for 64 bit
+
+CROSS_COMPILE
+--------------------------------------------------
+Specify an optional fixed part of the binutils filename.
+CROSS_COMPILE can be a part of the filename or the full path.
+
+CROSS_COMPILE is also used for ccache is some setups.
+
+CF
+--------------------------------------------------
+Additional options for sparse.
+CF is often used on the command-line like this:
+
+    make CF=-Wbitwise C=2
+
+INSTALL_PATH
+--------------------------------------------------
+INSTALL_PATH specifies where to place the updated kernel and system map
+images. Default is /boot, but you can set it to other values
+
+
+MODLIB
+--------------------------------------------------
+Specify where to install modules.
+The default value is:
+
+     $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+
+The value can be overridden in which case the default value is ignored.
+
+INSTALL_MOD_PATH
+--------------------------------------------------
+INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+relocations required by build roots.  This is not defined in the
+makefile but the argument can be passed to make if needed.
+
+INSTALL_MOD_STRIP
+--------------------------------------------------
+INSTALL_MOD_STRIP, if defined, will cause modules to be
+stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
+the default option --strip-debug will be used.  Otherwise,
+INSTALL_MOD_STRIP will used as the options to the strip command.
+
+INSTALL_FW_PATH
+--------------------------------------------------
+INSTALL_FW_PATH specify where to install the firmware blobs.
+The default value is:
+
+    $(INSTALL_MOD_PATH)/lib/firmware
+
+The value can be overridden in which case the default value is ignored.
+
+INSTALL_HDR_PATH
+--------------------------------------------------
+INSTALL_HDR_PATH specify where to install user space headers when
+executing "make headers_*".
+The default value is:
+
+    $(objtree)/usr
+
+$(objtree) is the directory where output files are saved.
+The output directory is often set using "O=..." on the commandline.
+
+The value can be overridden in which case the default value is ignored.
+
+KBUILD_MODPOST_WARN
+--------------------------------------------------
+KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined
+symbols in the final module linking stage.
+
+KBUILD_MODPOST_FINAL
+--------------------------------------------------
+KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
+This is solely usefull to speed up test compiles.
+
+KBUILD_EXTRA_SYMBOLS
+--------------------------------------------------
+For modules use symbols from another modules.
+See more details in modules.txt.
-- 
cgit v0.10.2


From 521b0c774d1350aac18f5cd35831469a4e879d72 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Tue, 30 Dec 2008 10:20:08 +0100
Subject: kbuild: drop debugging leftover in tags.sh

Noticed by Jike.

Reported-by: "Jike Song" <albcamus@gmail.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 4e75472..9e3451d 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -84,7 +84,6 @@ docscope()
 
 exuberant()
 {
-	all_sources > all
 	all_sources | xargs $1 -a                               \
 	-I __initdata,__exitdata,__acquires,__releases          \
 	-I __read_mostly,____cacheline_aligned                  \
-- 
cgit v0.10.2


From 483b41218fa9d5172312a9e294aaf78e22b266e6 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Tue, 30 Dec 2008 11:34:58 +0100
Subject: kbuild: add checks for include of linux/types in userspace headers

If we see __[us](8|16|32|64) then we must include <linux/types.h>
If wee see include of <asm/types.h> then we recommend <linux/types.h>

Original script from Mike but modified by me.

Cc: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 72924a7..b62c3196 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -34,9 +34,11 @@ foreach my $file (@files) {
 	$lineno = 0;
 	while ($line = <FH>) {
 		$lineno++;
-		check_include();
-		check_prototypes();
-		check_config();
+		&check_include();
+		&check_asm_types();
+		&check_sizetypes();
+		&check_prototypes();
+		&check_config();
 	}
 	close FH;
 }
@@ -73,3 +75,42 @@ sub check_config
 	}
 }
 
+my $linux_asm_types;
+sub check_asm_types()
+{
+	if ($lineno == 1) {
+		$linux_asm_types = 0;
+	} elsif ($linux_asm_types >= 1) {
+		return;
+	}
+	if ($line =~ m/^\s*#\s*include\s+<asm\/types.h>/) {
+		$linux_asm_types = 1;
+		printf STDERR "$filename:$lineno: " .
+		"include of <linux/types.h> is preferred over <asm/types.h>\n"
+		# Warn until headers are all fixed
+		#$ret = 1;
+	}
+}
+
+my $linux_types;
+sub check_sizetypes
+{
+	if ($lineno == 1) {
+		$linux_types = 0;
+	} elsif ($linux_types >= 1) {
+		return;
+	}
+	if ($line =~ m/^\s*#\s*include\s+<linux\/types.h>/) {
+		$linux_types = 1;
+		return;
+	}
+	if ($line =~ m/__[us](8|16|32|64)\b/) {
+		printf STDERR "$filename:$lineno: " .
+		              "found __[us]{8,16,32,64} type " .
+		              "without #include <linux/types.h>\n";
+		$linux_types = 2;
+		# Warn until headers are all fixed
+		#$ret = 1;
+	}
+}
+
-- 
cgit v0.10.2


From b67ff8ce122f3353bd741db48ce1756c12fb5f2d Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 31 Dec 2008 09:32:30 +0100
Subject: kbuild: ignore a few files in headers_check

The new check for asm/types.h and linux/types.h had
a few false positives.

o We cannot let linux/types.h include linux/types.h
o The int-ll64.h and int-ll64.h define the types
  and are included by linux/types.h

Handle this by hardcoding the filenames in the headers_check script.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index b62c3196..db30fac 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -78,6 +78,9 @@ sub check_config
 my $linux_asm_types;
 sub check_asm_types()
 {
+	if ($filename =~ /types.h|int-l64.h|int-ll64.h/o) {
+		return;
+	}
 	if ($lineno == 1) {
 		$linux_asm_types = 0;
 	} elsif ($linux_asm_types >= 1) {
@@ -95,6 +98,9 @@ sub check_asm_types()
 my $linux_types;
 sub check_sizetypes
 {
+	if ($filename =~ /types.h|int-l64.h|int-ll64.h/o) {
+		return;
+	}
 	if ($lineno == 1) {
 		$linux_types = 0;
 	} elsif ($linux_types >= 1) {
-- 
cgit v0.10.2


From 6680598b44ed3c0052d155522eb21fc5a00de5f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 2 Jan 2009 18:53:14 +0100
Subject: Disallow gcc versions 3.{0,1}

GCC 3.0 and 3.1 are too old to build a working kernel.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
[ This check got dropped as obsolete when I simplified the gcc header
  inclusion mess in f153b82121b0366fe0e5f9553545cce237335175, but Willy
  Tarreau reports actually having those old versions still..  -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index 2befe65..8005eff 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,6 +2,10 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
+#if __GNUC_MINOR__ < 2
+# error Sorry, your compiler is too old - please upgrade it.
+#endif
+
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
-- 
cgit v0.10.2


From 79ff56ebd3edfb16f8badc558cb439b203a3298f Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 30 Dec 2008 20:18:00 -0800
Subject: swiotlb: add missing __init annotations

Impact: cleanup, reduce kernel size a bit

The current kernel build warns:

    WARNING: vmlinux.o(.text+0x11458): Section mismatch in reference from the function swiotlb_alloc_boot() to the function .init.text:__alloc_bootmem_low()
    The function swiotlb_alloc_boot() references
    the function __init __alloc_bootmem_low().
    This is often because swiotlb_alloc_boot lacks a __init
    annotation or the annotation of __alloc_bootmem_low is wrong.

    WARNING: vmlinux.o(.text+0x1011f2): Section mismatch in reference from the function swiotlb_late_init_with_default_size() to the function .init.text:__alloc_bootmem_low()
    The function swiotlb_late_init_with_default_size() references
    the function __init __alloc_bootmem_low().
    This is often because swiotlb_late_init_with_default_size lacks a __init
    annotation or the annotation of __alloc_bootmem_low is wrong.

and indeed the functions calling __alloc_bootmem_low() can be marked
__init as well.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c344..8cba374 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -13,7 +13,7 @@
 
 int swiotlb __read_mostly;
 
-void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
 {
 	return alloc_bootmem_low_pages(size);
 }
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fa2dc4e..b6d0aae 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -116,7 +116,7 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
 {
 	return alloc_bootmem_low_pages(size);
 }
-- 
cgit v0.10.2


From a66963a966881238d2738185e6f1adae1447f830 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 18:14:04 -0800
Subject: sparc: delete unused config symbols

There is no need to define a config symbol if
it is never set to any value. Undefined symbols equal
to 'n'.

GENERIC_GPIO looks like it is similar but
it is set using select in some other file so
it must be kept.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0a94d9c..002f7b4 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -188,14 +188,6 @@ config ARCH_MAY_HAVE_PC_FDC
 	bool
 	default y
 
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
 config EMULATED_CMPXCHG
 	bool
 	default y if SPARC32
@@ -442,26 +434,6 @@ config SERIAL_CONSOLE
 endmenu
 
 menu "Bus options (PCI etc.)"
-config ISA
-	bool
-	help
-	  ISA is found on Espresso only and is not supported currently.
-
-config ISAPNP
-	bool
-	help
-	  ISAPNP is not supported
-
-config EISA
-	bool
-	help
-	  EISA is not supported.
-
-config MCA
-	bool
-	help
-	  MCA is not supported.
-
 config SBUS
 	bool
 	default y
-- 
cgit v0.10.2


From a508228a9ed2c2b582cec7833b60f55d12789219 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 18:34:50 -0800
Subject: sparc: unify posix_types.h

The posix types differed so much in their definition
that they are kept in separate blocks.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 89c260a..57bcc1f 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,8 +3,6 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ipcbuf_32.h
 header-y += ipcbuf_64.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
 header-y += ptrace_32.h
 header-y += ptrace_64.h
 header-y += sigcontext_32.h
diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h
index 03a0e09..98d6ebb 100644
--- a/arch/sparc/include/asm/posix_types.h
+++ b/arch/sparc/include/asm/posix_types.h
@@ -1,8 +1,155 @@
-#ifndef ___ASM_SPARC_POSIX_TYPES_H
-#define ___ASM_SPARC_POSIX_TYPES_H
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+#ifndef __SPARC_POSIX_TYPES_H
+#define __SPARC_POSIX_TYPES_H
+
 #if defined(__sparc__) && defined(__arch64__)
-#include <asm/posix_types_64.h>
+/* sparc 64 bit */
+typedef unsigned long          __kernel_size_t;
+typedef long                   __kernel_ssize_t;
+typedef long                   __kernel_ptrdiff_t;
+typedef long                   __kernel_time_t;
+typedef long                   __kernel_clock_t;
+typedef int                    __kernel_pid_t;
+typedef int                    __kernel_ipc_pid_t;
+typedef unsigned int           __kernel_uid_t;
+typedef unsigned int           __kernel_gid_t;
+typedef unsigned long          __kernel_ino_t;
+typedef unsigned int           __kernel_mode_t;
+typedef unsigned short         __kernel_umode_t;
+typedef unsigned int           __kernel_nlink_t;
+typedef int                    __kernel_daddr_t;
+typedef long                   __kernel_off_t;
+typedef char *                 __kernel_caddr_t;
+typedef unsigned short	       __kernel_uid16_t;
+typedef unsigned short	       __kernel_gid16_t;
+typedef int                    __kernel_clockid_t;
+typedef int                    __kernel_timer_t;
+
+typedef unsigned short 	       __kernel_old_uid_t;
+typedef unsigned short         __kernel_old_gid_t;
+typedef __kernel_uid_t	       __kernel_uid32_t;
+typedef __kernel_gid_t	       __kernel_gid32_t;
+
+typedef unsigned int	       __kernel_old_dev_t;
+
+/* Note this piece of asymmetry from the v9 ABI.  */
+typedef int		       __kernel_suseconds_t;
+
 #else
-#include <asm/posix_types_32.h>
-#endif
+/* sparc 32 bit */
+
+typedef unsigned int           __kernel_size_t;
+typedef int                    __kernel_ssize_t;
+typedef long int               __kernel_ptrdiff_t;
+typedef long                   __kernel_time_t;
+typedef long		       __kernel_suseconds_t;
+typedef long                   __kernel_clock_t;
+typedef int                    __kernel_pid_t;
+typedef unsigned short         __kernel_ipc_pid_t;
+typedef unsigned short         __kernel_uid_t;
+typedef unsigned short         __kernel_gid_t;
+typedef unsigned long          __kernel_ino_t;
+typedef unsigned short         __kernel_mode_t;
+typedef unsigned short         __kernel_umode_t;
+typedef short                  __kernel_nlink_t;
+typedef long                   __kernel_daddr_t;
+typedef long                   __kernel_off_t;
+typedef char *                 __kernel_caddr_t;
+typedef unsigned short	       __kernel_uid16_t;
+typedef unsigned short	       __kernel_gid16_t;
+typedef unsigned int	       __kernel_uid32_t;
+typedef unsigned int	       __kernel_gid32_t;
+typedef unsigned short	       __kernel_old_uid_t;
+typedef unsigned short	       __kernel_old_gid_t;
+typedef unsigned short	       __kernel_old_dev_t;
+typedef int                    __kernel_clockid_t;
+typedef int                    __kernel_timer_t;
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+#ifdef __GNUC__
+typedef long long              __kernel_loff_t;
 #endif
+
+typedef struct {
+	int	val[2];
+} __kernel_fsid_t;
+
+#ifdef __KERNEL__
+
+#undef __FD_SET
+static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
+{
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant cases (8 or 32 longs,
+ * for 256 and 1024-bit fd_sets respectively)
+ */
+#undef __FD_ZERO
+static inline void __FD_ZERO(__kernel_fd_set *p)
+{
+	unsigned long *tmp = p->fds_bits;
+	int i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+			case 32:
+			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+			  tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
+			  tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
+			  tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
+			  tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
+			  return;
+			case 16:
+			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+			  return;
+			case 8:
+			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+			  return;
+			case 4:
+			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			  return;
+		}
+	}
+	i = __FDSET_LONGS;
+	while (i) {
+		i--;
+		*tmp = 0;
+		tmp++;
+	}
+}
+
+#endif /* __KERNEL__ */
+#endif /* __SPARC_POSIX_TYPES_H */
diff --git a/arch/sparc/include/asm/posix_types_32.h b/arch/sparc/include/asm/posix_types_32.h
deleted file mode 100644
index 6bb6eb1..0000000
--- a/arch/sparc/include/asm/posix_types_32.h
+++ /dev/null
@@ -1,118 +0,0 @@
-#ifndef __ARCH_SPARC_POSIX_TYPES_H
-#define __ARCH_SPARC_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc.  Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned int           __kernel_size_t;
-typedef int                    __kernel_ssize_t;
-typedef long int               __kernel_ptrdiff_t;
-typedef long                   __kernel_time_t;
-typedef long		       __kernel_suseconds_t;
-typedef long                   __kernel_clock_t;
-typedef int                    __kernel_pid_t;
-typedef unsigned short         __kernel_ipc_pid_t;
-typedef unsigned short         __kernel_uid_t;
-typedef unsigned short         __kernel_gid_t;
-typedef unsigned long          __kernel_ino_t;
-typedef unsigned short         __kernel_mode_t;
-typedef unsigned short         __kernel_umode_t;
-typedef short                  __kernel_nlink_t;
-typedef long                   __kernel_daddr_t;
-typedef long                   __kernel_off_t;
-typedef char *                 __kernel_caddr_t;
-typedef unsigned short	       __kernel_uid16_t;
-typedef unsigned short	       __kernel_gid16_t;
-typedef unsigned int	       __kernel_uid32_t;
-typedef unsigned int	       __kernel_gid32_t;
-typedef unsigned short	       __kernel_old_uid_t;
-typedef unsigned short	       __kernel_old_gid_t;
-typedef unsigned short	       __kernel_old_dev_t;
-typedef int                    __kernel_clockid_t;
-typedef int                    __kernel_timer_t;
-
-#ifdef __GNUC__
-typedef long long	__kernel_loff_t;
-#endif
-
-typedef struct {
-	int	val[2];
-} __kernel_fsid_t;
-
-#if defined(__KERNEL__)
-
-#undef __FD_SET
-static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
-}
-
-#undef __FD_CLR
-static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
-}
-
-#undef __FD_ISSET
-static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant cases (8 or 32 longs,
- * for 256 and 1024-bit fd_sets respectively)
- */
-#undef __FD_ZERO
-static inline void __FD_ZERO(__kernel_fd_set *p)
-{
-	unsigned long *tmp = p->fds_bits;
-	int i;
-
-	if (__builtin_constant_p(__FDSET_LONGS)) {
-		switch (__FDSET_LONGS) {
-			case 32:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
-			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
-			  tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
-			  tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
-			  tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
-			  tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
-			  return;
-			case 16:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
-			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
-			  return;
-			case 8:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  return;
-			case 4:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  return;
-		}
-	}
-	i = __FDSET_LONGS;
-	while (i) {
-		i--;
-		*tmp = 0;
-		tmp++;
-	}
-}
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* !(__ARCH_SPARC_POSIX_TYPES_H) */
diff --git a/arch/sparc/include/asm/posix_types_64.h b/arch/sparc/include/asm/posix_types_64.h
deleted file mode 100644
index ba8f932..0000000
--- a/arch/sparc/include/asm/posix_types_64.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#ifndef __ARCH_SPARC64_POSIX_TYPES_H
-#define __ARCH_SPARC64_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc.  Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned long          __kernel_size_t;
-typedef long                   __kernel_ssize_t;
-typedef long                   __kernel_ptrdiff_t;
-typedef long                   __kernel_time_t;
-typedef long                   __kernel_clock_t;
-typedef int                    __kernel_pid_t;
-typedef int                    __kernel_ipc_pid_t;
-typedef unsigned int           __kernel_uid_t;
-typedef unsigned int           __kernel_gid_t;
-typedef unsigned long          __kernel_ino_t;
-typedef unsigned int           __kernel_mode_t;
-typedef unsigned short         __kernel_umode_t;
-typedef unsigned int           __kernel_nlink_t;
-typedef int                    __kernel_daddr_t;
-typedef long                   __kernel_off_t;
-typedef char *                 __kernel_caddr_t;
-typedef unsigned short	       __kernel_uid16_t;
-typedef unsigned short	       __kernel_gid16_t;
-typedef int                    __kernel_clockid_t;
-typedef int                    __kernel_timer_t;
-
-typedef unsigned short 	       __kernel_old_uid_t;
-typedef unsigned short         __kernel_old_gid_t;
-typedef __kernel_uid_t	       __kernel_uid32_t;
-typedef __kernel_gid_t	       __kernel_gid32_t;
-
-typedef unsigned int	       __kernel_old_dev_t;
-
-/* Note this piece of asymmetry from the v9 ABI.  */
-typedef int		       __kernel_suseconds_t;
-
-#ifdef __GNUC__
-typedef long long              __kernel_loff_t;
-#endif
-
-typedef struct {
-	int	val[2];
-} __kernel_fsid_t;
-
-#if defined(__KERNEL__)
-
-#undef __FD_SET
-static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
-}
-
-#undef __FD_CLR
-static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
-}
-
-#undef __FD_ISSET
-static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
-{
-	unsigned long _tmp = fd / __NFDBITS;
-	unsigned long _rem = fd % __NFDBITS;
-	return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant cases (8 or 32 longs,
- * for 256 and 1024-bit fd_sets respectively)
- */
-#undef __FD_ZERO
-static inline void __FD_ZERO(__kernel_fd_set *p)
-{
-	unsigned long *tmp = p->fds_bits;
-	int i;
-
-	if (__builtin_constant_p(__FDSET_LONGS)) {
-		switch (__FDSET_LONGS) {
-			case 32:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
-			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
-			  tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
-			  tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
-			  tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
-			  tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
-			  return;
-			case 16:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
-			  tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
-			  return;
-			case 8:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
-			  return;
-			case 4:
-			  tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
-			  return;
-		}
-	}
-	i = __FDSET_LONGS;
-	while (i) {
-		i--;
-		*tmp = 0;
-		tmp++;
-	}
-}
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* !(__ARCH_SPARC64_POSIX_TYPES_H) */
-- 
cgit v0.10.2


From 104e28059d771274b545b4772a27c5c8f9af2767 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 18:39:10 -0800
Subject: sparc32: drop __old_kernel_stat

sparc32 does not define __ARCH_WANT_OLD_STAT so
we do not use this structure neither do we support it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/stat_32.h b/arch/sparc/include/asm/stat_32.h
index 2299e1d5..643d572 100644
--- a/arch/sparc/include/asm/stat_32.h
+++ b/arch/sparc/include/asm/stat_32.h
@@ -3,20 +3,6 @@
 
 #include <linux/types.h>
 
-struct __old_kernel_stat {
-	unsigned short st_dev;
-	unsigned short st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned long  st_size;
-	unsigned long  st_atime;
-	unsigned long  st_mtime;
-	unsigned long  st_ctime;
-};
-
 struct stat {
 	unsigned short	st_dev;
 	unsigned long	st_ino;
-- 
cgit v0.10.2


From 085219f79cad89291699bd2bfb21c9fdabafe65f Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 18:47:34 -0800
Subject: sparc32: use proper types in struct stat

Like sparc64 use proper types in struct stat

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/stat_32.h b/arch/sparc/include/asm/stat_32.h
index 643d572..45b3ee4 100644
--- a/arch/sparc/include/asm/stat_32.h
+++ b/arch/sparc/include/asm/stat_32.h
@@ -5,21 +5,21 @@
 
 struct stat {
 	unsigned short	st_dev;
-	unsigned long	st_ino;
-	unsigned short	st_mode;
+	ino_t		st_ino;
+	mode_t		st_mode;
 	short		st_nlink;
-	unsigned short	st_uid;
-	unsigned short	st_gid;
+	uid_t		st_uid;
+	gid_t		st_gid;
 	unsigned short	st_rdev;
-	long		st_size;
-	long		st_atime;
+	off_t		st_size;
+	time_t		st_atime;
 	unsigned long	st_atime_nsec;
-	long		st_mtime;
+	time_t		st_mtime;
 	unsigned long	st_mtime_nsec;
-	long		st_ctime;
+	time_t		st_ctime;
 	unsigned long	st_ctime_nsec;
-	long		st_blksize;
-	long		st_blocks;
+	off_t		st_blksize;
+	off_t		st_blocks;
 	unsigned long	__unused4[2];
 };
 
-- 
cgit v0.10.2


From 12aa0b17328a01490c9e53904767ca59596f9ea1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 18:48:21 -0800
Subject: sparc: unify stat.h

To my suprise struct stat64 was not equal on sparc 32 and sparc64,
so there was really nothing to share here.
Unify the files by adding their respective content to stat.h.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 57bcc1f..42cf483 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -11,8 +11,6 @@ header-y += siginfo_32.h
 header-y += siginfo_64.h
 header-y += signal_32.h
 header-y += signal_64.h
-header-y += stat_32.h
-header-y += stat_64.h
 
 header-y += apc.h
 header-y += asi.h
diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h
index d815301..55db5ec 100644
--- a/arch/sparc/include/asm/stat.h
+++ b/arch/sparc/include/asm/stat.h
@@ -1,8 +1,107 @@
-#ifndef ___ASM_SPARC_STAT_H
-#define ___ASM_SPARC_STAT_H
+#ifndef __SPARC_STAT_H
+#define __SPARC_STAT_H
+
+#include <linux/types.h>
+
 #if defined(__sparc__) && defined(__arch64__)
-#include <asm/stat_64.h>
+/* 64 bit sparc */
+struct stat {
+	unsigned   st_dev;
+	ino_t   st_ino;
+	mode_t  st_mode;
+	short   st_nlink;
+	uid_t   st_uid;
+	gid_t   st_gid;
+	unsigned   st_rdev;
+	off_t   st_size;
+	time_t  st_atime;
+	time_t  st_mtime;
+	time_t  st_ctime;
+	off_t   st_blksize;
+	off_t   st_blocks;
+	unsigned long  __unused4[2];
+};
+
+struct stat64 {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_nlink;
+
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	__pad0;
+
+	unsigned long	st_rdev;
+	long		st_size;
+	long		st_blksize;
+	long		st_blocks;
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	long		__unused[3];
+};
+
 #else
-#include <asm/stat_32.h>
-#endif
-#endif
+/* 32 bit sparc */
+struct stat {
+	unsigned short	st_dev;
+	ino_t		st_ino;
+	mode_t		st_mode;
+	short		st_nlink;
+	uid_t		st_uid;
+	gid_t		st_gid;
+	unsigned short	st_rdev;
+	off_t		st_size;
+	time_t		st_atime;
+	unsigned long	st_atime_nsec;
+	time_t		st_mtime;
+	unsigned long	st_mtime_nsec;
+	time_t		st_ctime;
+	unsigned long	st_ctime_nsec;
+	off_t		st_blksize;
+	off_t		st_blocks;
+	unsigned long	__unused4[2];
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+	unsigned long long st_dev;
+
+	unsigned long long st_ino;
+
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+
+	unsigned long long st_rdev;
+
+	unsigned char	__pad3[8];
+
+	long long	st_size;
+	unsigned int	st_blksize;
+
+	unsigned char	__pad4[8];
+	unsigned int	st_blocks;
+
+	unsigned int	st_atime;
+	unsigned int	st_atime_nsec;
+
+	unsigned int	st_mtime;
+	unsigned int	st_mtime_nsec;
+
+	unsigned int	st_ctime;
+	unsigned int	st_ctime_nsec;
+
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+#endif /* defined(__sparc__) && defined(__arch64__) */
+#endif /* __SPARC_STAT_H */
diff --git a/arch/sparc/include/asm/stat_32.h b/arch/sparc/include/asm/stat_32.h
deleted file mode 100644
index 45b3ee4..0000000
--- a/arch/sparc/include/asm/stat_32.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _SPARC_STAT_H
-#define _SPARC_STAT_H
-
-#include <linux/types.h>
-
-struct stat {
-	unsigned short	st_dev;
-	ino_t		st_ino;
-	mode_t		st_mode;
-	short		st_nlink;
-	uid_t		st_uid;
-	gid_t		st_gid;
-	unsigned short	st_rdev;
-	off_t		st_size;
-	time_t		st_atime;
-	unsigned long	st_atime_nsec;
-	time_t		st_mtime;
-	unsigned long	st_mtime_nsec;
-	time_t		st_ctime;
-	unsigned long	st_ctime_nsec;
-	off_t		st_blksize;
-	off_t		st_blocks;
-	unsigned long	__unused4[2];
-};
-
-#define STAT_HAVE_NSEC 1
-
-struct stat64 {
-	unsigned long long st_dev;
-
-	unsigned long long st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-
-	unsigned long long st_rdev;
-
-	unsigned char	__pad3[8];
-
-	long long	st_size;
-	unsigned int	st_blksize;
-
-	unsigned char	__pad4[8];
-	unsigned int	st_blocks;
-
-	unsigned int	st_atime;
-	unsigned int	st_atime_nsec;
-
-	unsigned int	st_mtime;
-	unsigned int	st_mtime_nsec;
-
-	unsigned int	st_ctime;
-	unsigned int	st_ctime_nsec;
-
-	unsigned int	__unused4;
-	unsigned int	__unused5;
-};
-
-#endif
diff --git a/arch/sparc/include/asm/stat_64.h b/arch/sparc/include/asm/stat_64.h
deleted file mode 100644
index 9650fde..0000000
--- a/arch/sparc/include/asm/stat_64.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _SPARC64_STAT_H
-#define _SPARC64_STAT_H
-
-#include <linux/types.h>
-
-struct stat {
-	unsigned   st_dev;
-	ino_t   st_ino;
-	mode_t  st_mode;
-	short   st_nlink;
-	uid_t   st_uid;
-	gid_t   st_gid;
-	unsigned   st_rdev;
-	off_t   st_size;
-	time_t  st_atime;
-	time_t  st_mtime;
-	time_t  st_ctime;
-	off_t   st_blksize;
-	off_t   st_blocks;
-	unsigned long  __unused4[2];
-};
-
-struct stat64 {
-	unsigned long	st_dev;
-	unsigned long	st_ino;
-	unsigned long	st_nlink;
-
-	unsigned int	st_mode;
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-	unsigned int	__pad0;
-
-	unsigned long	st_rdev;
-	long		st_size;
-	long		st_blksize;
-	long		st_blocks;
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-	long		__unused[3];
-};
-
-#endif
-- 
cgit v0.10.2


From a0381a9480fffc6269d06f79da5fa5c511621c29 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 2 Jan 2009 19:12:46 -0800
Subject: sparc: Kill bogus comment about IRQF_SHARED in pci_psycho.c

Noticed by Geert Uytterhoeven.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
index dfb3ec8..3b34344 100644
--- a/arch/sparc/kernel/pci_psycho.c
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -307,10 +307,7 @@ static void psycho_register_error_handlers(struct pci_pbm_info *pbm)
 
 	/* We really mean to ignore the return result here.  Two
 	 * PCI controller share the same interrupt numbers and
-	 * drive the same front-end hardware.  Whichever of the
-	 * two get in here first will register the IRQ handler
-	 * the second will just error out since we do not pass in
-	 * IRQF_SHARED.
+	 * drive the same front-end hardware.
 	 */
 	err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED,
 			  "PSYCHO_UE", pbm);
-- 
cgit v0.10.2


From 55d646feee9c0ced63f4189aca4ba7c9508b75f9 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 19:17:47 -0800
Subject: sparc64: prepare signal_64 for unification

o add a sparc32 only definition
o fix a few style issues (white space errors etc).
o include compiler.h (for __user)

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/signal_64.h b/arch/sparc/include/asm/signal_64.h
index ab1509a..41535e7 100644
--- a/arch/sparc/include/asm/signal_64.h
+++ b/arch/sparc/include/asm/signal_64.h
@@ -1,7 +1,8 @@
-#ifndef _ASMSPARC64_SIGNAL_H
-#define _ASMSPARC64_SIGNAL_H
+#ifndef __SPARC_SIGNAL_H
+#define __SPARC_SIGNAL_H
 
 #include <asm/sigcontext.h>
+#include <linux/compiler.h>
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
@@ -83,8 +84,8 @@
 
 #define __OLD_NSIG	32
 #define __NEW_NSIG      64
-#define _NSIG_BPW     	64
-#define _NSIG_WORDS   	(__NEW_NSIG / _NSIG_BPW)
+#define _NSIG_BPW       64
+#define _NSIG_WORDS     (__NEW_NSIG / _NSIG_BPW)
 
 #define SIGRTMIN       32
 #define SIGRTMAX       __NEW_NSIG
@@ -156,20 +157,35 @@ struct sigstack {
 #define MINSIGSTKSZ	4096
 #define SIGSTKSZ	16384
 
+#ifdef __KERNEL__
+/*
+ * DJHR
+ * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
+ * interrupt handler's irq structure should be statically allocated
+ * by the request_irq routine.
+ * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
+ * of interrupt usage and that sucks. Also without a flag like this
+ * it may be possible for the free_irq routine to attempt to free
+ * statically allocated data.. which is NOT GOOD.
+ *
+ */
+#define SA_STATIC_ALLOC         0x8000
+#endif
+
 #include <asm-generic/signal.h>
 
 struct __new_sigaction {
 	__sighandler_t		sa_handler;
 	unsigned long		sa_flags;
-	__sigrestore_t 		sa_restorer;  /* not used by Linux/SPARC yet */
+	__sigrestore_t		sa_restorer;  /* not used by Linux/SPARC yet */
 	__new_sigset_t		sa_mask;
 };
 
 struct __old_sigaction {
-	__sighandler_t  	sa_handler;
-	__old_sigset_t  	sa_mask;
-	unsigned long   	sa_flags;
-	void 			(*sa_restorer)(void);     /* not used by Linux/SPARC yet */
+	__sighandler_t		sa_handler;
+	__old_sigset_t		sa_mask;
+	unsigned long		sa_flags;
+	void			(*sa_restorer)(void);  /* not used by Linux/SPARC yet */
 };
 
 typedef struct sigaltstack {
@@ -181,8 +197,8 @@ typedef struct sigaltstack {
 #ifdef __KERNEL__
 
 struct k_sigaction {
-	struct __new_sigaction 	sa;
-	void __user		*ka_restorer;
+	struct			__new_sigaction sa;
+	void			__user *ka_restorer;
 };
 
 #define ptrace_signal_deliver(regs, cookie) do { } while (0)
@@ -191,4 +207,4 @@ struct k_sigaction {
 
 #endif /* !(__ASSEMBLY__) */
 
-#endif /* !(_ASMSPARC64_SIGNAL_H) */
+#endif /* !(__SPARC_SIGNAL_H) */
-- 
cgit v0.10.2


From ece93487c31607558f4b91f378fcee4b43956dbc Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 19:21:06 -0800
Subject: sparc: unify signal.h

They were almost identical and with the preapration
patch nothing was needed to be added.

The unified version contains a few sparc64 only definitions
but they are kept as is and not protected by ifdef/endif.
The unified version exports a bit more to userspace then the
32 bit version did.
This is not considered fatal.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 42cf483..fe724d4 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -9,8 +9,6 @@ header-y += sigcontext_32.h
 header-y += sigcontext_64.h
 header-y += siginfo_32.h
 header-y += siginfo_64.h
-header-y += signal_32.h
-header-y += signal_64.h
 
 header-y += apc.h
 header-y += asi.h
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index 27ab05d..41535e7 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -1,8 +1,210 @@
-#ifndef ___ASM_SPARC_SIGNAL_H
-#define ___ASM_SPARC_SIGNAL_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/signal_64.h>
+#ifndef __SPARC_SIGNAL_H
+#define __SPARC_SIGNAL_H
+
+#include <asm/sigcontext.h>
+#include <linux/compiler.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#include <linux/personality.h>
+#include <linux/types.h>
+#endif
+#endif
+
+/* On the Sparc the signal handlers get passed a 'sub-signal' code
+ * for certain signal types, which we document here.
+ */
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define    SUBSIG_STACK       0
+#define    SUBSIG_ILLINST     2
+#define    SUBSIG_PRIVINST    3
+#define    SUBSIG_BADTRAP(t)  (0x80 + (t))
+
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+
+#define SIGEMT           7
+#define    SUBSIG_TAG    10
+
+#define SIGFPE		 8
+#define    SUBSIG_FPDISABLED     0x400
+#define    SUBSIG_FPERROR        0x404
+#define    SUBSIG_FPINTOVFL      0x001
+#define    SUBSIG_FPSTSIG        0x002
+#define    SUBSIG_IDIVZERO       0x014
+#define    SUBSIG_FPINEXACT      0x0c4
+#define    SUBSIG_FPDIVZERO      0x0c8
+#define    SUBSIG_FPUNFLOW       0x0cc
+#define    SUBSIG_FPOPERROR      0x0d0
+#define    SUBSIG_FPOVFLOW       0x0d4
+
+#define SIGKILL		 9
+#define SIGBUS          10
+#define    SUBSIG_BUSTIMEOUT    1
+#define    SUBSIG_ALIGNMENT     2
+#define    SUBSIG_MISCERROR     5
+
+#define SIGSEGV		11
+#define    SUBSIG_NOMAPPING     3
+#define    SUBSIG_PROTECTION    4
+#define    SUBSIG_SEGERROR      5
+
+#define SIGSYS		12
+
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGURG          16
+
+/* SunOS values which deviate from the Linux/i386 ones */
+#define SIGSTOP		17
+#define SIGTSTP		18
+#define SIGCONT		19
+#define SIGCHLD		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGIO		23
+#define SIGPOLL		SIGIO   /* SysV name for SIGIO */
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGLOST		29
+#define SIGPWR		SIGLOST
+#define SIGUSR1		30
+#define SIGUSR2		31
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define __OLD_NSIG	32
+#define __NEW_NSIG      64
+#define _NSIG_BPW       64
+#define _NSIG_WORDS     (__NEW_NSIG / _NSIG_BPW)
+
+#define SIGRTMIN       32
+#define SIGRTMAX       __NEW_NSIG
+
+#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
+#define _NSIG			__NEW_NSIG
+#define __new_sigset_t		sigset_t
+#define __new_sigaction		sigaction
+#define __new_sigaction32	sigaction32
+#define __old_sigset_t		old_sigset_t
+#define __old_sigaction		old_sigaction
+#define __old_sigaction32	old_sigaction32
 #else
-#include <asm/signal_32.h>
+#define _NSIG			__OLD_NSIG
+#define NSIG			_NSIG
+#define __old_sigset_t		sigset_t
+#define __old_sigaction		sigaction
+#define __old_sigaction32	sigaction32
 #endif
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long __old_sigset_t;            /* at least 32 bits */
+
+typedef struct {
+       unsigned long sig[_NSIG_WORDS];
+} __new_sigset_t;
+
+/* A SunOS sigstack */
+struct sigstack {
+	/* XXX 32-bit pointers pinhead XXX */
+	char *the_stack;
+	int   cur_status;
+};
+
+/* Sigvec flags */
+#define _SV_SSTACK    1u    /* This signal handler should use sig-stack */
+#define _SV_INTR      2u    /* Sig return should not restart system call */
+#define _SV_RESET     4u    /* Set handler to SIG_DFL upon taken signal */
+#define _SV_IGNCHILD  8u    /* Do not send SIGCHLD */
+
+/*
+ * sa_flags values: SA_STACK is not currently supported, but will allow the
+ * usage of signal stacks by using the (now obsolete) sa_restorer field in
+ * the sigaction structure as a stack pointer. This is now possible due to
+ * the changes in signal handling. LBT 010493.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ */
+#define SA_NOCLDSTOP	_SV_IGNCHILD
+#define SA_STACK	_SV_SSTACK
+#define SA_ONSTACK	_SV_SSTACK
+#define SA_RESTART	_SV_INTR
+#define SA_ONESHOT	_SV_RESET
+#define SA_NOMASK	0x20u
+#define SA_NOCLDWAIT    0x100u
+#define SA_SIGINFO      0x200u
+
+
+#define SIG_BLOCK          0x01	/* for blocking signals */
+#define SIG_UNBLOCK        0x02	/* for unblocking signals */
+#define SIG_SETMASK        0x04	/* for setting the signal mask */
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	4096
+#define SIGSTKSZ	16384
+
+#ifdef __KERNEL__
+/*
+ * DJHR
+ * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
+ * interrupt handler's irq structure should be statically allocated
+ * by the request_irq routine.
+ * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
+ * of interrupt usage and that sucks. Also without a flag like this
+ * it may be possible for the free_irq routine to attempt to free
+ * statically allocated data.. which is NOT GOOD.
+ *
+ */
+#define SA_STATIC_ALLOC         0x8000
 #endif
+
+#include <asm-generic/signal.h>
+
+struct __new_sigaction {
+	__sighandler_t		sa_handler;
+	unsigned long		sa_flags;
+	__sigrestore_t		sa_restorer;  /* not used by Linux/SPARC yet */
+	__new_sigset_t		sa_mask;
+};
+
+struct __old_sigaction {
+	__sighandler_t		sa_handler;
+	__old_sigset_t		sa_mask;
+	unsigned long		sa_flags;
+	void			(*sa_restorer)(void);  /* not used by Linux/SPARC yet */
+};
+
+typedef struct sigaltstack {
+	void			__user *ss_sp;
+	int			ss_flags;
+	size_t			ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+struct k_sigaction {
+	struct			__new_sigaction sa;
+	void			__user *ka_restorer;
+};
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/signal_32.h b/arch/sparc/include/asm/signal_32.h
deleted file mode 100644
index 96a60ab..0000000
--- a/arch/sparc/include/asm/signal_32.h
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifndef _ASMSPARC_SIGNAL_H
-#define _ASMSPARC_SIGNAL_H
-
-#include <asm/sigcontext.h>
-#include <linux/compiler.h>
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <linux/personality.h>
-#include <linux/types.h>
-#endif
-#endif
-
-/* On the Sparc the signal handlers get passed a 'sub-signal' code
- * for certain signal types, which we document here.
- */
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define    SUBSIG_STACK       0
-#define    SUBSIG_ILLINST     2
-#define    SUBSIG_PRIVINST    3
-#define    SUBSIG_BADTRAP(t)  (0x80 + (t))
-
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-
-#define SIGEMT           7
-#define    SUBSIG_TAG    10
-
-#define SIGFPE		 8
-#define    SUBSIG_FPDISABLED     0x400
-#define    SUBSIG_FPERROR        0x404
-#define    SUBSIG_FPINTOVFL      0x001
-#define    SUBSIG_FPSTSIG        0x002
-#define    SUBSIG_IDIVZERO       0x014
-#define    SUBSIG_FPINEXACT      0x0c4
-#define    SUBSIG_FPDIVZERO      0x0c8
-#define    SUBSIG_FPUNFLOW       0x0cc
-#define    SUBSIG_FPOPERROR      0x0d0
-#define    SUBSIG_FPOVFLOW       0x0d4
-
-#define SIGKILL		 9
-#define SIGBUS          10
-#define    SUBSIG_BUSTIMEOUT    1
-#define    SUBSIG_ALIGNMENT     2
-#define    SUBSIG_MISCERROR     5
-
-#define SIGSEGV		11
-#define    SUBSIG_NOMAPPING     3
-#define    SUBSIG_PROTECTION    4
-#define    SUBSIG_SEGERROR      5
-
-#define SIGSYS		12
-
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGURG          16
-
-/* SunOS values which deviate from the Linux/i386 ones */
-#define SIGSTOP		17
-#define SIGTSTP		18
-#define SIGCONT		19
-#define SIGCHLD		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGIO		23
-#define SIGPOLL		SIGIO   /* SysV name for SIGIO */
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGLOST		29
-#define SIGPWR		SIGLOST
-#define SIGUSR1		30
-#define SIGUSR2		31
-
-/* Most things should be clean enough to redefine this at will, if care
- * is taken to make libc match.
- */
-
-#define __OLD_NSIG	32
-#define __NEW_NSIG	64
-#define _NSIG_BPW	32
-#define _NSIG_WORDS	(__NEW_NSIG / _NSIG_BPW)
-
-#define SIGRTMIN	32
-#define SIGRTMAX	__NEW_NSIG
-
-#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
-#define	_NSIG		__NEW_NSIG
-#define __new_sigset_t	sigset_t
-#define __new_sigaction	sigaction
-#define __old_sigset_t	old_sigset_t
-#define __old_sigaction	old_sigaction
-#else
-#define _NSIG		__OLD_NSIG
-#define __old_sigset_t	sigset_t
-#define __old_sigaction	sigaction
-#endif
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long __old_sigset_t;
-
-typedef struct {
-	unsigned long	sig[_NSIG_WORDS];
-} __new_sigset_t;
-
-
-#ifdef __KERNEL__
-/* A SunOS sigstack */
-struct sigstack {
-	char *the_stack;
-	int   cur_status;
-};
-#endif
-
-/* Sigvec flags */
-#define _SV_SSTACK    1u    /* This signal handler should use sig-stack */
-#define _SV_INTR      2u    /* Sig return should not restart system call */
-#define _SV_RESET     4u    /* Set handler to SIG_DFL upon taken signal */
-#define _SV_IGNCHILD  8u    /* Do not send SIGCHLD */
-
-/*
- * sa_flags values: SA_STACK is not currently supported, but will allow the
- * usage of signal stacks by using the (now obsolete) sa_restorer field in
- * the sigaction structure as a stack pointer. This is now possible due to
- * the changes in signal handling. LBT 010493.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- */
-#define SA_NOCLDSTOP	_SV_IGNCHILD
-#define SA_STACK	_SV_SSTACK
-#define SA_ONSTACK	_SV_SSTACK
-#define SA_RESTART	_SV_INTR
-#define SA_ONESHOT	_SV_RESET
-#define SA_NOMASK	0x20u
-#define SA_NOCLDWAIT	0x100u
-#define SA_SIGINFO	0x200u
-
-#define SIG_BLOCK          0x01	/* for blocking signals */
-#define SIG_UNBLOCK        0x02	/* for unblocking signals */
-#define SIG_SETMASK        0x04	/* for setting the signal mask */
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	4096
-#define SIGSTKSZ	16384
-
-#ifdef __KERNEL__
-/*
- * DJHR
- * SA_STATIC_ALLOC is used for the SPARC system to indicate that this
- * interrupt handler's irq structure should be statically allocated
- * by the request_irq routine.
- * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
- * of interrupt usage and that sucks. Also without a flag like this
- * it may be possible for the free_irq routine to attempt to free
- * statically allocated data.. which is NOT GOOD.
- *
- */
-#define SA_STATIC_ALLOC		0x8000
-#endif
-
-#include <asm-generic/signal.h>
-
-#ifdef __KERNEL__
-struct __new_sigaction {
-	__sighandler_t	sa_handler;
-	unsigned long	sa_flags;
-	void		(*sa_restorer)(void);	/* Not used by Linux/SPARC */
-	__new_sigset_t	sa_mask;
-};
-
-struct k_sigaction {
-	struct __new_sigaction	sa;
-	void			__user *ka_restorer;
-};
-
-struct __old_sigaction {
-	__sighandler_t	sa_handler;
-	__old_sigset_t	sa_mask;
-	unsigned long	sa_flags;
-	void		(*sa_restorer) (void);	/* not used by Linux/SPARC */
-};
-
-typedef struct sigaltstack {
-	void		__user *ss_sp;
-	int		ss_flags;
-	size_t		ss_size;
-} stack_t;
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* !(__KERNEL__) */
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(_ASMSPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/signal_64.h b/arch/sparc/include/asm/signal_64.h
deleted file mode 100644
index 41535e7..0000000
--- a/arch/sparc/include/asm/signal_64.h
+++ /dev/null
@@ -1,210 +0,0 @@
-#ifndef __SPARC_SIGNAL_H
-#define __SPARC_SIGNAL_H
-
-#include <asm/sigcontext.h>
-#include <linux/compiler.h>
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <linux/personality.h>
-#include <linux/types.h>
-#endif
-#endif
-
-/* On the Sparc the signal handlers get passed a 'sub-signal' code
- * for certain signal types, which we document here.
- */
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define    SUBSIG_STACK       0
-#define    SUBSIG_ILLINST     2
-#define    SUBSIG_PRIVINST    3
-#define    SUBSIG_BADTRAP(t)  (0x80 + (t))
-
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-
-#define SIGEMT           7
-#define    SUBSIG_TAG    10
-
-#define SIGFPE		 8
-#define    SUBSIG_FPDISABLED     0x400
-#define    SUBSIG_FPERROR        0x404
-#define    SUBSIG_FPINTOVFL      0x001
-#define    SUBSIG_FPSTSIG        0x002
-#define    SUBSIG_IDIVZERO       0x014
-#define    SUBSIG_FPINEXACT      0x0c4
-#define    SUBSIG_FPDIVZERO      0x0c8
-#define    SUBSIG_FPUNFLOW       0x0cc
-#define    SUBSIG_FPOPERROR      0x0d0
-#define    SUBSIG_FPOVFLOW       0x0d4
-
-#define SIGKILL		 9
-#define SIGBUS          10
-#define    SUBSIG_BUSTIMEOUT    1
-#define    SUBSIG_ALIGNMENT     2
-#define    SUBSIG_MISCERROR     5
-
-#define SIGSEGV		11
-#define    SUBSIG_NOMAPPING     3
-#define    SUBSIG_PROTECTION    4
-#define    SUBSIG_SEGERROR      5
-
-#define SIGSYS		12
-
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGURG          16
-
-/* SunOS values which deviate from the Linux/i386 ones */
-#define SIGSTOP		17
-#define SIGTSTP		18
-#define SIGCONT		19
-#define SIGCHLD		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGIO		23
-#define SIGPOLL		SIGIO   /* SysV name for SIGIO */
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGLOST		29
-#define SIGPWR		SIGLOST
-#define SIGUSR1		30
-#define SIGUSR2		31
-
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-
-#define __OLD_NSIG	32
-#define __NEW_NSIG      64
-#define _NSIG_BPW       64
-#define _NSIG_WORDS     (__NEW_NSIG / _NSIG_BPW)
-
-#define SIGRTMIN       32
-#define SIGRTMAX       __NEW_NSIG
-
-#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
-#define _NSIG			__NEW_NSIG
-#define __new_sigset_t		sigset_t
-#define __new_sigaction		sigaction
-#define __new_sigaction32	sigaction32
-#define __old_sigset_t		old_sigset_t
-#define __old_sigaction		old_sigaction
-#define __old_sigaction32	old_sigaction32
-#else
-#define _NSIG			__OLD_NSIG
-#define NSIG			_NSIG
-#define __old_sigset_t		sigset_t
-#define __old_sigaction		sigaction
-#define __old_sigaction32	sigaction32
-#endif
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long __old_sigset_t;            /* at least 32 bits */
-
-typedef struct {
-       unsigned long sig[_NSIG_WORDS];
-} __new_sigset_t;
-
-/* A SunOS sigstack */
-struct sigstack {
-	/* XXX 32-bit pointers pinhead XXX */
-	char *the_stack;
-	int   cur_status;
-};
-
-/* Sigvec flags */
-#define _SV_SSTACK    1u    /* This signal handler should use sig-stack */
-#define _SV_INTR      2u    /* Sig return should not restart system call */
-#define _SV_RESET     4u    /* Set handler to SIG_DFL upon taken signal */
-#define _SV_IGNCHILD  8u    /* Do not send SIGCHLD */
-
-/*
- * sa_flags values: SA_STACK is not currently supported, but will allow the
- * usage of signal stacks by using the (now obsolete) sa_restorer field in
- * the sigaction structure as a stack pointer. This is now possible due to
- * the changes in signal handling. LBT 010493.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- */
-#define SA_NOCLDSTOP	_SV_IGNCHILD
-#define SA_STACK	_SV_SSTACK
-#define SA_ONSTACK	_SV_SSTACK
-#define SA_RESTART	_SV_INTR
-#define SA_ONESHOT	_SV_RESET
-#define SA_NOMASK	0x20u
-#define SA_NOCLDWAIT    0x100u
-#define SA_SIGINFO      0x200u
-
-
-#define SIG_BLOCK          0x01	/* for blocking signals */
-#define SIG_UNBLOCK        0x02	/* for unblocking signals */
-#define SIG_SETMASK        0x04	/* for setting the signal mask */
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	4096
-#define SIGSTKSZ	16384
-
-#ifdef __KERNEL__
-/*
- * DJHR
- * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
- * interrupt handler's irq structure should be statically allocated
- * by the request_irq routine.
- * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
- * of interrupt usage and that sucks. Also without a flag like this
- * it may be possible for the free_irq routine to attempt to free
- * statically allocated data.. which is NOT GOOD.
- *
- */
-#define SA_STATIC_ALLOC         0x8000
-#endif
-
-#include <asm-generic/signal.h>
-
-struct __new_sigaction {
-	__sighandler_t		sa_handler;
-	unsigned long		sa_flags;
-	__sigrestore_t		sa_restorer;  /* not used by Linux/SPARC yet */
-	__new_sigset_t		sa_mask;
-};
-
-struct __old_sigaction {
-	__sighandler_t		sa_handler;
-	__old_sigset_t		sa_mask;
-	unsigned long		sa_flags;
-	void			(*sa_restorer)(void);  /* not used by Linux/SPARC yet */
-};
-
-typedef struct sigaltstack {
-	void			__user *ss_sp;
-	int			ss_flags;
-	size_t			ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
-
-struct k_sigaction {
-	struct			__new_sigaction sa;
-	void			__user *ka_restorer;
-};
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* !(__KERNEL__) */
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC_SIGNAL_H) */
-- 
cgit v0.10.2


From 4d7b92ad572b4bd4d92fc80911641bb6cba3b99c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 19:32:59 -0800
Subject: sparc: add '32' suffix to reg_window, sigcontext, __siginfo_t

Renaming a few types to contain a 32 suffix makes the
type names compatible with sparc64 and thus makes sharing
between the two a lot easier.

Note: None of these definitions are expected part of the
stable ABI towards userspace.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index 2ae67a2..09521c6 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -99,7 +99,7 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 			     "st\t%%g0, [%0 + %3 + 0x3c]"
 			     : /* no outputs */
 			     : "r" (regs),
-			       "r" (sp - sizeof(struct reg_window)),
+			       "r" (sp - sizeof(struct reg_window32)),
 			       "r" (zero),
 			       "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))
 			     : "memory");
diff --git a/arch/sparc/include/asm/ptrace_32.h b/arch/sparc/include/asm/ptrace_32.h
index 4cef450..acb2d89 100644
--- a/arch/sparc/include/asm/ptrace_32.h
+++ b/arch/sparc/include/asm/ptrace_32.h
@@ -41,7 +41,7 @@ struct pt_regs {
 #define UREG_RETPC     UREG_I7
 
 /* A register window */
-struct reg_window {
+struct reg_window32 {
 	unsigned long locals[8];
 	unsigned long ins[8];
 };
diff --git a/arch/sparc/include/asm/sigcontext_32.h b/arch/sparc/include/asm/sigcontext_32.h
index c5fb60d..756e996 100644
--- a/arch/sparc/include/asm/sigcontext_32.h
+++ b/arch/sparc/include/asm/sigcontext_32.h
@@ -9,7 +9,7 @@
 #define __SUNOS_MAXWIN   31
 
 /* This is what SunOS does, so shall I. */
-struct sigcontext {
+struct sigcontext32 {
 	int sigc_onstack;      /* state to restore */
 	int sigc_mask;         /* sigmask to restore */
 	int sigc_sp;           /* stack pointer */
@@ -28,10 +28,7 @@ struct sigcontext {
 	char *sigc_spbuf[__SUNOS_MAXWIN];
 
 	/* Windows to restore after signal */
-	struct {
-		unsigned long	locals[8];
-		unsigned long	ins[8];
-	} sigc_wbuf[__SUNOS_MAXWIN];
+	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
 };
 
 typedef struct {
@@ -43,7 +40,7 @@ typedef struct {
 		unsigned long u_regs[16]; /* globals and ins */
 	}		si_regs;
 	int		si_mask;
-} __siginfo_t;
+} __siginfo32_t;
 
 typedef struct {
 	unsigned   long si_float_regs [32];
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 80fe547..0f7b0e5 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -45,7 +45,7 @@ struct thread_info {
 	/* A place to store user windows and stack pointers
 	 * when the stack needs inspection.
 	 */
-	struct reg_window	reg_window[NSWINS];	/* align for ldd! */
+	struct reg_window32	reg_window[NSWINS];	/* align for ldd! */
 	unsigned long		rwbuf_stkptrs[NSWINS];
 	unsigned long		w_saved;
 
diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c
index 757805c..04df4ed 100644
--- a/arch/sparc/kernel/kgdb_32.c
+++ b/arch/sparc/kernel/kgdb_32.c
@@ -14,14 +14,14 @@ extern unsigned long trapbase;
 
 void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 {
-	struct reg_window *win;
+	struct reg_window32 *win;
 	int i;
 
 	gdb_regs[GDB_G0] = 0;
 	for (i = 0; i < 15; i++)
 		gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
 
-	win = (struct reg_window *) regs->u_regs[UREG_FP];
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
 	for (i = 0; i < 8; i++)
 		gdb_regs[GDB_L0 + i] = win->locals[i];
 	for (i = 0; i < 8; i++)
@@ -43,7 +43,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 {
 	struct thread_info *t = task_thread_info(p);
-	struct reg_window *win;
+	struct reg_window32 *win;
 	int i;
 
 	for (i = GDB_G0; i < GDB_G6; i++)
@@ -55,7 +55,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 	gdb_regs[GDB_SP] = t->ksp;
 	gdb_regs[GDB_O7] = 0;
 
-	win = (struct reg_window *) t->ksp;
+	win = (struct reg_window32 *) t->ksp;
 	for (i = 0; i < 8; i++)
 		gdb_regs[GDB_L0 + i] = win->locals[i];
 	for (i = 0; i < 8; i++)
@@ -77,7 +77,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 
 void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 {
-	struct reg_window *win;
+	struct reg_window32 *win;
 	int i;
 
 	for (i = 0; i < 15; i++)
@@ -96,7 +96,7 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	regs->npc = gdb_regs[GDB_NPC];
 	regs->y = gdb_regs[GDB_Y];
 
-	win = (struct reg_window *) regs->u_regs[UREG_FP];
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
 	for (i = 0; i < 8; i++)
 		win->locals[i] = gdb_regs[GDB_L0 + i];
 	for (i = 0; i < 8; i++)
diff --git a/arch/sparc/kernel/muldiv.c b/arch/sparc/kernel/muldiv.c
index ba960c0..6ce1021 100644
--- a/arch/sparc/kernel/muldiv.c
+++ b/arch/sparc/kernel/muldiv.c
@@ -60,7 +60,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
 }
 
 #define fetch_reg(reg, regs) ({						\
-	struct reg_window __user *win;					\
+	struct reg_window32 __user *win;					\
 	register unsigned long ret;					\
 									\
 	if (!(reg)) ret = 0;						\
@@ -68,7 +68,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
 		ret = regs->u_regs[(reg)];				\
 	} else {							\
 		/* Ho hum, the slightly complicated case. */		\
-		win = (struct reg_window __user *)regs->u_regs[UREG_FP];\
+		win = (struct reg_window32 __user *)regs->u_regs[UREG_FP];\
 		if (get_user (ret, &win->locals[(reg) - 16])) return -1;\
 	}								\
 	ret;								\
@@ -77,7 +77,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
 static inline int
 store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs)
 {
-	struct reg_window __user *win;
+	struct reg_window32 __user *win;
 
 	if (!reg)
 		return 0;
@@ -86,7 +86,7 @@ store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs)
 		return 0;
 	} else {
 		/* need to use put_user() in this case: */
-		win = (struct reg_window __user *) regs->u_regs[UREG_FP];
+		win = (struct reg_window32 __user *) regs->u_regs[UREG_FP];
 		return (put_user(result, &win->locals[reg - 16]));
 	}
 }
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 69d9315..5a8d8ce 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -180,13 +180,13 @@ static DEFINE_SPINLOCK(sparc_backtrace_lock);
 
 void __show_backtrace(unsigned long fp)
 {
-	struct reg_window *rw;
+	struct reg_window32 *rw;
 	unsigned long flags;
 	int cpu = smp_processor_id();
 
 	spin_lock_irqsave(&sparc_backtrace_lock, flags);
 
-	rw = (struct reg_window *)fp;
+	rw = (struct reg_window32 *)fp;
         while(rw && (((unsigned long) rw) >= PAGE_OFFSET) &&
             !(((unsigned long) rw) & 0x7)) {
 		printk("CPU[%d]: ARGS[%08lx,%08lx,%08lx,%08lx,%08lx,%08lx] "
@@ -196,7 +196,7 @@ void __show_backtrace(unsigned long fp)
 		       rw->ins[6],
 		       rw->ins[7]);
 		printk("%pS\n", (void *) rw->ins[7]);
-		rw = (struct reg_window *) rw->ins[6];
+		rw = (struct reg_window32 *) rw->ins[6];
 	}
 	spin_unlock_irqrestore(&sparc_backtrace_lock, flags);
 }
@@ -258,7 +258,7 @@ void show_stackframe(struct sparc_stackf *sf)
 
 void show_regs(struct pt_regs *r)
 {
-	struct reg_window *rw = (struct reg_window *) r->u_regs[14];
+	struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14];
 
         printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx    %s\n",
 	       r->psr, r->pc, r->npc, r->y, print_tainted());
@@ -287,7 +287,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 {
 	unsigned long pc, fp;
 	unsigned long task_base;
-	struct reg_window *rw;
+	struct reg_window32 *rw;
 	int count = 0;
 
 	if (tsk != NULL)
@@ -301,7 +301,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 		if (fp < (task_base + sizeof(struct thread_info)) ||
 		    fp >= (task_base + (PAGE_SIZE << 1)))
 			break;
-		rw = (struct reg_window *) fp;
+		rw = (struct reg_window32 *) fp;
 		pc = rw->ins[7];
 		printk("[%08lx : ", pc);
 		printk("%pS ] ", (void *) pc);
@@ -679,7 +679,7 @@ unsigned long get_wchan(struct task_struct *task)
 	unsigned long pc, fp, bias = 0;
 	unsigned long task_base = (unsigned long) task;
         unsigned long ret = 0;
-	struct reg_window *rw;
+	struct reg_window32 *rw;
 	int count = 0;
 
 	if (!task || task == current ||
@@ -692,7 +692,7 @@ unsigned long get_wchan(struct task_struct *task)
 		if (fp < (task_base + sizeof(struct thread_info)) ||
 		    fp >= (task_base + (2 * PAGE_SIZE)))
 			break;
-		rw = (struct reg_window *) fp;
+		rw = (struct reg_window32 *) fp;
 		pc = rw->ins[7];
 		if (!in_sched_functions(pc)) {
 			ret = pc;
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index c94f91c..181d069 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -34,7 +34,7 @@ extern void fpload(unsigned long *fpregs, unsigned long *fsr);
 
 struct signal_frame {
 	struct sparc_stackf	ss;
-	__siginfo_t		info;
+	__siginfo32_t		info;
 	__siginfo_fpu_t __user	*fpu_save;
 	unsigned long		insns[2] __attribute__ ((aligned (8)));
 	unsigned int		extramask[_NSIG_WORDS - 1];
@@ -351,7 +351,7 @@ static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __copy_to_user(sf->extramask, &oldset->sig[1],
 			      (_NSIG_WORDS - 1) * sizeof(unsigned int));
 	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window));
+			      sizeof(struct reg_window32));
 	if (err)
 		goto sigsegv;
 	
@@ -433,7 +433,7 @@ static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
 	
 	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window));	
+			      sizeof(struct reg_window32));
 
 	err |= copy_siginfo_to_user(&sf->info, info);
 
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 716f394..213645b 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -67,7 +67,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 	__RESTORE; __RESTORE; __RESTORE; __RESTORE;
 
 	{
-		struct reg_window *rw = (struct reg_window *)regs->u_regs[UREG_FP];
+		struct reg_window32 *rw = (struct reg_window32 *)regs->u_regs[UREG_FP];
 
 		/* Stop the back trace when we hit userland or we
 		 * find some badly aligned kernel stack. Set an upper
@@ -79,7 +79,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 		      !(((unsigned long) rw) & 0x7)) {
 			printk("Caller[%08lx]: %pS\n", rw->ins[7],
 			       (void *) rw->ins[7]);
-			rw = (struct reg_window *)rw->ins[6];
+			rw = (struct reg_window32 *)rw->ins[6];
 		}
 	}
 	printk("Instruction DUMP:");
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index c2a28c5..6b1e6cd 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -97,26 +97,26 @@ static inline int sign_extend_imm13(int imm)
 
 static inline unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	struct reg_window *win;
+	struct reg_window32 *win;
 
 	if(reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
 
 	/* Ho hum, the slightly complicated case. */
-	win = (struct reg_window *) regs->u_regs[UREG_FP];
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
 	return win->locals[reg - 16]; /* yes, I know what this does... */
 }
 
 static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	struct reg_window __user *win;
+	struct reg_window32 __user *win;
 	unsigned long ret;
 
 	if (reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
 
 	/* Ho hum, the slightly complicated case. */
-	win = (struct reg_window __user *) regs->u_regs[UREG_FP];
+	win = (struct reg_window32 __user *) regs->u_regs[UREG_FP];
 
 	if ((unsigned long)win & 3)
 		return -1;
@@ -129,11 +129,11 @@ static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *reg
 
 static inline unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
 {
-	struct reg_window *win;
+	struct reg_window32 *win;
 
 	if(reg < 16)
 		return &regs->u_regs[reg];
-	win = (struct reg_window *) regs->u_regs[UREG_FP];
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
 	return &win->locals[reg - 16];
 }
 
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index 9cc93ea..f24d298 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -42,7 +42,7 @@ static inline void shift_window_buffer(int first_win, int last_win, struct threa
 
 	for(i = first_win; i < last_win; i++) {
 		tp->rwbuf_stkptrs[i] = tp->rwbuf_stkptrs[i+1];
-		memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window));
+		memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window32));
 	}
 }
 
@@ -70,7 +70,7 @@ void synchronize_user_stack(void)
 
 		/* Ok, let it rip. */
 		if (copy_to_user((char __user *) sp, &tp->reg_window[window],
-				 sizeof(struct reg_window)))
+				 sizeof(struct reg_window32)))
 			continue;
 
 		shift_window_buffer(window, tp->w_saved - 1, tp);
@@ -119,7 +119,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
 
 		if ((sp & 7) ||
 		    copy_to_user((char __user *) sp, &tp->reg_window[window],
-				 sizeof(struct reg_window)))
+				 sizeof(struct reg_window32)))
 			do_exit(SIGILL);
 	}
 	tp->w_saved = 0;
-- 
cgit v0.10.2


From bd703d88a2dbeb6c7945345de427eedf78ef89c6 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 19:34:46 -0800
Subject: sparc: unify sigcontext.h

With the renamed types in place the unification was straightforward.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index fe724d4..b0a3814 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -5,8 +5,6 @@ header-y += ipcbuf_32.h
 header-y += ipcbuf_64.h
 header-y += ptrace_32.h
 header-y += ptrace_64.h
-header-y += sigcontext_32.h
-header-y += sigcontext_64.h
 header-y += siginfo_32.h
 header-y += siginfo_64.h
 
diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
index e92de7e..a1607d1 100644
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -1,8 +1,96 @@
-#ifndef ___ASM_SPARC_SIGCONTEXT_H
-#define ___ASM_SPARC_SIGCONTEXT_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/sigcontext_64.h>
+#ifndef __SPARC_SIGCONTEXT_H
+#define __SPARC_SIGCONTEXT_H
+
+#ifdef __KERNEL__
+#include <asm/ptrace.h>
+
+#ifndef __ASSEMBLY__
+
+#define __SUNOS_MAXWIN   31
+
+/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */
+struct sigcontext32 {
+	int sigc_onstack;      /* state to restore */
+	int sigc_mask;         /* sigmask to restore */
+	int sigc_sp;           /* stack pointer */
+	int sigc_pc;           /* program counter */
+	int sigc_npc;          /* next program counter */
+	int sigc_psr;          /* for condition codes etc */
+	int sigc_g1;           /* User uses these two registers */
+	int sigc_o0;           /* within the trampoline code. */
+
+	/* Now comes information regarding the users window set
+	 * at the time of the signal.
+	 */
+	int sigc_oswins;       /* outstanding windows */
+
+	/* stack ptrs for each regwin buf */
+	unsigned sigc_spbuf[__SUNOS_MAXWIN];
+
+	/* Windows to restore after signal */
+	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
+};
+
+
+/* This is what we use for 32bit new non-rt signals. */
+
+typedef struct {
+	struct {
+		unsigned int psr;
+		unsigned int pc;
+		unsigned int npc;
+		unsigned int y;
+		unsigned int u_regs[16]; /* globals and ins */
+	}			si_regs;
+	int			si_mask;
+} __siginfo32_t;
+
+#ifdef CONFIG_SPARC64
+typedef struct {
+	unsigned   int si_float_regs [64];
+	unsigned   long si_fsr;
+	unsigned   long si_gsr;
+	unsigned   long si_fprs;
+} __siginfo_fpu_t;
+
+/* This is what SunOS doesn't, so we have to write this alone
+   and do it properly. */
+struct sigcontext {
+	/* The size of this array has to match SI_MAX_SIZE from siginfo.h */
+	char			sigc_info[128];
+	struct {
+		unsigned long	u_regs[16]; /* globals and ins */
+		unsigned long	tstate;
+		unsigned long	tpc;
+		unsigned long	tnpc;
+		unsigned int	y;
+		unsigned int	fprs;
+	}			sigc_regs;
+	__siginfo_fpu_t *	sigc_fpu_save;
+	struct {
+		void	*	ss_sp;
+		int		ss_flags;
+		unsigned long	ss_size;
+	}			sigc_stack;
+	unsigned long		sigc_mask;
+};
+
 #else
-#include <asm/sigcontext_32.h>
-#endif
-#endif
+
+typedef struct {
+	unsigned long si_float_regs [32];
+	unsigned long si_fsr;
+	unsigned long si_fpqdepth;
+	struct {
+		unsigned long *insn_addr;
+		unsigned long insn;
+	} si_fpqueue [16];
+} __siginfo_fpu_t;
+#endif /* (CONFIG_SPARC64) */
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* (__KERNEL__) */
+
+#endif /* !(__SPARC_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/sigcontext_32.h b/arch/sparc/include/asm/sigcontext_32.h
deleted file mode 100644
index 756e996..0000000
--- a/arch/sparc/include/asm/sigcontext_32.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef __SPARC_SIGCONTEXT_H
-#define __SPARC_SIGCONTEXT_H
-
-#ifdef __KERNEL__
-#include <asm/ptrace.h>
-
-#ifndef __ASSEMBLY__
-
-#define __SUNOS_MAXWIN   31
-
-/* This is what SunOS does, so shall I. */
-struct sigcontext32 {
-	int sigc_onstack;      /* state to restore */
-	int sigc_mask;         /* sigmask to restore */
-	int sigc_sp;           /* stack pointer */
-	int sigc_pc;           /* program counter */
-	int sigc_npc;          /* next program counter */
-	int sigc_psr;          /* for condition codes etc */
-	int sigc_g1;           /* User uses these two registers */
-	int sigc_o0;           /* within the trampoline code. */
-
-	/* Now comes information regarding the users window set
-	 * at the time of the signal.
-	 */
-	int sigc_oswins;       /* outstanding windows */
-
-	/* stack ptrs for each regwin buf */
-	char *sigc_spbuf[__SUNOS_MAXWIN];
-
-	/* Windows to restore after signal */
-	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
-};
-
-typedef struct {
-	struct {
-		unsigned long psr;
-		unsigned long pc;
-		unsigned long npc;
-		unsigned long y;
-		unsigned long u_regs[16]; /* globals and ins */
-	}		si_regs;
-	int		si_mask;
-} __siginfo32_t;
-
-typedef struct {
-	unsigned   long si_float_regs [32];
-	unsigned   long si_fsr;
-	unsigned   long si_fpqdepth;
-	struct {
-		unsigned long *insn_addr;
-		unsigned long insn;
-	} si_fpqueue [16];
-} __siginfo_fpu_t;
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* (__KERNEL__) */
-
-#endif /* !(__SPARC_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/sigcontext_64.h b/arch/sparc/include/asm/sigcontext_64.h
deleted file mode 100644
index 1c868d6..0000000
--- a/arch/sparc/include/asm/sigcontext_64.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef __SPARC64_SIGCONTEXT_H
-#define __SPARC64_SIGCONTEXT_H
-
-#ifdef __KERNEL__
-#include <asm/ptrace.h>
-#endif
-
-#ifndef __ASSEMBLY__
-
-#ifdef __KERNEL__
-
-#define __SUNOS_MAXWIN   31
-
-/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */
-struct sigcontext32 {
-	int sigc_onstack;      /* state to restore */
-	int sigc_mask;         /* sigmask to restore */
-	int sigc_sp;           /* stack pointer */
-	int sigc_pc;           /* program counter */
-	int sigc_npc;          /* next program counter */
-	int sigc_psr;          /* for condition codes etc */
-	int sigc_g1;           /* User uses these two registers */
-	int sigc_o0;           /* within the trampoline code. */
-
-	/* Now comes information regarding the users window set
-	 * at the time of the signal.
-	 */
-	int sigc_oswins;       /* outstanding windows */
-
-	/* stack ptrs for each regwin buf */
-	unsigned sigc_spbuf[__SUNOS_MAXWIN];
-
-	/* Windows to restore after signal */
-	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
-};
-
-#endif
-
-#ifdef __KERNEL__
-
-/* This is what we use for 32bit new non-rt signals. */
-
-typedef struct {
-	struct {
-		unsigned int psr;
-		unsigned int pc;
-		unsigned int npc;
-		unsigned int y;
-		unsigned int u_regs[16]; /* globals and ins */
-	}			si_regs;
-	int			si_mask;
-} __siginfo32_t;
-
-#endif
-
-typedef struct {
-	unsigned   int si_float_regs [64];
-	unsigned   long si_fsr;
-	unsigned   long si_gsr;
-	unsigned   long si_fprs;
-} __siginfo_fpu_t;
-
-/* This is what SunOS doesn't, so we have to write this alone
-   and do it properly. */
-struct sigcontext {
-	/* The size of this array has to match SI_MAX_SIZE from siginfo.h */
-	char			sigc_info[128];
-	struct {
-		unsigned long	u_regs[16]; /* globals and ins */
-		unsigned long	tstate;
-		unsigned long	tpc;
-		unsigned long	tnpc;
-		unsigned int	y;
-		unsigned int	fprs;
-	}			sigc_regs;
-	__siginfo_fpu_t *	sigc_fpu_save;
-	struct {
-		void	*	ss_sp;
-		int		ss_flags;
-		unsigned long	ss_size;
-	}			sigc_stack;
-	unsigned long		sigc_mask;
-};
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC64_SIGCONTEXT_H) */
-- 
cgit v0.10.2


From f3ec38d5135ca4bff0132c0782da6da4663ae0e5 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 19:42:12 -0800
Subject: sparc: unify ptrace.h

The two ptrace.h implementations are very alike but
the small differences required two set of ifdef/else/endif pairs.

The definition of reg_window32 could have been shared but
that would have required several updates in sparc32 code as
all printk formatting for example assume it is longs.

sparc_stackf looked like anohter candidate to share if the 32
bit was renamed to sparc_stackf32.
But it contains two pointers in the sparc32 version which would
have been 64 bit in the sparc64 version so it was non-trivial.
Using a set of accessor macros could do the trick if pursued later.

The sparc64 specific definitions are not protected by
ifdef - as it should not be required to do so.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index b0a3814..f79249f 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,8 +3,6 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ipcbuf_32.h
 header-y += ipcbuf_64.h
-header-y += ptrace_32.h
-header-y += ptrace_64.h
 header-y += siginfo_32.h
 header-y += siginfo_64.h
 
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index 6dcbe2e..30b0b79 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -1,8 +1,448 @@
-#ifndef ___ASM_SPARC_PTRACE_H
-#define ___ASM_SPARC_PTRACE_H
+#ifndef __SPARC_PTRACE_H
+#define __SPARC_PTRACE_H
+
 #if defined(__sparc__) && defined(__arch64__)
-#include <asm/ptrace_64.h>
+/* 64 bit sparc */
+#include <asm/pstate.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+
+/* This magic value must have the low 9 bits clear,
+ * as that is where we encode the %tt value, see below.
+ */
+#define PT_REGS_MAGIC 0x57ac6c00
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct pt_regs {
+	unsigned long u_regs[16]; /* globals and ins */
+	unsigned long tstate;
+	unsigned long tpc;
+	unsigned long tnpc;
+	unsigned int y;
+
+	/* We encode a magic number, PT_REGS_MAGIC, along
+	 * with the %tt (trap type) register value at trap
+	 * entry time.  The magic number allows us to identify
+	 * accurately a trap stack frame in the stack
+	 * unwinder, and the %tt value allows us to test
+	 * things like "in a system call" etc. for an arbitray
+	 * process.
+	 *
+	 * The PT_REGS_MAGIC is choosen such that it can be
+	 * loaded completely using just a sethi instruction.
+	 */
+	unsigned int magic;
+};
+
+struct pt_regs32 {
+	unsigned int psr;
+	unsigned int pc;
+	unsigned int npc;
+	unsigned int y;
+	unsigned int u_regs[16]; /* globals and ins */
+};
+
+/* A V9 register window */
+struct reg_window {
+	unsigned long locals[8];
+	unsigned long ins[8];
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+	unsigned int locals[8];
+	unsigned int ins[8];
+};
+
+/* A V9 Sparc stack frame */
+struct sparc_stackf {
+	unsigned long locals[8];
+        unsigned long ins[6];
+	struct sparc_stackf *fp;
+	unsigned long callers_pc;
+	char *structptr;
+	unsigned long xargs[6];
+	unsigned long xxargs[1];
+};
+
+/* A 32-bit Sparc stack frame */
+struct sparc_stackf32 {
+	unsigned int locals[8];
+        unsigned int ins[6];
+	unsigned int fp;
+	unsigned int callers_pc;
+	unsigned int structptr;
+	unsigned int xargs[6];
+	unsigned int xxargs[1];
+};
+
+struct sparc_trapf {
+	unsigned long locals[8];
+	unsigned long ins[8];
+	unsigned long _unused;
+	struct pt_regs *regs;
+};
+#endif /* (!__ASSEMBLY__) */
 #else
-#include <asm/ptrace_32.h>
+/* 32 bit sparc */
+
+#include <asm/psr.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+#ifndef __ASSEMBLY__
+
+struct pt_regs {
+	unsigned long psr;
+	unsigned long pc;
+	unsigned long npc;
+	unsigned long y;
+	unsigned long u_regs[16]; /* globals and ins */
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+	unsigned long locals[8];
+	unsigned long ins[8];
+};
+
+/* A Sparc stack frame */
+struct sparc_stackf {
+	unsigned long locals[8];
+        unsigned long ins[6];
+	struct sparc_stackf *fp;
+	unsigned long callers_pc;
+	char *structptr;
+	unsigned long xargs[6];
+	unsigned long xxargs[1];
+};
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__))*/
+
+#ifndef __ASSEMBLY__
+
+#define TRACEREG_SZ	sizeof(struct pt_regs)
+#define STACKFRAME_SZ	sizeof(struct sparc_stackf)
+
+#define TRACEREG32_SZ	sizeof(struct pt_regs32)
+#define STACKFRAME32_SZ	sizeof(struct sparc_stackf32)
+
+#endif /* (!__ASSEMBLY__) */
+
+#define UREG_G0        0
+#define UREG_G1        1
+#define UREG_G2        2
+#define UREG_G3        3
+#define UREG_G4        4
+#define UREG_G5        5
+#define UREG_G6        6
+#define UREG_G7        7
+#define UREG_I0        8
+#define UREG_I1        9
+#define UREG_I2        10
+#define UREG_I3        11
+#define UREG_I4        12
+#define UREG_I5        13
+#define UREG_I6        14
+#define UREG_I7        15
+#define UREG_FP        UREG_I6
+#define UREG_RETPC     UREG_I7
+
+#if defined(__sparc__) && defined(__arch64__)
+/* 64 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+
+#include <linux/threads.h>
+#include <asm/system.h>
+
+static inline int pt_regs_trap_type(struct pt_regs *regs)
+{
+	return regs->magic & 0x1ff;
+}
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+	return (regs->tstate & TSTATE_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+	return (regs->tstate &= ~TSTATE_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({	flush_user_windows(); \
+	get_thread_wsaved() != 0; \
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+	synchronize_user_stack()
+
+struct global_reg_snapshot {
+	unsigned long		tstate;
+	unsigned long		tpc;
+	unsigned long		tnpc;
+	unsigned long		o7;
+	unsigned long		i7;
+	unsigned long		rpc;
+	struct thread_info	*thread;
+	unsigned long		pad1;
+};
+extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
+
+#define force_successful_syscall_return()	    \
+do {	current_thread_info()->syscall_noerror = 1; \
+} while (0)
+#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
+#define instruction_pointer(regs) ((regs)->tpc)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
 #endif
+extern void show_regs(struct pt_regs *);
+#endif /* (__KERNEL__) */
+
+#else /* __ASSEMBLY__ */
+/* For assembly code. */
+#define TRACEREG_SZ		0xa0
+#define STACKFRAME_SZ		0xc0
+
+#define TRACEREG32_SZ		0x50
+#define STACKFRAME32_SZ		0x60
+#endif /* __ASSEMBLY__ */
+
+#else /* (defined(__sparc__) && defined(__arch64__)) */
+
+/* 32 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+
+#include <asm/system.h>
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+	return (regs->psr & PSR_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+	return (regs->psr &= ~PSR_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({	flush_user_windows(); \
+	current_thread_info()->w_saved != 0;	\
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+	synchronize_user_stack()
+
+#define user_mode(regs) (!((regs)->psr & PSR_PS))
+#define instruction_pointer(regs) ((regs)->pc)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+unsigned long profile_pc(struct pt_regs *);
+extern void show_regs(struct pt_regs *);
+#endif /* (__KERNEL__) */
+
+#else /* (!__ASSEMBLY__) */
+/* For assembly code. */
+#define TRACEREG_SZ       0x50
+#define STACKFRAME_SZ     0x60
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__)) */
+
+#ifdef __KERNEL__
+#define STACK_BIAS		2047
 #endif
+
+/* These are for pt_regs. */
+#define PT_V9_G0     0x00
+#define PT_V9_G1     0x08
+#define PT_V9_G2     0x10
+#define PT_V9_G3     0x18
+#define PT_V9_G4     0x20
+#define PT_V9_G5     0x28
+#define PT_V9_G6     0x30
+#define PT_V9_G7     0x38
+#define PT_V9_I0     0x40
+#define PT_V9_I1     0x48
+#define PT_V9_I2     0x50
+#define PT_V9_I3     0x58
+#define PT_V9_I4     0x60
+#define PT_V9_I5     0x68
+#define PT_V9_I6     0x70
+#define PT_V9_FP     PT_V9_I6
+#define PT_V9_I7     0x78
+#define PT_V9_TSTATE 0x80
+#define PT_V9_TPC    0x88
+#define PT_V9_TNPC   0x90
+#define PT_V9_Y      0x98
+#define PT_V9_MAGIC  0x9c
+#define PT_TSTATE	PT_V9_TSTATE
+#define PT_TPC		PT_V9_TPC
+#define PT_TNPC		PT_V9_TNPC
+
+/* These for pt_regs32. */
+#define PT_PSR    0x0
+#define PT_PC     0x4
+#define PT_NPC    0x8
+#define PT_Y      0xc
+#define PT_G0     0x10
+#define PT_WIM    PT_G0
+#define PT_G1     0x14
+#define PT_G2     0x18
+#define PT_G3     0x1c
+#define PT_G4     0x20
+#define PT_G5     0x24
+#define PT_G6     0x28
+#define PT_G7     0x2c
+#define PT_I0     0x30
+#define PT_I1     0x34
+#define PT_I2     0x38
+#define PT_I3     0x3c
+#define PT_I4     0x40
+#define PT_I5     0x44
+#define PT_I6     0x48
+#define PT_FP     PT_I6
+#define PT_I7     0x4c
+
+/* Reg_window offsets */
+#define RW_V9_L0     0x00
+#define RW_V9_L1     0x08
+#define RW_V9_L2     0x10
+#define RW_V9_L3     0x18
+#define RW_V9_L4     0x20
+#define RW_V9_L5     0x28
+#define RW_V9_L6     0x30
+#define RW_V9_L7     0x38
+#define RW_V9_I0     0x40
+#define RW_V9_I1     0x48
+#define RW_V9_I2     0x50
+#define RW_V9_I3     0x58
+#define RW_V9_I4     0x60
+#define RW_V9_I5     0x68
+#define RW_V9_I6     0x70
+#define RW_V9_I7     0x78
+
+#define RW_L0     0x00
+#define RW_L1     0x04
+#define RW_L2     0x08
+#define RW_L3     0x0c
+#define RW_L4     0x10
+#define RW_L5     0x14
+#define RW_L6     0x18
+#define RW_L7     0x1c
+#define RW_I0     0x20
+#define RW_I1     0x24
+#define RW_I2     0x28
+#define RW_I3     0x2c
+#define RW_I4     0x30
+#define RW_I5     0x34
+#define RW_I6     0x38
+#define RW_I7     0x3c
+
+/* Stack_frame offsets */
+#define SF_V9_L0     0x00
+#define SF_V9_L1     0x08
+#define SF_V9_L2     0x10
+#define SF_V9_L3     0x18
+#define SF_V9_L4     0x20
+#define SF_V9_L5     0x28
+#define SF_V9_L6     0x30
+#define SF_V9_L7     0x38
+#define SF_V9_I0     0x40
+#define SF_V9_I1     0x48
+#define SF_V9_I2     0x50
+#define SF_V9_I3     0x58
+#define SF_V9_I4     0x60
+#define SF_V9_I5     0x68
+#define SF_V9_FP     0x70
+#define SF_V9_PC     0x78
+#define SF_V9_RETP   0x80
+#define SF_V9_XARG0  0x88
+#define SF_V9_XARG1  0x90
+#define SF_V9_XARG2  0x98
+#define SF_V9_XARG3  0xa0
+#define SF_V9_XARG4  0xa8
+#define SF_V9_XARG5  0xb0
+#define SF_V9_XXARG  0xb8
+
+#define SF_L0     0x00
+#define SF_L1     0x04
+#define SF_L2     0x08
+#define SF_L3     0x0c
+#define SF_L4     0x10
+#define SF_L5     0x14
+#define SF_L6     0x18
+#define SF_L7     0x1c
+#define SF_I0     0x20
+#define SF_I1     0x24
+#define SF_I2     0x28
+#define SF_I3     0x2c
+#define SF_I4     0x30
+#define SF_I5     0x34
+#define SF_FP     0x38
+#define SF_PC     0x3c
+#define SF_RETP   0x40
+#define SF_XARG0  0x44
+#define SF_XARG1  0x48
+#define SF_XARG2  0x4c
+#define SF_XARG3  0x50
+#define SF_XARG4  0x54
+#define SF_XARG5  0x58
+#define SF_XXARG  0x5c
+
+#ifdef __KERNEL__
+
+/* global_reg_snapshot offsets */
+#define GR_SNAP_TSTATE	0x00
+#define GR_SNAP_TPC	0x08
+#define GR_SNAP_TNPC	0x10
+#define GR_SNAP_O7	0x18
+#define GR_SNAP_I7	0x20
+#define GR_SNAP_RPC	0x28
+#define GR_SNAP_THREAD	0x30
+#define GR_SNAP_PAD1	0x38
+
+#endif  /*  __KERNEL__  */
+
+/* Stuff for the ptrace system call */
+#define PTRACE_SPARC_DETACH       11
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_READDATA           16
+#define PTRACE_WRITEDATA          17
+#define PTRACE_READTEXT           18
+#define PTRACE_WRITETEXT          19
+#define PTRACE_GETFPAREGS         20
+#define PTRACE_SETFPAREGS         21
+
+/* There are for debugging 64-bit processes, either from a 32 or 64 bit
+ * parent.  Thus their complements are for debugging 32-bit processes only.
+ */
+
+#define PTRACE_GETREGS64	  22
+#define PTRACE_SETREGS64	  23
+/* PTRACE_SYSCALL is 24 */
+#define PTRACE_GETFPREGS64	  25
+#define PTRACE_SETFPREGS64	  26
+
+#endif /* !(__SPARC_PTRACE_H) */
diff --git a/arch/sparc/include/asm/ptrace_32.h b/arch/sparc/include/asm/ptrace_32.h
deleted file mode 100644
index acb2d89..0000000
--- a/arch/sparc/include/asm/ptrace_32.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef _SPARC_PTRACE_H
-#define _SPARC_PTRACE_H
-
-#include <asm/psr.h>
-
-/* This struct defines the way the registers are stored on the
- * stack during a system call and basically all traps.
- */
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-struct pt_regs {
-	unsigned long psr;
-	unsigned long pc;
-	unsigned long npc;
-	unsigned long y;
-	unsigned long u_regs[16]; /* globals and ins */
-};
-
-#define UREG_G0        0
-#define UREG_G1        1
-#define UREG_G2        2
-#define UREG_G3        3
-#define UREG_G4        4
-#define UREG_G5        5
-#define UREG_G6        6
-#define UREG_G7        7
-#define UREG_I0        8
-#define UREG_I1        9
-#define UREG_I2        10
-#define UREG_I3        11
-#define UREG_I4        12
-#define UREG_I5        13
-#define UREG_I6        14
-#define UREG_I7        15
-#define UREG_WIM       UREG_G0
-#define UREG_FADDR     UREG_G0
-#define UREG_FP        UREG_I6
-#define UREG_RETPC     UREG_I7
-
-/* A register window */
-struct reg_window32 {
-	unsigned long locals[8];
-	unsigned long ins[8];
-};
-
-/* A Sparc stack frame */
-struct sparc_stackf {
-	unsigned long locals[8];
-        unsigned long ins[6];
-	struct sparc_stackf *fp;
-	unsigned long callers_pc;
-	char *structptr;
-	unsigned long xargs[6];
-	unsigned long xxargs[1];
-};
-
-#define TRACEREG_SZ   sizeof(struct pt_regs)
-#define STACKFRAME_SZ sizeof(struct sparc_stackf)
-
-#ifdef __KERNEL__
-
-#include <asm/system.h>
-
-static inline bool pt_regs_is_syscall(struct pt_regs *regs)
-{
-	return (regs->psr & PSR_SYSCALL);
-}
-
-static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
-{
-	return (regs->psr &= ~PSR_SYSCALL);
-}
-
-#define arch_ptrace_stop_needed(exit_code, info) \
-({	flush_user_windows(); \
-	current_thread_info()->w_saved != 0;	\
-})
-
-#define arch_ptrace_stop(exit_code, info) \
-	synchronize_user_stack()
-
-#define user_mode(regs) (!((regs)->psr & PSR_PS))
-#define instruction_pointer(regs) ((regs)->pc)
-#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-unsigned long profile_pc(struct pt_regs *);
-extern void show_regs(struct pt_regs *);
-#endif
-
-#else /* __ASSEMBLY__ */
-/* For assembly code. */
-#define TRACEREG_SZ       0x50
-#define STACKFRAME_SZ     0x60
-#endif
-
-/*
- * The asm-offsets.h is a generated file, so we cannot include it.
- * It may be OK for glibc headers, but it's utterly pointless for C code.
- * The assembly code using those offsets has to include it explicitly.
- */
-/* #include <asm/asm-offsets.h> */
-
-/* These are for pt_regs. */
-#define PT_PSR    0x0
-#define PT_PC     0x4
-#define PT_NPC    0x8
-#define PT_Y      0xc
-#define PT_G0     0x10
-#define PT_WIM    PT_G0
-#define PT_G1     0x14
-#define PT_G2     0x18
-#define PT_G3     0x1c
-#define PT_G4     0x20
-#define PT_G5     0x24
-#define PT_G6     0x28
-#define PT_G7     0x2c
-#define PT_I0     0x30
-#define PT_I1     0x34
-#define PT_I2     0x38
-#define PT_I3     0x3c
-#define PT_I4     0x40
-#define PT_I5     0x44
-#define PT_I6     0x48
-#define PT_FP     PT_I6
-#define PT_I7     0x4c
-
-/* Reg_window offsets */
-#define RW_L0     0x00
-#define RW_L1     0x04
-#define RW_L2     0x08
-#define RW_L3     0x0c
-#define RW_L4     0x10
-#define RW_L5     0x14
-#define RW_L6     0x18
-#define RW_L7     0x1c
-#define RW_I0     0x20
-#define RW_I1     0x24
-#define RW_I2     0x28
-#define RW_I3     0x2c
-#define RW_I4     0x30
-#define RW_I5     0x34
-#define RW_I6     0x38
-#define RW_I7     0x3c
-
-/* Stack_frame offsets */
-#define SF_L0     0x00
-#define SF_L1     0x04
-#define SF_L2     0x08
-#define SF_L3     0x0c
-#define SF_L4     0x10
-#define SF_L5     0x14
-#define SF_L6     0x18
-#define SF_L7     0x1c
-#define SF_I0     0x20
-#define SF_I1     0x24
-#define SF_I2     0x28
-#define SF_I3     0x2c
-#define SF_I4     0x30
-#define SF_I5     0x34
-#define SF_FP     0x38
-#define SF_PC     0x3c
-#define SF_RETP   0x40
-#define SF_XARG0  0x44
-#define SF_XARG1  0x48
-#define SF_XARG2  0x4c
-#define SF_XARG3  0x50
-#define SF_XARG4  0x54
-#define SF_XARG5  0x58
-#define SF_XXARG  0x5c
-
-/* Stuff for the ptrace system call */
-#define PTRACE_SPARC_DETACH       11
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-#define PTRACE_GETFPREGS          14
-#define PTRACE_SETFPREGS          15
-#define PTRACE_READDATA           16
-#define PTRACE_WRITEDATA          17
-#define PTRACE_READTEXT           18
-#define PTRACE_WRITETEXT          19
-#define PTRACE_GETFPAREGS         20
-#define PTRACE_SETFPAREGS         21
-
-#endif /* !(_SPARC_PTRACE_H) */
diff --git a/arch/sparc/include/asm/ptrace_64.h b/arch/sparc/include/asm/ptrace_64.h
deleted file mode 100644
index cd6fbfc..0000000
--- a/arch/sparc/include/asm/ptrace_64.h
+++ /dev/null
@@ -1,356 +0,0 @@
-#ifndef _SPARC64_PTRACE_H
-#define _SPARC64_PTRACE_H
-
-#include <asm/pstate.h>
-
-/* This struct defines the way the registers are stored on the
- * stack during a system call and basically all traps.
- */
-
-/* This magic value must have the low 9 bits clear,
- * as that is where we encode the %tt value, see below.
- */
-#define PT_REGS_MAGIC 0x57ac6c00
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-struct pt_regs {
-	unsigned long u_regs[16]; /* globals and ins */
-	unsigned long tstate;
-	unsigned long tpc;
-	unsigned long tnpc;
-	unsigned int y;
-
-	/* We encode a magic number, PT_REGS_MAGIC, along
-	 * with the %tt (trap type) register value at trap
-	 * entry time.  The magic number allows us to identify
-	 * accurately a trap stack frame in the stack
-	 * unwinder, and the %tt value allows us to test
-	 * things like "in a system call" etc. for an arbitray
-	 * process.
-	 *
-	 * The PT_REGS_MAGIC is choosen such that it can be
-	 * loaded completely using just a sethi instruction.
-	 */
-	unsigned int magic;
-};
-
-struct pt_regs32 {
-	unsigned int psr;
-	unsigned int pc;
-	unsigned int npc;
-	unsigned int y;
-	unsigned int u_regs[16]; /* globals and ins */
-};
-
-#define UREG_G0        0
-#define UREG_G1        1
-#define UREG_G2        2
-#define UREG_G3        3
-#define UREG_G4        4
-#define UREG_G5        5
-#define UREG_G6        6
-#define UREG_G7        7
-#define UREG_I0        8
-#define UREG_I1        9
-#define UREG_I2        10
-#define UREG_I3        11
-#define UREG_I4        12
-#define UREG_I5        13
-#define UREG_I6        14
-#define UREG_I7        15
-#define UREG_FP        UREG_I6
-#define UREG_RETPC     UREG_I7
-
-/* A V9 register window */
-struct reg_window {
-	unsigned long locals[8];
-	unsigned long ins[8];
-};
-
-/* A 32-bit register window. */
-struct reg_window32 {
-	unsigned int locals[8];
-	unsigned int ins[8];
-};
-
-/* A V9 Sparc stack frame */
-struct sparc_stackf {
-	unsigned long locals[8];
-        unsigned long ins[6];
-	struct sparc_stackf *fp;
-	unsigned long callers_pc;
-	char *structptr;
-	unsigned long xargs[6];
-	unsigned long xxargs[1];
-};
-
-/* A 32-bit Sparc stack frame */
-struct sparc_stackf32 {
-	unsigned int locals[8];
-        unsigned int ins[6];
-	unsigned int fp;
-	unsigned int callers_pc;
-	unsigned int structptr;
-	unsigned int xargs[6];
-	unsigned int xxargs[1];
-};
-
-struct sparc_trapf {
-	unsigned long locals[8];
-	unsigned long ins[8];
-	unsigned long _unused;
-	struct pt_regs *regs;
-};
-
-#define TRACEREG_SZ	sizeof(struct pt_regs)
-#define STACKFRAME_SZ	sizeof(struct sparc_stackf)
-
-#define TRACEREG32_SZ	sizeof(struct pt_regs32)
-#define STACKFRAME32_SZ	sizeof(struct sparc_stackf32)
-
-#ifdef __KERNEL__
-
-#include <linux/threads.h>
-#include <asm/system.h>
-
-static inline int pt_regs_trap_type(struct pt_regs *regs)
-{
-	return regs->magic & 0x1ff;
-}
-
-static inline bool pt_regs_is_syscall(struct pt_regs *regs)
-{
-	return (regs->tstate & TSTATE_SYSCALL);
-}
-
-static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
-{
-	return (regs->tstate &= ~TSTATE_SYSCALL);
-}
-
-#define arch_ptrace_stop_needed(exit_code, info) \
-({	flush_user_windows(); \
-	get_thread_wsaved() != 0; \
-})
-
-#define arch_ptrace_stop(exit_code, info) \
-	synchronize_user_stack()
-
-struct global_reg_snapshot {
-	unsigned long		tstate;
-	unsigned long		tpc;
-	unsigned long		tnpc;
-	unsigned long		o7;
-	unsigned long		i7;
-	unsigned long		rpc;
-	struct thread_info	*thread;
-	unsigned long		pad1;
-};
-extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
-
-#define force_successful_syscall_return()	    \
-do {	current_thread_info()->syscall_noerror = 1; \
-} while (0)
-#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
-#define instruction_pointer(regs) ((regs)->tpc)
-#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
-#ifdef CONFIG_SMP
-extern unsigned long profile_pc(struct pt_regs *);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
-extern void show_regs(struct pt_regs *);
-#endif
-
-#else /* __ASSEMBLY__ */
-/* For assembly code. */
-#define TRACEREG_SZ		0xa0
-#define STACKFRAME_SZ		0xc0
-
-#define TRACEREG32_SZ		0x50
-#define STACKFRAME32_SZ		0x60
-#endif
-
-#ifdef __KERNEL__
-#define STACK_BIAS		2047
-#endif
-
-/* These are for pt_regs. */
-#define PT_V9_G0     0x00
-#define PT_V9_G1     0x08
-#define PT_V9_G2     0x10
-#define PT_V9_G3     0x18
-#define PT_V9_G4     0x20
-#define PT_V9_G5     0x28
-#define PT_V9_G6     0x30
-#define PT_V9_G7     0x38
-#define PT_V9_I0     0x40
-#define PT_V9_I1     0x48
-#define PT_V9_I2     0x50
-#define PT_V9_I3     0x58
-#define PT_V9_I4     0x60
-#define PT_V9_I5     0x68
-#define PT_V9_I6     0x70
-#define PT_V9_FP     PT_V9_I6
-#define PT_V9_I7     0x78
-#define PT_V9_TSTATE 0x80
-#define PT_V9_TPC    0x88
-#define PT_V9_TNPC   0x90
-#define PT_V9_Y      0x98
-#define PT_V9_MAGIC  0x9c
-#define PT_TSTATE	PT_V9_TSTATE
-#define PT_TPC		PT_V9_TPC
-#define PT_TNPC		PT_V9_TNPC
-
-/* These for pt_regs32. */
-#define PT_PSR    0x0
-#define PT_PC     0x4
-#define PT_NPC    0x8
-#define PT_Y      0xc
-#define PT_G0     0x10
-#define PT_WIM    PT_G0
-#define PT_G1     0x14
-#define PT_G2     0x18
-#define PT_G3     0x1c
-#define PT_G4     0x20
-#define PT_G5     0x24
-#define PT_G6     0x28
-#define PT_G7     0x2c
-#define PT_I0     0x30
-#define PT_I1     0x34
-#define PT_I2     0x38
-#define PT_I3     0x3c
-#define PT_I4     0x40
-#define PT_I5     0x44
-#define PT_I6     0x48
-#define PT_FP     PT_I6
-#define PT_I7     0x4c
-
-/* Reg_window offsets */
-#define RW_V9_L0     0x00
-#define RW_V9_L1     0x08
-#define RW_V9_L2     0x10
-#define RW_V9_L3     0x18
-#define RW_V9_L4     0x20
-#define RW_V9_L5     0x28
-#define RW_V9_L6     0x30
-#define RW_V9_L7     0x38
-#define RW_V9_I0     0x40
-#define RW_V9_I1     0x48
-#define RW_V9_I2     0x50
-#define RW_V9_I3     0x58
-#define RW_V9_I4     0x60
-#define RW_V9_I5     0x68
-#define RW_V9_I6     0x70
-#define RW_V9_I7     0x78
-
-#define RW_L0     0x00
-#define RW_L1     0x04
-#define RW_L2     0x08
-#define RW_L3     0x0c
-#define RW_L4     0x10
-#define RW_L5     0x14
-#define RW_L6     0x18
-#define RW_L7     0x1c
-#define RW_I0     0x20
-#define RW_I1     0x24
-#define RW_I2     0x28
-#define RW_I3     0x2c
-#define RW_I4     0x30
-#define RW_I5     0x34
-#define RW_I6     0x38
-#define RW_I7     0x3c
-
-/* Stack_frame offsets */
-#define SF_V9_L0     0x00
-#define SF_V9_L1     0x08
-#define SF_V9_L2     0x10
-#define SF_V9_L3     0x18
-#define SF_V9_L4     0x20
-#define SF_V9_L5     0x28
-#define SF_V9_L6     0x30
-#define SF_V9_L7     0x38
-#define SF_V9_I0     0x40
-#define SF_V9_I1     0x48
-#define SF_V9_I2     0x50
-#define SF_V9_I3     0x58
-#define SF_V9_I4     0x60
-#define SF_V9_I5     0x68
-#define SF_V9_FP     0x70
-#define SF_V9_PC     0x78
-#define SF_V9_RETP   0x80
-#define SF_V9_XARG0  0x88
-#define SF_V9_XARG1  0x90
-#define SF_V9_XARG2  0x98
-#define SF_V9_XARG3  0xa0
-#define SF_V9_XARG4  0xa8
-#define SF_V9_XARG5  0xb0
-#define SF_V9_XXARG  0xb8
-
-#define SF_L0     0x00
-#define SF_L1     0x04
-#define SF_L2     0x08
-#define SF_L3     0x0c
-#define SF_L4     0x10
-#define SF_L5     0x14
-#define SF_L6     0x18
-#define SF_L7     0x1c
-#define SF_I0     0x20
-#define SF_I1     0x24
-#define SF_I2     0x28
-#define SF_I3     0x2c
-#define SF_I4     0x30
-#define SF_I5     0x34
-#define SF_FP     0x38
-#define SF_PC     0x3c
-#define SF_RETP   0x40
-#define SF_XARG0  0x44
-#define SF_XARG1  0x48
-#define SF_XARG2  0x4c
-#define SF_XARG3  0x50
-#define SF_XARG4  0x54
-#define SF_XARG5  0x58
-#define SF_XXARG  0x5c
-
-#ifdef __KERNEL__
-
-/* global_reg_snapshot offsets */
-#define GR_SNAP_TSTATE	0x00
-#define GR_SNAP_TPC	0x08
-#define GR_SNAP_TNPC	0x10
-#define GR_SNAP_O7	0x18
-#define GR_SNAP_I7	0x20
-#define GR_SNAP_RPC	0x28
-#define GR_SNAP_THREAD	0x30
-#define GR_SNAP_PAD1	0x38
-
-#endif  /*  __KERNEL__  */
-
-/* Stuff for the ptrace system call */
-#define PTRACE_SPARC_DETACH       11
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-#define PTRACE_GETFPREGS          14
-#define PTRACE_SETFPREGS          15
-#define PTRACE_READDATA           16
-#define PTRACE_WRITEDATA          17
-#define PTRACE_READTEXT           18
-#define PTRACE_WRITETEXT          19
-#define PTRACE_GETFPAREGS         20
-#define PTRACE_SETFPAREGS         21
-
-/* There are for debugging 64-bit processes, either from a 32 or 64 bit
- * parent.  Thus their complements are for debugging 32-bit processes only.
- */
-
-#define PTRACE_GETREGS64	  22
-#define PTRACE_SETREGS64	  23
-/* PTRACE_SYSCALL is 24 */
-#define PTRACE_GETFPREGS64	  25
-#define PTRACE_SETFPREGS64	  26
-
-#endif /* !(_SPARC64_PTRACE_H) */
-- 
cgit v0.10.2


From 0999769e6cad9b0e5abb7c513c0c3f16821f0884 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 15:37:14 +1030
Subject: cris: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index c0e62f8..9e69cfb 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 #define ffs kernel_ffs
 
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/find.h>
-- 
cgit v0.10.2


From 1eae29bcc25b8a12bd6f416304c8aea1d576807e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:10:04 -0800
Subject: sparc: unify siginfo.h

Trivial unification where the sparc64 specific
parts are protected using a signle ifdef/endif pair.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index f79249f..930ba02 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,8 +3,6 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ipcbuf_32.h
 header-y += ipcbuf_64.h
-header-y += siginfo_32.h
-header-y += siginfo_64.h
 
 header-y += apc.h
 header-y += asi.h
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
index bd81f8d..988e5d8 100644
--- a/arch/sparc/include/asm/siginfo.h
+++ b/arch/sparc/include/asm/siginfo.h
@@ -1,8 +1,37 @@
-#ifndef ___ASM_SPARC_SIGINFO_H
-#define ___ASM_SPARC_SIGINFO_H
+#ifndef __SPARC_SIGINFO_H
+#define __SPARC_SIGINFO_H
+
 #if defined(__sparc__) && defined(__arch64__)
-#include <asm/siginfo_64.h>
-#else
-#include <asm/siginfo_32.h>
-#endif
-#endif
+
+#define SI_PAD_SIZE32	((SI_MAX_SIZE/sizeof(int)) - 3)
+#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+#define __ARCH_SI_BAND_T int
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#ifdef __KERNEL__
+
+#include <linux/compat.h>
+
+#ifdef CONFIG_COMPAT
+
+struct compat_siginfo;
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* __KERNEL__ */
+
+#define SI_NOINFO	32767		/* no information in siginfo_t */
+
+/*
+ * SIGEMT si_codes
+ */
+#define EMT_TAGOVF	(__SI_FAULT|1)	/* tag overflow */
+#define NSIGEMT		1
+
+#endif /* !(__SPARC_SIGINFO_H) */
diff --git a/arch/sparc/include/asm/siginfo_32.h b/arch/sparc/include/asm/siginfo_32.h
deleted file mode 100644
index 3c71af1..0000000
--- a/arch/sparc/include/asm/siginfo_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _SPARC_SIGINFO_H
-#define _SPARC_SIGINFO_H
-
-#define __ARCH_SI_UID_T		unsigned int
-#define __ARCH_SI_TRAPNO
-
-#include <asm-generic/siginfo.h>
-
-#define SI_NOINFO	32767		/* no information in siginfo_t */
-
-/*
- * SIGEMT si_codes
- */
-#define EMT_TAGOVF	(__SI_FAULT|1)	/* tag overflow */
-#define NSIGEMT		1
-
-#endif /* !(_SPARC_SIGINFO_H) */
diff --git a/arch/sparc/include/asm/siginfo_64.h b/arch/sparc/include/asm/siginfo_64.h
deleted file mode 100644
index c96e6c3..0000000
--- a/arch/sparc/include/asm/siginfo_64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _SPARC64_SIGINFO_H
-#define _SPARC64_SIGINFO_H
-
-#define SI_PAD_SIZE32	((SI_MAX_SIZE/sizeof(int)) - 3)
-
-#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
-#define __ARCH_SI_TRAPNO
-#define __ARCH_SI_BAND_T int
-
-#include <asm-generic/siginfo.h>
-
-#ifdef __KERNEL__
-
-#include <linux/compat.h>
-
-#ifdef CONFIG_COMPAT
-
-struct compat_siginfo;
-
-#endif /* CONFIG_COMPAT */
-
-#endif /* __KERNEL__ */
-
-#define SI_NOINFO	32767		/* no information in siginfo_t */
-
-/*
- * SIGEMT si_codes
- */
-#define EMT_TAGOVF	(__SI_FAULT|1)	/* tag overflow */
-#define NSIGEMT		1
-
-#endif
-- 
cgit v0.10.2


From 3011618d9a010b33b7e67cb26df9bc79c948f67b Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:10:48 -0800
Subject: sparc: remove ebus definitions from openprom*.h

Looks like leftovers from the removal of the special ebus layer.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h
index 875da35..3868057 100644
--- a/arch/sparc/include/asm/openprom_32.h
+++ b/arch/sparc/include/asm/openprom_32.h
@@ -239,17 +239,6 @@ struct linux_prom_pci_assigned_addresses {
 	unsigned int size_lo;
 };
 
-struct linux_prom_ebus_ranges {
-	unsigned int child_phys_hi;
-	unsigned int child_phys_lo;
-
-	unsigned int parent_phys_hi;
-	unsigned int parent_phys_mid;
-	unsigned int parent_phys_lo;
-
-	unsigned int size;
-};
-
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h
index b69e4a8..955d7f6 100644
--- a/arch/sparc/include/asm/openprom_64.h
+++ b/arch/sparc/include/asm/openprom_64.h
@@ -249,32 +249,6 @@ struct linux_prom_pci_intmask {
 	unsigned int interrupt;
 };
 
-struct linux_prom_ebus_ranges {
-	unsigned int child_phys_hi;
-	unsigned int child_phys_lo;
-
-	unsigned int parent_phys_hi;
-	unsigned int parent_phys_mid;
-	unsigned int parent_phys_lo;
-
-	unsigned int size;
-};
-
-struct linux_prom_ebus_intmap {
-	unsigned int phys_hi;
-	unsigned int phys_lo;
-
-	unsigned int interrupt;
-
-	int          cnode;
-	unsigned int cinterrupt;
-};
-
-struct linux_prom_ebus_intmask {
-	unsigned int phys_hi;
-	unsigned int phys_lo;
-	unsigned int interrupt;
-};
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC64_OPENPROM_H) */
-- 
cgit v0.10.2


From b608c3fe3cffcb3ebc87ffdec134286859d4a44e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:12:40 -0800
Subject: sparc: remove linux_prom_pci_assigned_addresses from openprom_32.h

It is not used anywhere in the tree so drop it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h
index 3868057..f8b4bf7 100644
--- a/arch/sparc/include/asm/openprom_32.h
+++ b/arch/sparc/include/asm/openprom_32.h
@@ -229,16 +229,6 @@ struct linux_prom_pci_ranges {
 	unsigned int size_lo;
 };
 
-struct linux_prom_pci_assigned_addresses {
-	unsigned int which_io;
-
-	unsigned int phys_hi;
-	unsigned int phys_lo;
-
-	unsigned int size_hi;
-	unsigned int size_lo;
-};
-
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC_OPENPROM_H) */
-- 
cgit v0.10.2


From bb5b52bbd5e2cd40b193f34c11eec19864080dcf Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:13:52 -0800
Subject: sparc: prepare openprom for unification

Align the sparc and sparc64 versions so differences are minimal.
A few data types are changed to better reflect there actual usage.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h
index f8b4bf7..2e58db5 100644
--- a/arch/sparc/include/asm/openprom_32.h
+++ b/arch/sparc/include/asm/openprom_32.h
@@ -47,7 +47,7 @@ struct linux_dev_v2_funcs {
 
 struct linux_mlist_v0 {
 	struct linux_mlist_v0 *theres_more;
-	char *start_adr;
+	unsigned int start_adr;
 	unsigned num_bytes;
 };
 
@@ -182,9 +182,9 @@ struct linux_nodeops {
 #define PROMINTR_MAX    15
 
 struct linux_prom_registers {
-	unsigned int which_io;         /* is this in OBIO space? */
-	unsigned int phys_addr;        /* The physical address of this register */
-	unsigned int reg_size;         /* How many bytes does this register take up? */
+	unsigned int which_io;	/* is this in OBIO space? */
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
 };
 
 struct linux_prom_irqs {
diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h
index 955d7f6..db3711e 100644
--- a/arch/sparc/include/asm/openprom_64.h
+++ b/arch/sparc/include/asm/openprom_64.h
@@ -44,7 +44,7 @@ struct linux_dev_v2_funcs {
 
 struct linux_mlist_v0 {
 	struct linux_mlist_v0 *theres_more;
-	unsigned start_adr;
+	unsigned int start_adr;
 	unsigned num_bytes;
 };
 
@@ -167,9 +167,9 @@ struct linux_romvec {
 struct linux_nodeops {
 	int (*no_nextnode)(int node);
 	int (*no_child)(int node);
-	int (*no_proplen)(int node, char *name);
-	int (*no_getprop)(int node, char *name, char *val);
-	int (*no_setprop)(int node, char *name, char *val, int len);
+	int (*no_proplen)(int node, const char *name);
+	int (*no_getprop)(int node, const char *name, char *val);
+	int (*no_setprop)(int node, const char *name, char *val, int len);
 	char * (*no_nextprop)(int node, char *name);
 };
 
@@ -179,9 +179,9 @@ struct linux_nodeops {
 #define PROMINTR_MAX    32
 
 struct linux_prom_registers {
-	unsigned which_io;	/* hi part of physical address			*/
-	unsigned phys_addr;	/* The physical address of this register	*/
-	int reg_size;		/* How many bytes does this register take up?	*/
+	unsigned int which_io;	/* hi part of physical address */
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
 };
 
 struct linux_prom64_registers {
-- 
cgit v0.10.2


From 640cc590bd6112424f4c248fe839af28a06b54c4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:14:35 -0800
Subject: sparc64: delete unused linux_prom64_ranges from openprom_64.h

It was not used over the whole tree - so drop it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h
index db3711e..8148900 100644
--- a/arch/sparc/include/asm/openprom_64.h
+++ b/arch/sparc/include/asm/openprom_64.h
@@ -203,12 +203,6 @@ struct linux_prom_ranges {
 	unsigned int or_size;
 };
 
-struct linux_prom64_ranges {
-	unsigned long ot_child_base;		/* Bus feels this */
-	unsigned long ot_parent_base;		/* CPU looks from here */
-	unsigned long or_size;
-};
-
 /* Ranges and reg properties are a bit different for PCI. */
 struct linux_prom_pci_registers {
 	unsigned int phys_hi;
-- 
cgit v0.10.2


From 7c59d28d0e798fff1ebfedcf7821cbd5513091bd Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:15:25 -0800
Subject: sparc: unify openprom.h

After the preparational steps the unification was simple.

The linux_prom_pci_registers definition did not look like
it could be unified at first look since the structure is assigned
using prop_getproperty() / of_get_property() so the structure
is assumed to come direct form the prom.

The LINUX_OPPROM_MAGIC was kept even if it is not used by the kernel
on the assumption that userspace may require it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 930ba02..58f9b3a 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -11,8 +11,6 @@ header-y += envctrl.h
 header-y += fbio.h
 header-y += jsflash.h
 header-y += openprom.h
-header-y += openprom_32.h
-header-y += openprom_64.h
 header-y += openpromio.h
 header-y += perfctr.h
 header-y += psrcompat.h
diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h
index aaeae905..963e1a4 100644
--- a/arch/sparc/include/asm/openprom.h
+++ b/arch/sparc/include/asm/openprom.h
@@ -1,8 +1,277 @@
-#ifndef ___ASM_SPARC_OPENPROM_H
-#define ___ASM_SPARC_OPENPROM_H
+#ifndef __SPARC_OPENPROM_H
+#define __SPARC_OPENPROM_H
+
+/* openprom.h:  Prom structures and defines for access to the OPENBOOT
+ *              prom routines and data areas.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+/* Empirical constants... */
+#define LINUX_OPPROM_MAGIC      0x10010407
+
+#ifndef __ASSEMBLY__
+/* V0 prom device operations. */
+struct linux_dev_v0_funcs {
+	int (*v0_devopen)(char *device_str);
+	int (*v0_devclose)(int dev_desc);
+	int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+	int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+	int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
+	int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
+	int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+	int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+	int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
+};
+
+/* V2 and later prom device operations. */
+struct linux_dev_v2_funcs {
+	int (*v2_inst2pkg)(int d);	/* Convert ihandle to phandle */
+	char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
+	void (*v2_dumb_mem_free)(char *va, unsigned sz);
+
+	/* To map devices into virtual I/O space. */
+	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
+	void (*v2_dumb_munmap)(char *virta, unsigned size);
+
+	int (*v2_dev_open)(char *devpath);
+	void (*v2_dev_close)(int d);
+	int (*v2_dev_read)(int d, char *buf, int nbytes);
+	int (*v2_dev_write)(int d, char *buf, int nbytes);
+	int (*v2_dev_seek)(int d, int hi, int lo);
+
+	/* Never issued (multistage load support) */
+	void (*v2_wheee2)(void);
+	void (*v2_wheee3)(void);
+};
+
+struct linux_mlist_v0 {
+	struct linux_mlist_v0 *theres_more;
+	unsigned int start_adr;
+	unsigned num_bytes;
+};
+
+struct linux_mem_v0 {
+	struct linux_mlist_v0 **v0_totphys;
+	struct linux_mlist_v0 **v0_prommap;
+	struct linux_mlist_v0 **v0_available; /* What we can use */
+};
+
+/* Arguments sent to the kernel from the boot prompt. */
+struct linux_arguments_v0 {
+	char *argv[8];
+	char args[100];
+	char boot_dev[2];
+	int boot_dev_ctrl;
+	int boot_dev_unit;
+	int dev_partition;
+	char *kernel_file_name;
+	void *aieee1;           /* XXX */
+};
+
+/* V2 and up boot things. */
+struct linux_bootargs_v2 {
+	char **bootpath;
+	char **bootargs;
+	int *fd_stdin;
+	int *fd_stdout;
+};
+
+/* The top level PROM vector. */
+struct linux_romvec {
+	/* Version numbers. */
+	unsigned int pv_magic_cookie;
+	unsigned int pv_romvers;
+	unsigned int pv_plugin_revision;
+	unsigned int pv_printrev;
+
+	/* Version 0 memory descriptors. */
+	struct linux_mem_v0 pv_v0mem;
+
+	/* Node operations. */
+	struct linux_nodeops *pv_nodeops;
+
+	char **pv_bootstr;
+	struct linux_dev_v0_funcs pv_v0devops;
+
+	char *pv_stdin;
+	char *pv_stdout;
+#define	PROMDEV_KBD	0		/* input from keyboard */
+#define	PROMDEV_SCREEN	0		/* output to screen */
+#define	PROMDEV_TTYA	1		/* in/out to ttya */
+#define	PROMDEV_TTYB	2		/* in/out to ttyb */
+
+	/* Blocking getchar/putchar.  NOT REENTRANT! (grr) */
+	int (*pv_getchar)(void);
+	void (*pv_putchar)(int ch);
+
+	/* Non-blocking variants. */
+	int (*pv_nbgetchar)(void);
+	int (*pv_nbputchar)(int ch);
+
+	void (*pv_putstr)(char *str, int len);
+
+	/* Miscellany. */
+	void (*pv_reboot)(char *bootstr);
+	void (*pv_printf)(__const__ char *fmt, ...);
+	void (*pv_abort)(void);
+	__volatile__ int *pv_ticks;
+	void (*pv_halt)(void);
+	void (**pv_synchook)(void);
+
+	/* Evaluate a forth string, not different proto for V0 and V2->up. */
+	union {
+		void (*v0_eval)(int len, char *str);
+		void (*v2_eval)(char *str);
+	} pv_fortheval;
+
+	struct linux_arguments_v0 **pv_v0bootargs;
+
+	/* Get ether address. */
+	unsigned int (*pv_enaddr)(int d, char *enaddr);
+
+	struct linux_bootargs_v2 pv_v2bootargs;
+	struct linux_dev_v2_funcs pv_v2devops;
+
+	int filler[15];
+
+	/* This one is sun4c/sun4 only. */
+	void (*pv_setctxt)(int ctxt, char *va, int pmeg);
+
+	/* Prom version 3 Multiprocessor routines. This stuff is crazy.
+	 * No joke. Calling these when there is only one cpu probably
+	 * crashes the machine, have to test this. :-)
+	 */
+
+	/* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
+	 * 'thiscontext' executing at address 'prog_counter'
+	 */
+	int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
+			   int thiscontext, char *prog_counter);
+
+	/* v3_cpustop() will cause cpu 'whichcpu' to stop executing
+	 * until a resume cpu call is made.
+	 */
+	int (*v3_cpustop)(unsigned int whichcpu);
+
+	/* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
+	 * resume cpu call is made.
+	 */
+	int (*v3_cpuidle)(unsigned int whichcpu);
+
+	/* v3_cpuresume() will resume processor 'whichcpu' executing
+	 * starting with whatever 'pc' and 'npc' were left at the
+	 * last 'idle' or 'stop' call.
+	 */
+	int (*v3_cpuresume)(unsigned int whichcpu);
+};
+
+/* Routines for traversing the prom device tree. */
+struct linux_nodeops {
+	int (*no_nextnode)(int node);
+	int (*no_child)(int node);
+	int (*no_proplen)(int node, const char *name);
+	int (*no_getprop)(int node, const char *name, char *val);
+	int (*no_setprop)(int node, const char *name, char *val, int len);
+	char * (*no_nextprop)(int node, char *name);
+};
+
+/* More fun PROM structures for device probing. */
 #if defined(__sparc__) && defined(__arch64__)
-#include <asm/openprom_64.h>
+#define PROMREG_MAX     24
+#define PROMVADDR_MAX   16
+#define PROMINTR_MAX    32
 #else
-#include <asm/openprom_32.h>
+#define PROMREG_MAX     16
+#define PROMVADDR_MAX   16
+#define PROMINTR_MAX    15
 #endif
+
+struct linux_prom_registers {
+	unsigned int which_io;	/* hi part of physical address */
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
+};
+
+struct linux_prom64_registers {
+	unsigned long phys_addr;
+	unsigned long reg_size;
+};
+
+struct linux_prom_irqs {
+	int pri;    /* IRQ priority */
+	int vector; /* This is foobar, what does it do? */
+};
+
+/* Element of the "ranges" vector */
+struct linux_prom_ranges {
+	unsigned int ot_child_space;
+	unsigned int ot_child_base;		/* Bus feels this */
+	unsigned int ot_parent_space;
+	unsigned int ot_parent_base;		/* CPU looks from here */
+	unsigned int or_size;
+};
+
+/*
+ * Ranges and reg properties are a bit different for PCI.
+ */
+#if defined(__sparc__) && defined(__arch64__)
+struct linux_prom_pci_registers {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+#else
+struct linux_prom_pci_registers {
+	/*
+	 * We don't know what information this field contain.
+	 * We guess, PCI device function is in bits 15:8
+	 * So, ...
+	 */
+	unsigned int which_io;  /* Let it be which_io */
+
+	unsigned int phys_hi;
+	unsigned int phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+
 #endif
+
+struct linux_prom_pci_ranges {
+	unsigned int child_phys_hi;	/* Only certain bits are encoded here. */
+	unsigned int child_phys_mid;
+	unsigned int child_phys_lo;
+
+	unsigned int parent_phys_hi;
+	unsigned int parent_phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+
+struct linux_prom_pci_intmap {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+
+	unsigned int interrupt;
+
+	int          cnode;
+	unsigned int cinterrupt;
+};
+
+struct linux_prom_pci_intmask {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+	unsigned int interrupt;
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h
deleted file mode 100644
index 2e58db5..0000000
--- a/arch/sparc/include/asm/openprom_32.h
+++ /dev/null
@@ -1,234 +0,0 @@
-#ifndef __SPARC_OPENPROM_H
-#define __SPARC_OPENPROM_H
-
-/* openprom.h:  Prom structures and defines for access to the OPENBOOT
- *              prom routines and data areas.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-/* Empirical constants... */
-#define	LINUX_OPPROM_MAGIC      0x10010407
-
-#ifndef __ASSEMBLY__
-/* V0 prom device operations. */
-struct linux_dev_v0_funcs {
-	int (*v0_devopen)(char *device_str);
-	int (*v0_devclose)(int dev_desc);
-	int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
-	int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
-	int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
-	int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
-	int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
-	int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
-	int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
-};
-
-/* V2 and later prom device operations. */
-struct linux_dev_v2_funcs {
-	int (*v2_inst2pkg)(int d);	/* Convert ihandle to phandle */
-	char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
-	void (*v2_dumb_mem_free)(char *va, unsigned sz);
-
-	/* To map devices into virtual I/O space. */
-	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
-	void (*v2_dumb_munmap)(char *virta, unsigned size);
-
-	int (*v2_dev_open)(char *devpath);
-	void (*v2_dev_close)(int d);
-	int (*v2_dev_read)(int d, char *buf, int nbytes);
-	int (*v2_dev_write)(int d, char *buf, int nbytes);
-	int (*v2_dev_seek)(int d, int hi, int lo);
-
-	/* Never issued (multistage load support) */
-	void (*v2_wheee2)(void);
-	void (*v2_wheee3)(void);
-};
-
-struct linux_mlist_v0 {
-	struct linux_mlist_v0 *theres_more;
-	unsigned int start_adr;
-	unsigned num_bytes;
-};
-
-struct linux_mem_v0 {
-	struct linux_mlist_v0 **v0_totphys;
-	struct linux_mlist_v0 **v0_prommap;
-	struct linux_mlist_v0 **v0_available; /* What we can use */
-};
-
-/* Arguments sent to the kernel from the boot prompt. */
-struct linux_arguments_v0 {
-	char *argv[8];
-	char args[100];
-	char boot_dev[2];
-	int boot_dev_ctrl;
-	int boot_dev_unit;
-	int dev_partition;
-	char *kernel_file_name;
-	void *aieee1;           /* XXX */
-};
-
-/* V2 and up boot things. */
-struct linux_bootargs_v2 {
-	char **bootpath;
-	char **bootargs;
-	int *fd_stdin;
-	int *fd_stdout;
-};
-
-/* The top level PROM vector. */
-struct linux_romvec {
-	/* Version numbers. */
-	unsigned int pv_magic_cookie;
-	unsigned int pv_romvers;
-	unsigned int pv_plugin_revision;
-	unsigned int pv_printrev;
-
-	/* Version 0 memory descriptors. */
-	struct linux_mem_v0 pv_v0mem;
-
-	/* Node operations. */
-	struct linux_nodeops *pv_nodeops;
-
-	char **pv_bootstr;
-	struct linux_dev_v0_funcs pv_v0devops;
-
-	char *pv_stdin;
-	char *pv_stdout;
-#define	PROMDEV_KBD	0		/* input from keyboard */
-#define	PROMDEV_SCREEN	0		/* output to screen */
-#define	PROMDEV_TTYA	1		/* in/out to ttya */
-#define	PROMDEV_TTYB	2		/* in/out to ttyb */
-
-	/* Blocking getchar/putchar.  NOT REENTRANT! (grr) */
-	int (*pv_getchar)(void);
-	void (*pv_putchar)(int ch);
-
-	/* Non-blocking variants. */
-	int (*pv_nbgetchar)(void);
-	int (*pv_nbputchar)(int ch);
-
-	void (*pv_putstr)(char *str, int len);
-
-	/* Miscellany. */
-	void (*pv_reboot)(char *bootstr);
-	void (*pv_printf)(__const__ char *fmt, ...);
-	void (*pv_abort)(void);
-	__volatile__ int *pv_ticks;
-	void (*pv_halt)(void);
-	void (**pv_synchook)(void);
-
-	/* Evaluate a forth string, not different proto for V0 and V2->up. */
-	union {
-		void (*v0_eval)(int len, char *str);
-		void (*v2_eval)(char *str);
-	} pv_fortheval;
-
-	struct linux_arguments_v0 **pv_v0bootargs;
-
-	/* Get ether address. */
-	unsigned int (*pv_enaddr)(int d, char *enaddr);
-
-	struct linux_bootargs_v2 pv_v2bootargs;
-	struct linux_dev_v2_funcs pv_v2devops;
-
-	int filler[15];
-
-	/* This one is sun4c/sun4 only. */
-	void (*pv_setctxt)(int ctxt, char *va, int pmeg);
-
-	/* Prom version 3 Multiprocessor routines. This stuff is crazy.
-	 * No joke. Calling these when there is only one cpu probably
-	 * crashes the machine, have to test this. :-)
-	 */
-
-	/* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
-	 * 'thiscontext' executing at address 'prog_counter'
-	 */
-	int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
-			   int thiscontext, char *prog_counter);
-
-	/* v3_cpustop() will cause cpu 'whichcpu' to stop executing
-	 * until a resume cpu call is made.
-	 */
-	int (*v3_cpustop)(unsigned int whichcpu);
-
-	/* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
-	 * resume cpu call is made.
-	 */
-	int (*v3_cpuidle)(unsigned int whichcpu);
-
-	/* v3_cpuresume() will resume processor 'whichcpu' executing
-	 * starting with whatever 'pc' and 'npc' were left at the
-	 * last 'idle' or 'stop' call.
-	 */
-	int (*v3_cpuresume)(unsigned int whichcpu);
-};
-
-/* Routines for traversing the prom device tree. */
-struct linux_nodeops {
-	int (*no_nextnode)(int node);
-	int (*no_child)(int node);
-	int (*no_proplen)(int node, const char *name);
-	int (*no_getprop)(int node, const char *name, char *val);
-	int (*no_setprop)(int node, const char *name, char *val, int len);
-	char * (*no_nextprop)(int node, char *name);
-};
-
-/* More fun PROM structures for device probing. */
-#define PROMREG_MAX     16
-#define PROMVADDR_MAX   16
-#define PROMINTR_MAX    15
-
-struct linux_prom_registers {
-	unsigned int which_io;	/* is this in OBIO space? */
-	unsigned int phys_addr;	/* The physical address of this register */
-	unsigned int reg_size;	/* How many bytes does this register take up? */
-};
-
-struct linux_prom_irqs {
-	int pri;    /* IRQ priority */
-	int vector; /* This is foobar, what does it do? */
-};
-
-/* Element of the "ranges" vector */
-struct linux_prom_ranges {
-	unsigned int ot_child_space;
-	unsigned int ot_child_base;		/* Bus feels this */
-	unsigned int ot_parent_space;
-	unsigned int ot_parent_base;		/* CPU looks from here */
-	unsigned int or_size;
-};
-
-/* Ranges and reg properties are a bit different for PCI. */
-struct linux_prom_pci_registers {
-	/*
-	 * We don't know what information this field contain.
-	 * We guess, PCI device function is in bits 15:8
-	 * So, ...
-	 */
-	unsigned int which_io;  /* Let it be which_io */
-
-	unsigned int phys_hi;
-	unsigned int phys_lo;
-
-	unsigned int size_hi;
-	unsigned int size_lo;
-};
-
-struct linux_prom_pci_ranges {
-	unsigned int child_phys_hi;	/* Only certain bits are encoded here. */
-	unsigned int child_phys_mid;
-	unsigned int child_phys_lo;
-
-	unsigned int parent_phys_hi;
-	unsigned int parent_phys_lo;
-
-	unsigned int size_hi;
-	unsigned int size_lo;
-};
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h
deleted file mode 100644
index 8148900..0000000
--- a/arch/sparc/include/asm/openprom_64.h
+++ /dev/null
@@ -1,248 +0,0 @@
-#ifndef __SPARC64_OPENPROM_H
-#define __SPARC64_OPENPROM_H
-
-/* openprom.h:  Prom structures and defines for access to the OPENBOOT
- *              prom routines and data areas.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#ifndef __ASSEMBLY__
-/* V0 prom device operations. */
-struct linux_dev_v0_funcs {
-	int (*v0_devopen)(char *device_str);
-	int (*v0_devclose)(int dev_desc);
-	int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
-	int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
-	int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
-	int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
-	int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
-	int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
-	int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
-};
-
-/* V2 and later prom device operations. */
-struct linux_dev_v2_funcs {
-	int (*v2_inst2pkg)(int d);	/* Convert ihandle to phandle */
-	char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
-	void (*v2_dumb_mem_free)(char *va, unsigned sz);
-
-	/* To map devices into virtual I/O space. */
-	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
-	void (*v2_dumb_munmap)(char *virta, unsigned size);
-
-	int (*v2_dev_open)(char *devpath);
-	void (*v2_dev_close)(int d);
-	int (*v2_dev_read)(int d, char *buf, int nbytes);
-	int (*v2_dev_write)(int d, char *buf, int nbytes);
-	int (*v2_dev_seek)(int d, int hi, int lo);
-
-	/* Never issued (multistage load support) */
-	void (*v2_wheee2)(void);
-	void (*v2_wheee3)(void);
-};
-
-struct linux_mlist_v0 {
-	struct linux_mlist_v0 *theres_more;
-	unsigned int start_adr;
-	unsigned num_bytes;
-};
-
-struct linux_mem_v0 {
-	struct linux_mlist_v0 **v0_totphys;
-	struct linux_mlist_v0 **v0_prommap;
-	struct linux_mlist_v0 **v0_available; /* What we can use */
-};
-
-/* Arguments sent to the kernel from the boot prompt. */
-struct linux_arguments_v0 {
-	char *argv[8];
-	char args[100];
-	char boot_dev[2];
-	int boot_dev_ctrl;
-	int boot_dev_unit;
-	int dev_partition;
-	char *kernel_file_name;
-	void *aieee1;           /* XXX */
-};
-
-/* V2 and up boot things. */
-struct linux_bootargs_v2 {
-	char **bootpath;
-	char **bootargs;
-	int *fd_stdin;
-	int *fd_stdout;
-};
-
-/* The top level PROM vector. */
-struct linux_romvec {
-	/* Version numbers. */
-	unsigned int pv_magic_cookie;
-	unsigned int pv_romvers;
-	unsigned int pv_plugin_revision;
-	unsigned int pv_printrev;
-
-	/* Version 0 memory descriptors. */
-	struct linux_mem_v0 pv_v0mem;
-
-	/* Node operations. */
-	struct linux_nodeops *pv_nodeops;
-
-	char **pv_bootstr;
-	struct linux_dev_v0_funcs pv_v0devops;
-
-	char *pv_stdin;
-	char *pv_stdout;
-#define	PROMDEV_KBD	0		/* input from keyboard */
-#define	PROMDEV_SCREEN	0		/* output to screen */
-#define	PROMDEV_TTYA	1		/* in/out to ttya */
-#define	PROMDEV_TTYB	2		/* in/out to ttyb */
-
-	/* Blocking getchar/putchar.  NOT REENTRANT! (grr) */
-	int (*pv_getchar)(void);
-	void (*pv_putchar)(int ch);
-
-	/* Non-blocking variants. */
-	int (*pv_nbgetchar)(void);
-	int (*pv_nbputchar)(int ch);
-
-	void (*pv_putstr)(char *str, int len);
-
-	/* Miscellany. */
-	void (*pv_reboot)(char *bootstr);
-	void (*pv_printf)(__const__ char *fmt, ...);
-	void (*pv_abort)(void);
-	__volatile__ int *pv_ticks;
-	void (*pv_halt)(void);
-	void (**pv_synchook)(void);
-
-	/* Evaluate a forth string, not different proto for V0 and V2->up. */
-	union {
-		void (*v0_eval)(int len, char *str);
-		void (*v2_eval)(char *str);
-	} pv_fortheval;
-
-	struct linux_arguments_v0 **pv_v0bootargs;
-
-	/* Get ether address. */
-	unsigned int (*pv_enaddr)(int d, char *enaddr);
-
-	struct linux_bootargs_v2 pv_v2bootargs;
-	struct linux_dev_v2_funcs pv_v2devops;
-
-	int filler[15];
-
-	/* This one is sun4c/sun4 only. */
-	void (*pv_setctxt)(int ctxt, char *va, int pmeg);
-
-	/* Prom version 3 Multiprocessor routines. This stuff is crazy.
-	 * No joke. Calling these when there is only one cpu probably
-	 * crashes the machine, have to test this. :-)
-	 */
-
-	/* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
-	 * 'thiscontext' executing at address 'prog_counter'
-	 */
-	int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
-			   int thiscontext, char *prog_counter);
-
-	/* v3_cpustop() will cause cpu 'whichcpu' to stop executing
-	 * until a resume cpu call is made.
-	 */
-	int (*v3_cpustop)(unsigned int whichcpu);
-
-	/* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
-	 * resume cpu call is made.
-	 */
-	int (*v3_cpuidle)(unsigned int whichcpu);
-
-	/* v3_cpuresume() will resume processor 'whichcpu' executing
-	 * starting with whatever 'pc' and 'npc' were left at the
-	 * last 'idle' or 'stop' call.
-	 */
-	int (*v3_cpuresume)(unsigned int whichcpu);
-};
-
-/* Routines for traversing the prom device tree. */
-struct linux_nodeops {
-	int (*no_nextnode)(int node);
-	int (*no_child)(int node);
-	int (*no_proplen)(int node, const char *name);
-	int (*no_getprop)(int node, const char *name, char *val);
-	int (*no_setprop)(int node, const char *name, char *val, int len);
-	char * (*no_nextprop)(int node, char *name);
-};
-
-/* More fun PROM structures for device probing. */
-#define PROMREG_MAX     24
-#define PROMVADDR_MAX   16
-#define PROMINTR_MAX    32
-
-struct linux_prom_registers {
-	unsigned int which_io;	/* hi part of physical address */
-	unsigned int phys_addr;	/* The physical address of this register */
-	unsigned int reg_size;	/* How many bytes does this register take up? */
-};
-
-struct linux_prom64_registers {
-	unsigned long phys_addr;
-	unsigned long reg_size;
-};
-
-struct linux_prom_irqs {
-	int pri;    /* IRQ priority */
-	int vector; /* This is foobar, what does it do? */
-};
-
-/* Element of the "ranges" vector */
-struct linux_prom_ranges {
-	unsigned int ot_child_space;
-	unsigned int ot_child_base;		/* Bus feels this */
-	unsigned int ot_parent_space;
-	unsigned int ot_parent_base;		/* CPU looks from here */
-	unsigned int or_size;
-};
-
-/* Ranges and reg properties are a bit different for PCI. */
-struct linux_prom_pci_registers {
-	unsigned int phys_hi;
-	unsigned int phys_mid;
-	unsigned int phys_lo;
-
-	unsigned int size_hi;
-	unsigned int size_lo;
-};
-
-struct linux_prom_pci_ranges {
-	unsigned int child_phys_hi;	/* Only certain bits are encoded here. */
-	unsigned int child_phys_mid;
-	unsigned int child_phys_lo;
-
-	unsigned int parent_phys_hi;
-	unsigned int parent_phys_lo;
-
-	unsigned int size_hi;
-	unsigned int size_lo;
-};
-
-struct linux_prom_pci_intmap {
-	unsigned int phys_hi;
-	unsigned int phys_mid;
-	unsigned int phys_lo;
-
-	unsigned int interrupt;
-
-	int          cnode;
-	unsigned int cinterrupt;
-};
-
-struct linux_prom_pci_intmask {
-	unsigned int phys_hi;
-	unsigned int phys_mid;
-	unsigned int phys_lo;
-	unsigned int interrupt;
-};
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC64_OPENPROM_H) */
-- 
cgit v0.10.2


From 2ef4c01e180902a0197f959f84d4ae1d8eb18888 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:31:13 -0800
Subject: sparc: fix warning in userspace header jsflash.h

Fix following warnings in jsflash.h:

jsflash.h:11: include of <linux/types.h> is preferred over <asm/types.h>
jsflash.h:24: found __[us]{8,16,32,64} type without #include <linux/types.h>

Fixed by changing the include to <linux/types.h>

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/jsflash.h b/arch/sparc/include/asm/jsflash.h
index 3457f29..0717d9e 100644
--- a/arch/sparc/include/asm/jsflash.h
+++ b/arch/sparc/include/asm/jsflash.h
@@ -8,7 +8,7 @@
 #define _SPARC_JSFLASH_H
 
 #ifndef _SPARC_TYPES_H
-#include <asm/types.h>
+#include <linux/types.h>
 #endif
 
 /*
-- 
cgit v0.10.2


From fffeeb413704b742dd1d08a3b5a0070a72ab52e1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:31:58 -0800
Subject: sparc: fix warnings in userspace header byteorder.h

Fix following warnings in byteorder.h:

byteorder.h:4: include of <linux/types.h> is preferred over <asm/types.h>
byteorder.h:9: leaks CONFIG_SPARC32 to userspace where it is not valid
byteorder.h:13: leaks CONFIG_SPARC64 to userspace where it is not valid
byteorder.h:14: found __[us]{8,16,32,64} type without #include <linux/types.h>
byteorder.h:47: leaks CONFIG_SPARC64 to userspace where it is not valid

- changed to use include <linux/types.h> as suggested
- use preprocessor defined symbols to distingush between 32 and 64 bit

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/byteorder.h b/arch/sparc/include/asm/byteorder.h
index 5a70f13..738414b 100644
--- a/arch/sparc/include/asm/byteorder.h
+++ b/arch/sparc/include/asm/byteorder.h
@@ -1,16 +1,12 @@
 #ifndef _SPARC_BYTEORDER_H
 #define _SPARC_BYTEORDER_H
 
-#include <asm/types.h>
+#include <linux/types.h>
 #include <asm/asi.h>
 
 #define __BIG_ENDIAN
 
-#ifdef CONFIG_SPARC32
-#define __SWAB_64_THRU_32__
-#endif
-
-#ifdef CONFIG_SPARC64
+#if defined(__sparc__) && defined(__arch64__)
 static inline __u16 __arch_swab16p(const __u16 *addr)
 {
 	__u16 ret;
@@ -44,7 +40,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr)
 }
 #define __arch_swab64p __arch_swab64p
 
-#endif /* CONFIG_SPARC64 */
+#else
+#define __SWAB_64_THRU_32__
+#endif /* defined(__sparc__) && defined(__arch64__) */
 
 #include <linux/byteorder.h>
 
-- 
cgit v0.10.2


From 220483fec0cbc3710c828a236c9f1099d5de537a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:32:34 -0800
Subject: sparc: fix warning in userspace header traps.h

Fix following warning:
traps.h:23: extern's make no sense in userspace

Add an ifdef __KERNEL__ block that cover the
extern definition and a few related things that neither
is for userspace.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h
index bebdbf8..4becd66 100644
--- a/arch/sparc/include/asm/traps.h
+++ b/arch/sparc/include/asm/traps.h
@@ -10,7 +10,7 @@
 #define NUM_SPARC_TRAPS  255
 
 #ifndef __ASSEMBLY__
-
+#ifdef __KERNEL__
 /* This is for V8 compliant Sparc CPUS */
 struct tt_entry {
 	unsigned long inst_one;
@@ -29,7 +29,7 @@ static inline unsigned long get_tbr(void)
 	__asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (tbr));
 	return tbr;
 }
-
+#endif /* (__KERNEL__) */
 #endif /* !(__ASSEMBLY__) */
 
 /* For patching the trap table at boot time, we need to know how to
-- 
cgit v0.10.2


From 65579f3cfbc55a6d5ed0469a6f069ada6f810a3e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:33:05 -0800
Subject: sparc: drop get_tbr() in traps.h

get_tbr() has no users in the whole tree -drop it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h
index 4becd66..3aa62dd 100644
--- a/arch/sparc/include/asm/traps.h
+++ b/arch/sparc/include/asm/traps.h
@@ -22,13 +22,6 @@ struct tt_entry {
 /* We set this to _start in system setup. */
 extern struct tt_entry *sparc_ttable;
 
-static inline unsigned long get_tbr(void)
-{
-	unsigned long tbr;
-
-	__asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (tbr));
-	return tbr;
-}
 #endif /* (__KERNEL__) */
 #endif /* !(__ASSEMBLY__) */
 
-- 
cgit v0.10.2


From f54c88cb6c6928f259b95abb4477970df2bd3d55 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 2 Jan 2009 21:33:54 -0800
Subject: sparc: remove NO_PROC_ID - it is no longer used

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index 8408d9d..58101dc 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -170,7 +170,4 @@ void smp_setup_cpu_possible_map(void);
 #define smp_setup_cpu_possible_map() do { } while (0)
 
 #endif /* !(SMP) */
-
-#define NO_PROC_ID            0xFF
-
 #endif /* !(_SPARC_SMP_H) */
-- 
cgit v0.10.2


From ee38e5140bafbf40e1bd25ab917ac8db54a27799 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 16:14:05 +1030
Subject: frv: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 39456ba..287f6f6 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -339,6 +339,19 @@ int __ffs(unsigned long x)
 	return 31 - bit;
 }
 
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	unsigned long bit;
+	asm("scan %1,gr0,%0" : "=r"(bit) : "r"(word));
+	return bit;
+}
+
 /*
  * special slimline version of fls() for calculating ilog2_u32()
  * - note: no protection against n == 0
-- 
cgit v0.10.2


From 9ddabc2a29163e4b243d10c5e06fc5584073d7ad Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 16:16:04 +1030
Subject: h8300: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index cb18e3b..cb9ddf5 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -207,6 +207,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
 #endif /* __KERNEL__ */
 
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #endif /* _H8300_BITOPS_H */
-- 
cgit v0.10.2


From 16a206260ee70f181de6a3672678545859589ef2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 16:16:54 +1030
Subject: m32r: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 6dc9b81..aaddf0d 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -251,6 +251,7 @@ static __inline__ int test_and_change_bit(int nr, volatile void * addr)
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
-- 
cgit v0.10.2


From 5c134dad43443aa9c9606eaf47c378a6b9c5c597 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 16:19:03 +1030
Subject: mn10300: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/asm-mn10300/bitops.h b/include/asm-mn10300/bitops.h
index cc6d40c..0b610f4 100644
--- a/include/asm-mn10300/bitops.h
+++ b/include/asm-mn10300/bitops.h
@@ -196,6 +196,17 @@ int fls(int x)
 }
 
 /**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return __ilog2_u32(word);
+}
+
+/**
  * ffs - find first bit set
  * @x: the word to search
  *
-- 
cgit v0.10.2


From 5ece5c5192d065c229da01e7b347c1d3877b59fa Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 3 Jan 2009 16:21:08 +1030
Subject: xtensa: define __fls

Like fls, but can't be handed 0 and returns the bit number.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 23261e8..6c39303 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -82,6 +82,16 @@ static inline int fls (unsigned int x)
 	return 32 - __cntlz(x);
 }
 
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return 31 - __cntlz(word);
+}
 #else
 
 /* Use the generic implementation if we don't have the nsa/nsau instructions. */
@@ -90,6 +100,7 @@ static inline int fls (unsigned int x)
 # include <asm-generic/bitops/__ffs.h>
 # include <asm-generic/bitops/ffz.h>
 # include <asm-generic/bitops/fls.h>
+# include <asm-generic/bitops/__fls.h>
 
 #endif
 
-- 
cgit v0.10.2


From fa8efd50b353a36dbcd2c47a55335f002af4deb0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 2 Jan 2009 21:54:25 -0800
Subject: sparc: Update 64-bit defconfig.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 05d19a3..cde19ae 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -1,27 +1,27 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28-rc4
-# Mon Nov 10 12:35:09 2008
+# Linux kernel version: 2.6.28
+# Fri Jan  2 18:14:26 2009
 #
 CONFIG_SPARC=y
 CONFIG_SPARC64=y
+CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc64_defconfig"
+CONFIG_BITS=64
+CONFIG_64BIT=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
 CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
 CONFIG_IOMMU_HELPER=y
 CONFIG_QUICKLIST=y
 CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_MMU=y
 CONFIG_ARCH_NO_VIRT_TO_BUS=y
 CONFIG_OF=y
-CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
@@ -86,6 +86,7 @@ CONFIG_SLUB_DEBUG=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
 # CONFIG_MARKERS is not set
 CONFIG_OPROFILE=m
 CONFIG_HAVE_OPROFILE=y
@@ -127,34 +128,40 @@ CONFIG_DEFAULT_AS=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="anticipatory"
 CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 # CONFIG_FREEZER is not set
 
 #
 # Processor type and features
 #
-CONFIG_SPARC64_PAGE_SIZE_8KB=y
-# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
-CONFIG_SECCOMP=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=64
 CONFIG_HZ_100=y
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=100
 CONFIG_SCHED_HRTICK=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_SPARC64_SMP=y
+CONFIG_SPARC64_PAGE_SIZE_8KB=y
+# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
+CONFIG_SECCOMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=64
 # CONFIG_CPU_FREQ is not set
 CONFIG_US3_MC=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_HUGETLB_PAGE_SIZE_4MB=y
 # CONFIG_HUGETLB_PAGE_SIZE_512K is not set
 # CONFIG_HUGETLB_PAGE_SIZE_64K is not set
@@ -183,10 +190,18 @@ CONFIG_PHYS_ADDR_T_64BIT=y
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_NR_QUICK=1
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_PREEMPT is not set
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options (PCI etc.)
+#
 CONFIG_SBUS=y
 CONFIG_SBUSCHAR=y
-CONFIG_SUN_AUXIO=y
-CONFIG_SUN_IO=y
 CONFIG_SUN_LDOMS=y
 CONFIG_PCI=y
 CONFIG_PCI_DOMAINS=y
@@ -195,7 +210,9 @@ CONFIG_ARCH_SUPPORTS_MSI=y
 CONFIG_PCI_MSI=y
 # CONFIG_PCI_LEGACY is not set
 # CONFIG_PCI_DEBUG is not set
+# CONFIG_PCCARD is not set
 CONFIG_SUN_OPENPROMFS=m
+CONFIG_SPARC64_PCI=y
 
 #
 # Executable file formats
@@ -207,17 +224,13 @@ CONFIG_COMPAT_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
 CONFIG_COMPAT=y
 CONFIG_SYSVIPC_COMPAT=y
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-# CONFIG_PREEMPT_NONE is not set
-CONFIG_PREEMPT_VOLUNTARY=y
-# CONFIG_PREEMPT is not set
-# CONFIG_CMDLINE_BOOL is not set
 CONFIG_NET=y
 
 #
 # Networking options
 #
+# CONFIG_NET_NS is not set
+CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -314,6 +327,7 @@ CONFIG_VLAN_8021Q=m
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 # CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
 
 #
 # Network testing
@@ -330,8 +344,8 @@ CONFIG_WIRELESS=y
 # CONFIG_CFG80211 is not set
 CONFIG_WIRELESS_OLD_REGULATORY=y
 # CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
 # CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
 
@@ -378,8 +392,10 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_EEPROM_93CX6 is not set
 # CONFIG_SGI_IOC4 is not set
 # CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_C2PORT is not set
 CONFIG_HAVE_IDE=y
 CONFIG_IDE=y
 
@@ -387,6 +403,7 @@ CONFIG_IDE=y
 # Please see Documentation/ide/ide.txt for help/info on IDE drives
 #
 CONFIG_IDE_TIMINGS=y
+CONFIG_IDE_ATAPI=y
 # CONFIG_BLK_DEV_IDE_SATA is not set
 CONFIG_IDE_GD=y
 CONFIG_IDE_GD_ATA=y
@@ -394,7 +411,6 @@ CONFIG_IDE_GD_ATA=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y
 # CONFIG_BLK_DEV_IDETAPE is not set
-# CONFIG_BLK_DEV_IDESCSI is not set
 # CONFIG_IDE_TASK_IOCTL is not set
 CONFIG_IDE_PROC_FS=y
 
@@ -477,6 +493,7 @@ CONFIG_SCSI_FC_ATTRS=y
 # CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
+# CONFIG_SCSI_CXGB3_ISCSI is not set
 # CONFIG_BLK_DEV_3W_XXXX_RAID is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
@@ -490,6 +507,8 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
 # CONFIG_SCSI_IPS is not set
@@ -564,6 +583,9 @@ CONFIG_PHYLIB=m
 # CONFIG_BROADCOM_PHY is not set
 # CONFIG_ICPLUS_PHY is not set
 # CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
 # CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=m
@@ -590,7 +612,6 @@ CONFIG_NET_PCI=y
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_B44 is not set
 # CONFIG_FORCEDETH is not set
-# CONFIG_EEPRO100 is not set
 # CONFIG_E100 is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
@@ -600,6 +621,7 @@ CONFIG_NET_PCI=y
 # CONFIG_R6040 is not set
 # CONFIG_SIS900 is not set
 # CONFIG_EPIC100 is not set
+# CONFIG_SMSC9420 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
@@ -629,6 +651,7 @@ CONFIG_BNX2=m
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_CHELSIO_T3 is not set
 # CONFIG_ENIC is not set
 # CONFIG_IXGBE is not set
@@ -778,6 +801,7 @@ CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=m
@@ -870,6 +894,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADM1029 is not set
 # CONFIG_SENSORS_ADM1031 is not set
 # CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
 # CONFIG_SENSORS_ADT7470 is not set
 # CONFIG_SENSORS_ADT7473 is not set
 # CONFIG_SENSORS_ATXP1 is not set
@@ -919,11 +944,11 @@ CONFIG_HWMON=y
 # CONFIG_THERMAL is not set
 # CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 # CONFIG_SSB is not set
 
 #
@@ -1071,6 +1096,7 @@ CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
 CONFIG_SND_PCM_OSS_PLUGINS=y
 CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_HRTIMER is not set
 # CONFIG_SND_DYNAMIC_MINORS is not set
 CONFIG_SND_SUPPORT_OLD_API=y
 CONFIG_SND_VERBOSE_PROCFS=y
@@ -1242,11 +1268,11 @@ CONFIG_USB_UHCI_HCD=m
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
 #
 CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1337,6 +1363,7 @@ CONFIG_RTC_INTF_DEV=y
 # CONFIG_RTC_DRV_M41T80 is not set
 # CONFIG_RTC_DRV_S35390A is not set
 # CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
 
 #
 # SPI RTC drivers
@@ -1365,7 +1392,6 @@ CONFIG_RTC_DRV_STARFIRE=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
-CONFIG_STAGING_EXCLUDE_BUILD=y
 
 #
 # Misc Linux/SPARC drivers
@@ -1544,6 +1570,7 @@ CONFIG_SCHEDSTATS=y
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
@@ -1552,6 +1579,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
@@ -1560,8 +1588,12 @@ CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
 
 #
 # Tracers
@@ -1571,7 +1603,9 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_PRINTK_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
@@ -1600,11 +1634,16 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
 CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 CONFIG_CRYPTO_GF128MUL=m
 CONFIG_CRYPTO_NULL=m
 # CONFIG_CRYPTO_CRYPTD is not set
-- 
cgit v0.10.2


From bc7a166dd1530965aa80966f267235f067c5fddf Mon Sep 17 00:00:00 2001
From: Ulrich Dangel <uli@spamt.net>
Date: Fri, 2 Jan 2009 19:30:13 +0100
Subject: ALSA: hda - add basic jack reporting functions to patch_conexant.c

Added functions to report jack sense.
As CXT5051_PORTB_EVENT has the same value as CONEXANT_MIC_EVENT two input
devices for the microphone will be created if using CXT5051.

Signed-off-by: Ulrich Dangel <uli@spamt.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index b20e1ce..e0eebfb 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <sound/core.h>
+#include <sound/jack.h>
+
 #include "hda_codec.h"
 #include "hda_local.h"
 
@@ -37,8 +39,21 @@
 #define CONEXANT_HP_EVENT	0x37
 #define CONEXANT_MIC_EVENT	0x38
 
+/* Conexant 5051 specific */
+
+#define CXT5051_SPDIF_OUT	0x1C
+#define CXT5051_PORTB_EVENT	0x38
+#define CXT5051_PORTC_EVENT	0x39
 
 
+struct conexant_jack {
+
+	hda_nid_t nid;
+	int type;
+	struct snd_jack *jack;
+
+};
+
 struct conexant_spec {
 
 	struct snd_kcontrol_new *mixers[5];
@@ -83,6 +98,9 @@ struct conexant_spec {
 
 	unsigned int spdif_route;
 
+	/* jack detection */
+	struct snd_array jacks;
+
 	/* dynamic controls, init_verbs and input_mux */
 	struct auto_pin_cfg autocfg;
 	struct hda_input_mux private_imux;
@@ -329,6 +347,86 @@ static int conexant_mux_enum_put(struct snd_kcontrol *kcontrol,
 				     &spec->cur_mux[adc_idx]);
 }
 
+static int conexant_add_jack(struct hda_codec *codec,
+		hda_nid_t nid, int type)
+{
+	struct conexant_spec *spec;
+	struct conexant_jack *jack;
+	const char *name;
+
+	spec = codec->spec;
+	snd_array_init(&spec->jacks, sizeof(*jack), 32);
+	jack = snd_array_new(&spec->jacks);
+	name = (type == SND_JACK_HEADPHONE) ? "Headphone" : "Mic" ;
+
+	if (!jack)
+		return -ENOMEM;
+
+	jack->nid = nid;
+	jack->type = type;
+
+	return snd_jack_new(codec->bus->card, name, type, &jack->jack);
+}
+
+static void conexant_report_jack(struct hda_codec *codec, hda_nid_t nid)
+{
+	struct conexant_spec *spec = codec->spec;
+	struct conexant_jack *jacks = spec->jacks.list;
+
+	if (jacks) {
+		int i;
+		for (i = 0; i < spec->jacks.used; i++) {
+			if (jacks->nid == nid) {
+				unsigned int present;
+				present = snd_hda_codec_read(codec, nid, 0,
+						AC_VERB_GET_PIN_SENSE, 0) &
+					AC_PINSENSE_PRESENCE;
+
+				present = (present) ? jacks->type : 0 ;
+
+				snd_jack_report(jacks->jack,
+						present);
+			}
+			jacks++;
+		}
+	}
+}
+
+static int conexant_init_jacks(struct hda_codec *codec)
+{
+#ifdef CONFIG_SND_JACK
+	struct conexant_spec *spec = codec->spec;
+	int i;
+
+	for (i = 0; i < spec->num_init_verbs; i++) {
+		const struct hda_verb *hv;
+
+		hv = spec->init_verbs[i];
+		while (hv->nid) {
+			int err = 0;
+			switch (hv->param ^ AC_USRSP_EN) {
+			case CONEXANT_HP_EVENT:
+				err = conexant_add_jack(codec, hv->nid,
+						SND_JACK_HEADPHONE);
+				conexant_report_jack(codec, hv->nid);
+				break;
+			case CXT5051_PORTC_EVENT:
+			case CONEXANT_MIC_EVENT:
+				err = conexant_add_jack(codec, hv->nid,
+						SND_JACK_MICROPHONE);
+				conexant_report_jack(codec, hv->nid);
+				break;
+			}
+			if (err < 0)
+				return err;
+			++hv;
+		}
+	}
+#endif
+	return 0;
+
+}
+
 static int conexant_init(struct hda_codec *codec)
 {
 	struct conexant_spec *spec = codec->spec;
@@ -341,6 +439,16 @@ static int conexant_init(struct hda_codec *codec)
 
 static void conexant_free(struct hda_codec *codec)
 {
+#ifdef CONFIG_SND_JACK
+	struct conexant_spec *spec = codec->spec;
+	if (spec->jacks.list) {
+		struct conexant_jack *jacks = spec->jacks.list;
+		int i;
+		for (i = 0; i < spec->jacks.used; i++)
+			snd_device_free(codec->bus->card, &jacks[i].jack);
+		snd_array_free(&spec->jacks);
+	}
+#endif
 	kfree(codec->spec);
 }
 
@@ -1526,9 +1634,6 @@ static int patch_cxt5047(struct hda_codec *codec)
 /* Conexant 5051 specific */
 static hda_nid_t cxt5051_dac_nids[1] = { 0x10 };
 static hda_nid_t cxt5051_adc_nids[2] = { 0x14, 0x15 };
-#define CXT5051_SPDIF_OUT	0x1C
-#define CXT5051_PORTB_EVENT	0x38
-#define CXT5051_PORTC_EVENT	0x39
 
 static struct hda_channel_mode cxt5051_modes[1] = {
 	{ 2, NULL },
-- 
cgit v0.10.2


From acf26c0cad5ba00dcafa633805e4660e90c1eac0 Mon Sep 17 00:00:00 2001
From: Ulrich Dangel <uli@spamt.net>
Date: Fri, 2 Jan 2009 19:30:14 +0100
Subject: ALSA: hda - cxt5051 report jack state

Signed-off-by: Ulrich Dangel <uli@spamt.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e0eebfb..75de40a 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1713,6 +1713,7 @@ static void cxt5051_hp_automute(struct hda_codec *codec)
 static void cxt5051_hp_unsol_event(struct hda_codec *codec,
 				   unsigned int res)
 {
+	int nid = (res & AC_UNSOL_RES_SUBTAG) >> 20;
 	switch (res >> 26) {
 	case CONEXANT_HP_EVENT:
 		cxt5051_hp_automute(codec);
@@ -1724,6 +1725,7 @@ static void cxt5051_hp_unsol_event(struct hda_codec *codec,
 		cxt5051_portc_automic(codec);
 		break;
 	}
+	conexant_report_jack(codec, nid);
 }
 
 static struct snd_kcontrol_new cxt5051_mixers[] = {
@@ -1798,6 +1800,7 @@ static struct hda_verb cxt5051_init_verbs[] = {
 static int cxt5051_init(struct hda_codec *codec)
 {
 	conexant_init(codec);
+	conexant_init_jacks(codec);
 	if (codec->patch_ops.unsol_event) {
 		cxt5051_hp_automute(codec);
 		cxt5051_portb_automic(codec);
-- 
cgit v0.10.2


From 015ab17dc2e9de805c26e74f498b12ee5e8de07e Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 14:04:20 +0000
Subject: intel-iommu: remove some unused struct intel_iommu fields

The seg, saved_msg and sysdev fields appear to be unused since
before the code was first merged.

linux/msi.h is not needed in linux/intel-iommu.h anymore since
there is no longer a reference to struct msi_msg. The MSI code
in drivers/pci/intel-iommu.c still has linux/msi.h included
via linux/dmar.h.

linux/sysdev.h isn't needed because there is no reference to
struct sys_device.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..8e5a445 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -27,7 +27,6 @@
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cf..1bff7bf 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -23,8 +23,6 @@
 #define _INTEL_IOMMU_H_
 
 #include <linux/types.h>
-#include <linux/msi.h>
-#include <linux/sysdev.h>
 #include <linux/iova.h>
 #include <linux/io.h>
 #include <linux/dma_remapping.h>
@@ -289,7 +287,6 @@ struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64		cap;
 	u64		ecap;
-	int		seg;
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
@@ -302,8 +299,6 @@ struct intel_iommu {
 
 	unsigned int irq;
 	unsigned char name[7];    /* Device Name */
-	struct msi_msg saved_msg;
-	struct sys_device sysdev;
 	struct iommu_flush flush;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
-- 
cgit v0.10.2


From 519a05491586dad04e687660e54c57882315b22b Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 14:21:13 +0000
Subject: intel-iommu: make init_dmars() static

init_dmars() is not used outside of drivers/pci/intel-iommu.c

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8e5a445..95ae3a9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1589,7 +1589,7 @@ static inline void iommu_prepare_isa(void)
 }
 #endif /* !CONFIG_DMAR_FLPY_WA */
 
-int __init init_dmars(void)
+static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_rmrr_unit *rmrr;
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39..cf92c49 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -141,7 +141,6 @@ struct device_domain_info {
 	struct dmar_domain *domain; /* pointer to domain */
 };
 
-extern int init_dmars(void);
 extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
-- 
cgit v0.10.2


From f27be03b271851fd54529f292c0f25b4c1f1a553 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:43 +0000
Subject: intel-iommu: move DMA_32/64BIT_PFN into intel-iommu.c

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 95ae3a9..6fadbb9b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,6 +53,9 @@
 
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
 
 static void flush_unmaps_timeout(unsigned long data);
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index cf92c49..2e5a5c0 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,11 +9,6 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
-
-
 /*
  * 0: Present
  * 1-11: Reserved
-- 
cgit v0.10.2


From 46b08e1a76b758193b0e7b889c6486a16eb1e9e2 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:44 +0000
Subject: intel-iommu: move root entry defs from dma_remapping.h

We keep the struct root_entry forward declaration for the
pointer in struct intel_iommu.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6fadbb9b..29bf2d8 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -57,6 +57,39 @@
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
 #define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
 
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & VTD_PAGE_MASK;
+}
+
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & VTD_PAGE_MASK) :
+		NULL);
+}
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 2e5a5c0..d852166 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,39 +9,7 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & VTD_PAGE_MASK;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & VTD_PAGE_MASK) :
-		NULL);
-}
+struct root_entry;
 
 /*
  * low 64 bits:
-- 
cgit v0.10.2


From 7a8fc25e0cc6e75fa6fdb0a856490e324218550b Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:45 +0000
Subject: intel-iommu: move context entry defs out from dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 29bf2d8..9d06f4b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -90,6 +90,44 @@ get_context_addr_from_root(struct root_entry *root)
 		NULL);
 }
 
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index d852166..9a88f7d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -12,44 +12,6 @@
 struct root_entry;
 
 /*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
  * 0: readable
  * 1: writable
  * 2-6: reserved
-- 
cgit v0.10.2


From 622ba12a4c2148999bda9b891bfd0c6ddcb6c57e Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:46 +0000
Subject: intel-iommu: move DMA PTE defs out of dma_remapping.h

DMA_PTE_READ/WRITE are needed by kvm.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9d06f4b..26c5402 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -128,6 +128,28 @@ struct context_entry {
 	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
 
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 9a88f7d..9d5874e 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -11,31 +11,9 @@
 
 struct root_entry;
 
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
 
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
 struct intel_iommu;
 
 struct dmar_domain {
-- 
cgit v0.10.2


From 99126f7ce14aff5f9371b2fa81fddb82be815794 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:47 +0000
Subject: intel-iommu: move struct dmar_domain def out dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 26c5402..97c36b2 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -150,6 +150,24 @@ struct dma_pte {
 		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
 
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 9d5874e..3330144 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,30 +9,12 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-struct root_entry;
-
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
 
 struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
+struct dmar_domain;
+struct root_entry;
 
 /* PCI domain-device relationship */
 struct device_domain_info {
-- 
cgit v0.10.2


From a647dacbb1389aa6a5fa631766c1eaea35905890 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:48 +0000
Subject: intel-iommu: move struct device_domain_info out of dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 97c36b2..f23a020 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -168,6 +168,16 @@ struct dmar_domain {
 	int		flags;
 };
 
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 3330144..4ef5f6b 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -16,16 +16,6 @@ struct intel_iommu;
 struct dmar_domain;
 struct root_entry;
 
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
 extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
-- 
cgit v0.10.2


From 58fa7304a2c2bfd46e505c293ef779aa1d9715c2 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:49 +0000
Subject: intel-iommu: kill off duplicate def of dmar_disabled

This is only used in dmar.c and intel-iommu.h, so dma_remapping.h
seems like the appropriate place for it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f1984fc..f284407 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -144,7 +144,6 @@ struct dmar_rmrr_unit {
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 /* Intel DMAR  initialization functions */
 extern int intel_iommu_init(void);
-extern int dmar_disabled;
 #else
 static inline int intel_iommu_init(void)
 {
-- 
cgit v0.10.2


From 2abd7e167c1b281f99bb58d302225872bfae9123 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:50 +0000
Subject: intel-iommu: move iommu_prepare_gfx_mapping() out of dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f23a020..c1c59a6 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1686,6 +1686,11 @@ static void __init iommu_prepare_gfx_mapping(void)
 			printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
 	}
 }
+#else /* !CONFIG_DMAR_GFX_WA */
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
 #endif
 
 #ifdef CONFIG_DMAR_FLOPPY_WA
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 4ef5f6b..7799a85 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -20,11 +20,4 @@ extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
 
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
-
 #endif
-- 
cgit v0.10.2


From c07e7d217bef198422b7eface456ecfd4bb1ab87 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 21 Nov 2008 16:54:46 +0000
Subject: intel-iommu: trivially inline context entry macros

Some macros were unused, so I just dropped them:

  context_fault_disable
  context_translation_type
  context_address_root
  context_address_width
  context_domain_id

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c1c59a6..3be931b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -105,28 +105,53 @@ struct context_entry {
 	u64 lo;
 	u64 hi;
 };
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
+
+static inline bool context_present(struct context_entry *context)
+{
+	return (context->lo & 1);
+}
+static inline void context_set_present(struct context_entry *context)
+{
+	context->lo |= 1;
+}
+
+static inline void context_set_fault_enable(struct context_entry *context)
+{
+	context->lo &= (((u64)-1) << 2) | 1;
+}
+
 #define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+static inline void context_set_translation_type(struct context_entry *context,
+						unsigned long value)
+{
+	context->lo &= (((u64)-1) << 4) | 3;
+	context->lo |= (value & 3) << 2;
+}
+
+static inline void context_set_address_root(struct context_entry *context,
+					    unsigned long value)
+{
+	context->lo |= value & VTD_PAGE_MASK;
+}
+
+static inline void context_set_address_width(struct context_entry *context,
+					     unsigned long value)
+{
+	context->hi |= value & 7;
+}
+
+static inline void context_set_domain_id(struct context_entry *context,
+					 unsigned long value)
+{
+	context->hi |= (value & ((1 << 16) - 1)) << 8;
+}
+
+static inline void context_clear_entry(struct context_entry *context)
+{
+	context->lo = 0;
+	context->hi = 0;
+}
 
 /*
  * 0: readable
@@ -349,7 +374,7 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 		ret = 0;
 		goto out;
 	}
-	ret = context_present(context[devfn]);
+	ret = context_present(&context[devfn]);
 out:
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	return ret;
@@ -365,7 +390,7 @@ static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
 	root = &iommu->root_entry[bus];
 	context = get_context_addr_from_root(root);
 	if (context) {
-		context_clear_entry(context[devfn]);
+		context_clear_entry(&context[devfn]);
 		__iommu_flush_cache(iommu, &context[devfn], \
 			sizeof(*context));
 	}
@@ -1284,17 +1309,17 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	if (!context)
 		return -ENOMEM;
 	spin_lock_irqsave(&iommu->lock, flags);
-	if (context_present(*context)) {
+	if (context_present(context)) {
 		spin_unlock_irqrestore(&iommu->lock, flags);
 		return 0;
 	}
 
-	context_set_domain_id(*context, domain->id);
-	context_set_address_width(*context, domain->agaw);
-	context_set_address_root(*context, virt_to_phys(domain->pgd));
-	context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-	context_set_fault_enable(*context);
-	context_set_present(*context);
+	context_set_domain_id(context, domain->id);
+	context_set_address_width(context, domain->agaw);
+	context_set_address_root(context, virt_to_phys(domain->pgd));
+	context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
+	context_set_fault_enable(context);
+	context_set_present(context);
 	__iommu_flush_cache(iommu, context, sizeof(*context));
 
 	/* it's a non-present to present mapping */
-- 
cgit v0.10.2


From 19c239ce3d089fee339d1ab7e97b43d6f0557ce5 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 21 Nov 2008 16:56:53 +0000
Subject: intel-iommu: trivially inline DMA PTE macros

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3be931b..213a5c8 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -164,16 +164,41 @@ static inline void context_clear_entry(struct context_entry *context)
 struct dma_pte {
 	u64 val;
 };
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
 
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
+static inline void dma_clear_pte(struct dma_pte *pte)
+{
+	pte->val = 0;
+}
+
+static inline void dma_set_pte_readable(struct dma_pte *pte)
+{
+	pte->val |= DMA_PTE_READ;
+}
+
+static inline void dma_set_pte_writable(struct dma_pte *pte)
+{
+	pte->val |= DMA_PTE_WRITE;
+}
+
+static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
+{
+	pte->val = (pte->val & ~3) | (prot & 3);
+}
+
+static inline u64 dma_pte_addr(struct dma_pte *pte)
+{
+	return (pte->val & VTD_PAGE_MASK);
+}
+
+static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
+{
+	pte->val |= (addr & VTD_PAGE_MASK);
+}
+
+static inline bool dma_pte_present(struct dma_pte *pte)
+{
+	return (pte->val & 3) != 0;
+}
 
 struct dmar_domain {
 	int	id;			/* domain id */
@@ -487,7 +512,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 		if (level == 1)
 			break;
 
-		if (!dma_pte_present(*pte)) {
+		if (!dma_pte_present(pte)) {
 			tmp_page = alloc_pgtable_page();
 
 			if (!tmp_page) {
@@ -497,16 +522,16 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 			}
 			__iommu_flush_cache(domain->iommu, tmp_page,
 					PAGE_SIZE);
-			dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
+			dma_set_pte_addr(pte, virt_to_phys(tmp_page));
 			/*
 			 * high level table always sets r/w, last level page
 			 * table control read/write
 			 */
-			dma_set_pte_readable(*pte);
-			dma_set_pte_writable(*pte);
+			dma_set_pte_readable(pte);
+			dma_set_pte_writable(pte);
 			__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
 		}
-		parent = phys_to_virt(dma_pte_addr(*pte));
+		parent = phys_to_virt(dma_pte_addr(pte));
 		level--;
 	}
 
@@ -529,9 +554,9 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
 		if (level == total)
 			return pte;
 
-		if (!dma_pte_present(*pte))
+		if (!dma_pte_present(pte))
 			break;
-		parent = phys_to_virt(dma_pte_addr(*pte));
+		parent = phys_to_virt(dma_pte_addr(pte));
 		total--;
 	}
 	return NULL;
@@ -546,7 +571,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
 	pte = dma_addr_level_pte(domain, addr, 1);
 
 	if (pte) {
-		dma_clear_pte(*pte);
+		dma_clear_pte(pte);
 		__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
 	}
 }
@@ -593,8 +618,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 			pte = dma_addr_level_pte(domain, tmp, level);
 			if (pte) {
 				free_pgtable_page(
-					phys_to_virt(dma_pte_addr(*pte)));
-				dma_clear_pte(*pte);
+					phys_to_virt(dma_pte_addr(pte)));
+				dma_clear_pte(pte);
 				__iommu_flush_cache(domain->iommu,
 						pte, sizeof(*pte));
 			}
@@ -1421,9 +1446,9 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 		/* We don't need lock here, nobody else
 		 * touches the iova range
 		 */
-		BUG_ON(dma_pte_addr(*pte));
-		dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
-		dma_set_pte_prot(*pte, prot);
+		BUG_ON(dma_pte_addr(pte));
+		dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
+		dma_set_pte_prot(pte, prot);
 		__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
 		start_pfn++;
 		index++;
@@ -2584,7 +2609,7 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
 	pte = addr_to_dma_pte(domain, iova);
 
 	if (pte)
-		pfn = dma_pte_addr(*pte);
+		pfn = dma_pte_addr(pte);
 
 	return pfn >> VTD_PAGE_SHIFT;
 }
-- 
cgit v0.10.2


From 2e824f79240476d57a8589f46232cabf151efe90 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Mon, 22 Dec 2008 16:54:58 +0800
Subject: VT-d: fix segment number being ignored when searching DRHD

On platforms with multiple PCI segments, any of the segments can have a DRHD
with INCLUDE_PCI_ALL flag. So need to check the DRHD's segment number against
the PCI device's when searching its DRHD.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..5f164ff 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -191,26 +191,17 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 {
 	struct acpi_dmar_hardware_unit *drhd;
-	static int include_all;
 	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 
-	if (!dmaru->include_all)
-		ret = dmar_parse_dev_scope((void *)(drhd + 1),
+	if (dmaru->include_all)
+		return 0;
+
+	ret = dmar_parse_dev_scope((void *)(drhd + 1),
 				((void *)drhd) + drhd->header.length,
 				&dmaru->devices_cnt, &dmaru->devices,
 				drhd->segment);
-	else {
-		/* Only allow one INCLUDE_ALL */
-		if (include_all) {
-			printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
-				"device scope is allowed\n");
-			ret = -EINVAL;
-		}
-		include_all = 1;
-	}
-
 	if (ret) {
 		list_del(&dmaru->list);
 		kfree(dmaru);
@@ -384,12 +375,21 @@ int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
 struct dmar_drhd_unit *
 dmar_find_matched_drhd_unit(struct pci_dev *dev)
 {
-	struct dmar_drhd_unit *drhd = NULL;
+	struct dmar_drhd_unit *dmaru = NULL;
+	struct acpi_dmar_hardware_unit *drhd;
+
+	list_for_each_entry(dmaru, &dmar_drhd_units, list) {
+		drhd = container_of(dmaru->hdr,
+				    struct acpi_dmar_hardware_unit,
+				    header);
+
+		if (dmaru->include_all &&
+		    drhd->segment == pci_domain_nr(dev->bus))
+			return dmaru;
 
-	list_for_each_entry(drhd, &dmar_drhd_units, list) {
-		if (drhd->include_all || dmar_pci_device_match(drhd->devices,
-						drhd->devices_cnt, dev))
-			return drhd;
+		if (dmar_pci_device_match(dmaru->devices,
+					  dmaru->devices_cnt, dev))
+			return dmaru;
 	}
 
 	return NULL;
-- 
cgit v0.10.2


From 03fb02c604d68156c0828e3950094f18ce529385 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 1 Jan 2009 17:14:58 -0300
Subject: V4L/DVB (10171): Use usb_set_intfdata

This code had calls to both usb_set_intfdata and dev_set_drvdata, doing the
same thing.

The semantic patch that lead to finding this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@header@
@@

@same depends on header@
position p;
@@

usb_set_intfdata@p(...) { ... }

@depends on header@
position _p!=same.p;
identifier _f;
struct usb_interface *intf;
expression data;
@@

_f@_p(...) { <+...
- dev_set_drvdata(&intf->dev, data);
+ usb_set_intfdata(intf, data);
...+> }

// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/zr364xx.c b/drivers/media/video/zr364xx.c
index bf68ed9..9302356 100644
--- a/drivers/media/video/zr364xx.c
+++ b/drivers/media/video/zr364xx.c
@@ -893,7 +893,6 @@ static void zr364xx_disconnect(struct usb_interface *intf)
 {
 	struct zr364xx_camera *cam = usb_get_intfdata(intf);
 	usb_set_intfdata(intf, NULL);
-	dev_set_drvdata(&intf->dev, NULL);
 	dev_info(&intf->dev, DRIVER_DESC " webcam unplugged\n");
 	if (cam->vdev)
 		video_unregister_device(cam->vdev);
-- 
cgit v0.10.2


From 763d19bb90a005a339b7d5ba70a710bb17db2bab Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 31 Dec 2008 23:35:24 -0300
Subject: V4L/DVB (10172): add DVB_DEVICE_TYPE= to uevent

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 6c571d9..65d6966 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -436,8 +436,9 @@ static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
-	add_uevent_var(env, "DVB_DEVICE_NUM=%d", dvbdev->id);
 	add_uevent_var(env, "DVB_ADAPTER_NUM=%d", dvbdev->adapter->num);
+	add_uevent_var(env, "DVB_DEVICE_TYPE=%s", dnames[dvbdev->type]);
+	add_uevent_var(env, "DVB_DEVICE_NUM=%d", dvbdev->id);
 	return 0;
 }
 
-- 
cgit v0.10.2


From b15dd79ea06b04a7ecee95f62ce7b6a3547dbb0a Mon Sep 17 00:00:00 2001
From: Udo Steinberg <udo@hypervisor.org>
Date: Fri, 2 Jan 2009 17:34:28 -0300
Subject: V4L/DVB (10173): Missing v4l2_prio_close in radio_release

The radio_release function of the BTTV driver is missing a call to
v4l2_prio_close. As a result, after the radio device has been opened at
least once (e.g., by HAL during bootup), v4l2_priority will never drop below
V4L2_PRIORITY_INTERACTIVE again. With the following patch against 2.6.28,
applications that run with V4L2_PRIORITY_BACKGROUND are able to open devices
again. Previous Linux versions are affected as well.

Signed-off-by: Udo Steinberg <udo@hypervisor.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index d2f43bd..c71f394 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -3472,6 +3472,7 @@ static int radio_release(struct file *file)
 	struct bttv *btv = fh->btv;
 	struct rds_command cmd;
 
+	v4l2_prio_close(&btv->prio,&fh->prio);
 	file->private_data = NULL;
 	kfree(fh);
 
-- 
cgit v0.10.2


From d71a2f33ac466a437f316e7bb024d0175a7f3cd9 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Sun, 7 Dec 2008 21:13:41 +0800
Subject: Initialize domain flags to 0

It's random number after the domain is allocated by kmem_cache_alloc

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 213a5c8..65aa1d4 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1180,6 +1180,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
 	set_bit(num, iommu->domain_ids);
 	domain->id = num;
 	domain->iommu = iommu;
+	domain->flags = 0;
 	iommu->domains[num] = domain;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-- 
cgit v0.10.2


From 3b5410e735b093060b96664230c6f9f4fe80b251 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 09:17:15 +0800
Subject: change P2P domain flags

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 65aa1d4..22ad885 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -200,6 +200,9 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 	return (pte->val & 3) != 0;
 }
 
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
+
 struct dmar_domain {
 	int	id;			/* domain id */
 	struct intel_iommu *iommu;	/* back pointer to owning iommu */
@@ -214,8 +217,7 @@ struct dmar_domain {
 	/* adjusted guest address width, 0 is level 2 30-bit */
 	int		agaw;
 
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
+	int		flags;		/* flags to find out type of domain */
 };
 
 /* PCI domain-device relationship */
@@ -1574,7 +1576,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 		info->dev = NULL;
 		info->domain = domain;
 		/* This domain is shared by devices under p2p bridge */
-		domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
 
 		/* pcie-to-pci bridge already has a domain, uses it */
 		found = NULL;
-- 
cgit v0.10.2


From d9630fe941769dd050fbc38fbbac20a708ab9461 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 11:06:32 +0800
Subject: Add global iommu list

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 22ad885..d2ffa7a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -57,6 +57,9 @@
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
 #define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
 
+/* global iommu list, set NULL for ignored DMAR units */
+static struct intel_iommu **g_iommus;
+
 /*
  * 0: Present
  * 1-11: Reserved
@@ -1153,6 +1156,17 @@ void free_dmar_iommu(struct intel_iommu *iommu)
 	kfree(iommu->domains);
 	kfree(iommu->domain_ids);
 
+	g_iommus[iommu->seq_id] = NULL;
+
+	/* if all iommus are freed, free g_iommus */
+	for (i = 0; i < g_num_of_iommus; i++) {
+		if (g_iommus[i])
+			break;
+	}
+
+	if (i == g_num_of_iommus)
+		kfree(g_iommus);
+
 	/* free context mapping */
 	free_context_table(iommu);
 }
@@ -1794,9 +1808,18 @@ static int __init init_dmars(void)
 		 */
 	}
 
+	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
+			GFP_KERNEL);
+	if (!g_iommus) {
+		printk(KERN_ERR "Allocating global iommu array failed\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
 	deferred_flush = kzalloc(g_num_of_iommus *
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 	if (!deferred_flush) {
+		kfree(g_iommus);
 		ret = -ENOMEM;
 		goto error;
 	}
@@ -1806,6 +1829,7 @@ static int __init init_dmars(void)
 			continue;
 
 		iommu = drhd->iommu;
+		g_iommus[iommu->seq_id] = iommu;
 
 		ret = iommu_init_domains(iommu);
 		if (ret)
@@ -1918,6 +1942,7 @@ error:
 		iommu = drhd->iommu;
 		free_iommu(iommu);
 	}
+	kfree(g_iommus);
 	return ret;
 }
 
-- 
cgit v0.10.2


From a2bb8459fe46e5aaad6637b31b5593d740097cba Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 11:24:12 +0800
Subject: Get iommu from g_iommus for deferred flush

deferred_flush[] uses the iommu seq_id to index, so its iommu is fixed and can get it from g_iommus.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d2ffa7a..86b9f58 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2101,10 +2101,11 @@ static void flush_unmaps(void)
 
 	/* just flush them all */
 	for (i = 0; i < g_num_of_iommus; i++) {
-		if (deferred_flush[i].next) {
-			struct intel_iommu *iommu =
-				deferred_flush[i].domain[0]->iommu;
+		struct intel_iommu *iommu = g_iommus[i];
+		if (!iommu)
+			continue;
 
+		if (deferred_flush[i].next) {
 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 						 DMA_TLB_GLOBAL_FLUSH, 0);
 			for (j = 0; j < deferred_flush[i].next; j++) {
-- 
cgit v0.10.2


From 8c11e798eee2ce4475134eaf61302b28ea4f205d Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 15:29:22 +0800
Subject: iommu bitmap instead of iommu pointer in dmar_domain

In order to support assigning multiple devices from different iommus to a domain, iommu bitmap is used to keep all iommus the domain are related to.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 86b9f58..9dca689 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -208,7 +208,7 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 
 struct dmar_domain {
 	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
 
 	struct list_head devices; 	/* all devices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
@@ -362,6 +362,18 @@ void free_iova_mem(struct iova *iova)
 	kmem_cache_free(iommu_iova_cache, iova);
 }
 
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+{
+	int iommu_id;
+
+	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
+		return NULL;
+
+	return g_iommus[iommu_id];
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
@@ -502,6 +514,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 	int level = agaw_to_level(domain->agaw);
 	int offset;
 	unsigned long flags;
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	BUG_ON(!domain->pgd);
 
@@ -525,7 +538,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 					flags);
 				return NULL;
 			}
-			__iommu_flush_cache(domain->iommu, tmp_page,
+			__iommu_flush_cache(iommu, tmp_page,
 					PAGE_SIZE);
 			dma_set_pte_addr(pte, virt_to_phys(tmp_page));
 			/*
@@ -534,7 +547,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 			 */
 			dma_set_pte_readable(pte);
 			dma_set_pte_writable(pte);
-			__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+			__iommu_flush_cache(iommu, pte, sizeof(*pte));
 		}
 		parent = phys_to_virt(dma_pte_addr(pte));
 		level--;
@@ -571,13 +584,14 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
 {
 	struct dma_pte *pte = NULL;
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	/* get last level pte */
 	pte = dma_addr_level_pte(domain, addr, 1);
 
 	if (pte) {
 		dma_clear_pte(pte);
-		__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+		__iommu_flush_cache(iommu, pte, sizeof(*pte));
 	}
 }
 
@@ -608,6 +622,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 	int total = agaw_to_level(domain->agaw);
 	int level;
 	u64 tmp;
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	start &= (((u64)1) << addr_width) - 1;
 	end &= (((u64)1) << addr_width) - 1;
@@ -625,7 +640,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 				free_pgtable_page(
 					phys_to_virt(dma_pte_addr(pte)));
 				dma_clear_pte(pte);
-				__iommu_flush_cache(domain->iommu,
+				__iommu_flush_cache(iommu,
 						pte, sizeof(*pte));
 			}
 			tmp += level_size(level);
@@ -1195,7 +1210,8 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
 
 	set_bit(num, iommu->domain_ids);
 	domain->id = num;
-	domain->iommu = iommu;
+	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+	set_bit(iommu->seq_id, &domain->iommu_bmp);
 	domain->flags = 0;
 	iommu->domains[num] = domain;
 	spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1206,10 +1222,13 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
 static void iommu_free_domain(struct dmar_domain *domain)
 {
 	unsigned long flags;
+	struct intel_iommu *iommu;
+
+	iommu = domain_get_iommu(domain);
 
-	spin_lock_irqsave(&domain->iommu->lock, flags);
-	clear_bit(domain->id, domain->iommu->domain_ids);
-	spin_unlock_irqrestore(&domain->iommu->lock, flags);
+	spin_lock_irqsave(&iommu->lock, flags);
+	clear_bit(domain->id, iommu->domain_ids);
+	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 static struct iova_domain reserved_iova_list;
@@ -1288,7 +1307,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
-	iommu = domain->iommu;
+	iommu = domain_get_iommu(domain);
 	if (guest_width > cap_mgaw(iommu->cap))
 		guest_width = cap_mgaw(iommu->cap);
 	domain->gaw = guest_width;
@@ -1341,7 +1360,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		u8 bus, u8 devfn)
 {
 	struct context_entry *context;
-	struct intel_iommu *iommu = domain->iommu;
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 	unsigned long flags;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
@@ -1413,8 +1432,9 @@ static int domain_context_mapped(struct dmar_domain *domain,
 {
 	int ret;
 	struct pci_dev *tmp, *parent;
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 
-	ret = device_context_mapped(domain->iommu,
+	ret = device_context_mapped(iommu,
 		pdev->bus->number, pdev->devfn);
 	if (!ret)
 		return ret;
@@ -1425,17 +1445,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
 	/* Secondary interface's bus number and devfn 0 */
 	parent = pdev->bus->self;
 	while (parent != tmp) {
-		ret = device_context_mapped(domain->iommu, parent->bus->number,
+		ret = device_context_mapped(iommu, parent->bus->number,
 			parent->devfn);
 		if (!ret)
 			return ret;
 		parent = parent->bus->self;
 	}
 	if (tmp->is_pcie)
-		return device_context_mapped(domain->iommu,
+		return device_context_mapped(iommu,
 			tmp->subordinate->number, 0);
 	else
-		return device_context_mapped(domain->iommu,
+		return device_context_mapped(iommu,
 			tmp->bus->number, tmp->devfn);
 }
 
@@ -1447,6 +1467,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 	struct dma_pte *pte;
 	int index;
 	int addr_width = agaw_to_width(domain->agaw);
+	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	hpa &= (((u64)1) << addr_width) - 1;
 
@@ -1466,7 +1487,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 		BUG_ON(dma_pte_addr(pte));
 		dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
 		dma_set_pte_prot(pte, prot);
-		__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+		__iommu_flush_cache(iommu, pte, sizeof(*pte));
 		start_pfn++;
 		index++;
 	}
@@ -1475,10 +1496,12 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 
 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
 {
-	clear_context_table(domain->iommu, bus, devfn);
-	domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
+	struct intel_iommu *iommu = domain_get_iommu(domain);
+
+	clear_context_table(iommu, bus, devfn);
+	iommu->flush.flush_context(iommu, 0, 0, 0,
 					   DMA_CCMD_GLOBAL_INVL, 0);
-	domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
+	iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 					 DMA_TLB_GLOBAL_FLUSH, 0);
 }
 
@@ -2033,6 +2056,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	struct iova *iova;
 	int prot = 0;
 	int ret;
+	struct intel_iommu *iommu;
 
 	BUG_ON(dir == DMA_NONE);
 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2042,6 +2066,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	if (!domain)
 		return 0;
 
+	iommu = domain_get_iommu(domain);
 	size = aligned_size((u64)paddr, size);
 
 	iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -2055,7 +2080,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	 * mappings..
 	 */
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
-			!cap_zlr(domain->iommu->cap))
+			!cap_zlr(iommu->cap))
 		prot |= DMA_PTE_READ;
 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
 		prot |= DMA_PTE_WRITE;
@@ -2071,10 +2096,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 		goto error;
 
 	/* it's a non-present to present mapping */
-	ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+	ret = iommu_flush_iotlb_psi(iommu, domain->id,
 			start_paddr, size >> VTD_PAGE_SHIFT, 1);
 	if (ret)
-		iommu_flush_write_buffer(domain->iommu);
+		iommu_flush_write_buffer(iommu);
 
 	return start_paddr + ((u64)paddr & (~PAGE_MASK));
 
@@ -2132,12 +2157,14 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
 {
 	unsigned long flags;
 	int next, iommu_id;
+	struct intel_iommu *iommu;
 
 	spin_lock_irqsave(&async_umap_flush_lock, flags);
 	if (list_size == HIGH_WATER_MARK)
 		flush_unmaps();
 
-	iommu_id = dom->iommu->seq_id;
+	iommu = domain_get_iommu(dom);
+	iommu_id = iommu->seq_id;
 
 	next = deferred_flush[iommu_id].next;
 	deferred_flush[iommu_id].domain[next] = dom;
@@ -2159,12 +2186,15 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
 	struct dmar_domain *domain;
 	unsigned long start_addr;
 	struct iova *iova;
+	struct intel_iommu *iommu;
 
 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
 		return;
 	domain = find_domain(pdev);
 	BUG_ON(!domain);
 
+	iommu = domain_get_iommu(domain);
+
 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
 	if (!iova)
 		return;
@@ -2180,9 +2210,9 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
 	/* free page tables */
 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
 	if (intel_iommu_strict) {
-		if (iommu_flush_iotlb_psi(domain->iommu,
+		if (iommu_flush_iotlb_psi(iommu,
 			domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
-			iommu_flush_write_buffer(domain->iommu);
+			iommu_flush_write_buffer(iommu);
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
 	} else {
@@ -2243,11 +2273,15 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	size_t size = 0;
 	void *addr;
 	struct scatterlist *sg;
+	struct intel_iommu *iommu;
 
 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
 		return;
 
 	domain = find_domain(pdev);
+	BUG_ON(!domain);
+
+	iommu = domain_get_iommu(domain);
 
 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
 	if (!iova)
@@ -2264,9 +2298,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	/* free page tables */
 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
 
-	if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+	if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
 			size >> VTD_PAGE_SHIFT, 0))
-		iommu_flush_write_buffer(domain->iommu);
+		iommu_flush_write_buffer(iommu);
 
 	/* free iova */
 	__free_iova(&domain->iovad, iova);
@@ -2300,6 +2334,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 	int ret;
 	struct scatterlist *sg;
 	unsigned long start_addr;
+	struct intel_iommu *iommu;
 
 	BUG_ON(dir == DMA_NONE);
 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2309,6 +2344,8 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 	if (!domain)
 		return 0;
 
+	iommu = domain_get_iommu(domain);
+
 	for_each_sg(sglist, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		addr = (void *)virt_to_phys(addr);
@@ -2326,7 +2363,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 	 * mappings..
 	 */
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
-			!cap_zlr(domain->iommu->cap))
+			!cap_zlr(iommu->cap))
 		prot |= DMA_PTE_READ;
 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
 		prot |= DMA_PTE_WRITE;
@@ -2358,9 +2395,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 	}
 
 	/* it's a non-present to present mapping */
-	if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+	if (iommu_flush_iotlb_psi(iommu, domain->id,
 			start_addr, offset >> VTD_PAGE_SHIFT, 1))
-		iommu_flush_write_buffer(domain->iommu);
+		iommu_flush_write_buffer(iommu);
 	return nelems;
 }
 
-- 
cgit v0.10.2


From 1b5736839ae13dadc5947940144f95dd0f4a4a8c Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 15:34:06 +0800
Subject: calculate agaw for each iommu

"SAGAW" capability may be different across iommus. Use a default agaw, but if default agaw is not supported in some iommus, choose a less supported agaw.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 5f164ff..f5a662a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	int map_size;
 	u32 ver;
 	static int iommu_allocated = 0;
+	int agaw;
 
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 
+	agaw = iommu_calculate_agaw(iommu);
+	if (agaw < 0) {
+		printk(KERN_ERR
+			"Cannot get a valid agaw for iommu (seq_id = %d)\n",
+			iommu->seq_id);
+		goto error;
+	}
+	iommu->agaw = agaw;
+
 	/* the registers might be more than one page */
 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 		cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9dca689..3ecfa23 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -362,6 +362,28 @@ void free_iova_mem(struct iova *iova)
 	kmem_cache_free(iommu_iova_cache, iova);
 }
 
+
+static inline int width_to_agaw(int width);
+
+/* calculate agaw for each iommu.
+ * "SAGAW" may be different across iommus, use a default agaw, and
+ * get a supported less agaw for iommus that don't support the default agaw.
+ */
+int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+	unsigned long sagaw;
+	int agaw = -1;
+
+	sagaw = cap_sagaw(iommu->cap);
+	for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	     agaw >= 0; agaw--) {
+		if (test_bit(agaw, &sagaw))
+			break;
+	}
+
+	return agaw;
+}
+
 /* in native case, each domain is related to only one iommu */
 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 7799a85..136f170 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -17,6 +17,7 @@ struct dmar_domain;
 struct root_entry;
 
 extern void free_dmar_iommu(struct intel_iommu *iommu);
+extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1bff7bf..06349fd 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -290,6 +290,7 @@ struct intel_iommu {
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
+	int		agaw; /* agaw of this iommu */
 
 #ifdef CONFIG_DMAR
 	unsigned long 	*domain_ids; /* bitmap of domains */
-- 
cgit v0.10.2


From 8e604097ddc483eb1e6e99564953e4e937fe439a Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 15:49:06 +0800
Subject: iommu coherency

In dmar_domain, more than one iommus may be included in iommu_bmp. Due to "Coherency" capability may be different across iommus, set this variable to indicate iommu access is coherent or not. Only when all related iommus in a dmar_domain are all coherent, iommu access of this domain is coherent.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3ecfa23..104e99d 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -221,6 +221,8 @@ struct dmar_domain {
 	int		agaw;
 
 	int		flags;		/* flags to find out type of domain */
+
+	int		iommu_coherency;/* indicate coherency of iommu access */
 };
 
 /* PCI domain-device relationship */
@@ -396,6 +398,23 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 	return g_iommus[iommu_id];
 }
 
+/* "Coherency" capability may be different across iommus */
+static void domain_update_iommu_coherency(struct dmar_domain *domain)
+{
+	int i;
+
+	domain->iommu_coherency = 1;
+
+	i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+	for (; i < g_num_of_iommus; ) {
+		if (!ecap_coherent(g_iommus[i]->ecap)) {
+			domain->iommu_coherency = 0;
+			break;
+		}
+		i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+	}
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
@@ -1346,6 +1365,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	domain->agaw = agaw;
 	INIT_LIST_HEAD(&domain->devices);
 
+	if (ecap_coherent(iommu->ecap))
+		domain->iommu_coherency = 1;
+	else
+		domain->iommu_coherency = 0;
+
 	/* always allocate the top pgd */
 	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
 	if (!domain->pgd)
-- 
cgit v0.10.2


From 1ce28feb22833645a0f3843cd873a0b56ed19ef0 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 16:35:39 +0800
Subject: Add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE

Add this flag for VT-d used in virtual machine, like KVM.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 104e99d..ffbe4c5 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -206,6 +206,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 /* devices under the same p2p bridge are owned in one domain */
 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
 
+/* domain represents a virtual machine, more than one devices
+ * across iommus may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
+
 struct dmar_domain {
 	int	id;			/* domain id */
 	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
@@ -391,6 +396,8 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
 	int iommu_id;
 
+	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
+
 	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 		return NULL;
-- 
cgit v0.10.2


From c7151a8dfefd11108de5b4293af2390962bcff71 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 22:51:37 +0800
Subject: Add/remove domain device info for virtual machine domain

Add iommu reference count in domain, and add a lock to protect iommu setting including iommu_bmp, iommu_count and iommu_coherency.

virtual machine domain may have multiple devices from different iommus, so it needs to do more things when add/remove domain device info. Thus implement separate these functions for virtual machine domain.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ffbe4c5..6ed18fa 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -228,6 +228,8 @@ struct dmar_domain {
 	int		flags;		/* flags to find out type of domain */
 
 	int		iommu_coherency;/* indicate coherency of iommu access */
+	int		iommu_count;	/* reference count of iommu */
+	spinlock_t	iommu_lock;	/* protect iommu set in domain */
 };
 
 /* PCI domain-device relationship */
@@ -422,6 +424,27 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
 	}
 }
 
+static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
+{
+	struct dmar_drhd_unit *drhd = NULL;
+	int i;
+
+	for_each_drhd_unit(drhd) {
+		if (drhd->ignored)
+			continue;
+
+		for (i = 0; i < drhd->devices_cnt; i++)
+			if (drhd->devices[i]->bus->number == bus &&
+			    drhd->devices[i]->devfn == devfn)
+				return drhd->iommu;
+
+		if (drhd->include_all)
+			return drhd->iommu;
+	}
+
+	return NULL;
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
@@ -1196,12 +1219,18 @@ void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct dmar_domain *domain;
 	int i;
+	unsigned long flags;
 
 	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
 	for (; i < cap_ndoms(iommu->cap); ) {
 		domain = iommu->domains[i];
 		clear_bit(i, iommu->domain_ids);
-		domain_exit(domain);
+
+		spin_lock_irqsave(&domain->iommu_lock, flags);
+		if (--domain->iommu_count == 0)
+			domain_exit(domain);
+		spin_unlock_irqrestore(&domain->iommu_lock, flags);
+
 		i = find_next_bit(iommu->domain_ids,
 			cap_ndoms(iommu->cap), i+1);
 	}
@@ -1351,6 +1380,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
 	spin_lock_init(&domain->mapping_lock);
+	spin_lock_init(&domain->iommu_lock);
 
 	domain_reserve_special_ranges(domain);
 
@@ -1377,6 +1407,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	else
 		domain->iommu_coherency = 0;
 
+	domain->iommu_count = 1;
+
 	/* always allocate the top pgd */
 	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
 	if (!domain->pgd)
@@ -1445,6 +1477,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	spin_lock_irqsave(&domain->iommu_lock, flags);
+	if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
+		domain->iommu_count++;
+		domain_update_iommu_coherency(domain);
+	}
+	spin_unlock_irqrestore(&domain->iommu_lock, flags);
 	return 0;
 }
 
@@ -1547,9 +1586,10 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 	return 0;
 }
 
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
-	struct intel_iommu *iommu = domain_get_iommu(domain);
+	if (!iommu)
+		return;
 
 	clear_context_table(iommu, bus, devfn);
 	iommu->flush.flush_context(iommu, 0, 0, 0,
@@ -1562,6 +1602,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 {
 	struct device_domain_info *info;
 	unsigned long flags;
+	struct intel_iommu *iommu;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	while (!list_empty(&domain->devices)) {
@@ -1573,7 +1614,8 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 			info->dev->dev.archdata.iommu = NULL;
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 
-		detach_domain_for_dev(info->domain, info->bus, info->devfn);
+		iommu = device_to_iommu(info->bus, info->devfn);
+		iommu_detach_dev(iommu, info->bus, info->devfn);
 		free_devinfo_mem(info);
 
 		spin_lock_irqsave(&device_domain_lock, flags);
@@ -2625,6 +2667,122 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+static int vm_domain_add_dev_info(struct dmar_domain *domain,
+				  struct pci_dev *pdev)
+{
+	struct device_domain_info *info;
+	unsigned long flags;
+
+	info = alloc_devinfo_mem();
+	if (!info)
+		return -ENOMEM;
+
+	info->bus = pdev->bus->number;
+	info->devfn = pdev->devfn;
+	info->dev = pdev;
+	info->domain = domain;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	list_add(&info->link, &domain->devices);
+	list_add(&info->global, &device_domain_list);
+	pdev->dev.archdata.iommu = info;
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	return 0;
+}
+
+static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
+					  struct pci_dev *pdev)
+{
+	struct device_domain_info *info;
+	struct intel_iommu *iommu;
+	unsigned long flags;
+	int found = 0;
+	struct list_head *entry, *tmp;
+
+	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	if (!iommu)
+		return;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	list_for_each_safe(entry, tmp, &domain->devices) {
+		info = list_entry(entry, struct device_domain_info, link);
+		if (info->bus == pdev->bus->number &&
+		    info->devfn == pdev->devfn) {
+			list_del(&info->link);
+			list_del(&info->global);
+			if (info->dev)
+				info->dev->dev.archdata.iommu = NULL;
+			spin_unlock_irqrestore(&device_domain_lock, flags);
+
+			iommu_detach_dev(iommu, info->bus, info->devfn);
+			free_devinfo_mem(info);
+
+			spin_lock_irqsave(&device_domain_lock, flags);
+
+			if (found)
+				break;
+			else
+				continue;
+		}
+
+		/* if there is no other devices under the same iommu
+		 * owned by this domain, clear this iommu in iommu_bmp
+		 * update iommu count and coherency
+		 */
+		if (device_to_iommu(info->bus, info->devfn) == iommu)
+			found = 1;
+	}
+
+	if (found == 0) {
+		unsigned long tmp_flags;
+		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
+		clear_bit(iommu->seq_id, &domain->iommu_bmp);
+		domain->iommu_count--;
+		domain_update_iommu_coherency(domain);
+		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
+	}
+
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
+{
+	struct device_domain_info *info;
+	struct intel_iommu *iommu;
+	unsigned long flags1, flags2;
+
+	spin_lock_irqsave(&device_domain_lock, flags1);
+	while (!list_empty(&domain->devices)) {
+		info = list_entry(domain->devices.next,
+			struct device_domain_info, link);
+		list_del(&info->link);
+		list_del(&info->global);
+		if (info->dev)
+			info->dev->dev.archdata.iommu = NULL;
+
+		spin_unlock_irqrestore(&device_domain_lock, flags1);
+
+		iommu = device_to_iommu(info->bus, info->devfn);
+		iommu_detach_dev(iommu, info->bus, info->devfn);
+
+		/* clear this iommu in iommu_bmp, update iommu count
+		 * and coherency
+		 */
+		spin_lock_irqsave(&domain->iommu_lock, flags2);
+		if (test_and_clear_bit(iommu->seq_id,
+				       &domain->iommu_bmp)) {
+			domain->iommu_count--;
+			domain_update_iommu_coherency(domain);
+		}
+		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+
+		free_devinfo_mem(info);
+		spin_lock_irqsave(&device_domain_lock, flags1);
+	}
+	spin_unlock_irqrestore(&device_domain_lock, flags1);
+}
+
 void intel_iommu_domain_exit(struct dmar_domain *domain)
 {
 	u64 end;
@@ -2702,7 +2860,10 @@ EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
 
 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
 {
-	detach_domain_for_dev(domain, bus, devfn);
+	struct intel_iommu *iommu;
+
+	iommu = device_to_iommu(bus, devfn);
+	iommu_detach_dev(iommu, bus, devfn);
 }
 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
 
-- 
cgit v0.10.2


From 5331fe6f5627e06eec7d0dc154a0a3a9c27813c5 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:00:00 +0800
Subject: Add domain_flush_cache

Because virtual machine domain may have multiple devices from different iommus, it cannot use __iommu_flush_cache.

In some common low level functions, use domain_flush_cache instead of __iommu_flush_cache. On the other hand, in some functions, iommu can is specified or domain cannot be got, still use __iommu_flush_cache

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6ed18fa..f0a2199 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -445,6 +445,13 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
 	return NULL;
 }
 
+static void domain_flush_cache(struct dmar_domain *domain,
+			       void *addr, int size)
+{
+	if (!domain->iommu_coherency)
+		clflush_cache_range(addr, size);
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
@@ -585,7 +592,6 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 	int level = agaw_to_level(domain->agaw);
 	int offset;
 	unsigned long flags;
-	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	BUG_ON(!domain->pgd);
 
@@ -609,8 +615,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 					flags);
 				return NULL;
 			}
-			__iommu_flush_cache(iommu, tmp_page,
-					PAGE_SIZE);
+			domain_flush_cache(domain, tmp_page, PAGE_SIZE);
 			dma_set_pte_addr(pte, virt_to_phys(tmp_page));
 			/*
 			 * high level table always sets r/w, last level page
@@ -618,7 +623,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 			 */
 			dma_set_pte_readable(pte);
 			dma_set_pte_writable(pte);
-			__iommu_flush_cache(iommu, pte, sizeof(*pte));
+			domain_flush_cache(domain, pte, sizeof(*pte));
 		}
 		parent = phys_to_virt(dma_pte_addr(pte));
 		level--;
@@ -655,14 +660,13 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
 {
 	struct dma_pte *pte = NULL;
-	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	/* get last level pte */
 	pte = dma_addr_level_pte(domain, addr, 1);
 
 	if (pte) {
 		dma_clear_pte(pte);
-		__iommu_flush_cache(iommu, pte, sizeof(*pte));
+		domain_flush_cache(domain, pte, sizeof(*pte));
 	}
 }
 
@@ -693,7 +697,6 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 	int total = agaw_to_level(domain->agaw);
 	int level;
 	u64 tmp;
-	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	start &= (((u64)1) << addr_width) - 1;
 	end &= (((u64)1) << addr_width) - 1;
@@ -711,8 +714,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 				free_pgtable_page(
 					phys_to_virt(dma_pte_addr(pte)));
 				dma_clear_pte(pte);
-				__iommu_flush_cache(iommu,
-						pte, sizeof(*pte));
+				domain_flush_cache(domain, pte, sizeof(*pte));
 			}
 			tmp += level_size(level);
 		}
@@ -1445,12 +1447,17 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		u8 bus, u8 devfn)
 {
 	struct context_entry *context;
-	struct intel_iommu *iommu = domain_get_iommu(domain);
 	unsigned long flags;
+	struct intel_iommu *iommu;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 	BUG_ON(!domain->pgd);
+
+	iommu = device_to_iommu(bus, devfn);
+	if (!iommu)
+		return -ENODEV;
+
 	context = device_to_context_entry(iommu, bus, devfn);
 	if (!context)
 		return -ENOMEM;
@@ -1466,7 +1473,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
 	context_set_fault_enable(context);
 	context_set_present(context);
-	__iommu_flush_cache(iommu, context, sizeof(*context));
+	domain_flush_cache(domain, context, sizeof(*context));
 
 	/* it's a non-present to present mapping */
 	if (iommu->flush.flush_context(iommu, domain->id,
@@ -1519,12 +1526,15 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
 			tmp->bus->number, tmp->devfn);
 }
 
-static int domain_context_mapped(struct dmar_domain *domain,
-	struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
 {
 	int ret;
 	struct pci_dev *tmp, *parent;
-	struct intel_iommu *iommu = domain_get_iommu(domain);
+	struct intel_iommu *iommu;
+
+	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	if (!iommu)
+		return -ENODEV;
 
 	ret = device_context_mapped(iommu,
 		pdev->bus->number, pdev->devfn);
@@ -1559,7 +1569,6 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 	struct dma_pte *pte;
 	int index;
 	int addr_width = agaw_to_width(domain->agaw);
-	struct intel_iommu *iommu = domain_get_iommu(domain);
 
 	hpa &= (((u64)1) << addr_width) - 1;
 
@@ -1579,7 +1588,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 		BUG_ON(dma_pte_addr(pte));
 		dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
 		dma_set_pte_prot(pte, prot);
-		__iommu_flush_cache(iommu, pte, sizeof(*pte));
+		domain_flush_cache(domain, pte, sizeof(*pte));
 		start_pfn++;
 		index++;
 	}
@@ -2129,7 +2138,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
 	}
 
 	/* make sure context mapping is ok */
-	if (unlikely(!domain_context_mapped(domain, pdev))) {
+	if (unlikely(!domain_context_mapped(pdev))) {
 		ret = domain_context_mapping(domain, pdev);
 		if (ret) {
 			printk(KERN_ERR
-- 
cgit v0.10.2


From 5e98c4b1d6e89676193c355e430eddf369bcf195 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:03:27 +0800
Subject: Allocation and free functions of virtual machine domain

virtual machine domain is different from native DMA-API domain, implement separate allocation and free functions for virtual machine domain.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f0a2199..171f6c6 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1216,6 +1216,7 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 
 
 static void domain_exit(struct dmar_domain *domain);
+static void vm_domain_exit(struct dmar_domain *domain);
 
 void free_dmar_iommu(struct intel_iommu *iommu)
 {
@@ -1229,8 +1230,12 @@ void free_dmar_iommu(struct intel_iommu *iommu)
 		clear_bit(i, iommu->domain_ids);
 
 		spin_lock_irqsave(&domain->iommu_lock, flags);
-		if (--domain->iommu_count == 0)
-			domain_exit(domain);
+		if (--domain->iommu_count == 0) {
+			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+				vm_domain_exit(domain);
+			else
+				domain_exit(domain);
+		}
 		spin_unlock_irqrestore(&domain->iommu_lock, flags);
 
 		i = find_next_bit(iommu->domain_ids,
@@ -2792,6 +2797,104 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
 	spin_unlock_irqrestore(&device_domain_lock, flags1);
 }
 
+/* domain id for virtual machine, it won't be set in context */
+static unsigned long vm_domid;
+
+static struct dmar_domain *iommu_alloc_vm_domain(void)
+{
+	struct dmar_domain *domain;
+
+	domain = alloc_domain_mem();
+	if (!domain)
+		return NULL;
+
+	domain->id = vm_domid++;
+	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+
+	return domain;
+}
+
+static int vm_domain_init(struct dmar_domain *domain, int guest_width)
+{
+	int adjust_width;
+
+	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+	spin_lock_init(&domain->mapping_lock);
+	spin_lock_init(&domain->iommu_lock);
+
+	domain_reserve_special_ranges(domain);
+
+	/* calculate AGAW */
+	domain->gaw = guest_width;
+	adjust_width = guestwidth_to_adjustwidth(guest_width);
+	domain->agaw = width_to_agaw(adjust_width);
+
+	INIT_LIST_HEAD(&domain->devices);
+
+	domain->iommu_count = 0;
+	domain->iommu_coherency = 0;
+
+	/* always allocate the top pgd */
+	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+	if (!domain->pgd)
+		return -ENOMEM;
+	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+	return 0;
+}
+
+static void iommu_free_vm_domain(struct dmar_domain *domain)
+{
+	unsigned long flags;
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	unsigned long i;
+	unsigned long ndomains;
+
+	for_each_drhd_unit(drhd) {
+		if (drhd->ignored)
+			continue;
+		iommu = drhd->iommu;
+
+		ndomains = cap_ndoms(iommu->cap);
+		i = find_first_bit(iommu->domain_ids, ndomains);
+		for (; i < ndomains; ) {
+			if (iommu->domains[i] == domain) {
+				spin_lock_irqsave(&iommu->lock, flags);
+				clear_bit(i, iommu->domain_ids);
+				iommu->domains[i] = NULL;
+				spin_unlock_irqrestore(&iommu->lock, flags);
+				break;
+			}
+			i = find_next_bit(iommu->domain_ids, ndomains, i+1);
+		}
+	}
+}
+
+static void vm_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	vm_domain_remove_all_dev_info(domain);
+	/* destroy iovas */
+	put_iova_domain(&domain->iovad);
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~VTD_PAGE_MASK);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_vm_domain(domain);
+	free_domain_mem(domain);
+}
+
 void intel_iommu_domain_exit(struct dmar_domain *domain)
 {
 	u64 end;
-- 
cgit v0.10.2


From ea6606b02fc3192f2edab2db669fa0b9756b4e67 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:08:15 +0800
Subject: Change domain_context_mapping_one for virtual machine domain

vm_domid won't be set in context, find available domain id for a device from its iommu.

For a virtual machine domain, a default agaw will be set, and skip top levels of page tables for iommu which has less agaw than default.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 171f6c6..8a204d5 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1454,6 +1454,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	struct context_entry *context;
 	unsigned long flags;
 	struct intel_iommu *iommu;
+	struct dma_pte *pgd;
+	unsigned long num;
+	unsigned long ndomains;
+	int id;
+	int agaw;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1472,9 +1477,53 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		return 0;
 	}
 
-	context_set_domain_id(context, domain->id);
-	context_set_address_width(context, domain->agaw);
-	context_set_address_root(context, virt_to_phys(domain->pgd));
+	id = domain->id;
+	pgd = domain->pgd;
+
+	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+		int found = 0;
+
+		/* find an available domain id for this device in iommu */
+		ndomains = cap_ndoms(iommu->cap);
+		num = find_first_bit(iommu->domain_ids, ndomains);
+		for (; num < ndomains; ) {
+			if (iommu->domains[num] == domain) {
+				id = num;
+				found = 1;
+				break;
+			}
+			num = find_next_bit(iommu->domain_ids,
+					    cap_ndoms(iommu->cap), num+1);
+		}
+
+		if (found == 0) {
+			num = find_first_zero_bit(iommu->domain_ids, ndomains);
+			if (num >= ndomains) {
+				spin_unlock_irqrestore(&iommu->lock, flags);
+				printk(KERN_ERR "IOMMU: no free domain ids\n");
+				return -EFAULT;
+			}
+
+			set_bit(num, iommu->domain_ids);
+			iommu->domains[num] = domain;
+			id = num;
+		}
+
+		/* Skip top levels of page tables for
+		 * iommu which has less agaw than default.
+		 */
+		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+			pgd = phys_to_virt(dma_pte_addr(pgd));
+			if (!dma_pte_present(pgd)) {
+				spin_unlock_irqrestore(&iommu->lock, flags);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	context_set_domain_id(context, id);
+	context_set_address_width(context, iommu->agaw);
+	context_set_address_root(context, virt_to_phys(pgd));
 	context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
 	context_set_fault_enable(context);
 	context_set_present(context);
-- 
cgit v0.10.2


From faa3d6f5ffe7bf60ebfd0d36513fbcda0eb0ea1a Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:09:29 +0800
Subject: Change intel iommu APIs of virtual machine domain

These APIs are used by KVM to use VT-d

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8a204d5..f138026 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2944,96 +2944,87 @@ static void vm_domain_exit(struct dmar_domain *domain)
 	free_domain_mem(domain);
 }
 
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+struct dmar_domain *intel_iommu_alloc_domain(void)
 {
-	u64 end;
-
-	/* Domain 0 is reserved, so dont process it */
-	if (!domain)
-		return;
-
-	end = DOMAIN_MAX_ADDR(domain->gaw);
-	end = end & (~VTD_PAGE_MASK);
-
-	/* clear ptes */
-	dma_pte_clear_range(domain, 0, end);
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, end);
-
-	iommu_free_domain(domain);
-	free_domain_mem(domain);
-}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
-{
-	struct dmar_drhd_unit *drhd;
 	struct dmar_domain *domain;
-	struct intel_iommu *iommu;
 
-	drhd = dmar_find_matched_drhd_unit(pdev);
-	if (!drhd) {
-		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
-		return NULL;
-	}
-
-	iommu = drhd->iommu;
-	if (!iommu) {
-		printk(KERN_ERR
-			"intel_iommu_domain_alloc: iommu == NULL\n");
-		return NULL;
-	}
-	domain = iommu_alloc_domain(iommu);
+	domain = iommu_alloc_vm_domain();
 	if (!domain) {
 		printk(KERN_ERR
 			"intel_iommu_domain_alloc: domain == NULL\n");
 		return NULL;
 	}
-	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+	if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		printk(KERN_ERR
 			"intel_iommu_domain_alloc: domain_init() failed\n");
-		intel_iommu_domain_exit(domain);
+		vm_domain_exit(domain);
 		return NULL;
 	}
+
 	return domain;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-int intel_iommu_context_mapping(
-	struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
 {
-	int rc;
-	rc = domain_context_mapping(domain, pdev);
-	return rc;
+	vm_domain_exit(domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
-int intel_iommu_page_mapping(
-	struct dmar_domain *domain, dma_addr_t iova,
-	u64 hpa, size_t size, int prot)
+int intel_iommu_attach_device(struct dmar_domain *domain,
+			      struct pci_dev *pdev)
 {
-	int rc;
-	rc = domain_page_mapping(domain, iova, hpa, size, prot);
-	return rc;
+	int ret;
+
+	/* normally pdev is not mapped */
+	if (unlikely(domain_context_mapped(pdev))) {
+		struct dmar_domain *old_domain;
+
+		old_domain = find_domain(pdev);
+		if (old_domain) {
+			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+				vm_domain_remove_one_dev_info(old_domain, pdev);
+			else
+				domain_remove_dev_info(old_domain);
+		}
+	}
+
+	ret = domain_context_mapping(domain, pdev);
+	if (ret)
+		return ret;
+
+	ret = vm_domain_add_dev_info(domain, pdev);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
 
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+void intel_iommu_detach_device(struct dmar_domain *domain,
+			       struct pci_dev *pdev)
 {
-	struct intel_iommu *iommu;
+	vm_domain_remove_one_dev_info(domain, pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 
-	iommu = device_to_iommu(bus, devfn);
-	iommu_detach_dev(iommu, bus, devfn);
+int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
+			    u64 hpa, size_t size, int prot)
+{
+	int ret;
+	ret = domain_page_mapping(domain, iova, hpa, size, prot);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_map_address);
 
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+void intel_iommu_unmap_address(struct dmar_domain *domain,
+			       dma_addr_t iova, size_t size)
 {
-	return find_domain(pdev);
+	dma_addr_t base;
+
+	/* The address might not be aligned */
+	base = iova & VTD_PAGE_MASK;
+	size = VTD_PAGE_ALIGN(size);
+	dma_pte_clear_range(domain, base, base + size);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
 
 int intel_iommu_found(void)
 {
@@ -3041,17 +3032,15 @@ int intel_iommu_found(void)
 }
 EXPORT_SYMBOL_GPL(intel_iommu_found);
 
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
 {
 	struct dma_pte *pte;
-	u64 pfn;
+	u64 phys = 0;
 
-	pfn = 0;
 	pte = addr_to_dma_pte(domain, iova);
-
 	if (pte)
-		pfn = dma_pte_addr(pte);
+		phys = dma_pte_addr(pte);
 
-	return pfn >> VTD_PAGE_SHIFT;
+	return phys;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 06349fd..07973c4 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,15 +330,17 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-void intel_iommu_domain_exit(struct dmar_domain *domain);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
-int intel_iommu_context_mapping(struct dmar_domain *domain,
-				struct pci_dev *pdev);
-int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
-			     u64 hpa, size_t size, int prot);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
-struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+struct dmar_domain *intel_iommu_alloc_domain(void);
+void intel_iommu_free_domain(struct dmar_domain *domain);
+int intel_iommu_attach_device(struct dmar_domain *domain,
+			      struct pci_dev *pdev);
+void intel_iommu_detach_device(struct dmar_domain *domain,
+			       struct pci_dev *pdev);
+int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
+			    u64 hpa, size_t size, int prot);
+void intel_iommu_unmap_address(struct dmar_domain *domain,
+			       dma_addr_t iova, size_t size);
+u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
 
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
-- 
cgit v0.10.2


From fe40f1e020d0923f5f35ca15f02a206c75a28053 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:10:23 +0800
Subject: Check agaw is sufficient for mapped memory

When domain is related to multiple iommus, need to check if the minimum agaw is sufficient for the mapped memory

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f138026..772fb22 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -230,6 +230,7 @@ struct dmar_domain {
 	int		iommu_coherency;/* indicate coherency of iommu access */
 	int		iommu_count;	/* reference count of iommu */
 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
+	u64		max_addr;	/* maximum mapped address */
 };
 
 /* PCI domain-device relationship */
@@ -2849,6 +2850,22 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
 /* domain id for virtual machine, it won't be set in context */
 static unsigned long vm_domid;
 
+static int vm_domain_min_agaw(struct dmar_domain *domain)
+{
+	int i;
+	int min_agaw = domain->agaw;
+
+	i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+	for (; i < g_num_of_iommus; ) {
+		if (min_agaw > g_iommus[i]->agaw)
+			min_agaw = g_iommus[i]->agaw;
+
+		i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+	}
+
+	return min_agaw;
+}
+
 static struct dmar_domain *iommu_alloc_vm_domain(void)
 {
 	struct dmar_domain *domain;
@@ -2883,6 +2900,7 @@ static int vm_domain_init(struct dmar_domain *domain, int guest_width)
 
 	domain->iommu_count = 0;
 	domain->iommu_coherency = 0;
+	domain->max_addr = 0;
 
 	/* always allocate the top pgd */
 	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
@@ -2974,6 +2992,9 @@ EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 int intel_iommu_attach_device(struct dmar_domain *domain,
 			      struct pci_dev *pdev)
 {
+	struct intel_iommu *iommu;
+	int addr_width;
+	u64 end;
 	int ret;
 
 	/* normally pdev is not mapped */
@@ -2989,6 +3010,21 @@ int intel_iommu_attach_device(struct dmar_domain *domain,
 		}
 	}
 
+	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	if (!iommu)
+		return -ENODEV;
+
+	/* check if this iommu agaw is sufficient for max mapped address */
+	addr_width = agaw_to_width(iommu->agaw);
+	end = DOMAIN_MAX_ADDR(addr_width);
+	end = end & VTD_PAGE_MASK;
+	if (end < domain->max_addr) {
+		printk(KERN_ERR "%s: iommu agaw (%d) is not "
+		       "sufficient for the mapped address (%llx)\n",
+		       __func__, iommu->agaw, domain->max_addr);
+		return -EFAULT;
+	}
+
 	ret = domain_context_mapping(domain, pdev);
 	if (ret)
 		return ret;
@@ -3008,7 +3044,29 @@ EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			    u64 hpa, size_t size, int prot)
 {
+	u64 max_addr;
+	int addr_width;
 	int ret;
+
+	max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
+	if (domain->max_addr < max_addr) {
+		int min_agaw;
+		u64 end;
+
+		/* check if minimum agaw is sufficient for mapped address */
+		min_agaw = vm_domain_min_agaw(domain);
+		addr_width = agaw_to_width(min_agaw);
+		end = DOMAIN_MAX_ADDR(addr_width);
+		end = end & VTD_PAGE_MASK;
+		if (end < max_addr) {
+			printk(KERN_ERR "%s: iommu agaw (%d) is not "
+			       "sufficient for the mapped address (%llx)\n",
+			       __func__, min_agaw, max_addr);
+			return -EFAULT;
+		}
+		domain->max_addr = max_addr;
+	}
+
 	ret = domain_page_mapping(domain, iova, hpa, size, prot);
 	return ret;
 }
@@ -3023,6 +3081,9 @@ void intel_iommu_unmap_address(struct dmar_domain *domain,
 	base = iova & VTD_PAGE_MASK;
 	size = VTD_PAGE_ALIGN(size);
 	dma_pte_clear_range(domain, base, base + size);
+
+	if (domain->max_addr == base + size)
+		domain->max_addr = base;
 }
 EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
 
-- 
cgit v0.10.2


From 260782bcfdaaa7850f29d6bb2ec6603019168c57 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Tue, 2 Dec 2008 21:03:39 +0800
Subject: KVM: use the new intel iommu APIs

intel iommu APIs are updated, use the new APIs.

In addition, change kvm_iommu_map_guest() to just create the domain, let kvm_iommu_assign_device() assign device.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eafabd5..c96739b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -330,9 +330,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
-int kvm_iommu_map_guest(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -341,9 +342,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm,
-				      struct kvm_assigned_dev_kernel
-				      *assigned_dev)
+static inline int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	return -ENODEV;
 }
@@ -352,6 +351,12 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
 	return 0;
 }
+
+static inline int kvm_assign_device(struct kvm *kvm,
+		struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc6127c..c92b634 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -503,7 +503,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		r = kvm_iommu_map_guest(kvm, match);
+		if (!kvm->arch.intel_iommu_domain) {
+			r = kvm_iommu_map_guest(kvm);
+			if (r)
+				goto out_list_del;
+		}
+		r = kvm_assign_device(kvm, match);
 		if (r)
 			goto out_list_del;
 	}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index a770874..44bb58a 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -45,20 +45,18 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
-		if (pfn)
+		if (intel_iommu_iova_to_phys(domain,
+					     gfn_to_gpa(gfn)))
 			continue;
 
 		pfn = gfn_to_pfn(kvm, gfn);
-		r = intel_iommu_page_mapping(domain,
-					     gfn_to_gpa(gfn),
-					     pfn_to_hpa(pfn),
-					     PAGE_SIZE,
-					     DMA_PTE_READ |
-					     DMA_PTE_WRITE);
+		r = intel_iommu_map_address(domain,
+					    gfn_to_gpa(gfn),
+					    pfn_to_hpa(pfn),
+					    PAGE_SIZE,
+					    DMA_PTE_READ | DMA_PTE_WRITE);
 		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_pages:"
+			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
 			goto unmap_pages;
 		}
@@ -86,50 +84,55 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
-int kvm_iommu_map_guest(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 	int r;
 
-	if (!intel_iommu_found()) {
-		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
 		return -ENODEV;
+
+	r = intel_iommu_attach_device(domain, pdev);
+	if (r) {
+		printk(KERN_ERR "assign device %x:%x.%x failed",
+			pdev->bus->number,
+			PCI_SLOT(pdev->devfn),
+			PCI_FUNC(pdev->devfn));
+		return r;
 	}
 
-	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
-	       assigned_dev->host_busnr,
-	       PCI_SLOT(assigned_dev->host_devfn),
-	       PCI_FUNC(assigned_dev->host_devfn));
+	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
 
-	pdev = assigned_dev->dev;
+	return 0;
+}
 
-	if (pdev == NULL) {
-		if (kvm->arch.intel_iommu_domain) {
-			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
-			kvm->arch.intel_iommu_domain = NULL;
-		}
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
 		return -ENODEV;
 	}
 
-	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
 	if (!kvm->arch.intel_iommu_domain)
-		return -ENODEV;
+		return -ENOMEM;
 
 	r = kvm_iommu_map_memslots(kvm);
 	if (r)
 		goto out_unmap;
 
-	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
-			       pdev->bus->number, pdev->devfn);
-
-	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
-					pdev);
-	if (r) {
-		printk(KERN_ERR "Domain context map for %s failed",
-		       pci_name(pdev));
-		goto out_unmap;
-	}
 	return 0;
 
 out_unmap:
@@ -138,19 +141,29 @@ out_unmap:
 }
 
 static void kvm_iommu_put_pages(struct kvm *kvm,
-			       gfn_t base_gfn, unsigned long npages)
+				gfn_t base_gfn, unsigned long npages)
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	int i;
+	unsigned long i;
+	u64 phys;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return;
 
 	for (i = 0; i < npages; i++) {
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
+		phys = intel_iommu_iova_to_phys(domain,
+						gfn_to_gpa(gfn));
+		pfn = phys >> PAGE_SHIFT;
 		kvm_release_pfn_clean(pfn);
 		gfn++;
 	}
+
+	intel_iommu_unmap_address(domain,
+				  gfn_to_gpa(base_gfn),
+				  PAGE_SIZE * npages);
 }
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -182,10 +195,9 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
 		       PCI_FUNC(entry->host_devfn));
 
 		/* detach kvm dmar domain */
-		intel_iommu_detach_dev(domain, entry->host_busnr,
-				       entry->host_devfn);
+		intel_iommu_detach_device(domain, entry->dev);
 	}
 	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_domain_exit(domain);
+	intel_iommu_free_domain(domain);
 	return 0;
 }
-- 
cgit v0.10.2


From 0a920356748df4fb06e86c21c23d2ed6d31d37ad Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Tue, 2 Dec 2008 21:24:23 +0800
Subject: KVM: support device deassignment

Support device deassignment, it can be used in device hotplug.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c96739b..ce5d1c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -334,6 +334,8 @@ int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -357,6 +359,12 @@ static inline int kvm_assign_device(struct kvm *kvm,
 {
 	return 0;
 }
+
+static inline int kvm_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c92b634..3238e08 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -530,6 +530,35 @@ out_free:
 }
 #endif
 
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		printk(KERN_INFO "%s: device hasn't been assigned before, "
+		  "so cannot be deassigned\n", __func__);
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+		kvm_deassign_device(kvm, match);
+
+	kvm_free_assigned_device(kvm, match);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+#endif
+
 static inline int valid_vcpu(int n)
 {
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -1863,6 +1892,19 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 #endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+	case KVM_DEASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 44bb58a..174ea1f 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -116,6 +116,30 @@ int kvm_assign_device(struct kvm *kvm,
 	return 0;
 }
 
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct pci_dev *pdev = NULL;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	intel_iommu_detach_device(domain, pdev);
+
+	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
+
+	return 0;
+}
+
 int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	int r;
-- 
cgit v0.10.2


From b653574a7d14b663cc812cb20be6a114939ba186 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:29:53 +0800
Subject: Deassign device in kvm_free_assgined_device

In kvm_iommu_unmap_memslots(), assigned_dev_head is already empty.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce5d1c1..e62a462 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -316,6 +316,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
 	unsigned long irq_requested_type;
 	int irq_source_id;
+	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
 };
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3238e08..4ef0fb4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -496,6 +496,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
+	match->flags = assigned_dev->flags;
 	match->dev = dev;
 	match->irq_source_id = -1;
 	match->kvm = kvm;
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 174ea1f..d46de9a 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -205,22 +205,12 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
 int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	struct kvm_assigned_dev_kernel *entry;
 	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
-		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
-		       entry->host_busnr,
-		       PCI_SLOT(entry->host_devfn),
-		       PCI_FUNC(entry->host_devfn));
-
-		/* detach kvm dmar domain */
-		intel_iommu_detach_device(domain, entry->dev);
-	}
 	kvm_iommu_unmap_memslots(kvm);
 	intel_iommu_free_domain(domain);
 	return 0;
-- 
cgit v0.10.2


From c4fa3864281c7d88b7262cbc6cbd5c90bb59860e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 15:51:19 +0100
Subject: KVM: rename vtd.c to iommu.c

Impact: file renamed

The code in the vtd.c file can be reused for other IOMMUs as well. So
rename it to make it clear that it handle more than VT-d.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc..cb69dfc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -52,7 +52,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 		coalesced_mmio.o irq_comm.o)
 
 ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c023435..00f46c2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
 ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
new file mode 100644
index 0000000..d46de9a
--- /dev/null
+++ b/virt/kvm/iommu.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r = 0;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		if (intel_iommu_iova_to_phys(domain,
+					     gfn_to_gpa(gfn)))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		r = intel_iommu_map_address(domain,
+					    gfn_to_gpa(gfn),
+					    pfn_to_hpa(pfn),
+					    PAGE_SIZE,
+					    DMA_PTE_READ | DMA_PTE_WRITE);
+		if (r) {
+			printk(KERN_ERR "kvm_iommu_map_address:"
+			       "iommu failed to map pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int r;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	r = intel_iommu_attach_device(domain, pdev);
+	if (r) {
+		printk(KERN_ERR "assign device %x:%x.%x failed",
+			pdev->bus->number,
+			PCI_SLOT(pdev->devfn),
+			PCI_FUNC(pdev->devfn));
+		return r;
+	}
+
+	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
+
+	return 0;
+}
+
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct pci_dev *pdev = NULL;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	intel_iommu_detach_device(domain, pdev);
+
+	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
+
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENOMEM;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	unsigned long i;
+	u64 phys;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return;
+
+	for (i = 0; i < npages; i++) {
+		phys = intel_iommu_iova_to_phys(domain,
+						gfn_to_gpa(gfn));
+		pfn = phys >> PAGE_SHIFT;
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+
+	intel_iommu_unmap_address(domain,
+				  gfn_to_gpa(base_gfn),
+				  PAGE_SIZE * npages);
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_free_domain(domain);
+	return 0;
+}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
deleted file mode 100644
index d46de9a..0000000
--- a/virt/kvm/vtd.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/pci.h>
-#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages);
-
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
-{
-	gfn_t gfn = base_gfn;
-	pfn_t pfn;
-	int i, r = 0;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	for (i = 0; i < npages; i++) {
-		/* check if already mapped */
-		if (intel_iommu_iova_to_phys(domain,
-					     gfn_to_gpa(gfn)))
-			continue;
-
-		pfn = gfn_to_pfn(kvm, gfn);
-		r = intel_iommu_map_address(domain,
-					    gfn_to_gpa(gfn),
-					    pfn_to_hpa(pfn),
-					    PAGE_SIZE,
-					    DMA_PTE_READ | DMA_PTE_WRITE);
-		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_address:"
-			       "iommu failed to map pfn=%lx\n", pfn);
-			goto unmap_pages;
-		}
-		gfn++;
-	}
-	return 0;
-
-unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
-	return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
-	int i, r;
-
-	down_read(&kvm->slots_lock);
-	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
-		if (r)
-			break;
-	}
-	up_read(&kvm->slots_lock);
-	return r;
-}
-
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	struct pci_dev *pdev = NULL;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	int r;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	pdev = assigned_dev->dev;
-	if (pdev == NULL)
-		return -ENODEV;
-
-	r = intel_iommu_attach_device(domain, pdev);
-	if (r) {
-		printk(KERN_ERR "assign device %x:%x.%x failed",
-			pdev->bus->number,
-			PCI_SLOT(pdev->devfn),
-			PCI_FUNC(pdev->devfn));
-		return r;
-	}
-
-	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
-
-	return 0;
-}
-
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	struct pci_dev *pdev = NULL;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	pdev = assigned_dev->dev;
-	if (pdev == NULL)
-		return -ENODEV;
-
-	intel_iommu_detach_device(domain, pdev);
-
-	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
-
-	return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
-	int r;
-
-	if (!intel_iommu_found()) {
-		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
-		return -ENODEV;
-	}
-
-	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
-	if (!kvm->arch.intel_iommu_domain)
-		return -ENOMEM;
-
-	r = kvm_iommu_map_memslots(kvm);
-	if (r)
-		goto out_unmap;
-
-	return 0;
-
-out_unmap:
-	kvm_iommu_unmap_memslots(kvm);
-	return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages)
-{
-	gfn_t gfn = base_gfn;
-	pfn_t pfn;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	unsigned long i;
-	u64 phys;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return;
-
-	for (i = 0; i < npages; i++) {
-		phys = intel_iommu_iova_to_phys(domain,
-						gfn_to_gpa(gfn));
-		pfn = phys >> PAGE_SHIFT;
-		kvm_release_pfn_clean(pfn);
-		gfn++;
-	}
-
-	intel_iommu_unmap_address(domain,
-				  gfn_to_gpa(base_gfn),
-				  PAGE_SIZE * npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
-	int i;
-	down_read(&kvm->slots_lock);
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
-	}
-	up_read(&kvm->slots_lock);
-
-	return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_free_domain(domain);
-	return 0;
-}
-- 
cgit v0.10.2


From 4a77a6cf6d9bf9f5c74b27f62bd2bfe6dcc88392 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 17:02:33 +0100
Subject: introcude linux/iommu.h for an iommu api

This patch introduces the API to abstract the exported VT-d functions
for KVM into a generic API. This way the AMD IOMMU implementation can
plug into this API later.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
new file mode 100644
index 0000000..8a7bfb1
--- /dev/null
+++ b/include/linux/iommu.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __LINUX_IOMMU_H
+#define __LINUX_IOMMU_H
+
+#define IOMMU_READ	(1)
+#define IOMMU_WRITE	(2)
+
+struct device;
+
+struct iommu_domain {
+	void *priv;
+};
+
+struct iommu_ops {
+	int (*domain_init)(struct iommu_domain *domain);
+	void (*domain_destroy)(struct iommu_domain *domain);
+	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	int (*map)(struct iommu_domain *domain, unsigned long iova,
+		   phys_addr_t paddr, size_t size, int prot);
+	void (*unmap)(struct iommu_domain *domain, unsigned long iova,
+		      size_t size);
+	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
+				    unsigned long iova);
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void register_iommu(struct iommu_ops *ops);
+extern bool iommu_found(void);
+extern struct iommu_domain *iommu_domain_alloc(void);
+extern void iommu_domain_free(struct iommu_domain *domain);
+extern int iommu_attach_device(struct iommu_domain *domain,
+			       struct device *dev);
+extern void iommu_detach_device(struct iommu_domain *domain,
+				struct device *dev);
+extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+			   phys_addr_t paddr, size_t size, int prot);
+extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+			      size_t size);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+				      unsigned long iova);
+
+#else /* CONFIG_IOMMU_API */
+
+static inline void register_iommu(struct iommu_ops *ops)
+{
+}
+
+static inline bool iommu_found(void)
+{
+	return false;
+}
+
+static inline struct iommu_domain *iommu_domain_alloc(void)
+{
+	return NULL;
+}
+
+static inline void iommu_domain_free(struct iommu_domain *domain)
+{
+}
+
+static inline int iommu_attach_device(struct iommu_domain *domain,
+				      struct device *dev)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_detach_device(struct iommu_domain *domain,
+				       struct device *dev)
+{
+}
+
+static inline int iommu_map_range(struct iommu_domain *domain,
+				  unsigned long iova, phys_addr_t paddr,
+				  size_t size, int prot)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_unmap_range(struct iommu_domain *domain,
+				     unsigned long iova, size_t size)
+{
+}
+
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+					     unsigned long iova)
+{
+	return 0;
+}
+
+#endif /* CONFIG_IOMMU_API */
+
+#endif /* __LINUX_IOMMU_H */
-- 
cgit v0.10.2


From fc2100eb4d0960b56c2c705a97941c08fb1c0fd4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 17:21:24 +0100
Subject: add frontend implementation for the IOMMU API

This API can be used by KVM for accessing different types of IOMMUs to
do device passthrough to guests. Beside that this API can also be used
by device drivers to map non-linear host memory into dma-linear
addresses to prevent sgather-gather DMA. UIO may be another user for
this API.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
new file mode 100644
index 0000000..5e039d4
--- /dev/null
+++ b/drivers/base/iommu.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/bug.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/iommu.h>
+
+static struct iommu_ops *iommu_ops;
+
+void register_iommu(struct iommu_ops *ops)
+{
+	if (iommu_ops)
+		BUG();
+
+	iommu_ops = ops;
+}
+
+bool iommu_found()
+{
+	return iommu_ops != NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_found);
+
+struct iommu_domain *iommu_domain_alloc(void)
+{
+	struct iommu_domain *domain;
+	int ret;
+
+	domain = kmalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return NULL;
+
+	ret = iommu_ops->domain_init(domain);
+	if (ret)
+		goto out_free;
+
+	return domain;
+
+out_free:
+	kfree(domain);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_alloc);
+
+void iommu_domain_free(struct iommu_domain *domain)
+{
+	iommu_ops->domain_destroy(domain);
+	kfree(domain);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_free);
+
+int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+	return iommu_ops->attach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_device);
+
+void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+	iommu_ops->detach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_device);
+
+int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+		    phys_addr_t paddr, size_t size, int prot)
+{
+	return iommu_ops->map(domain, iova, paddr, size, prot);
+}
+EXPORT_SYMBOL_GPL(iommu_map_range);
+
+void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+		      size_t size)
+{
+	iommu_ops->unmap(domain, iova, size);
+}
+EXPORT_SYMBOL_GPL(iommu_unmap_range);
+
+phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+			       unsigned long iova)
+{
+	return iommu_ops->iova_to_phys(domain, iova);
+}
+EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-- 
cgit v0.10.2


From 1aaf118352b85bb359ce28070bcc478f659a7031 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 17:25:13 +0100
Subject: select IOMMU_API when DMAR and/or AMD_IOMMU is selected

These two IOMMUs can implement the current version of this API. So
select the API if one or both of these IOMMU drivers is selected.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f61..3d31636 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
 
 config IOMMU_HELPER
 	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
+config IOMMU_API
+	def_bool (DMAR)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0..4737435 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -599,6 +599,9 @@ config SWIOTLB
 config IOMMU_HELPER
 	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
 
+config IOMMU_API
+	def_bool (AMD_IOMMU || DMAR)
+
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
 	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index c666373..b5b8ba5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
 obj-$(CONFIG_SMP)	+= topology.o
+obj-$(CONFIG_IOMMU_API) += iommu.o
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_MODULES)	+= module.o
 endif
-- 
cgit v0.10.2


From 19de40a8472fa64693eab844911eec277d489f6c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:43:34 +0100
Subject: KVM: change KVM to use IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3f..3486636 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_arch {
 	struct kvm_sal_data rdv_sal_data;
 
 	struct list_head assigned_dev_head;
-	struct dmar_domain *intel_iommu_domain;
+	struct iommu_domain *iommu_domain;
 	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index cb69dfc..0bb99b7 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 		coalesced_mmio.o irq_comm.o)
 
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd9..4e586f6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/hrtimer.h>
 #include <linux/uaccess.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/pgtable.h>
@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 	case KVM_CAP_IOMMU:
-		r = intel_iommu_found();
+		r = iommu_found();
 		break;
 	default:
 		r = 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a4..730843d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct list_head oos_global_pages;
-	struct dmar_domain *intel_iommu_domain;
+	struct iommu_domain *iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 00f46c2..d3ec292 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa81..cc17546 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = !tdp_enabled;
 		break;
 	case KVM_CAP_IOMMU:
-		r = intel_iommu_found();
+		r = iommu_found();
 		break;
 	default:
 		r = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e62a462..ec49d0b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,7 +328,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_IOMMU_API
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
 int kvm_iommu_map_guest(struct kvm *kvm);
@@ -337,7 +337,7 @@ int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
-#else /* CONFIG_DMAR */
+#else /* CONFIG_IOMMU_API */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
 				      unsigned long npages)
@@ -366,7 +366,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 {
 	return 0;
 }
-#endif /* CONFIG_DMAR */
+#endif /* CONFIG_IOMMU_API */
 
 static inline void kvm_guest_enter(void)
 {
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d46de9a..d0bebaa 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -25,6 +25,7 @@
 #include <linux/kvm_host.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm);
@@ -37,7 +38,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	int i, r = 0;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -45,16 +46,15 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
-		if (intel_iommu_iova_to_phys(domain,
-					     gfn_to_gpa(gfn)))
+		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
 		pfn = gfn_to_pfn(kvm, gfn);
-		r = intel_iommu_map_address(domain,
-					    gfn_to_gpa(gfn),
-					    pfn_to_hpa(pfn),
-					    PAGE_SIZE,
-					    DMA_PTE_READ | DMA_PTE_WRITE);
+		r = iommu_map_range(domain,
+				    gfn_to_gpa(gfn),
+				    pfn_to_hpa(pfn),
+				    PAGE_SIZE,
+				    IOMMU_READ | IOMMU_WRITE);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +88,7 @@ int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 
 	/* check if iommu exists and in use */
@@ -99,7 +99,7 @@ int kvm_assign_device(struct kvm *kvm,
 	if (pdev == NULL)
 		return -ENODEV;
 
-	r = intel_iommu_attach_device(domain, pdev);
+	r = iommu_attach_device(domain, &pdev->dev);
 	if (r) {
 		printk(KERN_ERR "assign device %x:%x.%x failed",
 			pdev->bus->number,
@@ -119,7 +119,7 @@ int kvm_assign_device(struct kvm *kvm,
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
@@ -130,7 +130,7 @@ int kvm_deassign_device(struct kvm *kvm,
 	if (pdev == NULL)
 		return -ENODEV;
 
-	intel_iommu_detach_device(domain, pdev);
+	iommu_detach_device(domain, &pdev->dev);
 
 	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
@@ -144,13 +144,13 @@ int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	int r;
 
-	if (!intel_iommu_found()) {
-		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+	if (!iommu_found()) {
+		printk(KERN_ERR "%s: iommu not found\n", __func__);
 		return -ENODEV;
 	}
 
-	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
-	if (!kvm->arch.intel_iommu_domain)
+	kvm->arch.iommu_domain = iommu_domain_alloc();
+	if (!kvm->arch.iommu_domain)
 		return -ENOMEM;
 
 	r = kvm_iommu_map_memslots(kvm);
@@ -169,7 +169,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	unsigned long i;
 	u64 phys;
 
@@ -178,16 +178,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 		return;
 
 	for (i = 0; i < npages; i++) {
-		phys = intel_iommu_iova_to_phys(domain,
-						gfn_to_gpa(gfn));
+		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
 		pfn = phys >> PAGE_SHIFT;
 		kvm_release_pfn_clean(pfn);
 		gfn++;
 	}
 
-	intel_iommu_unmap_address(domain,
-				  gfn_to_gpa(base_gfn),
-				  PAGE_SIZE * npages);
+	iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
 }
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -205,13 +202,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
 int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
 	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_free_domain(domain);
+	iommu_domain_free(domain);
 	return 0;
 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4ef0fb4..3a5a082 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -504,7 +504,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		if (!kvm->arch.intel_iommu_domain) {
+		if (!kvm->arch.iommu_domain) {
 			r = kvm_iommu_map_guest(kvm);
 			if (r)
 				goto out_list_del;
-- 
cgit v0.10.2


From 5d450806eb0e569c5846a5825e7f535980b0da32 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:52:32 +0100
Subject: VT-d: adapt domain init and destroy functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 772fb22..5c95a5a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/timer.h>
 #include <linux/iova.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -2962,32 +2963,34 @@ static void vm_domain_exit(struct dmar_domain *domain)
 	free_domain_mem(domain);
 }
 
-struct dmar_domain *intel_iommu_alloc_domain(void)
+static int intel_iommu_domain_init(struct iommu_domain *domain)
 {
-	struct dmar_domain *domain;
+	struct dmar_domain *dmar_domain;
 
-	domain = iommu_alloc_vm_domain();
-	if (!domain) {
+	dmar_domain = iommu_alloc_vm_domain();
+	if (!dmar_domain) {
 		printk(KERN_ERR
-			"intel_iommu_domain_alloc: domain == NULL\n");
-		return NULL;
+			"intel_iommu_domain_init: dmar_domain == NULL\n");
+		return -ENOMEM;
 	}
-	if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+	if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		printk(KERN_ERR
-			"intel_iommu_domain_alloc: domain_init() failed\n");
-		vm_domain_exit(domain);
-		return NULL;
+			"intel_iommu_domain_init() failed\n");
+		vm_domain_exit(dmar_domain);
+		return -ENOMEM;
 	}
+	domain->priv = dmar_domain;
 
-	return domain;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-void intel_iommu_free_domain(struct dmar_domain *domain)
+static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 {
-	vm_domain_exit(domain);
+	struct dmar_domain *dmar_domain = domain->priv;
+
+	domain->priv = NULL;
+	vm_domain_exit(dmar_domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
 int intel_iommu_attach_device(struct dmar_domain *domain,
 			      struct pci_dev *pdev)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 07973c4..0a7ba0c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-struct dmar_domain *intel_iommu_alloc_domain(void);
-void intel_iommu_free_domain(struct dmar_domain *domain);
 int intel_iommu_attach_device(struct dmar_domain *domain,
 			      struct pci_dev *pdev);
 void intel_iommu_detach_device(struct dmar_domain *domain,
-- 
cgit v0.10.2


From 4c5478c94eb29e6101f1f13175f7455bc8b5d953 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:58:24 +0100
Subject: VT-d: adapt device attach and detach functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c95a5a..db9a26c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2992,9 +2992,11 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 	vm_domain_exit(dmar_domain);
 }
 
-int intel_iommu_attach_device(struct dmar_domain *domain,
-			      struct pci_dev *pdev)
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+				     struct device *dev)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct intel_iommu *iommu;
 	int addr_width;
 	u64 end;
@@ -3006,7 +3008,7 @@ int intel_iommu_attach_device(struct dmar_domain *domain,
 
 		old_domain = find_domain(pdev);
 		if (old_domain) {
-			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
 				vm_domain_remove_one_dev_info(old_domain, pdev);
 			else
 				domain_remove_dev_info(old_domain);
@@ -3021,28 +3023,29 @@ int intel_iommu_attach_device(struct dmar_domain *domain,
 	addr_width = agaw_to_width(iommu->agaw);
 	end = DOMAIN_MAX_ADDR(addr_width);
 	end = end & VTD_PAGE_MASK;
-	if (end < domain->max_addr) {
+	if (end < dmar_domain->max_addr) {
 		printk(KERN_ERR "%s: iommu agaw (%d) is not "
 		       "sufficient for the mapped address (%llx)\n",
-		       __func__, iommu->agaw, domain->max_addr);
+		       __func__, iommu->agaw, dmar_domain->max_addr);
 		return -EFAULT;
 	}
 
-	ret = domain_context_mapping(domain, pdev);
+	ret = domain_context_mapping(dmar_domain, pdev);
 	if (ret)
 		return ret;
 
-	ret = vm_domain_add_dev_info(domain, pdev);
+	ret = vm_domain_add_dev_info(dmar_domain, pdev);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
 
-void intel_iommu_detach_device(struct dmar_domain *domain,
-			       struct pci_dev *pdev)
+static void intel_iommu_detach_device(struct iommu_domain *domain,
+				      struct device *dev)
 {
-	vm_domain_remove_one_dev_info(domain, pdev);
+	struct dmar_domain *dmar_domain = domain->priv;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	vm_domain_remove_one_dev_info(dmar_domain, pdev);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 
 int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			    u64 hpa, size_t size, int prot)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0a7ba0c..9909c5a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-int intel_iommu_attach_device(struct dmar_domain *domain,
-			      struct pci_dev *pdev);
-void intel_iommu_detach_device(struct dmar_domain *domain,
-			       struct pci_dev *pdev);
 int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			    u64 hpa, size_t size, int prot);
 void intel_iommu_unmap_address(struct dmar_domain *domain,
-- 
cgit v0.10.2


From dde57a210dcdce85e2813bab8f88687761d9f6a6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:04:09 +0100
Subject: VT-d: adapt domain map and unmap functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index db9a26c..8af6c96 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3047,20 +3047,28 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 	vm_domain_remove_one_dev_info(dmar_domain, pdev);
 }
 
-int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
-			    u64 hpa, size_t size, int prot)
+static int intel_iommu_map_range(struct iommu_domain *domain,
+				 unsigned long iova, phys_addr_t hpa,
+				 size_t size, int iommu_prot)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	u64 max_addr;
 	int addr_width;
+	int prot = 0;
 	int ret;
 
+	if (iommu_prot & IOMMU_READ)
+		prot |= DMA_PTE_READ;
+	if (iommu_prot & IOMMU_WRITE)
+		prot |= DMA_PTE_WRITE;
+
 	max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
-	if (domain->max_addr < max_addr) {
+	if (dmar_domain->max_addr < max_addr) {
 		int min_agaw;
 		u64 end;
 
 		/* check if minimum agaw is sufficient for mapped address */
-		min_agaw = vm_domain_min_agaw(domain);
+		min_agaw = vm_domain_min_agaw(dmar_domain);
 		addr_width = agaw_to_width(min_agaw);
 		end = DOMAIN_MAX_ADDR(addr_width);
 		end = end & VTD_PAGE_MASK;
@@ -3070,28 +3078,27 @@ int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			       __func__, min_agaw, max_addr);
 			return -EFAULT;
 		}
-		domain->max_addr = max_addr;
+		dmar_domain->max_addr = max_addr;
 	}
 
-	ret = domain_page_mapping(domain, iova, hpa, size, prot);
+	ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_map_address);
 
-void intel_iommu_unmap_address(struct dmar_domain *domain,
-			       dma_addr_t iova, size_t size)
+static void intel_iommu_unmap_range(struct iommu_domain *domain,
+				    unsigned long iova, size_t size)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	dma_addr_t base;
 
 	/* The address might not be aligned */
 	base = iova & VTD_PAGE_MASK;
 	size = VTD_PAGE_ALIGN(size);
-	dma_pte_clear_range(domain, base, base + size);
+	dma_pte_clear_range(dmar_domain, base, base + size);
 
-	if (domain->max_addr == base + size)
-		domain->max_addr = base;
+	if (dmar_domain->max_addr == base + size)
+		dmar_domain->max_addr = base;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
 
 int intel_iommu_found(void)
 {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 9909c5a..6bc26e0 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
-			    u64 hpa, size_t size, int prot);
-void intel_iommu_unmap_address(struct dmar_domain *domain,
-			       dma_addr_t iova, size_t size);
 u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
 
 #ifdef CONFIG_DMAR
-- 
cgit v0.10.2


From d14d65777c2491dd5baf1e17f444b8f653f3cbb1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:06:57 +0100
Subject: VT-d: adapt domain iova_to_phys function for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8af6c96..7128105 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3106,15 +3106,16 @@ int intel_iommu_found(void)
 }
 EXPORT_SYMBOL_GPL(intel_iommu_found);
 
-u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+					    unsigned long iova)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	struct dma_pte *pte;
 	u64 phys = 0;
 
-	pte = addr_to_dma_pte(domain, iova);
+	pte = addr_to_dma_pte(dmar_domain, iova);
 	if (pte)
 		phys = dma_pte_addr(pte);
 
 	return phys;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6bc26e0..26ccc02 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
-
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
 #else /* CONFIG_DMAR */
-- 
cgit v0.10.2


From a8bcbb0de4a52f07fef7412ddc877348311ebf2a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:14:02 +0100
Subject: VT-d: register functions for the IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7128105..81e04ec 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -277,6 +277,8 @@ static int intel_iommu_strict;
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
+static struct iommu_ops intel_iommu_ops;
+
 static int __init intel_iommu_setup(char *str)
 {
 	if (!str)
@@ -2729,6 +2731,9 @@ int __init intel_iommu_init(void)
 	init_timer(&unmap_timer);
 	force_iommu = 1;
 	dma_ops = &intel_dma_ops;
+
+	register_iommu(&intel_iommu_ops);
+
 	return 0;
 }
 
@@ -3119,3 +3124,13 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 
 	return phys;
 }
+
+static struct iommu_ops intel_iommu_ops = {
+	.domain_init	= intel_iommu_domain_init,
+	.domain_destroy = intel_iommu_domain_destroy,
+	.attach_dev	= intel_iommu_attach_device,
+	.detach_dev	= intel_iommu_detach_device,
+	.map		= intel_iommu_map_range,
+	.unmap		= intel_iommu_unmap_range,
+	.iova_to_phys	= intel_iommu_iova_to_phys,
+};
-- 
cgit v0.10.2


From e4754c96cf8b82a754dc5ba791d6c0bf1fbe8e8e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:26:42 +0100
Subject: VT-d: remove now unused intel_iommu_found function

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 81e04ec..ecb5fd3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3105,12 +3105,6 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
 		dmar_domain->max_addr = base;
 }
 
-int intel_iommu_found(void)
-{
-	return g_num_of_iommus;
-}
-EXPORT_SYMBOL_GPL(intel_iommu_found);
-
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 					    unsigned long iova)
 {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 26ccc02..c4f6c10 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,15 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-#ifdef CONFIG_DMAR
-int intel_iommu_found(void);
-#else /* CONFIG_DMAR */
-static inline int intel_iommu_found(void)
-{
-	return 0;
-}
-#endif /* CONFIG_DMAR */
-
 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
 extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
-- 
cgit v0.10.2


From cdc7b83726297b43deed0455d8732163cc59802a Mon Sep 17 00:00:00 2001
From: Mike Day <ncmike@ncultra.org>
Date: Fri, 12 Dec 2008 17:16:30 +0100
Subject: intel-iommu: fix bit shift at DOMAIN_FLAG_P2P_MULTIPLE_DEVICES

Signed-off-by: Mike Day <ncmike@ncultra.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ecb5fd3..235fb7a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -205,7 +205,7 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 }
 
 /* devices under the same p2p bridge are owned in one domain */
-#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
 
 /* domain represents a virtual machine, more than one devices
  * across iommus may be owned in one domain, e.g. kvm guest.
-- 
cgit v0.10.2


From 38e817febe2f12bd2fbf92a1df36f41946d0c223 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:27:52 +0100
Subject: AMD IOMMU: rename iommu_map to iommu_map_page

Impact: function rename

The iommu_map function maps only one page. Make this clear in the
function name.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da71..b11c855 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -338,10 +338,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-static int iommu_map(struct protection_domain *dom,
-		     unsigned long bus_addr,
-		     unsigned long phys_addr,
-		     int prot)
+static int iommu_map_page(struct protection_domain *dom,
+			  unsigned long bus_addr,
+			  unsigned long phys_addr,
+			  int prot)
 {
 	u64 __pte, *pte, *page;
 
@@ -440,7 +440,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 
 	for (addr = e->address_start; addr < e->address_end;
 	     addr += PAGE_SIZE) {
-		ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
+		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
 		if (ret)
 			return ret;
 		/*
-- 
cgit v0.10.2


From 86db2e5d47bfa61a151d6ac83263f4bde4d52290 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:20:21 +0100
Subject: AMD IOMMU: make dma_ops_free_pagetable generic

Impact: change code to free pagetables from protection domains

The dma_ops_free_pagetable function can only free pagetables from
dma_ops domains. Change that to free pagetables of pure protection
domains.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b11c855..8a0fd3d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -587,12 +587,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 	iommu_area_reserve(dom->bitmap, start_page, pages);
 }
 
-static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
+static void free_pagetable(struct protection_domain *domain)
 {
 	int i, j;
 	u64 *p1, *p2, *p3;
 
-	p1 = dma_dom->domain.pt_root;
+	p1 = domain->pt_root;
 
 	if (!p1)
 		return;
@@ -613,6 +613,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
 	}
 
 	free_page((unsigned long)p1);
+
+	domain->pt_root = NULL;
 }
 
 /*
@@ -624,7 +626,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 	if (!dom)
 		return;
 
-	dma_ops_free_pagetable(dom);
+	free_pagetable(&dom->domain);
 
 	kfree(dom->pte_pages);
 
-- 
cgit v0.10.2


From a2acfb75792511a35586db80a38b8e4701a97730 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:28:53 +0100
Subject: AMD IOMMU: add domain id free function

Impact: add code to release a domain id

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8a0fd3d..0922d5f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -571,6 +571,18 @@ static u16 domain_id_alloc(void)
 	return id;
 }
 
+#ifdef CONFIG_IOMMU_API
+static void domain_id_free(int id)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	if (id > 0 && id < MAX_DOMAIN_ID)
+		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+#endif
+
 /*
  * Used to reserve address ranges in the aperture (e.g. for exclusion
  * ranges.
-- 
cgit v0.10.2


From 8d201968e15f56ae2837b0d0b64d3fff098857b0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:34:41 +0100
Subject: AMD IOMMU: refactor completion wait handling into separate functions

Impact: split one function into three

The separate functions are required synchronize commands across all
hardware IOMMUs in the system.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0922d5f..2280ef8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -196,6 +196,46 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 }
 
 /*
+ * This function waits until an IOMMU has completed a completion
+ * wait command
+ */
+static void __iommu_wait_for_completion(struct amd_iommu *iommu)
+{
+	int ready = 0;
+	unsigned status = 0;
+	unsigned long i = 0;
+
+	while (!ready && (i < EXIT_LOOP_COUNT)) {
+		++i;
+		/* wait for the bit to become one */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
+	}
+
+	/* set bit back to zero */
+	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+	if (unlikely(i == EXIT_LOOP_COUNT))
+		panic("AMD IOMMU: Completion wait loop failed\n");
+}
+
+/*
+ * This function queues a completion wait command into the command
+ * buffer of an IOMMU
+ */
+static int __iommu_completion_wait(struct amd_iommu *iommu)
+{
+	struct iommu_cmd cmd;
+
+	 memset(&cmd, 0, sizeof(cmd));
+	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
+	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+
+	 return __iommu_queue_command(iommu, &cmd);
+}
+
+/*
  * This function is called whenever we need to ensure that the IOMMU has
  * completed execution of all commands we sent. It sends a
  * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +244,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
  */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
-	int ret = 0, ready = 0;
-	unsigned status = 0;
-	struct iommu_cmd cmd;
-	unsigned long flags, i = 0;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	int ret = 0;
+	unsigned long flags;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
 	if (!iommu->need_sync)
 		goto out;
 
-	iommu->need_sync = 0;
+	ret = __iommu_completion_wait(iommu);
 
-	ret = __iommu_queue_command(iommu, &cmd);
+	iommu->need_sync = 0;
 
 	if (ret)
 		goto out;
 
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
-	}
-
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		panic("AMD IOMMU: Completion wait loop failed\n");
+	__iommu_wait_for_completion(iommu);
 
 out:
 	spin_unlock_irqrestore(&iommu->lock, flags);
-- 
cgit v0.10.2


From 237b6f33291394c337ae84e2d3782d5605803af2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:54:37 +0100
Subject: AMD IOMMU: move invalidation command building to a separate function

Impact: refactoring of iommu_queue_inv_iommu_pages

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2280ef8..fee16fb 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -286,6 +286,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 	return ret;
 }
 
+static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+					  u16 domid, int pde, int s)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	address &= PAGE_MASK;
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	cmd->data[1] |= domid;
+	cmd->data[2] = lower_32_bits(address);
+	cmd->data[3] = upper_32_bits(address);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
 /*
  * Generic command send function for invalidaing TLB entries
  */
@@ -295,16 +310,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	struct iommu_cmd cmd;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
-	cmd.data[1] |= domid;
-	cmd.data[2] = lower_32_bits(address);
-	cmd.data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
 
 	ret = iommu_queue_command(iommu, &cmd);
 
-- 
cgit v0.10.2


From 9e919012e33c481991e46aa4cb13d807cd47b798 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:05:52 +0100
Subject: AMD IOMMU: don't remove protection domain from iommu_pd_list

Impact: save unneeded logic to add and remove domains to the list

The removal of a protection domain from the iommu_pd_list is not
necessary. Another benefit is that we save complexity because we don't
have to readd it later when the device no longer uses the domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fee16fb..b7b3067 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -844,7 +844,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
 	list_for_each_entry(entry, &iommu_pd_list, list) {
 		if (entry->target_dev == devid) {
 			ret = entry;
-			list_del(&ret->list);
 			break;
 		}
 	}
-- 
cgit v0.10.2


From 43f4960983a309568a6c4375f081e63fb2ff24a3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 21:01:12 +0100
Subject: AMD IOMMU: add iommu_flush_domain function

Impact: add a function to flush a domain id on every IOMMU

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b7b3067..2b6b8e0 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -352,6 +352,30 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
+#ifdef CONFIG_IOMMU_API
+/*
+ * This function is used to flush the IO/TLB for a given protection domain
+ * on every IOMMU in the system
+ */
+static void iommu_flush_domain(u16 domid)
+{
+	unsigned long flags;
+	struct amd_iommu *iommu;
+	struct iommu_cmd cmd;
+
+	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      domid, 1, 1);
+
+	list_for_each_entry(iommu, &amd_iommu_list, list) {
+		spin_lock_irqsave(&iommu->lock, flags);
+		__iommu_queue_command(iommu, &cmd);
+		__iommu_completion_wait(iommu);
+		__iommu_wait_for_completion(iommu);
+		spin_unlock_irqrestore(&iommu->lock, flags);
+	}
+}
+#endif
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v0.10.2


From 9fdb19d64c0247f23343b51fc85f438f8e7a2f3c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:46:25 +0100
Subject: AMD IOMMU: add protection domain flags

Imapct: add a new struct member to 'struct protection_domain'

When using protection domains for dma_ops and KVM its better to know for
which subsystem it was allocated. Add a flags member to struct
protection domain for that purpose.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2..4862a5b 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,20 @@
 /* FIXME: move this macro to <linux/pci.h> */
 #define PCI_BUS(x) (((x) >> 8) & 0xff)
 
+/* Protection domain flags */
+#define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
  */
 struct protection_domain {
-	spinlock_t lock; /* mostly used to lock the page table*/
-	u16 id;		 /* the domain id written to the device table */
-	int mode;	 /* paging mode (0-6 levels) */
-	u64 *pt_root;	 /* page table root pointer */
-	void *priv;	 /* private data */
+	spinlock_t lock;	/* mostly used to lock the page table*/
+	u16 id;			/* the domain id written to the device table */
+	int mode;		/* paging mode (0-6 levels) */
+	u64 *pt_root;		/* page table root pointer */
+	unsigned long flags;	/* flags to find out type of domain */
+	void *priv;		/* private data */
 };
 
 /*
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2b6b8e0..bb28e2c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -729,6 +729,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 		goto free_dma_dom;
 	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+	dma_dom->domain.flags = PD_DMA_OPS_MASK;
 	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
-- 
cgit v0.10.2


From 5b28df6f43ac9878f310ad0cb7f11ddb262a7ac6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:49:42 +0100
Subject: AMD IOMMU: add checks for dma_ops domain to dma_ops functions

Impact: detect when a driver uses a device assigned otherwise

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bb28e2c..5c465c9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -792,6 +792,15 @@ free_dma_dom:
 }
 
 /*
+ * little helper function to check whether a given protection domain is a
+ * dma_ops domain
+ */
+static bool dma_ops_domain(struct protection_domain *domain)
+{
+	return domain->flags & PD_DMA_OPS_MASK;
+}
+
+/*
  * Find out the protection domain structure for a given PCI device. This
  * will give us the pointer to the page table root for example.
  */
@@ -1096,6 +1105,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 		/* device not handled by any AMD IOMMU */
 		return (dma_addr_t)paddr;
 
+	if (!dma_ops_domain(domain))
+		return bad_dma_address;
+
 	spin_lock_irqsave(&domain->lock, flags);
 	addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
 			    dma_mask);
@@ -1126,6 +1138,9 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 		/* device not handled by any AMD IOMMU */
 		return;
 
+	if (!dma_ops_domain(domain))
+		return;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1180,6 +1195,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	if (!iommu || !domain)
 		return map_sg_no_iommu(dev, sglist, nelems, dir);
 
+	if (!dma_ops_domain(domain))
+		return 0;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	for_each_sg(sglist, s, nelems, i) {
@@ -1233,6 +1251,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		return;
 
+	if (!dma_ops_domain(domain))
+		return;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	for_each_sg(sglist, s, nelems, i) {
@@ -1278,6 +1299,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		return virt_addr;
 	}
 
+	if (!dma_ops_domain(domain))
+		goto out_free;
+
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
@@ -1286,18 +1310,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-	if (*dma_addr == bad_dma_address) {
-		free_pages((unsigned long)virt_addr, get_order(size));
-		virt_addr = NULL;
-		goto out;
-	}
+	if (*dma_addr == bad_dma_address)
+		goto out_free;
 
 	iommu_completion_wait(iommu);
 
-out:
 	spin_unlock_irqrestore(&domain->lock, flags);
 
 	return virt_addr;
+
+out_free:
+
+	free_pages((unsigned long)virt_addr, get_order(size));
+
+	return NULL;
 }
 
 /*
@@ -1319,6 +1345,9 @@ static void free_coherent(struct device *dev, size_t size,
 	if (!iommu || !domain)
 		goto free_mem;
 
+	if (!dma_ops_domain(domain))
+		goto free_mem;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
-- 
cgit v0.10.2


From 863c74ebd0152b21bc4b11c1447b5d1429287d37 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:56:36 +0100
Subject: AMD IOMMU: add device reference counting for protection domains

Impact: know how many devices are assigned to a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 4862a5b..1c769f4 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -203,6 +203,7 @@ struct protection_domain {
 	int mode;		/* paging mode (0-6 levels) */
 	u64 *pt_root;		/* page table root pointer */
 	unsigned long flags;	/* flags to find out type of domain */
+	unsigned dev_cnt;	/* devices assigned to this domain */
 	void *priv;		/* private data */
 };
 
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5c465c9..8b45bc4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -825,9 +825,10 @@ static void set_device_domain(struct amd_iommu *iommu,
 			      u16 devid)
 {
 	unsigned long flags;
-
 	u64 pte_root = virt_to_phys(domain->pt_root);
 
+	domain->dev_cnt += 1;
+
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
-- 
cgit v0.10.2


From f1179dc005ee2b0e55c3f74f3552c3e9ef852265 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 14:39:51 +0100
Subject: AMD IOMMU: rename set_device_domain function

Impact: rename set_device_domain() to attach_device()

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8b45bc4..12e8b67 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -820,9 +820,9 @@ static struct protection_domain *domain_for_device(u16 devid)
  * If a device is not yet associated with a domain, this function does
  * assigns it visible for the hardware
  */
-static void set_device_domain(struct amd_iommu *iommu,
-			      struct protection_domain *domain,
-			      u16 devid)
+static void attach_device(struct amd_iommu *iommu,
+			  struct protection_domain *domain,
+			  u16 devid)
 {
 	unsigned long flags;
 	u64 pte_root = virt_to_phys(domain->pt_root);
@@ -929,14 +929,14 @@ static int get_device_resources(struct device *dev,
 		if (!dma_dom)
 			dma_dom = (*iommu)->default_dom;
 		*domain = &dma_dom->domain;
-		set_device_domain(*iommu, *domain, *bdf);
+		attach_device(*iommu, *domain, *bdf);
 		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
 				"device ", (*domain)->id);
 		print_devid(_bdf, 1);
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-		set_device_domain(*iommu, *domain, _bdf);
+		attach_device(*iommu, *domain, _bdf);
 
 	return 1;
 }
-- 
cgit v0.10.2


From 355bf553edb7fe21ada51f62c849180bec6da877 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:02:41 +0100
Subject: AMD IOMMU: add device detach helper functions

Impact: add helper functions to detach a device from a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 12e8b67..15456a3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -844,6 +844,45 @@ static void attach_device(struct amd_iommu *iommu,
 	iommu_queue_inv_dev_entry(iommu, devid);
 }
 
+#ifdef CONFIG_IOMMU_API
+/*
+ * Removes a device from a protection domain (unlocked)
+ */
+static void __detach_device(struct protection_domain *domain, u16 devid)
+{
+
+	/* lock domain */
+	spin_lock(&domain->lock);
+
+	/* remove domain from the lookup table */
+	amd_iommu_pd_table[devid] = NULL;
+
+	/* remove entry from the device table seen by the hardware */
+	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+	amd_iommu_dev_table[devid].data[1] = 0;
+	amd_iommu_dev_table[devid].data[2] = 0;
+
+	/* decrease reference counter */
+	domain->dev_cnt -= 1;
+
+	/* ready */
+	spin_unlock(&domain->lock);
+}
+
+/*
+ * Removes a device from a protection domain (with devtable_lock held)
+ */
+static void detach_device(struct protection_domain *domain, u16 devid)
+{
+	unsigned long flags;
+
+	/* lock device table */
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	__detach_device(domain, devid);
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+#endif
+
 /*****************************************************************************
  *
  * The next functions belong to the dma_ops mapping/unmapping code.
-- 
cgit v0.10.2


From e275a2a0fc9e2168b15f6c7814e30b7ad58b1c7c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 18:27:25 +0100
Subject: AMD IOMMU: add device notifier callback

Impact: inform IOMMU about state change of a device in the driver core

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 15456a3..140875b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -47,6 +47,8 @@ struct iommu_cmd {
 
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
+static struct dma_ops_domain *find_protection_domain(u16 devid);
+
 
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -844,7 +846,6 @@ static void attach_device(struct amd_iommu *iommu,
 	iommu_queue_inv_dev_entry(iommu, devid);
 }
 
-#ifdef CONFIG_IOMMU_API
 /*
  * Removes a device from a protection domain (unlocked)
  */
@@ -881,7 +882,62 @@ static void detach_device(struct protection_domain *domain, u16 devid)
 	__detach_device(domain, devid);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
-#endif
+
+static int device_change_notifier(struct notifier_block *nb,
+				  unsigned long action, void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
+	struct protection_domain *domain;
+	struct dma_ops_domain *dma_domain;
+	struct amd_iommu *iommu;
+
+	if (devid > amd_iommu_last_bdf)
+		goto out;
+
+	devid = amd_iommu_alias_table[devid];
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu == NULL)
+		goto out;
+
+	domain = domain_for_device(devid);
+
+	if (domain && !dma_ops_domain(domain))
+		WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
+			  "to a non-dma-ops domain\n", dev_name(dev));
+
+	switch (action) {
+	case BUS_NOTIFY_BOUND_DRIVER:
+		if (domain)
+			goto out;
+		dma_domain = find_protection_domain(devid);
+		if (!dma_domain)
+			dma_domain = iommu->default_dom;
+		attach_device(iommu, &dma_domain->domain, devid);
+		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
+		       "device %s\n", dma_domain->domain.id, dev_name(dev));
+		break;
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		if (!domain)
+			goto out;
+		detach_device(domain, devid);
+		break;
+	default:
+		goto out;
+	}
+
+	iommu_queue_inv_dev_entry(iommu, devid);
+	iommu_completion_wait(iommu);
+
+out:
+	return 0;
+}
+
+struct notifier_block device_nb = {
+	.notifier_call = device_change_notifier,
+};
 
 /*****************************************************************************
  *
@@ -1510,6 +1566,8 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
+	bus_register_notifier(&pci_bus_type, &device_nb);
+
 	return 0;
 
 free_domains:
-- 
cgit v0.10.2


From 6d98cd8043c13438e4ca8a9464893f0224198a30 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:05:55 +0100
Subject: AMD IOMMU: add domain cleanup helper function

Impact: add a function to remove all devices from a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 140875b..5d3f085 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1579,3 +1579,31 @@ free_domains:
 
 	return ret;
 }
+
+/*****************************************************************************
+ *
+ * The following functions belong to the exported interface of AMD IOMMU
+ *
+ * This interface allows access to lower level functions of the IOMMU
+ * like protection domain handling and assignement of devices to domains
+ * which is not possible with the dma_ops interface.
+ *
+ *****************************************************************************/
+
+#ifdef CONFIG_IOMMU_API
+
+static void cleanup_domain(struct protection_domain *domain)
+{
+	unsigned long flags;
+	u16 devid;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+
+	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
+		if (amd_iommu_pd_table[devid] == domain)
+			__detach_device(domain, devid);
+
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+#endif
-- 
cgit v0.10.2


From c156e347d6d3c36b6843c3b168eda61b9a02c827 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:13:27 +0100
Subject: AMD IOMMU: add domain init function for IOMMU API

Impact: add a generic function for allocation protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5d3f085..6c0bd49 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -22,6 +22,9 @@
 #include <linux/bitops.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#ifdef CONFIG_IOMMU_API
+#include <linux/iommu.h>
+#endif
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -1606,4 +1609,31 @@ static void cleanup_domain(struct protection_domain *domain)
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
+static int amd_iommu_domain_init(struct iommu_domain *dom)
+{
+	struct protection_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return -ENOMEM;
+
+	spin_lock_init(&domain->lock);
+	domain->mode = PAGE_MODE_3_LEVEL;
+	domain->id = domain_id_alloc();
+	if (!domain->id)
+		goto out_free;
+	domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!domain->pt_root)
+		goto out_free;
+
+	dom->priv = domain;
+
+	return 0;
+
+out_free:
+	kfree(domain);
+
+	return -ENOMEM;
+}
+
 #endif
-- 
cgit v0.10.2


From 98383fc301c6546af0f3a8a1d3cb8bf218f7e940 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:34:12 +0100
Subject: AMD IOMMU: add domain destroy function for IOMMU API

Impact: add a generic function for releasing protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c0bd49..891d713 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1636,4 +1636,25 @@ out_free:
 	return -ENOMEM;
 }
 
+static void amd_iommu_domain_destroy(struct iommu_domain *dom)
+{
+	struct protection_domain *domain = dom->priv;
+
+	if (!domain)
+		return;
+
+	if (domain->dev_cnt > 0)
+		cleanup_domain(domain);
+
+	BUG_ON(domain->dev_cnt != 0);
+
+	free_pagetable(domain);
+
+	domain_id_free(domain->id);
+
+	kfree(domain);
+
+	dom->priv = NULL;
+}
+
 #endif
-- 
cgit v0.10.2


From 684f2888847b896faafed87ce4733501d2cc283c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:07:44 +0100
Subject: AMD IOMMU: add device detach function for IOMMU API

Impact: add a generic function to detach devices from protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 891d713..ef9b309 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1657,4 +1657,30 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
 	dom->priv = NULL;
 }
 
+static void amd_iommu_detach_device(struct iommu_domain *dom,
+				    struct device *dev)
+{
+	struct protection_domain *domain = dom->priv;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
+
+	if (dev->bus != &pci_bus_type)
+		return;
+
+	pdev = to_pci_dev(dev);
+
+	devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+	if (devid > 0)
+		detach_device(domain, devid);
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (!iommu)
+		return;
+
+	iommu_queue_inv_dev_entry(iommu, devid);
+	iommu_completion_wait(iommu);
+}
+
 #endif
-- 
cgit v0.10.2


From 01106066a6900b518debe990ddaadf376d433bd6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:34:11 +0100
Subject: AMD IOMMU: add device attach function for IOMMU API

Impact: add a generic function to attach devices to protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ef9b309..2f7c0b3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1683,4 +1683,39 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	iommu_completion_wait(iommu);
 }
 
+static int amd_iommu_attach_device(struct iommu_domain *dom,
+				   struct device *dev)
+{
+	struct protection_domain *domain = dom->priv;
+	struct protection_domain *old_domain;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
+
+	if (dev->bus != &pci_bus_type)
+		return -EINVAL;
+
+	pdev = to_pci_dev(dev);
+
+	devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+	if (devid >= amd_iommu_last_bdf ||
+			devid != amd_iommu_alias_table[devid])
+		return -EINVAL;
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (!iommu)
+		return -EINVAL;
+
+	old_domain = domain_for_device(devid);
+	if (old_domain)
+		return -EBUSY;
+
+	attach_device(iommu, domain, devid);
+
+	iommu_completion_wait(iommu);
+
+	return 0;
+}
+
 #endif
-- 
cgit v0.10.2


From c6229ca649aa9b312d1f1de20af8d2603b14eead Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:48:43 +0100
Subject: AMD IOMMU: add domain map function for IOMMU API

Impact: add a generic function to map pages into protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2f7c0b3..1fcedbe 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1718,4 +1718,33 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	return 0;
 }
 
+static int amd_iommu_map_range(struct iommu_domain *dom,
+			       unsigned long iova, phys_addr_t paddr,
+			       size_t size, int iommu_prot)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long i,  npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+	int prot = 0;
+	int ret;
+
+	if (iommu_prot & IOMMU_READ)
+		prot |= IOMMU_PROT_IR;
+	if (iommu_prot & IOMMU_WRITE)
+		prot |= IOMMU_PROT_IW;
+
+	iova  &= PAGE_MASK;
+	paddr &= PAGE_MASK;
+
+	for (i = 0; i < npages; ++i) {
+		ret = iommu_map_page(domain, iova, paddr, prot);
+		if (ret)
+			return ret;
+
+		iova  += PAGE_SIZE;
+		paddr += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
 #endif
-- 
cgit v0.10.2


From eb74ff6cc0080c7f6270fdfffba65c4eff23d3ad Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:59:10 +0100
Subject: AMD IOMMU: add domain unmap function for IOMMU API

Impact: add a generic function to unmap pages into protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1fcedbe..d8a0abf 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -445,6 +445,30 @@ static int iommu_map_page(struct protection_domain *dom,
 	return 0;
 }
 
+#ifdef CONFIG_IOMMU_API
+static void iommu_unmap_page(struct protection_domain *dom,
+			     unsigned long bus_addr)
+{
+	u64 *pte;
+
+	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+	*pte = 0;
+}
+#endif
+
 /*
  * This function checks if a specific unity mapping entry is needed for
  * this specific IOMMU.
@@ -1747,4 +1771,21 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
 	return 0;
 }
 
+static void amd_iommu_unmap_range(struct iommu_domain *dom,
+				  unsigned long iova, size_t size)
+{
+
+	struct protection_domain *domain = dom->priv;
+	unsigned long i,  npages = iommu_num_pages(iova, size, PAGE_SIZE);
+
+	iova  &= PAGE_MASK;
+
+	for (i = 0; i < npages; ++i) {
+		iommu_unmap_page(domain, iova);
+		iova  += PAGE_SIZE;
+	}
+
+	iommu_flush_domain(domain->id);
+}
+
 #endif
-- 
cgit v0.10.2


From 645c4c8d7289a718c9c828ec217f2b94e3b3e6ff Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:05:50 +0100
Subject: AMD IOMMU: add domain address lookup function for IOMMU API

Impact: add a generic function to lockup addresses in protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d8a0abf..b599e80 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1788,4 +1788,35 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
 	iommu_flush_domain(domain->id);
 }
 
+static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
+					  unsigned long iova)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long offset = iova & ~PAGE_MASK;
+	phys_addr_t paddr;
+	u64 *pte;
+
+	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	paddr  = *pte & IOMMU_PAGE_MASK;
+	paddr |= offset;
+
+	return paddr;
+}
+
 #endif
-- 
cgit v0.10.2


From 26961efe0dab9ca73f8fc3b6137b814252e04972 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 17:00:17 +0100
Subject: AMD IOMMU: register functions for the IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b599e80..d9b651c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -41,6 +41,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 static LIST_HEAD(iommu_pd_list);
 static DEFINE_SPINLOCK(iommu_pd_list_lock);
 
+#ifdef CONFIG_IOMMU_API
+static struct iommu_ops amd_iommu_ops;
+#endif
+
 /*
  * general struct to manage commands send to an IOMMU
  */
@@ -1593,6 +1597,10 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
+#ifdef CONFIG_IOMMU_API
+	register_iommu(&amd_iommu_ops);
+#endif
+
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
 	return 0;
@@ -1819,4 +1827,14 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 	return paddr;
 }
 
+static struct iommu_ops amd_iommu_ops = {
+	.domain_init = amd_iommu_domain_init,
+	.domain_destroy = amd_iommu_domain_destroy,
+	.attach_dev = amd_iommu_attach_device,
+	.detach_dev = amd_iommu_detach_device,
+	.map = amd_iommu_map_range,
+	.unmap = amd_iommu_unmap_range,
+	.iova_to_phys = amd_iommu_iova_to_phys,
+};
+
 #endif
-- 
cgit v0.10.2


From e2dc14a2a6c9a83baaafc51f06b7e73cec2167be Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 18:48:59 +0100
Subject: AMD IOMMU: add a domain flag for default domains

Impact: adds a new protection domain flag

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1c769f4..6adc702 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -192,6 +192,8 @@
 
 /* Protection domain flags */
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
+#define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
+					      domain for an IOMMU */
 
 /*
  * This structure contains generic data for  IOMMU protection domains
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d9b651c..f295654 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1574,6 +1574,7 @@ int __init amd_iommu_init_dma_ops(void)
 		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
 		if (iommu->default_dom == NULL)
 			return -ENOMEM;
+		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
 		ret = iommu_init_unity_mappings(iommu);
 		if (ret)
 			goto free_domains;
-- 
cgit v0.10.2


From 1ac4cbbc5eb56de96d264d10f464ba5222815b1b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:33:26 +0100
Subject: AMD IOMMU: allocate a new protection for hotplugged devices

Impact: also hotplug devices benefit from device isolation

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f295654..f226060 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -923,6 +923,8 @@ static int device_change_notifier(struct notifier_block *nb,
 	struct protection_domain *domain;
 	struct dma_ops_domain *dma_domain;
 	struct amd_iommu *iommu;
+	int order = amd_iommu_aperture_order;
+	unsigned long flags;
 
 	if (devid > amd_iommu_last_bdf)
 		goto out;
@@ -955,6 +957,21 @@ static int device_change_notifier(struct notifier_block *nb,
 			goto out;
 		detach_device(domain, devid);
 		break;
+	case BUS_NOTIFY_ADD_DEVICE:
+		/* allocate a protection domain if a device is added */
+		dma_domain = find_protection_domain(devid);
+		if (dma_domain)
+			goto out;
+		dma_domain = dma_ops_domain_alloc(iommu, order);
+		if (!dma_domain)
+			goto out;
+		dma_domain->target_dev = devid;
+
+		spin_lock_irqsave(&iommu_pd_list_lock, flags);
+		list_add_tail(&dma_domain->list, &iommu_pd_list);
+		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+
+		break;
 	default:
 		goto out;
 	}
-- 
cgit v0.10.2


From ab896722867602eb0e836717e8b857ad513800d8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:43:07 +0100
Subject: AMD IOMMU: use dev_name instead of self-build print_devid

Impact: use generic dev_name instead of own function

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 6adc702..ee8cfa0 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -389,18 +389,6 @@ extern int amd_iommu_isolate;
  */
 extern bool amd_iommu_unmap_flush;
 
-/* takes a PCI device id and prints it out in a readable form */
-static inline void print_devid(u16 devid, int nl)
-{
-	int bus = devid >> 8;
-	int dev = devid >> 3 & 0x1f;
-	int fn  = devid & 0x07;
-
-	printk("%02x:%02x.%x", bus, dev, fn);
-	if (nl)
-		printk("\n");
-}
-
 /* takes bus and device/function and returns the device id
  * FIXME: should that be in generic PCI code? */
 static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f226060..a53cf48 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1074,8 +1074,7 @@ static int get_device_resources(struct device *dev,
 		*domain = &dma_dom->domain;
 		attach_device(*iommu, *domain, *bdf);
 		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-				"device ", (*domain)->id);
-		print_devid(_bdf, 1);
+				"device %s\n", (*domain)->id, dev_name(dev));
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-- 
cgit v0.10.2


From 0cfd7aa90be83a4d278810d231f9ef03f189b4f0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:58:00 +0100
Subject: AMD IOMMU: convert iommu->need_sync to bool

Impact: use bool instead of int for iommu->need_sync

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ee8cfa0..c4b144e 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -302,7 +302,7 @@ struct amd_iommu {
 	bool int_enabled;
 
 	/* if one, we need to send a completion wait command */
-	int need_sync;
+	bool need_sync;
 
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a53cf48..e410e97 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -198,7 +198,7 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 	spin_lock_irqsave(&iommu->lock, flags);
 	ret = __iommu_queue_command(iommu, cmd);
 	if (!ret)
-		iommu->need_sync = 1;
+		iommu->need_sync = true;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
@@ -263,7 +263,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	ret = __iommu_completion_wait(iommu);
 
-	iommu->need_sync = 0;
+	iommu->need_sync = false;
 
 	if (ret)
 		goto out;
-- 
cgit v0.10.2


From c226f853091577e665ebc02c064af4834d8d4f28 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 13:53:54 +0100
Subject: AMD IOMMU: convert amd_iommu_isolate to bool

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index c4b144e..7abf9cf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -381,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
 /* will be 1 if device isolation is enabled */
-extern int amd_iommu_isolate;
+extern bool amd_iommu_isolate;
 
 /*
  * If true, the addresses will be flushed on unmap time, not when
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800..47e163b 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate = 1;		/* if 1, device isolation is enabled */
+bool amd_iommu_isolate = true;		/* if true, device isolation is
+					   enabled */
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
@@ -1218,9 +1219,9 @@ static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
 		if (strncmp(str, "isolate", 7) == 0)
-			amd_iommu_isolate = 1;
+			amd_iommu_isolate = true;
 		if (strncmp(str, "share", 5) == 0)
-			amd_iommu_isolate = 0;
+			amd_iommu_isolate = false;
 		if (strncmp(str, "fullflush", 9) == 0)
 			amd_iommu_unmap_flush = true;
 	}
-- 
cgit v0.10.2


From edcb34da259c503a2ffd37e51a658672ba3bc7a2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:01:45 +0100
Subject: AMD IOMMU: use calc_devid in prealloc_protection_domains

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e410e97..3011ea7 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1543,7 +1543,7 @@ void prealloc_protection_domains(void)
 	u16 devid;
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		devid = (dev->bus->number << 8) | dev->devfn;
+		devid = calc_devid(dev->bus->number, dev->devfn);
 		if (devid > amd_iommu_last_bdf)
 			continue;
 		devid = amd_iommu_alias_table[devid];
-- 
cgit v0.10.2


From a4e267c88b4acfc87ff2ab0cc8e9509878e9aaba Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:04:18 +0100
Subject: AMD IOMMU: use dev_name in iommu_enable function

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 47e163b..be81f61 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -246,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 void __init iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
-	       "at %02x:%02x.%x cap 0x%hx\n",
-	       iommu->dev->bus->number,
-	       PCI_SLOT(iommu->dev->devfn),
-	       PCI_FUNC(iommu->dev->devfn),
-	       iommu->cap_ptr);
+	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
+	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
-- 
cgit v0.10.2


From 2e117604a4e8f3f9cee4aec3373b0382159e152a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 11 Dec 2008 19:00:12 +0100
Subject: AMD IOMMU: add Kconfig entry for statistic collection code

Impact: adds new Kconfig entry

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0..f9998d2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
 	  your BIOS for an option to enable it or if you have an IVRS ACPI
 	  table.
 
+config AMD_IOMMU_STATS
+	bool "Export AMD IOMMU statistics to debugfs"
+	depends on AMD_IOMMU
+	select DEBUG_FS
+	help
+	  This option enables code in the AMD IOMMU driver to collect various
+	  statistics about whats happening in the driver and exports that
+	  information to userspace via debugfs.
+	  If unsure, say N.
+
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	def_bool y if X86_64
-- 
cgit v0.10.2


From a9dddbe0497ab0df7ee729e8d4cb0ee2dec3e4ba Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 12:33:06 +0100
Subject: AMD IOMMU: add necessary header defines for stats counting

Impact: add defines to make iommu stats collection configurable

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7abf9cf..1379c5f 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -396,4 +396,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
 	return (((u16)bus) << 8) | devfn;
 }
 
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+struct __iommu_counter {
+	char *name;
+	struct dentry *dent;
+	u64 value;
+};
+
+#define DECLARE_STATS_COUNTER(nm) \
+	static struct __iommu_counter nm = {	\
+		.name = #nm,			\
+	}
+
+#define INC_STATS_COUNTER(name)		name.value += 1
+#define ADD_STATS_COUNTER(name, x)	name.value += (x)
+#define SUB_STATS_COUNTER(name, x)	name.value -= (x)
+
+#else /* CONFIG_AMD_IOMMU_STATS */
+
+#define DECLARE_STATS_COUNTER(name)
+#define INC_STATS_COUNTER(name)
+#define ADD_STATS_COUNTER(name, x)
+#define SUB_STATS_COUNTER(name, x)
+
+#endif /* CONFIG_AMD_IOMMU_STATS */
+
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
-- 
cgit v0.10.2


From 7f26508bbb76ce86aad1130ef6b7f1a4bb7de0c2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 13:50:21 +0100
Subject: AMD IOMMU: add init code for statistic collection

Impact: create a new debugfs directory

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1379c5f..95c8cd9 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -420,6 +420,8 @@ struct __iommu_counter {
 #define ADD_STATS_COUNTER(name, x)
 #define SUB_STATS_COUNTER(name, x)
 
+static inline void amd_iommu_stats_init(void) { }
+
 #endif /* CONFIG_AMD_IOMMU_STATS */
 
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3011ea7..f98f706 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,6 +20,7 @@
 #include <linux/pci.h>
 #include <linux/gfp.h>
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
 #ifdef CONFIG_IOMMU_API
@@ -57,6 +58,40 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 static struct dma_ops_domain *find_protection_domain(u16 devid);
 
 
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+/*
+ * Initialization code for statistics collection
+ */
+
+static struct dentry *stats_dir;
+static struct dentry *de_isolate;
+static struct dentry *de_fflush;
+
+static void amd_iommu_stats_add(struct __iommu_counter *cnt)
+{
+	if (stats_dir == NULL)
+		return;
+
+	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
+				       &cnt->value);
+}
+
+static void amd_iommu_stats_init(void)
+{
+	stats_dir = debugfs_create_dir("amd-iommu", NULL);
+	if (stats_dir == NULL)
+		return;
+
+	de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
+					 (u32 *)&amd_iommu_isolate);
+
+	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
+					 (u32 *)&amd_iommu_unmap_flush);
+}
+
+#endif
+
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
@@ -1620,6 +1655,8 @@ int __init amd_iommu_init_dma_ops(void)
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
+	amd_iommu_stats_init();
+
 	return 0;
 
 free_domains:
-- 
cgit v0.10.2


From da49f6df726ecaaee87757e8b65a560679d32f99 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 14:59:58 +0100
Subject: AMD IOMMU: add stats counter for completion wait events

Impact: see number of completion wait events in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f98f706..b214357 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -64,6 +64,8 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
  * Initialization code for statistics collection
  */
 
+DECLARE_STATS_COUNTER(compl_wait);
+
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
 static struct dentry *de_fflush;
@@ -88,6 +90,8 @@ static void amd_iommu_stats_init(void)
 
 	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
 					 (u32 *)&amd_iommu_unmap_flush);
+
+	amd_iommu_stats_add(&compl_wait);
 }
 
 #endif
@@ -249,6 +253,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
 	unsigned status = 0;
 	unsigned long i = 0;
 
+	INC_STATS_COUNTER(compl_wait);
+
 	while (!ready && (i < EXIT_LOOP_COUNT)) {
 		++i;
 		/* wait for the bit to become one */
-- 
cgit v0.10.2


From 0f2a86f200bc97ae6cefc5d3ac879094b3fcde48 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:05:16 +0100
Subject: AMD IOMMU: add stats counter for map_single requests

Impact: see number of map_single requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b214357..ef37786 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -65,6 +65,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
  */
 
 DECLARE_STATS_COUNTER(compl_wait);
+DECLARE_STATS_COUNTER(cnt_map_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -92,6 +93,7 @@ static void amd_iommu_stats_init(void)
 					 (u32 *)&amd_iommu_unmap_flush);
 
 	amd_iommu_stats_add(&compl_wait);
+	amd_iommu_stats_add(&cnt_map_single);
 }
 
 #endif
@@ -1278,6 +1280,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 	dma_addr_t addr;
 	u64 dma_mask;
 
+	INC_STATS_COUNTER(cnt_map_single);
+
 	if (!check_device(dev))
 		return bad_dma_address;
 
-- 
cgit v0.10.2


From 146a6917fc30616401a090f55cff2b855ee5b2ab Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:07:12 +0100
Subject: AMD IOMMU: add stats counter for unmap_single requests

Impact: see number of unmap_single requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ef37786..71646c8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -66,6 +66,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 
 DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
+DECLARE_STATS_COUNTER(cnt_unmap_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -94,6 +95,7 @@ static void amd_iommu_stats_init(void)
 
 	amd_iommu_stats_add(&compl_wait);
 	amd_iommu_stats_add(&cnt_map_single);
+	amd_iommu_stats_add(&cnt_unmap_single);
 }
 
 #endif
@@ -1321,6 +1323,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 	struct protection_domain *domain;
 	u16 devid;
 
+	INC_STATS_COUNTER(cnt_unmap_single);
+
 	if (!check_device(dev) ||
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		/* device not handled by any AMD IOMMU */
-- 
cgit v0.10.2


From d03f067a9d0a1cc09529427af9a15e15d32ba1de Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:09:48 +0100
Subject: AMD IOMMU: add stats counter for map_sg requests

Impact: see number of map_sg requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 71646c8..859ca74 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -67,6 +67,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
+DECLARE_STATS_COUNTER(cnt_map_sg);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -96,6 +97,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&compl_wait);
 	amd_iommu_stats_add(&cnt_map_single);
 	amd_iommu_stats_add(&cnt_unmap_single);
+	amd_iommu_stats_add(&cnt_map_sg);
 }
 
 #endif
@@ -1377,6 +1379,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	int mapped_elems = 0;
 	u64 dma_mask;
 
+	INC_STATS_COUNTER(cnt_map_sg);
+
 	if (!check_device(dev))
 		return 0;
 
-- 
cgit v0.10.2


From 55877a6bcdf0843414eecc658550c6551f5b5e1d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:12:14 +0100
Subject: AMD IOMMU: add stats counter for unmap_sg requests

Impact: see number of unmap_sg requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 859ca74..49f2c85 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -68,6 +68,7 @@ DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
+DECLARE_STATS_COUNTER(cnt_unmap_sg);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -98,6 +99,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_map_single);
 	amd_iommu_stats_add(&cnt_unmap_single);
 	amd_iommu_stats_add(&cnt_map_sg);
+	amd_iommu_stats_add(&cnt_unmap_sg);
 }
 
 #endif
@@ -1443,6 +1445,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	u16 devid;
 	int i;
 
+	INC_STATS_COUNTER(cnt_unmap_sg);
+
 	if (!check_device(dev) ||
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		return;
-- 
cgit v0.10.2


From c8f0fb36bffa9e21d214a2910b825567d52bfc2c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:14:21 +0100
Subject: AMD IOMMU: add stats counter for alloc_coherent requests

Impact: see number of alloc_coherent requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 49f2c85..ecc89f8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -69,6 +69,7 @@ DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
+DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -100,6 +101,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_unmap_single);
 	amd_iommu_stats_add(&cnt_map_sg);
 	amd_iommu_stats_add(&cnt_unmap_sg);
+	amd_iommu_stats_add(&cnt_alloc_coherent);
 }
 
 #endif
@@ -1481,6 +1483,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	phys_addr_t paddr;
 	u64 dma_mask = dev->coherent_dma_mask;
 
+	INC_STATS_COUNTER(cnt_alloc_coherent);
+
 	if (!check_device(dev))
 		return NULL;
 
-- 
cgit v0.10.2


From 5d31ee7e08b7713596b999a42e67491bdf3665b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:16:38 +0100
Subject: AMD IOMMU: add stats counter for free_coherent requests

Impact: see number of free_coherent requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ecc89f8..112412d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -70,6 +70,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
+DECLARE_STATS_COUNTER(cnt_free_coherent);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -102,6 +103,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_map_sg);
 	amd_iommu_stats_add(&cnt_unmap_sg);
 	amd_iommu_stats_add(&cnt_alloc_coherent);
+	amd_iommu_stats_add(&cnt_free_coherent);
 }
 
 #endif
@@ -1541,6 +1543,8 @@ static void free_coherent(struct device *dev, size_t size,
 	struct protection_domain *domain;
 	u16 devid;
 
+	INC_STATS_COUNTER(cnt_free_coherent);
+
 	if (!check_device(dev))
 		return;
 
-- 
cgit v0.10.2


From c1858976f5ef05122bb671f678beaf7e1fe1dd74 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:42:39 +0100
Subject: AMD IOMMU: add stats counter for cross-page request

Impact: see number of requests for more than one page in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 112412d..f545503 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -71,6 +71,7 @@ DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
+DECLARE_STATS_COUNTER(cross_page);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -104,6 +105,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_unmap_sg);
 	amd_iommu_stats_add(&cnt_alloc_coherent);
 	amd_iommu_stats_add(&cnt_free_coherent);
+	amd_iommu_stats_add(&cross_page);
 }
 
 #endif
@@ -1217,6 +1219,9 @@ static dma_addr_t __map_single(struct device *dev,
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
+	if (pages > 1)
+		INC_STATS_COUNTER(cross_page);
+
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
-- 
cgit v0.10.2


From f57d98ae6979f7bcbf758023b4716f485385f903 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:46:29 +0100
Subject: AMD IOMMU: add stats counter for single iommu domain tlb flushes

Impact: see number of single iommu domain tlb flushes in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f545503..e99022d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -72,6 +72,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
+DECLARE_STATS_COUNTER(domain_flush_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -106,6 +107,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_alloc_coherent);
 	amd_iommu_stats_add(&cnt_free_coherent);
 	amd_iommu_stats_add(&cross_page);
+	amd_iommu_stats_add(&domain_flush_single);
 }
 
 #endif
@@ -413,6 +415,8 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 {
 	u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 
+	INC_STATS_COUNTER(domain_flush_single);
+
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
-- 
cgit v0.10.2


From 18811f55d48e5f3ee70c4744c592f940022fa592 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:48:28 +0100
Subject: AMD IOMMU: add stats counter for domain tlb flushes

Impact: see number of domain tlb flushes in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e99022d..a897c72 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -73,6 +73,7 @@ DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
+DECLARE_STATS_COUNTER(domain_flush_all);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -108,6 +109,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_free_coherent);
 	amd_iommu_stats_add(&cross_page);
 	amd_iommu_stats_add(&domain_flush_single);
+	amd_iommu_stats_add(&domain_flush_all);
 }
 
 #endif
@@ -431,6 +433,8 @@ static void iommu_flush_domain(u16 domid)
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 
+	INC_STATS_COUNTER(domain_flush_all);
+
 	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 				      domid, 1, 1);
 
-- 
cgit v0.10.2


From 5774f7c5fef2526bfa58eab628fbe91dce5e07b1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:57:30 +0100
Subject: AMD IOMMU: add statistics about allocated io memory

Impact: see amount of allocated io memory in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a897c72..69f367b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -74,6 +74,7 @@ DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
+DECLARE_STATS_COUNTER(alloced_io_mem);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -110,6 +111,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cross_page);
 	amd_iommu_stats_add(&domain_flush_single);
 	amd_iommu_stats_add(&domain_flush_all);
+	amd_iommu_stats_add(&alloced_io_mem);
 }
 
 #endif
@@ -1246,6 +1248,8 @@ static dma_addr_t __map_single(struct device *dev,
 	}
 	address += offset;
 
+	ADD_STATS_COUNTER(alloced_io_mem, size);
+
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
 		iommu_flush_tlb(iommu, dma_dom->domain.id);
 		dma_dom->need_flush = false;
@@ -1282,6 +1286,8 @@ static void __unmap_single(struct amd_iommu *iommu,
 		start += PAGE_SIZE;
 	}
 
+	SUB_STATS_COUNTER(alloced_io_mem, size);
+
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-- 
cgit v0.10.2


From 8ecaf8f19f0f0627d6ac6d69ed9472e7d307f35b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 16:13:04 +0100
Subject: AMD IOMMU: add statistics about total number of map requests

Impact: see total number of map requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69f367b..0c504b2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -75,6 +75,7 @@ DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
 DECLARE_STATS_COUNTER(alloced_io_mem);
+DECLARE_STATS_COUNTER(total_map_requests);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -112,6 +113,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&domain_flush_single);
 	amd_iommu_stats_add(&domain_flush_all);
 	amd_iommu_stats_add(&alloced_io_mem);
+	amd_iommu_stats_add(&total_map_requests);
 }
 
 #endif
@@ -1229,6 +1231,8 @@ static dma_addr_t __map_single(struct device *dev,
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
+	INC_STATS_COUNTER(total_map_requests);
+
 	if (pages > 1)
 		INC_STATS_COUNTER(cross_page);
 
-- 
cgit v0.10.2


From 7398ca79d227f7cd7f2ce23f08624e30081dcb4e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sat, 3 Jan 2009 16:37:53 +0100
Subject: kvm/iommu: fix compile warning

This fixes a compile warning about a variable thats maybe used
uninitialized in the function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d0bebaa..e9693a2 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -71,7 +71,7 @@ unmap_pages:
 
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
-	int i, r;
+	int i, r = 0;
 
 	down_read(&kvm->slots_lock);
 	for (i = 0; i < kvm->nmemslots; i++) {
-- 
cgit v0.10.2


From 0e93dd883537e628b809a2120854cd591c8935f1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:45:22 +0530
Subject: AMD IOMMU: prealloc_protection_domains should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0c504b2..881c68f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1622,7 +1622,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
-- 
cgit v0.10.2


From 065a6d68c71af2a3bdd080fa5aa353b76eede8f5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sat, 3 Jan 2009 14:16:35 +0100
Subject: AMD IOMMU: remove now unnecessary #ifdefs

The #ifdef's are no longer necessary when the iommu-api and the amd
iommu updates are merged together.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 881c68f..5113c08 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -426,7 +426,6 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
-#ifdef CONFIG_IOMMU_API
 /*
  * This function is used to flush the IO/TLB for a given protection domain
  * on every IOMMU in the system
@@ -450,7 +449,6 @@ static void iommu_flush_domain(u16 domid)
 		spin_unlock_irqrestore(&iommu->lock, flags);
 	}
 }
-#endif
 
 /****************************************************************************
  *
@@ -516,7 +514,6 @@ static int iommu_map_page(struct protection_domain *dom,
 	return 0;
 }
 
-#ifdef CONFIG_IOMMU_API
 static void iommu_unmap_page(struct protection_domain *dom,
 			     unsigned long bus_addr)
 {
@@ -538,7 +535,6 @@ static void iommu_unmap_page(struct protection_domain *dom,
 
 	*pte = 0;
 }
-#endif
 
 /*
  * This function checks if a specific unity mapping entry is needed for
@@ -723,7 +719,6 @@ static u16 domain_id_alloc(void)
 	return id;
 }
 
-#ifdef CONFIG_IOMMU_API
 static void domain_id_free(int id)
 {
 	unsigned long flags;
@@ -733,7 +728,6 @@ static void domain_id_free(int id)
 		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
-#endif
 
 /*
  * Used to reserve address ranges in the aperture (e.g. for exclusion
@@ -1702,9 +1696,7 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
-#ifdef CONFIG_IOMMU_API
 	register_iommu(&amd_iommu_ops);
-#endif
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
@@ -1732,8 +1724,6 @@ free_domains:
  *
  *****************************************************************************/
 
-#ifdef CONFIG_IOMMU_API
-
 static void cleanup_domain(struct protection_domain *domain)
 {
 	unsigned long flags;
@@ -1944,4 +1934,3 @@ static struct iommu_ops amd_iommu_ops = {
 	.iova_to_phys = amd_iommu_iova_to_phys,
 };
 
-#endif
-- 
cgit v0.10.2


From 263ec6457bb23d57b575ede18ff6c3d11e0b4e96 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 3 Jan 2009 13:16:09 +0100
Subject: cpumask: convert RCU implementations, fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: cleanup

This warning:

 kernel/rcuclassic.c: In function ‘rcu_start_batch’:
 kernel/rcuclassic.c:397: warning: passing argument 1 of ‘cpumask_andnot’ from incompatible pointer type

triggers because one usage site of rcp->cpumask was not converted
to to_cpumask(rcp->cpumask). There's no ill effects of this bug.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 6ec495f6..490934f 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -394,7 +394,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
+		cpumask_andnot(to_cpumask(rcp->cpumask),
+			       cpu_online_mask, nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
-- 
cgit v0.10.2


From 6bdf197b04b3ae7c85785bc5a9576f1bcb0ac7c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 3 Jan 2009 12:50:46 +0100
Subject: ia64: cpumask fix for is_affinity_mask_valid()

Impact: build fix on ia64

ia64's default_affinity_write() still had old cpumask_t usage:

 /home/mingo/tip/kernel/irq/proc.c: In function `default_affinity_write':
 /home/mingo/tip/kernel/irq/proc.c:114: error: incompatible type for argument 1 of `is_affinity_mask_valid'
 make[3]: *** [kernel/irq/proc.o] Error 1
 make[3]: *** Waiting for unfinished jobs....

update it to cpumask_var_t.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
index 3627116..36429a5 100644
--- a/arch/ia64/include/asm/irq.h
+++ b/arch/ia64/include/asm/irq.h
@@ -27,7 +27,7 @@ irq_canonicalize (int irq)
 }
 
 extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
-bool is_affinity_mask_valid(cpumask_t cpumask);
+bool is_affinity_mask_valid(cpumask_var_t cpumask);
 
 #define is_affinity_mask_valid is_affinity_mask_valid
 
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 0b6db53..95ff16c 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -112,11 +112,11 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 	}
 }
 
-bool is_affinity_mask_valid(cpumask_t cpumask)
+bool is_affinity_mask_valid(cpumask_var_t cpumask)
 {
 	if (ia64_platform_is("sn2")) {
 		/* Only allow one CPU to be specified in the smp_affinity mask */
-		if (cpus_weight(cpumask) != 1)
+		if (cpumask_weight(cpumask) != 1)
 			return false;
 	}
 	return true;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 2abd3a7..aae3f74 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -54,7 +54,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	if (err)
 		goto free_cpumask;
 
-	if (!is_affinity_mask_valid(*new_value)) {
+	if (!is_affinity_mask_valid(new_value)) {
 		err = -EINVAL;
 		goto free_cpumask;
 	}
-- 
cgit v0.10.2


From 730cf27246225d56ca1603b2f3c4fdbf882d4e51 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:45 -0800
Subject: x86: enable cpus display of kernel_max and offlined cpus

Impact: enables /sys/devices/system/cpu/{kernel_max,offline} user interface

By setting total_cpus, the drivers/base/cpu.c will display the
values of kernel_max (NR_CPUS-1) and the offlined cpu map.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9e177a4..f49c26b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1298,6 +1298,8 @@ __init void prefill_possible_map(void)
 	else
 		possible = setup_possible_cpus;
 
+	total_cpus = max_t(int, possible, num_processors + disabled_cpus);
+
 	if (possible > CONFIG_NR_CPUS) {
 		printk(KERN_WARNING
 			"%d Processors exceeds NR_CPUS limit of %d\n",
-- 
cgit v0.10.2


From 6ca09dfc9f180d038dcef93c167a833f43a8246f Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:45 -0800
Subject: sched: put back some stack hog changes that were undone in
 kernel/sched.c

Impact: prevents panic from stack overflow on numa-capable machines.

Some of the "removal of stack hogs" changes in kernel/sched.c by using
node_to_cpumask_ptr were undone by the early cpumask API updates, and
causes a panic due to stack overflow.  This patch undoes those changes
by using cpumask_of_node() which returns a 'const struct cpumask *'.

In addition, cpu_coregoup_map is replaced with cpu_coregroup_mask further
reducing stack usage.  (Both of these updates removed 9 FIXME's!)

Also:
   Pick up some remaining changes from the old 'cpumask_t' functions to
   the new 'struct cpumask *' functions.

   Optimize memory traffic by allocating each percpu local_cpu_mask on the
   same node as the referring cpu.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 27ba1d6..dd862d7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3715,7 +3715,7 @@ redo:
 		 * don't kick the migration_thread, if the curr
 		 * task on busiest cpu can't be moved to this_cpu
 		 */
-		if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+		if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
 			double_unlock_balance(this_rq, busiest);
 			all_pinned = 1;
 			return ld_moved;
@@ -6220,9 +6220,7 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	int dest_cpu;
-	/* FIXME: Use cpumask_of_node here. */
-	cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
-	const struct cpumask *nodemask = &_nodemask;
+	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
 
 again:
 	/* Look for allowed, online CPU in same node. */
@@ -7133,21 +7131,18 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
 static void sched_domain_node_span(int node, struct cpumask *span)
 {
 	nodemask_t used_nodes;
-	/* FIXME: use cpumask_of_node() */
-	node_to_cpumask_ptr(nodemask, node);
 	int i;
 
-	cpus_clear(*span);
+	cpumask_clear(span);
 	nodes_clear(used_nodes);
 
-	cpus_or(*span, *span, *nodemask);
+	cpumask_or(span, span, cpumask_of_node(node));
 	node_set(node, used_nodes);
 
 	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
 		int next_node = find_next_best_node(node, &used_nodes);
 
-		node_to_cpumask_ptr_next(nodemask, next_node);
-		cpus_or(*span, *span, *nodemask);
+		cpumask_or(span, span, cpumask_of_node(next_node));
 	}
 }
 #endif /* CONFIG_NUMA */
@@ -7227,9 +7222,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
-	/* FIXME: Use cpu_coregroup_mask. */
-	*mask = cpu_coregroup_map(cpu);
-	cpus_and(*mask, *mask, *cpu_map);
+	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
 	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
 	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
@@ -7259,10 +7252,8 @@ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
 				 struct cpumask *nodemask)
 {
 	int group;
-	/* FIXME: use cpumask_of_node */
-	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	cpumask_and(nodemask, pnodemask, cpu_map);
+	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
 	group = cpumask_first(nodemask);
 
 	if (sg)
@@ -7313,10 +7304,8 @@ static void free_sched_groups(const struct cpumask *cpu_map,
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
-			/* FIXME: Use cpumask_of_node */
-			node_to_cpumask_ptr(pnodemask, i);
 
-			cpus_and(*nodemask, *pnodemask, *cpu_map);
+			cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
 			if (cpumask_empty(nodemask))
 				continue;
 
@@ -7525,9 +7514,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
 
-		/* FIXME: use cpumask_of_node */
-		*nodemask = node_to_cpumask(cpu_to_node(i));
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map);
 
 #ifdef CONFIG_NUMA
 		if (cpumask_weight(cpu_map) >
@@ -7568,9 +7555,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		sd = &per_cpu(core_domains, i).sd;
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		*sched_domain_span(sd) = cpu_coregroup_map(i);
-		cpumask_and(sched_domain_span(sd),
-			    sched_domain_span(sd), cpu_map);
+		cpumask_and(sched_domain_span(sd), cpu_map,
+						   cpu_coregroup_mask(i));
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7606,9 +7592,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
 	for_each_cpu(i, cpu_map) {
-		/* FIXME: Use cpu_coregroup_mask */
-		*this_core_map = cpu_coregroup_map(i);
-		cpus_and(*this_core_map, *this_core_map, *cpu_map);
+		cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map);
 		if (i != cpumask_first(this_core_map))
 			continue;
 
@@ -7620,9 +7604,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
-		/* FIXME: Use cpumask_of_node */
-		*nodemask = node_to_cpumask(i);
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
 		if (cpumask_empty(nodemask))
 			continue;
 
@@ -7644,11 +7626,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		struct sched_group *sg, *prev;
 		int j;
 
-		/* FIXME: Use cpumask_of_node */
-		*nodemask = node_to_cpumask(i);
 		cpumask_clear(covered);
-
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
 		if (cpumask_empty(nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
@@ -7679,8 +7658,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 		for (j = 0; j < nr_node_ids; j++) {
 			int n = (i + j) % nr_node_ids;
-			/* FIXME: Use cpumask_of_node */
-			node_to_cpumask_ptr(pnodemask, n);
 
 			cpumask_complement(notcovered, covered);
 			cpumask_and(tmpmask, notcovered, cpu_map);
@@ -7688,7 +7665,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 			if (cpumask_empty(tmpmask))
 				break;
 
-			cpumask_and(tmpmask, tmpmask, pnodemask);
+			cpumask_and(tmpmask, tmpmask, cpumask_of_node(n));
 			if (cpumask_empty(tmpmask))
 				continue;
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 833b6d4..954e1a8 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1383,7 +1383,8 @@ static inline void init_sched_rt_class(void)
 	unsigned int i;
 
 	for_each_possible_cpu(i)
-		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
+		alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
+					GFP_KERNEL, cpu_to_node(i));
 }
 #endif /* CONFIG_SMP */
 
-- 
cgit v0.10.2


From 9628937d5b37169151c5f6bbd40919c6ac958a46 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:46 -0800
Subject: x86: cleanup some remaining usages of NR_CPUS where s/b nr_cpu_ids

Impact: Reduce future system panics due to cpumask operations using NR_CPUS

Insure that code does not look at bits >= nr_cpu_ids as when cpumasks are
allocated based on nr_cpu_ids, these extra bits will not be defined.

Also some other minor updates:

   * change in to use cpu accessor function set_cpu_present() instead of
     directly accessing cpu_present_map w/cpu_clear() [arch/x86/kernel/reboot.c]

   * use cpumask_of() instead of &cpumask_of_cpu() [arch/x86/kernel/reboot.c]

   * optimize some cpu_mask_to_apicid_and functions.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 51ac123..bc53d5e 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -157,7 +157,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 
 	num_bits_set = cpumask_weight(cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set == nr_cpu_ids)
 		return 0xFF;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -190,7 +190,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 
 	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set == nr_cpu_ids)
 		return cpu_to_logical_apicid(0);
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -218,9 +218,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
 	int apicid = cpu_to_logical_apicid(0);
 	cpumask_var_t cpumask;
 
@@ -229,31 +226,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
+	apicid = cpu_mask_to_apicid(cpumask);
 
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return cpu_to_logical_apicid(0);
-			}
-			apicid = new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-exit:
 	free_cpumask_var(cpumask);
 	return apicid;
 }
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index d28a507..1caf576 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -15,7 +15,7 @@
 #define SHARED_SWITCHER_PAGES \
 	DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
 /* Pages for switcher itself, then two pages per cpu */
-#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)
+#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
 /* We map at -4M for ease of mapping into the guest (one PTE page). */
 #define SWITCHER_ADDR 0xFFC00000
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index c80f00d..bf37bc4 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
 extern u8 cpu_2_logical_apicid[];
 static inline int cpu_to_logical_apicid(int cpu)
 {
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
 	return (int)cpu_2_logical_apicid[cpu];
 }
 
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 99327d1..4bb5fb3 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -52,7 +52,7 @@ static inline void init_apic_ldr(void)
 	int i;
 
 	/* Create logical APIC IDs by counting CPUs already in cluster. */
-	for (count = 0, i = NR_CPUS; --i >= 0; ) {
+	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
 		lid = cpu_2_logical_apicid[i];
 		if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
 			++count;
@@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid)
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
 	return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
@@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
+	if (mps_cpu < nr_cpu_ids)
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -146,7 +146,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 
 	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set >= nr_cpu_ids)
 		return (int) 0xFF;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -173,42 +173,16 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
-	int apicid = 0xFF;
+	int apicid = cpu_to_logical_apicid(0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return (int) 0xFF;
+		return apicid;
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
+	apicid = cpu_mask_to_apicid(cpumask);
 
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == nr_cpu_ids)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return 0xFF;
-			}
-			apicid = apicid | new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-exit:
 	free_cpumask_var(cpumask);
 	return apicid;
 }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72..fd24c55 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -598,7 +598,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
 int acpi_unmap_lsapic(int cpu)
 {
 	per_cpu(x86_cpu_to_apicid, cpu) = -1;
-	cpu_clear(cpu, cpu_present_map);
+	set_cpu_present(cpu, false);
 	num_processors--;
 
 	return (0);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6b7f824..9958924 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -140,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(const cpumask_t *mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(const cpumask_t *mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
 	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 42e0853..3f95a40 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 	} else if (smp_num_siblings > 1) {
 
-		if (smp_num_siblings > NR_CPUS) {
+		if (smp_num_siblings > nr_cpu_ids) {
 			printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
 					smp_num_siblings);
 			smp_num_siblings = 1;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1..62a3c23 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -121,7 +121,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
-	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+	if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 		ret = -ENXIO;	/* No such CPU */
 		goto out;
 	}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7e..7262666 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
 	lock_kernel();
 	cpu = iminor(file->f_path.dentry->d_inode);
 
-	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+	if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 		ret = -ENXIO;	/* No such CPU */
 		goto out;
 	}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ba7b9a0..de4a9d6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -449,7 +449,7 @@ void native_machine_shutdown(void)
 
 #ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
-	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
+	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
 		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
 #endif
@@ -459,7 +459,7 @@ void native_machine_shutdown(void)
 		reboot_cpu_id = smp_processor_id();
 
 	/* Make certain I only run on the appropriate processor */
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
+	set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
 
 	/* O.K Now that I'm on the appropriate processor,
 	 * stop all of the others.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f49c26b..6bd4d9b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1154,7 +1154,7 @@ static void __init smp_cpu_index_default(void)
 	for_each_possible_cpu(i) {
 		c = &cpu_data(i);
 		/* mark all to hotplug */
-		c->cpu_index = NR_CPUS;
+		c->cpu_index = nr_cpu_ids;
 	}
 }
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index a5bc054..9840b7e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -357,9 +357,8 @@ void __init find_smp_config(void)
 	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
 
 	/* initialize the CPU structures (moved from smp_boot_cpus) */
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++)
 		cpu_irq_affinity[i] = ~0;
-	}
 	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
 
 	/* The boot CPU must be extended */
@@ -1227,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier)
 	 * new values until the next timer interrupt in which they do process
 	 * accounting.
 	 */
-	for (i = 0; i < NR_CPUS; ++i)
+	for (i = 0; i < nr_cpu_ids; ++i)
 		per_cpu(prof_multiplier, i) = multiplier;
 
 	return 0;
@@ -1257,7 +1256,7 @@ void __init voyager_smp_intr_init(void)
 	int i;
 
 	/* initialize the per cpu irq mask to all disabled */
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		vic_irq_mask[i] = 0xFFFF;
 
 	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
-- 
cgit v0.10.2


From ee943a82b697456f9d2ac46f1e6d230beedb4b6c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: x86: use cpumask_var_t in acpi/boot.c

Impact: reduce stack size, use new API.

Replace cpumask_t with cpumask_var_t.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fd24c55..29dc0c8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_madt_local_apic *lapic;
-	cpumask_t tmp_map, new_map;
+	cpumask_var_t tmp_map, new_map;
 	u8 physid;
 	int cpu;
+	int retval = -ENOMEM;
 
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	buffer.length = ACPI_ALLOCATE_BUFFER;
 	buffer.pointer = NULL;
 
-	tmp_map = cpu_present_map;
+	if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
+		goto out;
+
+	if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
+		goto free_tmp_map;
+
+	cpumask_copy(tmp_map, cpu_present_mask);
 	acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
 
 	/*
 	 * If mp_register_lapic successfully generates a new logical cpu
 	 * number, then the following will get us exactly what was mapped
 	 */
-	cpus_andnot(new_map, cpu_present_map, tmp_map);
-	if (cpus_empty(new_map)) {
+	cpumask_andnot(new_map, cpu_present_mask, tmp_map);
+	if (cpumask_empty(new_map)) {
 		printk ("Unable to map lapic to logical cpu number\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto free_new_map;
 	}
 
-	cpu = first_cpu(new_map);
+	cpu = cpumask_first(new_map);
 
 	*pcpu = cpu;
-	return 0;
+	retval = 0;
+
+free_new_map:
+	free_cpumask_var(new_map);
+free_tmp_map:
+	free_cpumask_var(tmp_map);
+out:
+	return retval;
 }
 
 /* wrapper to silence section mismatch warning */
-- 
cgit v0.10.2


From 2fdf66b491ac706657946442789ec644cc317e1a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: cpumask: convert shared_cpu_map in acpi_processor* structs to
 cpumask_var_t

Impact: Reduce memory usage, use new API.

This is part of an effort to reduce structure sizes for machines
configured with large NR_CPUS.  cpumask_t gets replaced by
cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or
struct cpumask * (large NR_CPUS).

(Changes to powernow-k* by <travis>.)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02d..d0a0010 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 	}
 }
 
+static void free_acpi_perf_data(void)
+{
+	unsigned int i;
+
+	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
+	for_each_possible_cpu(i)
+		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
+				 ->shared_cpu_map);
+	free_percpu(acpi_perf_data);
+}
+
 /*
  * acpi_cpufreq_early_init - initialize ACPI P-States library
  *
@@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
  */
 static int __init acpi_cpufreq_early_init(void)
 {
+	unsigned int i;
 	dprintk("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +546,15 @@ static int __init acpi_cpufreq_early_init(void)
 		dprintk("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
+	for_each_possible_cpu(i) {
+		if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i)
+				       ->shared_cpu_map, GFP_KERNEL)) {
+
+			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
+			free_acpi_perf_data();
+			return -ENOMEM;
+		}
+	}
 
 	/* Do initialization in ACPI core */
 	acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,9 +625,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-		policy->cpus = perf->shared_cpu_map;
+		cpumask_copy(&policy->cpus, perf->shared_cpu_map);
 	}
-	policy->related_cpus = perf->shared_cpu_map;
+	cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
@@ -795,7 +816,7 @@ static int __init acpi_cpufreq_init(void)
 
 	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
 	if (ret)
-		free_percpu(acpi_perf_data);
+		free_acpi_perf_data();
 
 	return ret;
 }
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b..1b446d7 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
 		goto err0;
 	}
 
+	if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+								GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto err05;
+	}
+
 	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
 		retval = -EIO;
 		goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
 err2:
 	acpi_processor_unregister_performance(acpi_processor_perf, 0);
 err1:
+	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+err05:
 	kfree(acpi_processor_perf);
 err0:
 	printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
 	if (acpi_processor_perf) {
 		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
 		kfree(acpi_processor_perf);
 	}
 #endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44..c3c9adb 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
 	struct cpufreq_frequency_table *powernow_table;
-	int ret_val;
+	int ret_val = -ENODEV;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
+	if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+		printk(KERN_ERR PFX
+				"unable to alloc powernow_k8_data cpumask\n");
+		ret_val = -ENOMEM;
+		goto err_out_mem;
+	}
+
 	return 0;
 
 err_out_mem:
@@ -826,7 +833,7 @@ err_out:
 	/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
 	data->acpi_data.state_count = 0;
 
-	return -ENODEV;
+	return ret_val;
 }
 
 static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
 {
 	if (data->acpi_data.state_count)
 		acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+	free_cpumask_var(data->acpi_data.shared_cpu_map);
 }
 
 #else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->cpu = pol->cpu;
 	data->currpstate = HW_PSTATE_INVALID;
 
-	if (powernow_k8_cpu_init_acpi(data)) {
+	rc = powernow_k8_cpu_init_acpi(data);
+	if (rc) {
 		/*
 		 * Use the PSB BIOS structure. This is only availabe on
 		 * an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 			       "ACPI maintainers and complain to your BIOS "
 			       "vendor.\n");
 #endif
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 		rc = find_psb_table(data);
 		if (rc) {
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 	}
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 3494836..0cc2fd3 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -826,6 +826,11 @@ static int acpi_processor_add(struct acpi_device *device)
 	if (!pr)
 		return -ENOMEM;
 
+	if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
+		kfree(pr);
+		return -ENOMEM;
+	}
+
 	pr->handle = device->handle;
 	strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
@@ -845,10 +850,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type)
 
 	pr = acpi_driver_data(device);
 
-	if (pr->id >= nr_cpu_ids) {
-		kfree(pr);
-		return 0;
-	}
+	if (pr->id >= nr_cpu_ids)
+		goto free;
 
 	if (type == ACPI_BUS_REMOVAL_EJECT) {
 		if (acpi_processor_handle_eject(pr))
@@ -873,6 +876,9 @@ static int acpi_processor_remove(struct acpi_device *device, int type)
 
 	per_cpu(processors, pr->id) = NULL;
 	per_cpu(processor_device_array, pr->id) = NULL;
+
+free:
+	free_cpumask_var(pr->throttling.shared_cpu_map);
 	kfree(pr);
 
 	return 0;
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 0d7b772..846e227 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -588,12 +588,15 @@ int acpi_processor_preregister_performance(
 	int count, count_target;
 	int retval = 0;
 	unsigned int i, j;
-	cpumask_t covered_cpus;
+	cpumask_var_t covered_cpus;
 	struct acpi_processor *pr;
 	struct acpi_psd_package *pdomain;
 	struct acpi_processor *match_pr;
 	struct acpi_psd_package *match_pdomain;
 
+	if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+		return -ENOMEM;
+
 	mutex_lock(&performance_mutex);
 
 	retval = 0;
@@ -617,7 +620,7 @@ int acpi_processor_preregister_performance(
 		}
 
 		pr->performance = percpu_ptr(performance, i);
-		cpu_set(i, pr->performance->shared_cpu_map);
+		cpumask_set_cpu(i, pr->performance->shared_cpu_map);
 		if (acpi_processor_get_psd(pr)) {
 			retval = -EINVAL;
 			continue;
@@ -650,18 +653,18 @@ int acpi_processor_preregister_performance(
 		}
 	}
 
-	cpus_clear(covered_cpus);
+	cpumask_clear(covered_cpus);
 	for_each_possible_cpu(i) {
 		pr = per_cpu(processors, i);
 		if (!pr)
 			continue;
 
-		if (cpu_isset(i, covered_cpus))
+		if (cpumask_test_cpu(i, covered_cpus))
 			continue;
 
 		pdomain = &(pr->performance->domain_info);
-		cpu_set(i, pr->performance->shared_cpu_map);
-		cpu_set(i, covered_cpus);
+		cpumask_set_cpu(i, pr->performance->shared_cpu_map);
+		cpumask_set_cpu(i, covered_cpus);
 		if (pdomain->num_processors <= 1)
 			continue;
 
@@ -699,8 +702,8 @@ int acpi_processor_preregister_performance(
 				goto err_ret;
 			}
 
-			cpu_set(j, covered_cpus);
-			cpu_set(j, pr->performance->shared_cpu_map);
+			cpumask_set_cpu(j, covered_cpus);
+			cpumask_set_cpu(j, pr->performance->shared_cpu_map);
 			count++;
 		}
 
@@ -718,8 +721,8 @@ int acpi_processor_preregister_performance(
 
 			match_pr->performance->shared_type = 
 					pr->performance->shared_type;
-			match_pr->performance->shared_cpu_map =
-				pr->performance->shared_cpu_map;
+			cpumask_copy(match_pr->performance->shared_cpu_map,
+				     pr->performance->shared_cpu_map);
 		}
 	}
 
@@ -731,14 +734,15 @@ err_ret:
 
 		/* Assume no coordination on any error parsing domain info */
 		if (retval) {
-			cpus_clear(pr->performance->shared_cpu_map);
-			cpu_set(i, pr->performance->shared_cpu_map);
+			cpumask_clear(pr->performance->shared_cpu_map);
+			cpumask_set_cpu(i, pr->performance->shared_cpu_map);
 			pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL;
 		}
 		pr->performance = NULL; /* Will be set for real in register */
 	}
 
 	mutex_unlock(&performance_mutex);
+	free_cpumask_var(covered_cpus);
 	return retval;
 }
 EXPORT_SYMBOL(acpi_processor_preregister_performance);
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index a0c38c9..d278381 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -61,11 +61,14 @@ static int acpi_processor_update_tsd_coord(void)
 	int count, count_target;
 	int retval = 0;
 	unsigned int i, j;
-	cpumask_t covered_cpus;
+	cpumask_var_t covered_cpus;
 	struct acpi_processor *pr, *match_pr;
 	struct acpi_tsd_package *pdomain, *match_pdomain;
 	struct acpi_processor_throttling *pthrottling, *match_pthrottling;
 
+	if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))
+		return -ENOMEM;
+
 	/*
 	 * Now that we have _TSD data from all CPUs, lets setup T-state
 	 * coordination between all CPUs.
@@ -91,19 +94,19 @@ static int acpi_processor_update_tsd_coord(void)
 	if (retval)
 		goto err_ret;
 
-	cpus_clear(covered_cpus);
+	cpumask_clear(covered_cpus);
 	for_each_possible_cpu(i) {
 		pr = per_cpu(processors, i);
 		if (!pr)
 			continue;
 
-		if (cpu_isset(i, covered_cpus))
+		if (cpumask_test_cpu(i, covered_cpus))
 			continue;
 		pthrottling = &pr->throttling;
 
 		pdomain = &(pthrottling->domain_info);
-		cpu_set(i, pthrottling->shared_cpu_map);
-		cpu_set(i, covered_cpus);
+		cpumask_set_cpu(i, pthrottling->shared_cpu_map);
+		cpumask_set_cpu(i, covered_cpus);
 		/*
 		 * If the number of processor in the TSD domain is 1, it is
 		 * unnecessary to parse the coordination for this CPU.
@@ -144,8 +147,8 @@ static int acpi_processor_update_tsd_coord(void)
 				goto err_ret;
 			}
 
-			cpu_set(j, covered_cpus);
-			cpu_set(j, pthrottling->shared_cpu_map);
+			cpumask_set_cpu(j, covered_cpus);
+			cpumask_set_cpu(j, pthrottling->shared_cpu_map);
 			count++;
 		}
 		for_each_possible_cpu(j) {
@@ -165,12 +168,14 @@ static int acpi_processor_update_tsd_coord(void)
 			 * If some CPUS have the same domain, they
 			 * will have the same shared_cpu_map.
 			 */
-			match_pthrottling->shared_cpu_map =
-				pthrottling->shared_cpu_map;
+			cpumask_copy(match_pthrottling->shared_cpu_map,
+				     pthrottling->shared_cpu_map);
 		}
 	}
 
 err_ret:
+	free_cpumask_var(covered_cpus);
+
 	for_each_possible_cpu(i) {
 		pr = per_cpu(processors, i);
 		if (!pr)
@@ -182,8 +187,8 @@ err_ret:
 		 */
 		if (retval) {
 			pthrottling = &(pr->throttling);
-			cpus_clear(pthrottling->shared_cpu_map);
-			cpu_set(i, pthrottling->shared_cpu_map);
+			cpumask_clear(pthrottling->shared_cpu_map);
+			cpumask_set_cpu(i, pthrottling->shared_cpu_map);
 			pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL;
 		}
 	}
@@ -567,7 +572,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr)
 	pthrottling = &pr->throttling;
 	pthrottling->tsd_valid_flag = 1;
 	pthrottling->shared_type = pdomain->coord_type;
-	cpu_set(pr->id, pthrottling->shared_cpu_map);
+	cpumask_set_cpu(pr->id, pthrottling->shared_cpu_map);
 	/*
 	 * If the coordination type is not defined in ACPI spec,
 	 * the tsd_valid_flag will be clear and coordination type
@@ -826,7 +831,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
 
 static int acpi_processor_get_throttling(struct acpi_processor *pr)
 {
-	cpumask_t saved_mask;
+	cpumask_var_t saved_mask;
 	int ret;
 
 	if (!pr)
@@ -834,14 +839,20 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
 
 	if (!pr->flags.throttling)
 		return -ENODEV;
+
+	if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
+		return -ENOMEM;
+
 	/*
 	 * Migrate task to the cpu pointed by pr.
 	 */
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
+	cpumask_copy(saved_mask, &current->cpus_allowed);
+	/* FIXME: use work_on_cpu() */
+	set_cpus_allowed_ptr(current, cpumask_of(pr->id));
 	ret = pr->throttling.acpi_processor_get_throttling(pr);
 	/* restore the previous state */
-	set_cpus_allowed_ptr(current, &saved_mask);
+	set_cpus_allowed_ptr(current, saved_mask);
+	free_cpumask_var(saved_mask);
 
 	return ret;
 }
@@ -986,13 +997,13 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
 
 int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 {
-	cpumask_t saved_mask;
+	cpumask_var_t saved_mask;
 	int ret = 0;
 	unsigned int i;
 	struct acpi_processor *match_pr;
 	struct acpi_processor_throttling *p_throttling;
 	struct throttling_tstate t_state;
-	cpumask_t online_throttling_cpus;
+	cpumask_var_t online_throttling_cpus;
 
 	if (!pr)
 		return -EINVAL;
@@ -1003,17 +1014,25 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 	if ((state < 0) || (state > (pr->throttling.state_count - 1)))
 		return -EINVAL;
 
-	saved_mask = current->cpus_allowed;
+	if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) {
+		free_cpumask_var(saved_mask);
+		return -ENOMEM;
+	}
+
+	cpumask_copy(saved_mask, &current->cpus_allowed);
 	t_state.target_state = state;
 	p_throttling = &(pr->throttling);
-	cpus_and(online_throttling_cpus, cpu_online_map,
-			p_throttling->shared_cpu_map);
+	cpumask_and(online_throttling_cpus, cpu_online_mask,
+		    p_throttling->shared_cpu_map);
 	/*
 	 * The throttling notifier will be called for every
 	 * affected cpu in order to get one proper T-state.
 	 * The notifier event is THROTTLING_PRECHANGE.
 	 */
-	for_each_cpu_mask_nr(i, online_throttling_cpus) {
+	for_each_cpu(i, online_throttling_cpus) {
 		t_state.cpu = i;
 		acpi_processor_throttling_notifier(THROTTLING_PRECHANGE,
 							&t_state);
@@ -1025,7 +1044,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 	 * it can be called only for the cpu pointed by pr.
 	 */
 	if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
+		/* FIXME: use work_on_cpu() */
+		set_cpus_allowed_ptr(current, cpumask_of(pr->id));
 		ret = p_throttling->acpi_processor_set_throttling(pr,
 						t_state.target_state);
 	} else {
@@ -1034,7 +1054,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 		 * it is necessary to set T-state for every affected
 		 * cpus.
 		 */
-		for_each_cpu_mask_nr(i, online_throttling_cpus) {
+		for_each_cpu(i, online_throttling_cpus) {
 			match_pr = per_cpu(processors, i);
 			/*
 			 * If the pointer is invalid, we will report the
@@ -1056,7 +1076,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 				continue;
 			}
 			t_state.cpu = i;
-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
+			/* FIXME: use work_on_cpu() */
+			set_cpus_allowed_ptr(current, cpumask_of(i));
 			ret = match_pr->throttling.
 				acpi_processor_set_throttling(
 				match_pr, t_state.target_state);
@@ -1068,13 +1089,16 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 	 * affected cpu to update the T-states.
 	 * The notifier event is THROTTLING_POSTCHANGE
 	 */
-	for_each_cpu_mask_nr(i, online_throttling_cpus) {
+	for_each_cpu(i, online_throttling_cpus) {
 		t_state.cpu = i;
 		acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE,
 							&t_state);
 	}
 	/* restore the previous state */
-	set_cpus_allowed_ptr(current, &saved_mask);
+	/* FIXME: use work_on_cpu() */
+	set_cpus_allowed_ptr(current, saved_mask);
+	free_cpumask_var(online_throttling_cpus);
+	free_cpumask_var(saved_mask);
 	return ret;
 }
 
@@ -1120,7 +1144,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 	if (acpi_processor_get_tsd(pr)) {
 		pthrottling = &pr->throttling;
 		pthrottling->tsd_valid_flag = 0;
-		cpu_set(pr->id, pthrottling->shared_cpu_map);
+		cpumask_set_cpu(pr->id, pthrottling->shared_cpu_map);
 		pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL;
 	}
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 3795590..0574add 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -127,7 +127,7 @@ struct acpi_processor_performance {
 	unsigned int state_count;
 	struct acpi_processor_px *states;
 	struct acpi_psd_package domain_info;
-	cpumask_t shared_cpu_map;
+	cpumask_var_t shared_cpu_map;
 	unsigned int shared_type;
 };
 
@@ -172,7 +172,7 @@ struct acpi_processor_throttling {
 	unsigned int state_count;
 	struct acpi_processor_tx_tss *states_tss;
 	struct acpi_tsd_package domain_info;
-	cpumask_t shared_cpu_map;
+	cpumask_var_t shared_cpu_map;
 	int (*acpi_processor_get_throttling) (struct acpi_processor * pr);
 	int (*acpi_processor_set_throttling) (struct acpi_processor * pr,
 					      int state);
-- 
cgit v0.10.2


From 80855f7361eb68205e6bc1981928629d9b02d5c9 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: cpumask: use alloc_cpumask_var_node where appropriate

Impact: Reduce inter-node memory traffic.

Reduces inter-node memory traffic (offloading the global system bus)
by allocating referenced struct cpumasks on the same node as the
referring struct.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index d0a0010..28102ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -547,8 +547,9 @@ static int __init acpi_cpufreq_early_init(void)
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
-		if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i)
-				       ->shared_cpu_map, GFP_KERNEL)) {
+		if (!alloc_cpumask_var_node(
+			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
+			GFP_KERNEL, cpu_to_node(i))) {
 
 			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
 			free_acpi_perf_data();
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 3e070bb..a25c3f7 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -212,11 +212,11 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
-		/* FIXME: needs alloc_cpumask_var_node() */
-		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
 			kfree(cfg);
 			cfg = NULL;
-		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
+							  GFP_ATOMIC, node)) {
 			free_cpumask_var(cfg->domain);
 			kfree(cfg);
 			cfg = NULL;
-- 
cgit v0.10.2


From 8fd2d2d5aaf086cfa3b2e2e58cab96b7afdc9e51 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:48 -0800
Subject: cpumask: fix compile error when CONFIG_NR_CPUS is not defined

CONFIG_NR_CPUS will be defined for all arch's whether SMP or not, but
it may not have made it into all arches yet.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2aef96f..719ee5c 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -133,7 +133,7 @@ print_cpus_func(present);
  */
 static ssize_t print_cpus_kernel_max(struct sysdev_class *class, char *buf)
 {
-	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", CONFIG_NR_CPUS - 1);
+	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1);
 	return n;
 }
 static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
-- 
cgit v0.10.2


From ab14398abd195af91a744c320a52a1bce814dd1e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 2 Jan 2009 21:51:32 +0300
Subject: x86: setup_per_cpu_areas() cleanup

Impact: cleanup

__alloc_bootmem and __alloc_bootmem_node do panic
for us in case of fail so no need for additional
checks here.

Also lets use pr_*() macros for printing.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 49f3f70..a4b619c 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -153,12 +153,10 @@ void __init setup_per_cpu_areas(void)
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
 
-	printk(KERN_INFO
-		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
 
-	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
-			  size);
+	pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
 
 	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -169,22 +167,15 @@ void __init setup_per_cpu_areas(void)
 		if (!node_online(node) || !NODE_DATA(node)) {
 			ptr = __alloc_bootmem(size, align,
 					 __pa(MAX_DMA_ADDRESS));
-			printk(KERN_INFO
-			       "cpu %d has no node %d or node-local memory\n",
+			pr_info("cpu %d has no node %d or node-local memory\n",
 				cpu, node);
-			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d at %016lx\n",
-					 cpu, __pa(ptr));
-		}
-		else {
+			pr_debug("per cpu data for cpu%d at %016lx\n",
+				 cpu, __pa(ptr));
+		} else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
-			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d on node%d "
-					"at %016lx\n",
-					cpu, node, __pa(ptr));
+			pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
+				cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
-- 
cgit v0.10.2


From e0b685d39a0404e7f87fb7b7808c3b37a115fe11 Mon Sep 17 00:00:00 2001
From: Hugh Blemings <hugh@blemings.org>
Date: Sat, 3 Jan 2009 16:48:44 +1100
Subject: Updated contact info for CREDITS file

This updates some personal info in the CREDITS file.

I'm no longer actively involved in Keyspan driver work so shouldn't
really be listed as a Maintainer here.

I do however field the occasional question on them and as I'm dropping
the misc.nu domain, want to ensure people can find me should they need
to.

Signed-off-by: Hugh Blemings <hugh@blemings.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/CREDITS b/CREDITS
index b50db17..abe05a0 100644
--- a/CREDITS
+++ b/CREDITS
@@ -369,10 +369,10 @@ P: 1024/8462A731 4C 55 86 34 44 59 A7 99  2B 97 88 4A 88 9A 0D 97
 D: sun4 port, Sparc hacker
 
 N: Hugh Blemings
-E: hugh@misc.nu
-W: http://misc.nu/hugh/
-D: Author and maintainer of the Keyspan USB to Serial drivers
-S: Po Box 234
+E: hugh@blemings.org
+W: http://blemings.org/hugh
+D: Original author of the Keyspan USB to serial drivers, random PowerPC hacker
+S: PO Box 234
 S: Belconnen ACT 2616
 S: Australia
 
-- 
cgit v0.10.2


From fe30af971d896c144ef4708f97cf9d3186303c42 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 3 Jan 2009 07:16:13 +0000
Subject: remove the rudiment of a.out for sparc

it's been used only in sunos compat

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f1f3f41..8a3b32f 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -99,88 +99,53 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 #       define START_DATA(u)	(u.start_data)
 #elif defined(__arm__)
 #	define START_DATA(u)	((u.u_tsize << PAGE_SHIFT) + u.start_code)
-#elif defined(__sparc__)
-#       define START_DATA(u)    (u.u_tsize)
 #elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
 #       define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
 #endif
-#ifdef __sparc__
-#       define START_STACK(u)   ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
-#else
 #       define START_STACK(u)   (u.start_stack)
-#endif
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
-#ifndef __sparc__
 	dump.u_ar0 = offsetof(struct user, regs);
-#endif
 	dump.signal = signr;
 	aout_dump_thread(regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-#ifdef __sparc__
-	if ((dump.u_dsize + dump.u_ssize) > limit)
-		dump.u_dsize = 0;
-#else
 	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
 		dump.u_dsize = 0;
-#endif
 
 /* Make sure we have enough room to write the stack and data areas. */
-#ifdef __sparc__
-	if (dump.u_ssize > limit)
-		dump.u_ssize = 0;
-#else
 	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
 		dump.u_ssize = 0;
-#endif
 
 /* make sure we actually have a data and stack area to dump */
 	set_fs(USER_DS);
-#ifdef __sparc__
-	if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
-		dump.u_dsize = 0;
-	if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
-		dump.u_ssize = 0;
-#else
 	if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
 		dump.u_dsize = 0;
 	if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
 		dump.u_ssize = 0;
-#endif
 
 	set_fs(KERNEL_DS);
 /* struct user */
 	DUMP_WRITE(&dump,sizeof(dump));
 /* Now dump all of the user data.  Include malloced stuff as well */
-#ifndef __sparc__
 	DUMP_SEEK(PAGE_SIZE);
-#endif
 /* now we start writing out the user space info */
 	set_fs(USER_DS);
 /* Dump the data area */
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
-#ifdef __sparc__
-		dump_size = dump.u_dsize;
-#else
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-#endif
 		DUMP_WRITE(dump_start,dump_size);
 	}
 /* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
-#ifdef __sparc__
-		dump_size = dump.u_ssize;
-#else
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-#endif
 		DUMP_WRITE(dump_start,dump_size);
 	}
 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
@@ -205,11 +170,6 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin
 	int envc = bprm->envc;
 
 	sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
-#ifdef __sparc__
-	/* This imposes the proper stack alignment for a new process. */
-	sp = (void __user *) (((unsigned long) sp) & ~7);
-	if ((envc+argc+3)&1) --sp;
-#endif
 #ifdef __alpha__
 /* whee.. test-programs are so much fun. */
 	put_user(0, --sp);
@@ -302,11 +262,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* OK, This is the point of no return */
 #if defined(__alpha__)
 	SET_AOUT_PERSONALITY(bprm, ex);
-#elif defined(__sparc__)
-	set_personality(PER_SUNOS);
-#if !defined(__sparc_v9__)
-	memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
-#endif
 #else
 	set_personality(PER_LINUX);
 #endif
@@ -322,24 +277,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	install_exec_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
-#ifdef __sparc__
-	if (N_MAGIC(ex) == NMAGIC) {
-		loff_t pos = fd_offset;
-		/* Fuck me plenty... */
-		/* <AOL></AOL> */
-		down_write(&current->mm->mmap_sem);	
-		error = do_brk(N_TXTADDR(ex), ex.a_text);
-		up_write(&current->mm->mmap_sem);
-		bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
-			  ex.a_text, &pos);
-		down_write(&current->mm->mmap_sem);
-		error = do_brk(N_DATADDR(ex), ex.a_data);
-		up_write(&current->mm->mmap_sem);
-		bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
-			  ex.a_data, &pos);
-		goto beyond_if;
-	}
-#endif
 
 	if (N_MAGIC(ex) == OMAGIC) {
 		unsigned long text_addr, map_size;
@@ -347,7 +284,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 		text_addr = N_TXTADDR(ex);
 
-#if defined(__alpha__) || defined(__sparc__)
+#ifdef __alpha__
 		pos = fd_offset;
 		map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
 #else
-- 
cgit v0.10.2


From 17580d7f2f632ff8c9786d609508c35c9f56e1f3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 3 Jan 2009 07:16:23 +0000
Subject: sanitize ifdefs in binfmt_aout

They are actually alpha vs.  i386/arm/m68k i.e. ecoff vs. aout.

In the only place where we actually tried to handle arm and i386/m68k in
different ways (START_DATA() in coredump handling), the arm variant
works for all of them (i386 and m68k have u.start_code set to 0).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 8a3b32f..b639dcf 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -95,12 +95,10 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
 	struct user dump;
-#if defined(__alpha__)
+#ifdef __alpha__
 #       define START_DATA(u)	(u.start_data)
-#elif defined(__arm__)
+#else
 #	define START_DATA(u)	((u.u_tsize << PAGE_SHIFT) + u.start_code)
-#elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
-#       define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
 #endif
 #       define START_STACK(u)   (u.start_stack)
 
@@ -176,18 +174,18 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin
 	put_user(0, --sp);
 	if (bprm->loader) {
 		put_user(0, --sp);
-		put_user(0x3eb, --sp);
+		put_user(1003, --sp);
 		put_user(bprm->loader, --sp);
-		put_user(0x3ea, --sp);
+		put_user(1002, --sp);
 	}
 	put_user(bprm->exec, --sp);
-	put_user(0x3e9, --sp);
+	put_user(1001, --sp);
 #endif
 	sp -= envc+1;
 	envp = (char __user * __user *) sp;
 	sp -= argc+1;
 	argv = (char __user * __user *) sp;
-#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__)
+#ifndef __alpha__
 	put_user((unsigned long) envp,--sp);
 	put_user((unsigned long) argv,--sp);
 #endif
@@ -260,7 +258,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		return retval;
 
 	/* OK, This is the point of no return */
-#if defined(__alpha__)
+#ifdef __alpha__
 	SET_AOUT_PERSONALITY(bprm, ex);
 #else
 	set_personality(PER_LINUX);
-- 
cgit v0.10.2


From 3bfacef412b4bc993a8992217e50f1245f2fd3a6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 3 Jan 2009 07:16:33 +0000
Subject: get rid of special-casing the /sbin/loader on alpha

... just make it a binfmt handler like #! one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index ac706c1..b469775 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS	:= -Werror -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
 	    irq_alpha.o signal.o setup.o ptrace.o time.o \
-	    alpha_ksyms.o systbls.o err_common.o io.o
+	    alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 0000000..4a0af90
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,51 @@
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm_types.h>
+#include <linux/binfmts.h>
+#include <linux/a.out.h>
+
+static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+{
+	struct exec *eh = (struct exec *)bprm->buf;
+	unsigned long loader;
+	struct file *file;
+	int retval;
+
+	if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
+		return -ENOEXEC;
+
+	if (bprm->loader)
+		return -ENOEXEC;
+
+	allow_write_access(bprm->file);
+	fput(bprm->file);
+	bprm->file = NULL;
+
+	loader = bprm->vma->vm_end - sizeof(void *);
+
+	file = open_exec("/sbin/loader");
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
+		return retval;
+
+	/* Remember if the application is TASO.  */
+	bprm->taso = eh->ah.entry < 0x100000000UL;
+
+	bprm->file = file;
+	bprm->loader = loader;
+	retval = prepare_binprm(bprm);
+	if (retval < 0)
+		return retval;
+	return search_binary_handler(bprm,regs);
+}
+
+static struct linux_binfmt loader_format = {
+	.load_binary	= load_binary,
+};
+
+static int __init init_loader_binfmt(void)
+{
+	return register_binfmt(&loader_format);
+}
+arch_initcall(init_loader_binfmt);
diff --git a/fs/exec.c b/fs/exec.c
index dfbf700..3ef9cf9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -57,11 +57,6 @@
 #include <asm/tlb.h>
 #include "internal.h"
 
-#ifdef __alpha__
-/* for /sbin/loader handling in search_binary_handler() */
-#include <linux/a.out.h>
-#endif
-
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 int suid_dumpable = 0;
@@ -1172,41 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 	unsigned int depth = bprm->recursion_depth;
 	int try,retval;
 	struct linux_binfmt *fmt;
-#ifdef __alpha__
-	/* handle /sbin/loader.. */
-	{
-	    struct exec * eh = (struct exec *) bprm->buf;
 
-	    if (!bprm->loader && eh->fh.f_magic == 0x183 &&
-		(eh->fh.f_flags & 0x3000) == 0x3000)
-	    {
-		struct file * file;
-		unsigned long loader;
-
-		allow_write_access(bprm->file);
-		fput(bprm->file);
-		bprm->file = NULL;
-
-		loader = bprm->vma->vm_end - sizeof(void *);
-
-		file = open_exec("/sbin/loader");
-		retval = PTR_ERR(file);
-		if (IS_ERR(file))
-			return retval;
-
-		/* Remember if the application is TASO.  */
-		bprm->taso = eh->ah.entry < 0x100000000UL;
-
-		bprm->file = file;
-		bprm->loader = loader;
-		retval = prepare_binprm(bprm);
-		if (retval<0)
-			return retval;
-		/* should call search_binary_handler recursively here,
-		   but it does not matter */
-	    }
-	}
-#endif
 	retval = security_bprm_check(bprm);
 	if (retval)
 		return retval;
-- 
cgit v0.10.2


From d97106ab53f812910a62d18afb9dbe882819c1ba Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 3 Jan 2009 11:46:17 -0800
Subject: Make %p print '(null)' for NULL pointers

Before, when we only ever printed out the pointer value itself, a NULL
pointer would never cause issues and might as well be printed out as
just its numeric value.

However, with the extended %p formats, especially %pR, we might validly
want to print out resources for debugging.  And sometimes they don't
even exist, and the resource pointer is just NULL.  Print it out as
such, rather than oopsing.

This is a more generic version of a patch done by Trent Piepho (catching
all %p cases rather than just %pR, and using "(null)" instead of
"[NULL]" to match glibc).

Requested-by: Trent Piepho <xyzzy@speakeasy.org>
Acked-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b77702..98d6322 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -661,6 +661,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
  */
 static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
+	if (!ptr)
+		return string(buf, end, "(null)", field_width, precision, flags);
+
 	switch (*fmt) {
 	case 'F':
 		ptr = dereference_function_descriptor(ptr);
-- 
cgit v0.10.2


From 2f983570010a0dcb26d988da02d7ccfad00c807c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 3 Jan 2009 00:06:34 -0800
Subject: sparseirq: move set/get_timer_rand_state back to .c

those two functions only used in that C file

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d26891b..c7afc06 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -559,7 +559,40 @@ struct timer_rand_state {
 };
 
 #ifndef CONFIG_SPARSE_IRQ
-struct timer_rand_state *irq_timer_state[NR_IRQS];
+
+static struct timer_rand_state *irq_timer_state[NR_IRQS];
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq,
+				 struct timer_rand_state *state)
+{
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	return desc->timer_rand_state;
+}
+
+static void set_timer_rand_state(unsigned int irq,
+				 struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	desc->timer_rand_state = state;
+}
 #endif
 
 static struct timer_rand_state input_timer_state;
@@ -919,11 +952,6 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-#ifndef CONFIG_SPARSE_IRQ
-	if (irq >= nr_irqs)
-		return;
-#endif
-
 	state = get_timer_rand_state(irq);
 
 	if (state)
diff --git a/include/linux/random.h b/include/linux/random.h
index adbf3bd..407ea36 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -45,56 +45,6 @@ struct rand_pool_info {
 
 extern void rand_initialize_irq(int irq);
 
-struct timer_rand_state;
-#ifndef CONFIG_SPARSE_IRQ
-
-extern struct timer_rand_state *irq_timer_state[];
-
-static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	if (irq >= nr_irqs)
-		return NULL;
-
-	return irq_timer_state[irq];
-}
-
-static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	if (irq >= nr_irqs)
-		return;
-
-	irq_timer_state[irq] = state;
-}
-
-#else
-
-#include <linux/irq.h>
-static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return NULL;
-
-	return desc->timer_rand_state;
-}
-
-static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return;
-
-	desc->timer_rand_state = state;
-}
-#endif
-
-
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
 extern void add_interrupt_randomness(int irq);
-- 
cgit v0.10.2


From ac26fca3e14c8882e382daa7e96ab73e0186cf03 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 20 Nov 2008 11:27:02 +0100
Subject: HID: ignore mouse interface for unibody macbooks

The mouse interface on unibody macbooks is going to be handled by
bcm59743 driver in 2.6.29.

Reported-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 40df3e1..839de38 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1577,6 +1577,9 @@ static const struct hid_device_id hid_mouse_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
 	{ }
-- 
cgit v0.10.2


From efc7ce18d9037aa947c1aad5eb712ecc47520126 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 17 Oct 2008 15:01:15 +0200
Subject: HID: non-input reports can also be numbered

When computing the maximal buffer size needed, we must take into
account that not only input reports can be numbered.

Pointed out in bugzilla #10467

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 606369e..2afc861 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -4,7 +4,7 @@
  *  Copyright (c) 1999 Andreas Gal
  *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
  *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
- *  Copyright (c) 2006-2007 Jiri Kosina
+ *  Copyright (c) 2006-2008 Jiri Kosina
  */
 
 /*
@@ -641,9 +641,7 @@ static void hid_find_max_report(struct hid_device *hid, unsigned int type,
 	unsigned int size;
 
 	list_for_each_entry(report, &hid->report_enum[type].report_list, list) {
-		size = ((report->size - 1) >> 3) + 1;
-		if (type == HID_INPUT_REPORT && hid->report_enum[type].numbered)
-			size++;
+		size = ((report->size - 1) >> 3) + 1 + hid->report_enum[type].numbered;
 		if (*max < size)
 			*max = size;
 	}
-- 
cgit v0.10.2


From 08ef08ee8c5a8d538ca9a3c433d4213c128af863 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 30 Oct 2008 23:58:51 +0100
Subject: HID: automatically call usbhid_set_leds in usbhid driver

This patch (as1146c) makes usbhid automatically call usbhid_set_leds()
for any device that supports the keyboard boot protocol.

In theory this should be perfectly safe.  BIOSes send the LED output
report as part of their normal device initialization, so any keyboard
device supporting the boot protocol has to be able to handle it.

As a side effect, the hid-dell and hid-bright drivers are no longer
needed, and the Logitech keyboard driver can be removed from hid-lg.

CC: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index b4fd8ca..65b577e 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -107,13 +107,6 @@ config HID_BELKIN
 	---help---
 	Support for Belkin Flip KVM and Wireless keyboard.
 
-config HID_BRIGHT
-	tristate "Bright" if EMBEDDED
-	depends on USB_HID
-	default y
-	---help---
-	Support for Bright ABNT-2 keyboard.
-
 config HID_CHERRY
 	tristate "Cherry" if EMBEDDED
 	depends on USB_HID
@@ -135,14 +128,6 @@ config HID_CYPRESS
 	---help---
 	Support for cypress mouse and barcode readers.
 
-config HID_DELL
-	tristate "Dell" if EMBEDDED
-	depends on USB_HID
-	default y
-	---help---
-	Support for quirky Dell HID hardware that require
-	special LED handling (W7658 and SK8115 models)
-
 config HID_EZKEY
 	tristate "Ezkey" if EMBEDDED
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index b09e43e..e2294a8 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -23,11 +23,9 @@ endif
 obj-$(CONFIG_HID_A4TECH)	+= hid-a4tech.o
 obj-$(CONFIG_HID_APPLE)		+= hid-apple.o
 obj-$(CONFIG_HID_BELKIN)	+= hid-belkin.o
-obj-$(CONFIG_HID_BRIGHT)	+= hid-bright.o
 obj-$(CONFIG_HID_CHERRY)	+= hid-cherry.o
 obj-$(CONFIG_HID_CHICONY)	+= hid-chicony.o
 obj-$(CONFIG_HID_CYPRESS)	+= hid-cypress.o
-obj-$(CONFIG_HID_DELL)		+= hid-dell.o
 obj-$(CONFIG_HID_EZKEY)		+= hid-ezkey.o
 obj-$(CONFIG_HID_GYRATION)	+= hid-gyration.o
 obj-$(CONFIG_HID_LOGITECH)	+= hid-logitech.o
diff --git a/drivers/hid/hid-bright.c b/drivers/hid/hid-bright.c
deleted file mode 100644
index 38517a1..0000000
--- a/drivers/hid/hid-bright.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  HID driver for some bright "special" devices
- *
- *  Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Based on hid-dell driver
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/device.h>
-#include <linux/hid.h>
-#include <linux/module.h>
-
-#include "hid-ids.h"
-
-static int bright_probe(struct hid_device *hdev, const struct hid_device_id *id)
-{
-	int ret;
-
-	ret = hid_parse(hdev);
-	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
-		goto err_free;
-	}
-
-	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
-	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
-		goto err_free;
-	}
-
-	usbhid_set_leds(hdev);
-
-	return 0;
-err_free:
-	return ret;
-}
-
-static const struct hid_device_id bright_devices[] = {
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BRIGHT, USB_DEVICE_ID_BRIGHT_ABNT2) },
-	{ }
-};
-MODULE_DEVICE_TABLE(hid, bright_devices);
-
-static struct hid_driver bright_driver = {
-	.name = "bright",
-	.id_table = bright_devices,
-	.probe = bright_probe,
-};
-
-static int bright_init(void)
-{
-	return hid_register_driver(&bright_driver);
-}
-
-static void bright_exit(void)
-{
-	hid_unregister_driver(&bright_driver);
-}
-
-module_init(bright_init);
-module_exit(bright_exit);
-MODULE_LICENSE("GPL");
-
-HID_COMPAT_LOAD_DRIVER(bright);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 839de38..8be3003 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1256,16 +1256,12 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BRIGHT, USB_DEVICE_ID_BRIGHT_ABNT2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_W7658) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_SK8115) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERIC_13BA, USB_DEVICE_ID_GENERIC_13BA_KBD_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
@@ -1279,7 +1275,6 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) },
diff --git a/drivers/hid/hid-dell.c b/drivers/hid/hid-dell.c
deleted file mode 100644
index f5474300..0000000
--- a/drivers/hid/hid-dell.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  HID driver for some dell "special" devices
- *
- *  Copyright (c) 1999 Andreas Gal
- *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
- *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
- *  Copyright (c) 2006-2007 Jiri Kosina
- *  Copyright (c) 2007 Paul Walmsley
- *  Copyright (c) 2008 Jiri Slaby
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/device.h>
-#include <linux/hid.h>
-#include <linux/module.h>
-
-#include "hid-ids.h"
-
-static int dell_probe(struct hid_device *hdev, const struct hid_device_id *id)
-{
-	int ret;
-
-	ret = hid_parse(hdev);
-	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
-		goto err_free;
-	}
-
-	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
-	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
-		goto err_free;
-	}
-
-	usbhid_set_leds(hdev);
-
-	return 0;
-err_free:
-	return ret;
-}
-
-static const struct hid_device_id dell_devices[] = {
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_W7658) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_SK8115) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERIC_13BA, USB_DEVICE_ID_GENERIC_13BA_KBD_MOUSE) },
-	{ }
-};
-MODULE_DEVICE_TABLE(hid, dell_devices);
-
-static struct hid_driver dell_driver = {
-	.name = "dell",
-	.id_table = dell_devices,
-	.probe = dell_probe,
-};
-
-static int dell_init(void)
-{
-	return hid_register_driver(&dell_driver);
-}
-
-static void dell_exit(void)
-{
-	hid_unregister_driver(&dell_driver);
-}
-
-module_init(dell_init);
-module_exit(dell_exit);
-MODULE_LICENSE("GPL");
-
-HID_COMPAT_LOAD_DRIVER(dell);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3928969..aae2cec 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -107,9 +107,6 @@
 #define USB_VENDOR_ID_BELKIN           0x050d
 #define USB_DEVICE_ID_FLIP_KVM         0x3201
 
-#define USB_VENDOR_ID_BRIGHT		0x1241
-#define USB_DEVICE_ID_BRIGHT_ABNT2	0x1503
-
 #define USB_VENDOR_ID_BERKSHIRE		0x0c98
 #define USB_DEVICE_ID_BERKSHIRE_PCWD	0x1140
 
@@ -141,10 +138,6 @@
 #define USB_DEVICE_ID_CYPRESS_BARCODE_1	0xde61
 #define USB_DEVICE_ID_CYPRESS_BARCODE_2	0xde64
 
-#define USB_VENDOR_ID_DELL		0x413c
-#define USB_DEVICE_ID_DELL_W7658	0x2005
-#define USB_DEVICE_ID_DELL_SK8115	0x2105
-
 #define USB_VENDOR_ID_DELORME		0x1163
 #define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100
 #define USB_DEVICE_ID_DELORME_EM_LT20	0x0200
@@ -167,9 +160,6 @@
 
 #define USB_VENDOR_ID_GENERAL_TOUCH	0x0dfc
 
-#define USB_VENDOR_ID_GENERIC_13BA	0x13ba
-#define USB_DEVICE_ID_GENERIC_13BA_KBD_MOUSE	0x0017
-
 #define USB_VENDOR_ID_GLAB		0x06c2
 #define USB_DEVICE_ID_4_PHIDGETSERVO_30	0x0038
 #define USB_DEVICE_ID_1_PHIDGETSERVO_30	0x0039
@@ -292,7 +282,6 @@
 #define USB_DEVICE_ID_LOGITECH_WHEEL	0xc294
 #define USB_DEVICE_ID_LOGITECH_MOMO_WHEEL	0xc295
 #define USB_DEVICE_ID_LOGITECH_ELITE_KBD	0xc30a
-#define USB_DEVICE_ID_LOGITECH_KBD	0xc311
 #define USB_DEVICE_ID_S510_RECEIVER	0xc50c
 #define USB_DEVICE_ID_S510_RECEIVER_2	0xc517
 #define USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500	0xc512
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index 2bae340..83e07c9 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -26,7 +26,6 @@
 #define LG_RDESC		0x001
 #define LG_BAD_RELATIVE_KEYS	0x002
 #define LG_DUPLICATE_USAGES	0x004
-#define LG_RESET_LEDS		0x008
 #define LG_EXPANDED_KEYMAP	0x010
 #define LG_IGNORE_DOUBLED_WHEEL	0x020
 #define LG_WIRELESS		0x040
@@ -248,9 +247,6 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	if (quirks & LG_RESET_LEDS)
-		usbhid_set_leds(hdev);
-
 	if (quirks & LG_FF)
 		lgff_init(hdev);
 	if (quirks & LG_FF2)
@@ -279,9 +275,6 @@ static const struct hid_device_id lg_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI),
 		.driver_data = LG_DUPLICATE_USAGES },
 
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KBD),
-		.driver_data = LG_RESET_LEDS },
-
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD),
 		.driver_data = LG_IGNORE_DOUBLED_WHEEL | LG_EXPANDED_KEYMAP },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500),
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 2afc861..6383145 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -874,6 +874,15 @@ static int usbhid_start(struct hid_device *hid)
 
 	set_bit(HID_STARTED, &usbhid->iofl);
 
+	/* Some keyboards don't work until their LEDs have been set.
+	 * Since BIOSes do set the LEDs, it must be safe for any device
+	 * that supports the keyboard boot protocol.
+	 */
+	if (interface->desc.bInterfaceSubClass == USB_INTERFACE_SUBCLASS_BOOT &&
+			interface->desc.bInterfaceProtocol ==
+				USB_INTERFACE_PROTOCOL_KEYBOARD)
+		usbhid_set_leds(hid);
+
 	return 0;
 
 fail:
-- 
cgit v0.10.2


From 6bbe586fd4d94439f3960e200056ff057f7db5c6 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 31 Oct 2008 00:12:32 +0100
Subject: HID: struct device - replace bus_id with dev_name(), dev_set_name()

This patch is part of a larger patch series which will remove
the "char bus_id[20]" name string from struct device. The device
name is managed in the kobject anyway, and without any size
limitation, and just needlessly copied into "struct device".

To set and read the device name dev_name(dev) and dev_set_name(dev)
must be used. If your code uses static kobjects, which it shouldn't
do, "const char *init_name" can be used to statically provide the
name the registered device should have. At registration time, the
init_name field is cleared, to enforce the use of dev_name(dev) to
access the device name at a later time.

We need to get rid of all occurrences of bus_id in the entire tree
to be able to enable the new interface. Please apply this patch,
and possibly convert any remaining remaining occurrences of bus_id.

We want to submit a patch to -next, which will remove bus_id from
"struct device", to find the remaining pieces to convert, and finally
switch over to the new api, which will remove the 20 bytes array
and does no longer have a size limitation.

CC: Jiri Kosina <jkosina@suse.cz>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8be3003..8624a8f 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1616,9 +1616,10 @@ int hid_add_device(struct hid_device *hdev)
 	if (hid_ignore(hdev))
 		return -ENODEV;
 
-	/* XXX hack, any other cleaner solution < 20 bus_id bytes? */
-	sprintf(hdev->dev.bus_id, "%04X:%04X:%04X.%04X", hdev->bus,
-			hdev->vendor, hdev->product, atomic_inc_return(&id));
+	/* XXX hack, any other cleaner solution after the driver core
+	 * is converted to allow more than 20 bytes as the device name? */
+	dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus,
+		     hdev->vendor, hdev->product, atomic_inc_return(&id));
 
 	ret = device_add(&hdev->dev);
 	if (!ret)
-- 
cgit v0.10.2


From 9188e79ec3fd43a0a605274324aecfb731baa88b Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 12 Nov 2008 16:14:08 +0100
Subject: HID: add phys and name ioctls to hidraw

The hiddev interface provides ioctl() calls which can be used
to obtain phys and raw name of the underlying device.

Add the corresponding support also into hidraw.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 7685ae6..975edd8 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -265,6 +265,34 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
 				break;
 			}
 		default:
+			{
+				struct hid_device *hid = dev->hid;
+				if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ)
+					return -EINVAL;
+
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) {
+					int len;
+					if (!hid->name)
+						return 0;
+					len = strlen(hid->name) + 1;
+					if (len > _IOC_SIZE(cmd))
+						len = _IOC_SIZE(cmd);
+					return copy_to_user(user_arg, hid->name, len) ?
+						-EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) {
+					int len;
+					if (!hid->phys)
+						return 0;
+					len = strlen(hid->phys) + 1;
+					if (len > _IOC_SIZE(cmd))
+						len = _IOC_SIZE(cmd);
+					return copy_to_user(user_arg, hid->phys, len) ?
+						-EFAULT : len;
+				}
+                }
+
 			ret = -ENOTTY;
 	}
 	unlock_kernel();
diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h
index dbb5c8c..dd8d692 100644
--- a/include/linux/hidraw.h
+++ b/include/linux/hidraw.h
@@ -33,6 +33,8 @@ struct hidraw_devinfo {
 #define HIDIOCGRDESCSIZE	_IOR('H', 0x01, int)
 #define HIDIOCGRDESC		_IOR('H', 0x02, struct hidraw_report_descriptor)
 #define HIDIOCGRAWINFO		_IOR('H', 0x03, struct hidraw_devinfo)
+#define HIDIOCGRAWNAME(len)     _IOC(_IOC_READ, 'H', 0x04, len)
+#define HIDIOCGRAWPHYS(len)     _IOC(_IOC_READ, 'H', 0x05, len)
 
 #define HIDRAW_FIRST_MINOR 0
 #define HIDRAW_MAX_DEVICES 64
-- 
cgit v0.10.2


From 94011f93f2cd7410401e22390cf7a14fe5495a22 Mon Sep 17 00:00:00 2001
From: Rafi Rubin <rafi@seas.upenn.edu>
Date: Wed, 19 Nov 2008 15:54:46 +0100
Subject: HID: add n-trig digitizer support

Added quirks for the N-Trig digitizer.

Signed-off-by: Rafi Rubin <rafi@seas.upenn.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 65b577e..aadef9a 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -187,6 +187,13 @@ config HID_MONTEREY
 	---help---
 	Support for Monterey Genius KB29E.
 
+config HID_NTRIG
+	tristate "NTrig" if EMBEDDED
+	depends on USB_HID
+	default y
+	---help---
+	Support for N-Trig touch screen.
+
 config HID_PANTHERLORD
 	tristate "Pantherlord devices support" if EMBEDDED
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index e2294a8..7d34e8b 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_HID_GYRATION)	+= hid-gyration.o
 obj-$(CONFIG_HID_LOGITECH)	+= hid-logitech.o
 obj-$(CONFIG_HID_MICROSOFT)	+= hid-microsoft.o
 obj-$(CONFIG_HID_MONTEREY)	+= hid-monterey.o
+obj-$(CONFIG_HID_NTRIG)		+= hid-ntrig.o
 obj-$(CONFIG_HID_PANTHERLORD)	+= hid-pl.o
 obj-$(CONFIG_HID_PETALYNX)	+= hid-petalynx.o
 obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8624a8f..344f8fd 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1292,6 +1292,7 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
diff --git a/drivers/hid/hid-dummy.c b/drivers/hid/hid-dummy.c
index e148f86..4a6af3c 100644
--- a/drivers/hid/hid-dummy.c
+++ b/drivers/hid/hid-dummy.c
@@ -43,6 +43,9 @@ static int __init hid_dummy_init(void)
 #ifdef CONFIG_HID_MONTEREY_MODULE
 	HID_COMPAT_CALL_DRIVER(monterey);
 #endif
+#ifdef CONFIG_HID_NTRIG_MODULE
+	HID_COMPAT_CALL_DRIVER(ntrig);
+#endif
 #ifdef CONFIG_HID_PANTHERLORD_MODULE
 	HID_COMPAT_CALL_DRIVER(pantherlord);
 #endif
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index aae2cec..2b7b6ee 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -328,6 +328,9 @@
 #define USB_VENDOR_ID_NEC		0x073e
 #define USB_DEVICE_ID_NEC_USB_GAME_PAD	0x0301
 
+#define USB_VENDOR_ID_NTRIG                0x1b96
+#define USB_DEVICE_ID_NTRIG_TOUCH_SCREEN   0x0001
+
 #define USB_VENDOR_ID_ONTRAK		0x0a07
 #define USB_DEVICE_ID_ONTRAK_ADU100	0x0064
 
diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
new file mode 100644
index 0000000..db44fbd
--- /dev/null
+++ b/drivers/hid/hid-ntrig.c
@@ -0,0 +1,82 @@
+/*
+ *  HID driver for some ntrig "special" devices
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2006-2007 Jiri Kosina
+ *  Copyright (c) 2007 Paul Walmsley
+ *  Copyright (c) 2008 Jiri Slaby
+ *  Copyright (c) 2008 Rafi Rubin
+ *
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+#define NTRIG_DUPLICATE_USAGES	0x001
+
+#define nt_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
+					EV_KEY, (c))
+
+static int ntrig_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_DIGITIZER &&
+			(usage->hid & 0xff) == 0x47) {
+		nt_map_key_clear(BTN_TOOL_DOUBLETAP);
+		return 1;
+	}
+	return 0;
+}
+
+static int ntrig_input_mapped(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	if (usage->type == EV_KEY || usage->type == EV_REL
+			|| usage->type == EV_ABS)
+		clear_bit(usage->code, *bit);
+
+	return 0;
+}
+static const struct hid_device_id ntrig_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN),
+		.driver_data = NTRIG_DUPLICATE_USAGES },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, ntrig_devices);
+
+static struct hid_driver ntrig_driver = {
+	.name = "ntrig",
+	.id_table = ntrig_devices,
+	.input_mapping = ntrig_input_mapping,
+	.input_mapped = ntrig_input_mapped,
+};
+
+static int ntrig_init(void)
+{
+	return hid_register_driver(&ntrig_driver);
+}
+
+static void ntrig_exit(void)
+{
+	hid_unregister_driver(&ntrig_driver);
+}
+
+module_init(ntrig_init);
+module_exit(ntrig_exit);
+MODULE_LICENSE("GPL");
+
+HID_COMPAT_LOAD_DRIVER(ntrig);
-- 
cgit v0.10.2


From 0ed94b334265b6ee3e3336b4fedacfa9cb2ccaba Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:07 +0100
Subject: HID: move usbhid flags to usbhid.h

Move usbhid specific flags from global hid.h into local usbhid.h.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
index 332abcd..9eb3056 100644
--- a/drivers/hid/usbhid/usbhid.h
+++ b/drivers/hid/usbhid/usbhid.h
@@ -40,6 +40,16 @@ int usbhid_open(struct hid_device *hid);
 void usbhid_init_reports(struct hid_device *hid);
 void usbhid_submit_report(struct hid_device *hid, struct hid_report *report, unsigned char dir);
 
+/* iofl flags */
+#define HID_CTRL_RUNNING	1
+#define HID_OUT_RUNNING		2
+#define HID_IN_RUNNING		3
+#define HID_RESET_PENDING	4
+#define HID_SUSPENDED		5
+#define HID_CLEAR_HALT		6
+#define HID_DISCONNECTED	7
+#define HID_STARTED		8
+
 /*
  * USB-specific HID struct, to be pointed to
  * from struct hid_device->driver_data
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e5780f8..2c20f20 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -403,15 +403,6 @@ struct hid_output_fifo {
 #define HID_STAT_ADDED		1
 #define HID_STAT_PARSED		2
 
-#define HID_CTRL_RUNNING	1
-#define HID_OUT_RUNNING		2
-#define HID_IN_RUNNING		3
-#define HID_RESET_PENDING	4
-#define HID_SUSPENDED		5
-#define HID_CLEAR_HALT		6
-#define HID_DISCONNECTED	7
-#define HID_STARTED		8
-
 struct hid_input {
 	struct list_head list;
 	struct hid_report *report;
-- 
cgit v0.10.2


From 581a2739607b5fdfb6b22d6083fc7f83c441077f Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:08 +0100
Subject: HID: usbhid, use usb_endpoint_xfer_int

Use usb_endpoint_xfer_int() instead of direct use of constants.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 6383145..832e469 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -805,7 +805,7 @@ static int usbhid_start(struct hid_device *hid)
 		int interval;
 
 		endpoint = &interface->endpoint[n].desc;
-		if ((endpoint->bmAttributes & 3) != 3)		/* Not an interrupt endpoint */
+		if (!usb_endpoint_xfer_int(endpoint))
 			continue;
 
 		interval = endpoint->bInterval;
-- 
cgit v0.10.2


From 898089d08f983ef0fdb176267620543a7929826a Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:06 +0100
Subject: HID: use GFP_KERNEL in hid_alloc_buffers

We might sleep, so no problem to use GFP_KERNEL.

While at it bring the function to coding style.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 832e469..03cb494 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -651,13 +651,16 @@ static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
 
-	if (!(usbhid->inbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_ATOMIC, &usbhid->inbuf_dma)))
-		return -1;
-	if (!(usbhid->outbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_ATOMIC, &usbhid->outbuf_dma)))
-		return -1;
-	if (!(usbhid->cr = usb_buffer_alloc(dev, sizeof(*(usbhid->cr)), GFP_ATOMIC, &usbhid->cr_dma)))
-		return -1;
-	if (!(usbhid->ctrlbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_ATOMIC, &usbhid->ctrlbuf_dma)))
+	usbhid->inbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_KERNEL,
+			&usbhid->inbuf_dma);
+	usbhid->outbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_KERNEL,
+			&usbhid->outbuf_dma);
+	usbhid->cr = usb_buffer_alloc(dev, sizeof(*usbhid->cr), GFP_KERNEL,
+			&usbhid->cr_dma);
+	usbhid->ctrlbuf = usb_buffer_alloc(dev, usbhid->bufsize, GFP_KERNEL,
+			&usbhid->ctrlbuf_dma);
+	if (!usbhid->inbuf || !usbhid->outbuf || !usbhid->cr ||
+			!usbhid->ctrlbuf)
 		return -1;
 
 	return 0;
-- 
cgit v0.10.2


From 3a6f82f7a22cf19687f556997c6978b31c109360 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:09 +0100
Subject: HID: add dynids facility

Allow adding new devices to the hid drivers on the fly without
a need of kernel recompilation.

Now, one can test a driver e.g. by:
echo 0003:045E:00F0.0003 > ../generic-usb/unbind
echo 0003 045E 00F0 > new_id
from some driver subdir.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 344f8fd..34cc3b0 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1304,12 +1304,92 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ }
 };
 
+struct hid_dynid {
+	struct list_head list;
+	struct hid_device_id id;
+};
+
+/**
+ * store_new_id - add a new HID device ID to this driver and re-probe devices
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Adds a new dynamic hid device ID to this driver,
+ * and causes the driver to probe for all devices again.
+ */
+static ssize_t store_new_id(struct device_driver *drv, const char *buf,
+		size_t count)
+{
+	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+	struct hid_dynid *dynid;
+	__u32 bus, vendor, product;
+	unsigned long driver_data = 0;
+	int ret;
+
+	ret = sscanf(buf, "%x %x %x %lx",
+			&bus, &vendor, &product, &driver_data);
+	if (ret < 3)
+		return -EINVAL;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.bus = bus;
+	dynid->id.vendor = vendor;
+	dynid->id.product = product;
+	dynid->id.driver_data = driver_data;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_add_tail(&dynid->list, &hdrv->dyn_list);
+	spin_unlock(&hdrv->dyn_lock);
+
+	ret = 0;
+	if (get_driver(&hdrv->driver)) {
+		ret = driver_attach(&hdrv->driver);
+		put_driver(&hdrv->driver);
+	}
+
+	return ret ? : count;
+}
+static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+
+static void hid_free_dynids(struct hid_driver *hdrv)
+{
+	struct hid_dynid *dynid, *n;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_for_each_entry_safe(dynid, n, &hdrv->dyn_list, list) {
+		list_del(&dynid->list);
+		kfree(dynid);
+	}
+	spin_unlock(&hdrv->dyn_lock);
+}
+
+static const struct hid_device_id *hid_match_device(struct hid_device *hdev,
+		struct hid_driver *hdrv)
+{
+	struct hid_dynid *dynid;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_for_each_entry(dynid, &hdrv->dyn_list, list) {
+		if (hid_match_one_id(hdev, &dynid->id)) {
+			spin_unlock(&hdrv->dyn_lock);
+			return &dynid->id;
+		}
+	}
+	spin_unlock(&hdrv->dyn_lock);
+
+	return hid_match_id(hdev, hdrv->id_table);
+}
+
 static int hid_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
 	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
 
-	if (!hid_match_id(hdev, hdrv->id_table))
+	if (!hid_match_device(hdev, hdrv))
 		return 0;
 
 	/* generic wants all non-blacklisted */
@@ -1328,7 +1408,7 @@ static int hid_device_probe(struct device *dev)
 	int ret = 0;
 
 	if (!hdev->driver) {
-		id = hid_match_id(hdev, hdrv->id_table);
+		id = hid_match_device(hdev, hdrv);
 		if (id == NULL)
 			return -ENODEV;
 
@@ -1695,18 +1775,33 @@ EXPORT_SYMBOL_GPL(hid_destroy_device);
 int __hid_register_driver(struct hid_driver *hdrv, struct module *owner,
 		const char *mod_name)
 {
+	int ret;
+
 	hdrv->driver.name = hdrv->name;
 	hdrv->driver.bus = &hid_bus_type;
 	hdrv->driver.owner = owner;
 	hdrv->driver.mod_name = mod_name;
 
-	return driver_register(&hdrv->driver);
+	INIT_LIST_HEAD(&hdrv->dyn_list);
+	spin_lock_init(&hdrv->dyn_lock);
+
+	ret = driver_register(&hdrv->driver);
+	if (ret)
+		return ret;
+
+	ret = driver_create_file(&hdrv->driver, &driver_attr_new_id);
+	if (ret)
+		driver_unregister(&hdrv->driver);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__hid_register_driver);
 
 void hid_unregister_driver(struct hid_driver *hdrv)
 {
+	driver_remove_file(&hdrv->driver, &driver_attr_new_id);
 	driver_unregister(&hdrv->driver);
+	hid_free_dynids(hdrv);
 }
 EXPORT_SYMBOL_GPL(hid_unregister_driver);
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2c20f20..215035b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -531,6 +531,8 @@ struct hid_usage_id {
  * @name: driver name (e.g. "Footech_bar-wheel")
  * @id_table: which devices is this driver for (must be non-NULL for probe
  * 	      to be called)
+ * @dyn_list: list of dynamically added device ids
+ * @dyn_lock: lock protecting @dyn_list
  * @probe: new device inserted
  * @remove: device removed (NULL if not a hot-plug capable driver)
  * @report_table: on which reports to call raw_event (NULL means all)
@@ -558,6 +560,9 @@ struct hid_driver {
 	char *name;
 	const struct hid_device_id *id_table;
 
+	struct list_head dyn_list;
+	spinlock_t dyn_lock;
+
 	int (*probe)(struct hid_device *dev, const struct hid_device_id *id);
 	void (*remove)(struct hid_device *dev);
 
-- 
cgit v0.10.2


From aae6c286dad33c7f2c6992b9e310a371f2ae377e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 4 Dec 2008 16:16:46 +0100
Subject: HID: set proper dev.parent in hidraw

We need to properly set parent of the hidraw device (which is the
corresponding physical device itself) in order to hidraw devices not
end up under virtual device tree.

Reported-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 975edd8..aab5911 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -357,7 +357,7 @@ int hidraw_connect(struct hid_device *hid)
 		goto out;
 	}
 
-	dev->dev = device_create(hidraw_class, NULL, MKDEV(hidraw_major, minor),
+	dev->dev = device_create(hidraw_class, &hid->dev, MKDEV(hidraw_major, minor),
 				 NULL, "%s%d", "hidraw", minor);
 
 	if (IS_ERR(dev->dev)) {
-- 
cgit v0.10.2


From d04b431e3d769fbbf26c4f4072002375c8cc4ed9 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 11 Dec 2008 14:54:07 +0100
Subject: HID: switch specialized drivers from "default y" to !EMBEDDED

Fix the obnoxious "default y" for all the "special" HID code, which forces folk
with EMBEDDED defined to manually override that inappropriate default for
almost 20 choices.  The general policy is against "default y"; it should apply
here too.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index aadef9a..1033129 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -85,14 +85,14 @@ config HID_COMPAT
 config HID_A4TECH
 	tristate "A4 tech" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for A4 tech X5 and WOP-35 / Trust 450L mice.
 
 config HID_APPLE
 	tristate "Apple" if EMBEDDED
 	depends on (USB_HID || BT_HIDP)
-	default y
+	default !EMBEDDED
 	---help---
 	Support for some Apple devices which less or more break
 	HID specification.
@@ -103,49 +103,49 @@ config HID_APPLE
 config HID_BELKIN
 	tristate "Belkin" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Belkin Flip KVM and Wireless keyboard.
 
 config HID_CHERRY
 	tristate "Cherry" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Cherry Cymotion keyboard.
 
 config HID_CHICONY
 	tristate "Chicony" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Chicony Tactical pad.
 
 config HID_CYPRESS
 	tristate "Cypress" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for cypress mouse and barcode readers.
 
 config HID_EZKEY
 	tristate "Ezkey" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Ezkey BTC 8193 keyboard.
 
 config HID_GYRATION
 	tristate "Gyration" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Gyration remote control.
 
 config HID_LOGITECH
 	tristate "Logitech" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Logitech devices that are not fully compliant with HID standard.
 
@@ -176,28 +176,28 @@ config LOGIRUMBLEPAD2_FF
 config HID_MICROSOFT
 	tristate "Microsoft" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Microsoft devices that are not fully compliant with HID standard.
 
 config HID_MONTEREY
 	tristate "Monterey" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Monterey Genius KB29E.
 
 config HID_NTRIG
 	tristate "NTrig" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for N-Trig touch screen.
 
 config HID_PANTHERLORD
 	tristate "Pantherlord devices support" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for PantherLord/GreenAsia based device support.
 
@@ -212,28 +212,28 @@ config PANTHERLORD_FF
 config HID_PETALYNX
 	tristate "Petalynx" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Petalynx Maxter remote control.
 
 config HID_SAMSUNG
 	tristate "Samsung" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Samsung InfraRed remote control.
 
 config HID_SONY
 	tristate "Sony" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Sony PS3 controller.
 
 config HID_SUNPLUS
 	tristate "Sunplus" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Support for Sunplus wireless desktop.
 
-- 
cgit v0.10.2


From 42859e0bd21daba9974757fcfe4a4dde265fe28d Mon Sep 17 00:00:00 2001
From: Lukasz Lubojanski <lukasz@lubojanski.info>
Date: Thu, 11 Dec 2008 22:07:59 +0100
Subject: HID: force feedback driver for GreenAsia 0x12 PID

I have implemented Force Feedback driver for another "GreeAsia" based device
(0e8f:0012 "GreenAsia Inc. USB Joystick"). The functionality was tested with
MANTA Warior MM816 and SpeedLink Strike2 SL-6635 and fftest software -
everything seems to work right.

Signed-off-by: Lukasz Lubojanski <lukasz@lubojanski.info>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1033129..81dd9b8 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -237,6 +237,15 @@ config HID_SUNPLUS
 	---help---
 	Support for Sunplus wireless desktop.
 
+config GREENASIA_FF
+	tristate "GreenAsia (Product ID 0x12) force feedback support"
+	depends on USB_HID
+	select INPUT_FF_MEMLESS
+	---help---
+	Say Y here if you have a GreenAsia (Product ID 0x12) based game controller
+	(like MANTA Warior MM816 and SpeedLink Strike2 SL-6635) or adapter
+	and want to enable force feedback support for it.
+
 config THRUSTMASTER_FF
 	tristate "ThrustMaster devices support"
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 7d34e8b..3354eac 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_HID_PETALYNX)	+= hid-petalynx.o
 obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
 obj-$(CONFIG_HID_SONY)		+= hid-sony.o
 obj-$(CONFIG_HID_SUNPLUS)	+= hid-sunplus.o
+obj-$(CONFIG_GREENASIA_FF)	+= hid-gaff.o
 obj-$(CONFIG_THRUSTMASTER_FF)	+= hid-tmff.o
 obj-$(CONFIG_ZEROPLUS_FF)	+= hid-zpff.o
 
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 34cc3b0..8fd35a6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1265,6 +1265,7 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
diff --git a/drivers/hid/hid-dummy.c b/drivers/hid/hid-dummy.c
index 4a6af3c..b4cc0f7 100644
--- a/drivers/hid/hid-dummy.c
+++ b/drivers/hid/hid-dummy.c
@@ -61,6 +61,9 @@ static int __init hid_dummy_init(void)
 #ifdef CONFIG_HID_SUNPLUS_MODULE
 	HID_COMPAT_CALL_DRIVER(sunplus);
 #endif
+#ifdef CONFIG_GREENASIA_FF_MODULE
+	HID_COMPAT_CALL_DRIVER(greenasia);
+#endif
 #ifdef CONFIG_THRUSTMASTER_FF_MODULE
 	HID_COMPAT_CALL_DRIVER(thrustmaster);
 #endif
diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c
new file mode 100644
index 0000000..71211f6
--- /dev/null
+++ b/drivers/hid/hid-gaff.c
@@ -0,0 +1,185 @@
+/*
+ *  Force feedback support for GreenAsia (Product ID 0x12) based devices
+ *
+ *  The devices are distributed under various names and the same USB device ID
+ *  can be used in many game controllers.
+ *
+ *
+ *  0e8f:0012 "GreenAsia Inc.    USB Joystick     "
+ *   - tested with MANTA Warior MM816 and SpeedLink Strike2 SL-6635.
+ *
+ *  Copyright (c) 2008 Lukasz Lubojanski <lukasz@lubojanski.info>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/hid.h>
+#include "hid-ids.h"
+#include "usbhid/usbhid.h"
+
+struct gaff_device {
+	struct hid_report *report;
+};
+
+static int hid_gaff_play(struct input_dev *dev, void *data,
+			 struct ff_effect *effect)
+{
+	struct hid_device *hid = input_get_drvdata(dev);
+	struct gaff_device *gaff = data;
+	int left, right;
+
+	left = effect->u.rumble.strong_magnitude;
+	right = effect->u.rumble.weak_magnitude;
+
+	dbg_hid("called with 0x%04x 0x%04x", left, right);
+
+	left = left * 0xfe / 0xffff;
+	right = right * 0xfe / 0xffff;
+
+	gaff->report->field[0]->value[0] = 0x51;
+	gaff->report->field[0]->value[1] = 0x0;
+	gaff->report->field[0]->value[2] = right;
+	gaff->report->field[0]->value[3] = 0;
+	gaff->report->field[0]->value[4] = left;
+	gaff->report->field[0]->value[5] = 0;
+	dbg_hid("running with 0x%02x 0x%02x", left, right);
+	usbhid_submit_report(hid, gaff->report, USB_DIR_OUT);
+
+	gaff->report->field[0]->value[0] = 0xfa;
+	gaff->report->field[0]->value[1] = 0xfe;
+	gaff->report->field[0]->value[2] = 0x0;
+	gaff->report->field[0]->value[4] = 0x0;
+
+	usbhid_submit_report(hid, gaff->report, USB_DIR_OUT);
+
+	return 0;
+}
+
+static int gaff_init(struct hid_device *hid)
+{
+	struct gaff_device *gaff;
+	struct hid_report *report;
+	struct hid_input *hidinput = list_entry(hid->inputs.next,
+						struct hid_input, list);
+	struct list_head *report_list =
+			&hid->report_enum[HID_OUTPUT_REPORT].report_list;
+	struct list_head *report_ptr = report_list;
+	struct input_dev *dev = hidinput->input;
+	int error;
+
+	if (list_empty(report_list)) {
+		dev_err(&hid->dev, "no output reports found\n");
+		return -ENODEV;
+	}
+
+	report_ptr = report_ptr->next;
+
+	report = list_entry(report_ptr, struct hid_report, list);
+	if (report->maxfield < 1) {
+		dev_err(&hid->dev, "no fields in the report\n");
+		return -ENODEV;
+	}
+
+	if (report->field[0]->report_count < 6) {
+		dev_err(&hid->dev, "not enough values in the field\n");
+		return -ENODEV;
+	}
+
+	gaff = kzalloc(sizeof(struct gaff_device), GFP_KERNEL);
+	if (!gaff)
+		return -ENOMEM;
+
+	set_bit(FF_RUMBLE, dev->ffbit);
+
+	error = input_ff_create_memless(dev, gaff, hid_gaff_play);
+	if (error) {
+		kfree(gaff);
+		return error;
+	}
+
+	gaff->report = report;
+	gaff->report->field[0]->value[0] = 0x51;
+	gaff->report->field[0]->value[1] = 0x00;
+	gaff->report->field[0]->value[2] = 0x00;
+	gaff->report->field[0]->value[3] = 0x00;
+	usbhid_submit_report(hid, gaff->report, USB_DIR_OUT);
+
+	gaff->report->field[0]->value[0] = 0xfa;
+	gaff->report->field[0]->value[1] = 0xfe;
+
+	usbhid_submit_report(hid, gaff->report, USB_DIR_OUT);
+
+	dev_info(&hid->dev, "Force Feedback for GreenAsia 0x12"
+	       " devices by Lukasz Lubojanski <lukasz@lubojanski.info>\n");
+
+	return 0;
+}
+
+static int ga_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+	int ret;
+
+	dev_dbg(&hdev->dev, "Greenasia HID hardware probe...");
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		dev_err(&hdev->dev, "parse failed\n");
+		goto err;
+	}
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
+	if (ret) {
+		dev_err(&hdev->dev, "hw start failed\n");
+		goto err;
+	}
+
+	gaff_init(hdev);
+
+	return 0;
+err:
+	return ret;
+}
+
+static const struct hid_device_id ga_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012),  },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, ga_devices);
+
+static struct hid_driver ga_driver = {
+	.name = "greenasia",
+	.id_table = ga_devices,
+	.probe = ga_probe,
+};
+
+static int __init ga_init(void)
+{
+	return hid_register_driver(&ga_driver);
+}
+
+static void __exit ga_exit(void)
+{
+	hid_unregister_driver(&ga_driver);
+}
+
+module_init(ga_init);
+module_exit(ga_exit);
+MODULE_LICENSE("GPL");
+
+HID_COMPAT_LOAD_DRIVER(greenasia);
-- 
cgit v0.10.2


From 079034073faf974973baa0256b029451f6e768ad Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 16 Dec 2008 10:55:15 +0100
Subject: HID: hiddev cleanup -- handle all error conditions properly

This is a cleanup of hiddev and fixes the following issues:

- thread safety by locking in read & ioctl, introducing a per device mutex
- race between ioctl and disconnect, introducing a flag and locking
  in form of a per low level device mutex
- race between open and other methods, making sure only successfully
  opened devices are put on the list, changing order of events
- range checking both upper and lower limits of the minor range
- make sure further calls to open fail for unplugged devices even if
  the device still has opened files
- error checking for low level open
- possible loss of wakeup events, using standard waiting macros
- race in initialisation by moving registration after full initialisation

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 83e851a..6a98f9f 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -49,6 +49,7 @@
 struct hiddev {
 	int exist;
 	int open;
+	struct mutex existancelock;
 	wait_queue_head_t wait;
 	struct hid_device *hid;
 	struct list_head list;
@@ -63,6 +64,7 @@ struct hiddev_list {
 	struct fasync_struct *fasync;
 	struct hiddev *hiddev;
 	struct list_head node;
+	struct mutex thread_lock;
 };
 
 static struct hiddev *hiddev_table[HIDDEV_MINORS];
@@ -264,29 +266,48 @@ static int hiddev_release(struct inode * inode, struct file * file)
 static int hiddev_open(struct inode *inode, struct file *file)
 {
 	struct hiddev_list *list;
-	unsigned long flags;
+	int res;
 
 	int i = iminor(inode) - HIDDEV_MINOR_BASE;
 
-	if (i >= HIDDEV_MINORS || !hiddev_table[i])
+	if (i >= HIDDEV_MINORS || i < 0 || !hiddev_table[i])
 		return -ENODEV;
 
 	if (!(list = kzalloc(sizeof(struct hiddev_list), GFP_KERNEL)))
 		return -ENOMEM;
+	mutex_init(&list->thread_lock);
 
 	list->hiddev = hiddev_table[i];
 
-	spin_lock_irqsave(&list->hiddev->list_lock, flags);
-	list_add_tail(&list->node, &hiddev_table[i]->list);
-	spin_unlock_irqrestore(&list->hiddev->list_lock, flags);
 
 	file->private_data = list;
 
-	if (!list->hiddev->open++)
-		if (list->hiddev->exist)
-			usbhid_open(hiddev_table[i]->hid);
+	/*
+	 * no need for locking because the USB major number
+	 * is shared which usbcore guards against disconnect
+	 */
+	if (list->hiddev->exist) {
+		if (!list->hiddev->open++) {
+			res = usbhid_open(hiddev_table[i]->hid);
+			if (res < 0) {
+				res = -EIO;
+				goto bail;
+			}
+		}
+	} else {
+		res = -ENODEV;
+		goto bail;
+	}
+
+	spin_lock_irq(&list->hiddev->list_lock);
+	list_add_tail(&list->node, &hiddev_table[i]->list);
+	spin_unlock_irq(&list->hiddev->list_lock);
 
 	return 0;
+bail:
+	file->private_data = NULL;
+	kfree(list->hiddev);
+	return res;
 }
 
 /*
@@ -305,7 +326,7 @@ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t coun
 	DECLARE_WAITQUEUE(wait, current);
 	struct hiddev_list *list = file->private_data;
 	int event_size;
-	int retval = 0;
+	int retval;
 
 	event_size = ((list->flags & HIDDEV_FLAG_UREF) != 0) ?
 		sizeof(struct hiddev_usage_ref) : sizeof(struct hiddev_event);
@@ -313,10 +334,14 @@ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t coun
 	if (count < event_size)
 		return 0;
 
+	/* lock against other threads */
+	retval = mutex_lock_interruptible(&list->thread_lock);
+	if (retval)
+		return -ERESTARTSYS;
+
 	while (retval == 0) {
 		if (list->head == list->tail) {
-			add_wait_queue(&list->hiddev->wait, &wait);
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait(&list->hiddev->wait, &wait, TASK_INTERRUPTIBLE);
 
 			while (list->head == list->tail) {
 				if (file->f_flags & O_NONBLOCK) {
@@ -332,35 +357,45 @@ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t coun
 					break;
 				}
 
+				/* let O_NONBLOCK tasks run */
+				mutex_unlock(&list->thread_lock);
 				schedule();
+				if (mutex_lock_interruptible(&list->thread_lock))
+					return -EINTR;
 				set_current_state(TASK_INTERRUPTIBLE);
 			}
+			finish_wait(&list->hiddev->wait, &wait);
 
-			set_current_state(TASK_RUNNING);
-			remove_wait_queue(&list->hiddev->wait, &wait);
 		}
 
-		if (retval)
+		if (retval) {
+			mutex_unlock(&list->thread_lock);
 			return retval;
+		}
 
 
 		while (list->head != list->tail &&
 		       retval + event_size <= count) {
 			if ((list->flags & HIDDEV_FLAG_UREF) == 0) {
-				if (list->buffer[list->tail].field_index !=
-				    HID_FIELD_INDEX_NONE) {
+				if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE) {
 					struct hiddev_event event;
+
 					event.hid = list->buffer[list->tail].usage_code;
 					event.value = list->buffer[list->tail].value;
-					if (copy_to_user(buffer + retval, &event, sizeof(struct hiddev_event)))
+					if (copy_to_user(buffer + retval, &event, sizeof(struct hiddev_event))) {
+						mutex_unlock(&list->thread_lock);
 						return -EFAULT;
+					}
 					retval += sizeof(struct hiddev_event);
 				}
 			} else {
 				if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE ||
 				    (list->flags & HIDDEV_FLAG_REPORT) != 0) {
-					if (copy_to_user(buffer + retval, list->buffer + list->tail, sizeof(struct hiddev_usage_ref)))
+
+					if (copy_to_user(buffer + retval, list->buffer + list->tail, sizeof(struct hiddev_usage_ref))) {
+						mutex_unlock(&list->thread_lock);
 						return -EFAULT;
+					}
 					retval += sizeof(struct hiddev_usage_ref);
 				}
 			}
@@ -368,6 +403,7 @@ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t coun
 		}
 
 	}
+	mutex_unlock(&list->thread_lock);
 
 	return retval;
 }
@@ -555,7 +591,7 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct hid_field *field;
 	struct usbhid_device *usbhid = hid->driver_data;
 	void __user *user_arg = (void __user *)arg;
-	int i;
+	int i, r;
 	
 	/* Called without BKL by compat methods so no BKL taken */
 
@@ -619,10 +655,22 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		}
 
 	case HIDIOCGSTRING:
-		return hiddev_ioctl_string(hiddev, cmd, user_arg);
+		mutex_lock(&hiddev->existancelock);
+		if (!hiddev->exist)
+			r = hiddev_ioctl_string(hiddev, cmd, user_arg);
+		else
+			r = -ENODEV;
+		mutex_unlock(&hiddev->existancelock);
+		return r;
 
 	case HIDIOCINITREPORT:
+		mutex_lock(&hiddev->existancelock);
+		if (!hiddev->exist) {
+			mutex_unlock(&hiddev->existancelock);
+			return -ENODEV;
+		}
 		usbhid_init_reports(hid);
+		mutex_unlock(&hiddev->existancelock);
 
 		return 0;
 
@@ -636,8 +684,12 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL)
 			return -EINVAL;
 
-		usbhid_submit_report(hid, report, USB_DIR_IN);
-		usbhid_wait_io(hid);
+		mutex_lock(&hiddev->existancelock);
+		if (hiddev->exist) {
+			usbhid_submit_report(hid, report, USB_DIR_IN);
+			usbhid_wait_io(hid);
+		}
+		mutex_unlock(&hiddev->existancelock);
 
 		return 0;
 
@@ -651,8 +703,12 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL)
 			return -EINVAL;
 
-		usbhid_submit_report(hid, report, USB_DIR_OUT);
-		usbhid_wait_io(hid);
+		mutex_lock(&hiddev->existancelock);
+		if (hiddev->exist) {
+			usbhid_submit_report(hid, report, USB_DIR_OUT);
+			usbhid_wait_io(hid);
+		}
+		mutex_unlock(&hiddev->existancelock);
 
 		return 0;
 
@@ -710,7 +766,13 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case HIDIOCGUSAGES:
 	case HIDIOCSUSAGES:
 	case HIDIOCGCOLLECTIONINDEX:
-		return hiddev_ioctl_usage(hiddev, cmd, user_arg);
+		mutex_lock(&hiddev->existancelock);
+		if (hiddev->exist)
+			r = hiddev_ioctl_usage(hiddev, cmd, user_arg);
+		else
+			r = -ENODEV;
+		mutex_unlock(&hiddev->existancelock);
+		return r;
 
 	case HIDIOCGCOLLECTIONINFO:
 		if (copy_from_user(&cinfo, user_arg, sizeof(cinfo)))
@@ -808,23 +870,22 @@ int hiddev_connect(struct hid_device *hid, unsigned int force)
 	if (!(hiddev = kzalloc(sizeof(struct hiddev), GFP_KERNEL)))
 		return -1;
 
-	retval = usb_register_dev(usbhid->intf, &hiddev_class);
-	if (retval) {
-		err_hid("Not able to get a minor for this device.");
-		kfree(hiddev);
-		return -1;
-	}
-
 	init_waitqueue_head(&hiddev->wait);
 	INIT_LIST_HEAD(&hiddev->list);
 	spin_lock_init(&hiddev->list_lock);
+	mutex_init(&hiddev->existancelock);
 	hiddev->hid = hid;
 	hiddev->exist = 1;
 
-	hid->minor = usbhid->intf->minor;
-	hid->hiddev = hiddev;
-
-	hiddev_table[usbhid->intf->minor - HIDDEV_MINOR_BASE] = hiddev;
+	retval = usb_register_dev(usbhid->intf, &hiddev_class);
+	if (retval) {
+		err_hid("Not able to get a minor for this device.");
+		kfree(hiddev);
+		return -1;
+	} else {
+		hid->minor = usbhid->intf->minor;
+		hiddev_table[usbhid->intf->minor - HIDDEV_MINOR_BASE] = hiddev;
+	}
 
 	return 0;
 }
@@ -839,7 +900,9 @@ void hiddev_disconnect(struct hid_device *hid)
 	struct hiddev *hiddev = hid->hiddev;
 	struct usbhid_device *usbhid = hid->driver_data;
 
+	mutex_lock(&hiddev->existancelock);
 	hiddev->exist = 0;
+	mutex_unlock(&hiddev->existancelock);
 
 	hiddev_table[hiddev->hid->minor - HIDDEV_MINOR_BASE] = NULL;
 	usb_deregister_dev(usbhid->intf, &hiddev_class);
-- 
cgit v0.10.2


From 725cf0f47dbb02e0482f081828cff73f55479b79 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Tue, 16 Dec 2008 14:20:23 +0100
Subject: HID: avoid sparse warning in HID_COMPAT_LOAD_DRIVER

Impact: include a prototype for the exported function in the macro

Fix about 20 of this warnings:

  drivers/hid/hid-a4tech.c:162:1: warning: symbol 'hid_compat_a4tech' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 215035b..81aa84d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -793,6 +793,8 @@ dbg_hid(const char *fmt, ...)
 
 #ifdef CONFIG_HID_COMPAT
 #define HID_COMPAT_LOAD_DRIVER(name)	\
+/* prototype to avoid sparse warning */	\
+extern void hid_compat_##name(void);	\
 void hid_compat_##name(void) { }	\
 EXPORT_SYMBOL(hid_compat_##name)
 #else
-- 
cgit v0.10.2


From ac09952babed8e2ac6999127b7f95d7a2bbfd7af Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.lkml@gmail.com>
Date: Mon, 22 Dec 2008 22:50:52 +0100
Subject: HID: make boot protocol drivers depend on EMBEDDED

The usbmouse and usbkbd modules are not supposed to be used with regular USB
mice and keyboards. Make them depend on EMBEDDED to prevent them from being
built and loaded on non-EMBEDDED configs.

Signed-off-by: Parag Warudkar <parag.lkml@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig
index 5d9aa95..4edb3be 100644
--- a/drivers/hid/usbhid/Kconfig
+++ b/drivers/hid/usbhid/Kconfig
@@ -45,7 +45,7 @@ config USB_HIDDEV
 	  If unsure, say Y.
 
 menu "USB HID Boot Protocol drivers"
-	depends on USB!=n && USB_HID!=y
+	depends on USB!=n && USB_HID!=y && EMBEDDED
 
 config USB_KBD
 	tristate "USB HIDBP Keyboard (simple Boot) support"
-- 
cgit v0.10.2


From f14f526d02b14fd0b8c1ac4ec413e4577ad5f62e Mon Sep 17 00:00:00 2001
From: Lev Babiev <harley@hosers.org>
Date: Sun, 4 Jan 2009 00:36:56 +0100
Subject: HID: driver for TopSeed Cyberlink quirky remote

I recently picked up a Cyberlink branded remote control produced
by TopSeed Tech Corp. Alas, it appears that this device is using
non-standard mappings for some of it's keys (Usage page 0xffbc).

Signed-off-by: Lev Babiev <harley@hosers.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 81dd9b8..4c65e75 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -246,6 +246,13 @@ config GREENASIA_FF
 	(like MANTA Warior MM816 and SpeedLink Strike2 SL-6635) or adapter
 	and want to enable force feedback support for it.
 
+config HID_TOPSEED
+	tristate "TopSeed Cyberlink remote control support" if EMBEDDED
+	depends on USB_HID
+	default y
+	---help---
+	Say Y if you have a TopSeed Cyberlink remote control.
+
 config THRUSTMASTER_FF
 	tristate "ThrustMaster devices support"
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 3354eac..fbd021f 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_HID_SONY)		+= hid-sony.o
 obj-$(CONFIG_HID_SUNPLUS)	+= hid-sunplus.o
 obj-$(CONFIG_GREENASIA_FF)	+= hid-gaff.o
 obj-$(CONFIG_THRUSTMASTER_FF)	+= hid-tmff.o
+obj-$(CONFIG_HID_TOPSEED)	+= hid-topseed.o
 obj-$(CONFIG_ZEROPLUS_FF)	+= hid-zpff.o
 
 obj-$(CONFIG_USB_HID)		+= usbhid/
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8fd35a6..58a706d 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1299,6 +1299,7 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
 
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 2b7b6ee..daced0b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -375,6 +375,9 @@
 #define USB_VENDOR_ID_TOPMAX		0x0663
 #define USB_DEVICE_ID_TOPMAX_COBRAPAD	0x0103
 
+#define USB_VENDOR_ID_TOPSEED		0x0766
+#define USB_DEVICE_ID_TOPSEED_CYBERLINK	0x0204
+
 #define USB_VENDOR_ID_TURBOX		0x062a
 #define USB_DEVICE_ID_TURBOX_KEYBOARD	0x0201
 
diff --git a/drivers/hid/hid-topseed.c b/drivers/hid/hid-topseed.c
new file mode 100644
index 0000000..cca64a0
--- /dev/null
+++ b/drivers/hid/hid-topseed.c
@@ -0,0 +1,77 @@
+/*
+ *  HID driver for TopSeed Cyberlink remote
+ *
+ *  Copyright (c) 2008 Lev Babiev
+ *  based on hid-cherry driver
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+#define ts_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
+					EV_KEY, (c))
+static int ts_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	if ((usage->hid & HID_USAGE_PAGE) != 0x0ffbc0000)
+		return 0;
+
+	switch (usage->hid & HID_USAGE) {
+        case 0x00d: ts_map_key_clear(KEY_HOME);           break;
+        case 0x024: ts_map_key_clear(KEY_MENU);           break;
+        case 0x025: ts_map_key_clear(KEY_TV);             break;
+        case 0x048: ts_map_key_clear(KEY_RED);            break;
+        case 0x047: ts_map_key_clear(KEY_GREEN);          break;
+        case 0x049: ts_map_key_clear(KEY_YELLOW);         break;
+        case 0x04a: ts_map_key_clear(KEY_BLUE);           break;
+        case 0x04b: ts_map_key_clear(KEY_ANGLE);          break;
+        case 0x04c: ts_map_key_clear(KEY_LANGUAGE);       break;
+        case 0x04d: ts_map_key_clear(KEY_SUBTITLE);       break;
+        case 0x031: ts_map_key_clear(KEY_AUDIO);          break;
+        case 0x032: ts_map_key_clear(KEY_TEXT);           break;
+        case 0x033: ts_map_key_clear(KEY_CHANNEL);        break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static const struct hid_device_id ts_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, ts_devices);
+
+static struct hid_driver ts_driver = {
+	.name = "topseed",
+	.id_table = ts_devices,
+	.input_mapping = ts_input_mapping,
+};
+
+static int ts_init(void)
+{
+	return hid_register_driver(&ts_driver);
+}
+
+static void ts_exit(void)
+{
+	hid_unregister_driver(&ts_driver);
+}
+
+module_init(ts_init);
+module_exit(ts_exit);
+MODULE_LICENSE("GPL");
+
+HID_COMPAT_LOAD_DRIVER(topseed);
-- 
cgit v0.10.2


From 1db489b2953799d41098a891c85dea02e3c4721a Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sun, 4 Jan 2009 00:39:08 +0100
Subject: HID: fix default Kconfig setting for TopSpeed driver

Make default setting for TopSpeed driver compliant with the defaults
of the other specialized HID drivers.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 4c65e75..e85c8fe 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -249,7 +249,7 @@ config GREENASIA_FF
 config HID_TOPSEED
 	tristate "TopSeed Cyberlink remote control support" if EMBEDDED
 	depends on USB_HID
-	default y
+	default !EMBEDDED
 	---help---
 	Say Y if you have a TopSeed Cyberlink remote control.
 
-- 
cgit v0.10.2


From 5f6108cf9be4a77d6bee96750aa4fe18b6b97dee Mon Sep 17 00:00:00 2001
From: Alexey Klimov <klimov.linux@gmail.com>
Date: Mon, 8 Dec 2008 12:40:14 +0100
Subject: HID: don't allow DealExtreme usb-radio be handled by usb hid driver

This device is already handled by radio-si470x driver, and we therefore
want usbhid to ignore it.  Patch places usb ids of that device in
ignore section of hid-core.c

Signed-off-by: Alexey Klimov <klimov.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 40df3e1..0ac2b66 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1420,6 +1420,7 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM109) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_HIDCOM) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_ULTRAMOUSE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3928969..1fe0b8b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -141,6 +141,9 @@
 #define USB_DEVICE_ID_CYPRESS_BARCODE_1	0xde61
 #define USB_DEVICE_ID_CYPRESS_BARCODE_2	0xde64
 
+#define USB_VENDOR_ID_DEALEXTREAME	0x10c5
+#define USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701	0x819a
+
 #define USB_VENDOR_ID_DELL		0x413c
 #define USB_DEVICE_ID_DELL_W7658	0x2005
 #define USB_DEVICE_ID_DELL_SK8115	0x2105
-- 
cgit v0.10.2


From 25e61613cf3ca7f6d5f89a707b20c9eed6b74455 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matt.helsley@gmail.com>
Date: Sat, 13 Dec 2008 14:28:54 +0100
Subject: HID: add proper support for pensketch 12x9 tablet

The Genius PenSketch 12x9 tablet has a puck (labeled a
"Tablet Mouse") in addition to a pen. Without registering a quirk
the tablet appears to be a single input device that reports the
wrong axis information in /proc/bus/input/devices, and sends
incorrect events (e.g. ABS_Z instead of ABS_Y). This information
confuses the X evdev driver and makes the device impossible to
use.

The quirk fixes events and splits the device into multiple input
event devices so that at least the puck is useful.

Signed-off-by: Matt Helsley <matt.helsley@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 1fe0b8b..63aaa0f 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -389,6 +389,9 @@
 #define USB_VENDOR_ID_TURBOX		0x062a
 #define USB_DEVICE_ID_TURBOX_KEYBOARD	0x0201
 
+#define USB_VENDOR_ID_UCLOGIC		0x5543
+#define USB_DEVICE_ID_UCLOGIC_TABLET_PF1209	0x0042
+
 #define USB_VENDOR_ID_VERNIER		0x08f7
 #define USB_DEVICE_ID_VERNIER_LABPRO	0x0001
 #define USB_DEVICE_ID_VERNIER_GOTEMP	0x0002
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 47ebe04..4391717 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -54,6 +54,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS },
 	{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
 
-- 
cgit v0.10.2


From b8a832b1c0a70531b4bd69a67aa0bf72f6f2dc34 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Mon, 15 Dec 2008 13:12:08 +0100
Subject: HID: fix reference count leak hidraw

The hidraw subsystem has a bug that prevents the close syscall from ever
reaching the low level driver, leading to a resource leak. Fix by replacing
postdecrement with predecrement.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 7685ae6..96ec1ba 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -208,7 +208,7 @@ static int hidraw_release(struct inode * inode, struct file * file)
 
 	list_del(&list->node);
 	dev = hidraw_table[minor];
-	if (!dev->open--) {
+	if (!--dev->open) {
 		if (list->hidraw->exist)
 			dev->hid->ll_driver->close(dev->hid);
 		else
-- 
cgit v0.10.2


From 4dfdc46468a142216b284eea66040f49df3f7191 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 30 Dec 2008 00:49:59 +0100
Subject: HID: fix error condition propagation in hid-sony driver

sony_set_operational() only propagates return value from
usb_control_msg(), which returns negative on error and number
of transferred bytes otherwise.

Reported-by: Marcin Tolysz <tolysz@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 86e563b..dd5a397 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -102,7 +102,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	}
 
 	ret = sony_set_operational(hdev);
-	if (ret)
+	if (ret < 0)
 		goto err_stop;
 
 	return 0;
-- 
cgit v0.10.2


From c66b9906f863696159e05890bb7123269bb9a9de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 4 Jan 2009 10:55:02 +0100
Subject: intel-iommu: fix build error with INTR_REMAP=y and DMAR=n

dmar.o can be built in the CONFIG_INTR_REMAP=y case but
iommu_calculate_agaw() is only available if VT-d is built as well.

So create an inline version of iommu_calculate_agaw() for the
!CONFIG_DMAR case. The iommu->agaw value wont be used in this
case, but the code is cleaner (has less #ifdefs) if we have it around
unconditionally.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 136f170..af1dab4 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -17,7 +17,15 @@ struct dmar_domain;
 struct root_entry;
 
 extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+#ifdef CONFIG_DMAR
 extern int iommu_calculate_agaw(struct intel_iommu *iommu);
+#else
+static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+	return 0;
+}
+#endif
 
 extern int dmar_disabled;
 
-- 
cgit v0.10.2


From 913ae5a24efd27deef4fc154953871b62d0d99cd Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 3 Jan 2009 17:54:53 +0100
Subject: ALSA: sound/usb: Use negated usb_endpoint_xfer_control, etc

This patch extends 42a6e66f1e40a930d093c33ba0bb9d8d8e4555ed by using
usb_endpoint_xfer_control, usb_endpoint_xfer_isoc, usb_endpoint_xfer_bulk,
and usb_endpoint_xfer_int in the negated case as well.

This patch also rewrites some calls to usb_endpoint_dir_in as negated calls
to !usb_endpoint_dir_out, and vice versa, to better correspond to the
intent of the original code.

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@ struct usb_endpoint_descriptor *epd; @@

- (usb_endpoint_type(epd) != \(USB_ENDPOINT_XFER_CONTROL\|0\))
+ !usb_endpoint_xfer_control(epd)

@@ struct usb_endpoint_descriptor *epd; @@

- (usb_endpoint_type(epd) != \(USB_ENDPOINT_XFER_ISOC\|1\))
+ !usb_endpoint_xfer_isoc(epd)

@@ struct usb_endpoint_descriptor *epd; @@

- (usb_endpoint_type(epd) != \(USB_ENDPOINT_XFER_BULK\|2\))
+ !usb_endpoint_xfer_bulk(epd)

@@ struct usb_endpoint_descriptor *epd; @@

- (usb_endpoint_type(epd) != \(USB_ENDPOINT_XFER_INT\|3\))
+ !usb_endpoint_xfer_int(epd)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index 3a9a9fe..320641a 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -1392,8 +1392,7 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 	for (i = 0; i < intfd->bNumEndpoints; ++i) {
 		hostep = &hostif->endpoint[i];
 		ep = get_ep_desc(hostep);
-		if (usb_endpoint_type(ep) != USB_ENDPOINT_XFER_BULK &&
-		    usb_endpoint_type(ep) != USB_ENDPOINT_XFER_INT)
+		if (!usb_endpoint_xfer_bulk(ep) && !usb_endpoint_xfer_int(ep))
 			continue;
 		ms_ep = (struct usb_ms_endpoint_descriptor*)hostep->extra;
 		if (hostep->extralen < 4 ||
@@ -1495,8 +1494,8 @@ static int snd_usbmidi_detect_endpoints(struct snd_usb_midi* umidi,
 
 	for (i = 0; i < intfd->bNumEndpoints; ++i) {
 		epd = get_endpoint(hostif, i);
-		if (usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK &&
-		    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_INT)
+		if (!usb_endpoint_xfer_bulk(epd) &&
+		    !usb_endpoint_xfer_int(epd))
 			continue;
 		if (out_eps < max_endpoints &&
 		    usb_endpoint_dir_out(epd)) {
@@ -1607,21 +1606,19 @@ static int snd_usbmidi_create_endpoints_midiman(struct snd_usb_midi* umidi,
 	}
 
 	epd = get_endpoint(hostif, 0);
-	if (usb_endpoint_dir_out(epd) ||
-	    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_INT) {
+	if (!usb_endpoint_dir_in(epd) || !usb_endpoint_xfer_int(epd)) {
 		snd_printdd(KERN_ERR "endpoint[0] isn't interrupt\n");
 		return -ENXIO;
 	}
 	epd = get_endpoint(hostif, 2);
-	if (usb_endpoint_dir_in(epd) ||
-	    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK) {
+	if (!usb_endpoint_dir_out(epd) || !usb_endpoint_xfer_bulk(epd)) {
 		snd_printdd(KERN_ERR "endpoint[2] isn't bulk output\n");
 		return -ENXIO;
 	}
 	if (endpoint->out_cables > 0x0001) {
 		epd = get_endpoint(hostif, 4);
-		if (usb_endpoint_dir_in(epd) ||
-		    usb_endpoint_type(epd) != USB_ENDPOINT_XFER_BULK) {
+		if (!usb_endpoint_dir_out(epd) ||
+		    !usb_endpoint_xfer_bulk(epd)) {
 			snd_printdd(KERN_ERR "endpoint[4] isn't bulk output\n");
 			return -ENXIO;
 		}
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 9ce626f..00397c8 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -1755,8 +1755,7 @@ static int snd_usb_mixer_status_create(struct usb_mixer_interface *mixer)
 	if (get_iface_desc(hostif)->bNumEndpoints < 1)
 		return 0;
 	ep = get_endpoint(hostif, 0);
-	if (usb_endpoint_dir_out(ep) ||
-	    usb_endpoint_type(ep) != USB_ENDPOINT_XFER_INT)
+	if (!usb_endpoint_dir_in(ep) || !usb_endpoint_xfer_int(ep))
 		return 0;
 
 	epnum = usb_endpoint_num(ep);
-- 
cgit v0.10.2


From 6cd99b7828445dc18e9004c81067c36e8d9caa01 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 24 Oct 2008 19:25:27 +0200
Subject: mfd: Don't mark WM8350 security register as volatile

There's no need to read this back from the chip each time.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index 974678d..aaf394a 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1307,7 +1307,7 @@ const struct wm8350_reg_access wm8350_reg_io_map[] = {
 	{ 0xFF3F, 0xE03F, 0x0000 }, /* R216 - Main Bandgap Control */
 	{ 0xEF2F, 0xE02F, 0x0000 }, /* R217 - OSC Control */
 	{ 0xF3FF, 0xB3FF, 0xc000 }, /* R218 - RTC Tick Control */
-	{ 0xFFFF, 0xFFFF, 0xFFFF }, /* R219 */
+	{ 0xFFFF, 0xFFFF, 0x0000 }, /* R219 - Security */
 	{ 0x09FF, 0x01FF, 0x0000 }, /* R220 - RAM BIST 1 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R221 */
 	{ 0xFFFF, 0xFFFF, 0xFFFF }, /* R222 */
-- 
cgit v0.10.2


From 2c5212279a89224512e421fa9f8bd0fabbab77d8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 00:52:54 +0100
Subject: mfd: Remove i.MX31ism from WM8350 i2c driver

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 3e0ce0e..876e693 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -1,8 +1,6 @@
 /*
  * wm8350-i2c.c  --  Generic I2C driver for Wolfson WM8350 PMIC
  *
- * This driver defines and configures the WM8350 for the Freescale i.MX32ADS.
- *
  * Copyright 2007, 2008 Wolfson Microelectronics PLC.
  *
  * Author: Liam Girdwood
-- 
cgit v0.10.2


From 858e674466427b1236eb5ef9568999a7df286b1e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 00:57:33 +0100
Subject: mfd: Add some documentation for WM8350 register lock

Hopefully this will make the purpose of these functions a bit clearer,
it's not immediately obvious that the lock is a hardware feature.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 0d47fb9..d63a530 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -299,6 +299,13 @@ int wm8350_block_write(struct wm8350 *wm8350, int start_reg, int regs,
 }
 EXPORT_SYMBOL_GPL(wm8350_block_write);
 
+/**
+ * wm8350_reg_lock()
+ *
+ * The WM8350 has a hardware lock which can be used to prevent writes to
+ * some registers (generally those which can cause particularly serious
+ * problems if misused).  This function enables that lock.
+ */
 int wm8350_reg_lock(struct wm8350 *wm8350)
 {
 	u16 key = WM8350_LOCK_KEY;
@@ -314,6 +321,15 @@ int wm8350_reg_lock(struct wm8350 *wm8350)
 }
 EXPORT_SYMBOL_GPL(wm8350_reg_lock);
 
+/**
+ * wm8350_reg_unlock()
+ *
+ * The WM8350 has a hardware lock which can be used to prevent writes to
+ * some registers (generally those which can cause particularly serious
+ * problems if misused).  This function disables that lock so updates
+ * can be performed.  For maximum safety this should be done only when
+ * required.
+ */
 int wm8350_reg_unlock(struct wm8350 *wm8350)
 {
 	u16 key = WM8350_UNLOCK_KEY;
-- 
cgit v0.10.2


From 0c8a601678960fbcc1c1185a283d6d107575810b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 01:10:16 +0100
Subject: mfd: Add WM8350 revision H support

No other software changes are required.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index d63a530..c013afd 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1202,9 +1202,14 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 			dev_info(wm8350->dev, "Found Rev G device\n");
 			wm8350->rev = WM8350_REV_G;
 			break;
+		case WM8350_REV_H:
+			dev_info(wm8350->dev, "Found Rev H device\n");
+			wm8350->rev = WM8350_REV_H;
+			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
-			dev_info(wm8350->dev, "Found unknown rev\n");
+			dev_info(wm8350->dev, "Found unknown rev %x\n",
+				 (id2 & WM8350_CHIP_REV_MASK) >> 12);
 			ret = -ENODEV;
 			goto err;
 		}
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 6ebf97f..9490ec1 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -536,6 +536,7 @@
 #define WM8350_REV_E				0x4
 #define WM8350_REV_F				0x5
 #define WM8350_REV_G				0x6
+#define WM8350_REV_H				0x7
 
 #define WM8350_NUM_IRQ				63
 
-- 
cgit v0.10.2


From 67488526349d043372d141c054f4dc6313780b3c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 01:10:21 +0100
Subject: mfd: Add AUXADC support for WM8350

The auxiliary ADC in the WM8350 is shared between several subdevices
so access to it needs to be arbitrated by the core driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index c013afd..60439bd 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -63,7 +63,6 @@
  */
 static DEFINE_MUTEX(io_mutex);
 static DEFINE_MUTEX(reg_lock_mutex);
-static DEFINE_MUTEX(auxadc_mutex);
 
 /* Perform a physical read from the device.
  */
@@ -1082,6 +1081,55 @@ int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
 }
 EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
 
+int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
+{
+	u16 reg, result = 0;
+	int tries = 5;
+
+	if (channel < WM8350_AUXADC_AUX1 || channel > WM8350_AUXADC_TEMP)
+		return -EINVAL;
+	if (channel >= WM8350_AUXADC_USB && channel <= WM8350_AUXADC_TEMP
+	    && (scale != 0 || vref != 0))
+		return -EINVAL;
+
+	mutex_lock(&wm8350->auxadc_mutex);
+
+	/* Turn on the ADC */
+	reg = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
+	wm8350_reg_write(wm8350, WM8350_POWER_MGMT_5, reg | WM8350_AUXADC_ENA);
+
+	if (scale || vref) {
+		reg = scale << 13;
+		reg |= vref << 12;
+		wm8350_reg_write(wm8350, WM8350_AUX1_READBACK + channel, reg);
+	}
+
+	reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	reg |= 1 << channel | WM8350_AUXADC_POLL;
+	wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg);
+
+	do {
+		schedule_timeout_interruptible(1);
+		reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	} while (tries-- && (reg & WM8350_AUXADC_POLL));
+
+	if (!tries)
+		dev_err(wm8350->dev, "adc chn %d read timeout\n", channel);
+	else
+		result = wm8350_reg_read(wm8350,
+					 WM8350_AUX1_READBACK + channel);
+
+	/* Turn off the ADC */
+	reg = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
+	wm8350_reg_write(wm8350, WM8350_POWER_MGMT_5,
+			 reg & ~WM8350_AUXADC_ENA);
+
+	mutex_unlock(&wm8350->auxadc_mutex);
+
+	return result & WM8350_AUXADC_DATA1_MASK;
+}
+EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
+
 /*
  * Cache is always host endian.
  */
@@ -1239,6 +1287,7 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 	}
 
+	mutex_init(&wm8350->auxadc_mutex);
 	mutex_init(&wm8350->irq_mutex);
 	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
 	if (irq) {
diff --git a/include/linux/mfd/wm8350/comparator.h b/include/linux/mfd/wm8350/comparator.h
index 0537886..54bc5d0 100644
--- a/include/linux/mfd/wm8350/comparator.h
+++ b/include/linux/mfd/wm8350/comparator.h
@@ -164,4 +164,12 @@
 #define WM8350_AUXADC_BATT			6
 #define WM8350_AUXADC_TEMP			7
 
+struct wm8350;
+
+/*
+ * AUX ADC Readback
+ */
+int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale,
+		       int vref);
+
 #endif
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 9490ec1..cc19005 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -573,6 +573,8 @@ struct wm8350 {
 			 void *src);
 	u16 *reg_cache;
 
+	struct mutex auxadc_mutex;
+
 	/* Interrupt handling */
 	struct work_struct irq_work;
 	struct mutex irq_mutex; /* IRQ table mutex */
-- 
cgit v0.10.2


From 3fba19ec1ae5b460c73a7f32efed8d3b3300b246 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sat, 8 Nov 2008 01:13:16 +0100
Subject: mfd: allow reading entire register banks on twl4030

Minor change to the TWL4030 utility interface:  support reads
of all 256 bytes in each register bank (vs just 255).  This
can help when debugging, but is otherwise a NOP.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index dd843c4..ef9a971 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -225,7 +225,7 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
  *
  * Returns the result of operation - 0 is success
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	int sid;
@@ -274,7 +274,7 @@ EXPORT_SYMBOL(twl4030_i2c_write);
  *
  * Returns result of operation - num_bytes is success else failure.
  */
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	u8 val;
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index fb604dc..ae25c90 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -78,8 +78,8 @@ int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
  * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
  * for the value, and populate your data starting at offset 1.
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /*----------------------------------------------------------------------*/
 
-- 
cgit v0.10.2


From 14431aa0c5a443d13d24e6f865a8838f97dab973 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 16 Nov 2008 20:16:47 +0100
Subject: power_supply: Add support for WM8350 PMU

This patch adds support for the PMU provided by the WM8350 which
implements battery, line and USB supplies including a battery charger.
The hardware functions largely autonomously, with minimal software
control required to initiate fast charging.

Support for configuration of the USB supply is not yet implemented.
This means that the hardware will remain in the mode configured at
startup, by default limiting the current drawn from USB to 100mA.

This driver was originally written by Liam Girdwood with subsequent
updates for submission by Mark Brown.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 8e0c2b4..52f8676 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -29,6 +29,13 @@ config APM_POWER
 	  Say Y here to enable support APM status emulation using
 	  battery class devices.
 
+config WM8350_POWER
+        tristate "WM8350 PMU support"
+        depends on MFD_WM8350
+        help
+          Say Y here to enable support for the power management unit
+	  provided by the Wolfson Microelectronics WM8350 PMIC.
+
 config BATTERY_DS2760
 	tristate "DS2760 battery driver (HP iPAQ & others)"
 	select W1
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index e8f1ece..e6f6865 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_POWER_SUPPLY)	+= power_supply.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_WM8350_POWER)	+= wm8350_power.o
 
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
 obj-$(CONFIG_BATTERY_PMU)	+= pmu_battery.o
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
new file mode 100644
index 0000000..9c0a847
--- /dev/null
+++ b/drivers/power/wm8350_power.c
@@ -0,0 +1,515 @@
+/*
+ * Battery driver for wm8350 PMIC
+ *
+ * Copyright 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Based on OLPC Battery Driver
+ *
+ * Copyright 2006  David Woodhouse <dwmw2@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/mfd/wm8350/supply.h>
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/comparator.h>
+
+static int wm8350_read_battery_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_BATT, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+static int wm8350_read_line_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_LINE, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+static int wm8350_read_usb_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_USB, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+#define WM8350_BATT_SUPPLY	1
+#define WM8350_USB_SUPPLY	2
+#define WM8350_LINE_SUPPLY	4
+
+static inline int wm8350_charge_time_min(struct wm8350 *wm8350, int min)
+{
+	if (wm8350->rev < WM8350_REV_G)
+		return (((min - 30) / 15) & 0xf) << 8;
+	else
+		return (((min - 30) / 30) & 0xf) << 8;
+}
+
+static int wm8350_get_supplies(struct wm8350 *wm8350)
+{
+	u16 sm, ov, co, chrg;
+	int supplies = 0;
+
+	sm = wm8350_reg_read(wm8350, WM8350_STATE_MACHINE_STATUS);
+	ov = wm8350_reg_read(wm8350, WM8350_MISC_OVERRIDES);
+	co = wm8350_reg_read(wm8350, WM8350_COMPARATOR_OVERRIDES);
+	chrg = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2);
+
+	/* USB_SM */
+	sm = (sm & WM8350_USB_SM_MASK) >> WM8350_USB_SM_SHIFT;
+
+	/* CHG_ISEL */
+	chrg &= WM8350_CHG_ISEL_MASK;
+
+	/* If the USB state machine is active then we're using that with or
+	 * without battery, otherwise check for wall supply */
+	if (((sm == WM8350_USB_SM_100_SLV) ||
+	     (sm == WM8350_USB_SM_500_SLV) ||
+	     (sm == WM8350_USB_SM_STDBY_SLV))
+	    && !(ov & WM8350_USB_LIMIT_OVRDE))
+		supplies = WM8350_USB_SUPPLY;
+	else if (((sm == WM8350_USB_SM_100_SLV) ||
+		  (sm == WM8350_USB_SM_500_SLV) ||
+		  (sm == WM8350_USB_SM_STDBY_SLV))
+		 && (ov & WM8350_USB_LIMIT_OVRDE) && (chrg == 0))
+		supplies = WM8350_USB_SUPPLY | WM8350_BATT_SUPPLY;
+	else if (co & WM8350_WALL_FB_OVRDE)
+		supplies = WM8350_LINE_SUPPLY;
+	else
+		supplies = WM8350_BATT_SUPPLY;
+
+	return supplies;
+}
+
+static int wm8350_charger_config(struct wm8350 *wm8350,
+				 struct wm8350_charger_policy *policy)
+{
+	u16 reg, eoc_mA, fast_limit_mA;
+
+	if (!policy) {
+		dev_warn(wm8350->dev,
+			 "No charger policy, charger not configured.\n");
+		return -EINVAL;
+	}
+
+	/* make sure USB fast charge current is not > 500mA */
+	if (policy->fast_limit_USB_mA > 500) {
+		dev_err(wm8350->dev, "USB fast charge > 500mA\n");
+		return -EINVAL;
+	}
+
+	eoc_mA = WM8350_CHG_EOC_mA(policy->eoc_mA);
+
+	wm8350_reg_unlock(wm8350);
+
+	reg = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1)
+		& WM8350_CHG_ENA_R168;
+	wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1,
+			 reg | eoc_mA | policy->trickle_start_mV |
+			 WM8350_CHG_TRICKLE_TEMP_CHOKE |
+			 WM8350_CHG_TRICKLE_USB_CHOKE |
+			 WM8350_CHG_FAST_USB_THROTTLE);
+
+	if (wm8350_get_supplies(wm8350) & WM8350_USB_SUPPLY) {
+		fast_limit_mA =
+			WM8350_CHG_FAST_LIMIT_mA(policy->fast_limit_USB_mA);
+		wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2,
+			    policy->charge_mV | policy->trickle_charge_USB_mA |
+			    fast_limit_mA | wm8350_charge_time_min(wm8350,
+						policy->charge_timeout));
+
+	} else {
+		fast_limit_mA =
+			WM8350_CHG_FAST_LIMIT_mA(policy->fast_limit_mA);
+		wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2,
+			    policy->charge_mV | policy->trickle_charge_mA |
+			    fast_limit_mA | wm8350_charge_time_min(wm8350,
+						policy->charge_timeout));
+	}
+
+	wm8350_reg_lock(wm8350);
+	return 0;
+}
+
+static int wm8350_batt_status(struct wm8350 *wm8350)
+{
+	u16 state;
+
+	state = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2);
+	state &= WM8350_CHG_STS_MASK;
+
+	switch (state) {
+	case WM8350_CHG_STS_OFF:
+		return POWER_SUPPLY_STATUS_DISCHARGING;
+
+	case WM8350_CHG_STS_TRICKLE:
+	case WM8350_CHG_STS_FAST:
+		return POWER_SUPPLY_STATUS_CHARGING;
+
+	default:
+		return POWER_SUPPLY_STATUS_UNKNOWN;
+	}
+}
+
+static ssize_t charger_state_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(dev);
+	char *charge;
+	int state;
+
+	state = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2) &
+	    WM8350_CHG_STS_MASK;
+	switch (state) {
+	case WM8350_CHG_STS_OFF:
+		charge = "Charger Off";
+		break;
+	case WM8350_CHG_STS_TRICKLE:
+		charge = "Trickle Charging";
+		break;
+	case WM8350_CHG_STS_FAST:
+		charge = "Fast Charging";
+		break;
+	default:
+		return 0;
+	}
+
+	return sprintf(buf, "%s\n", charge);
+}
+
+static DEVICE_ATTR(charger_state, 0444, charger_state_show, NULL);
+
+static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
+{
+	struct wm8350_power *power = &wm8350->power;
+	struct wm8350_charger_policy *policy = power->policy;
+
+	switch (irq) {
+	case WM8350_IRQ_CHG_BAT_HOT:
+		dev_err(wm8350->dev, "battery too hot\n");
+		break;
+	case WM8350_IRQ_CHG_BAT_COLD:
+		dev_err(wm8350->dev, "battery too cold\n");
+		break;
+	case WM8350_IRQ_CHG_BAT_FAIL:
+		dev_err(wm8350->dev, "battery failed\n");
+		break;
+	case WM8350_IRQ_CHG_TO:
+		dev_err(wm8350->dev, "charger timeout\n");
+		break;
+	case WM8350_IRQ_CHG_END:
+		power_supply_changed(&power->battery);
+		break;
+	case WM8350_IRQ_CHG_START:
+		power_supply_changed(&power->battery);
+		break;
+
+	case WM8350_IRQ_CHG_FAST_RDY:
+		dev_dbg(wm8350->dev, "fast charger ready\n");
+		wm8350_charger_config(wm8350, policy);
+		wm8350_reg_unlock(wm8350);
+		wm8350_set_bits(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1,
+				WM8350_CHG_FAST);
+		wm8350_reg_lock(wm8350);
+		break;
+
+	case WM8350_IRQ_CHG_VBATT_LT_3P9:
+		dev_warn(wm8350->dev, "battery < 3.9V\n");
+		break;
+	case WM8350_IRQ_CHG_VBATT_LT_3P1:
+		dev_warn(wm8350->dev, "battery < 3.1V\n");
+		break;
+	case WM8350_IRQ_CHG_VBATT_LT_2P85:
+		dev_warn(wm8350->dev, "battery < 2.85V\n");
+		break;
+
+		/* Supply change.  We will overnotify but it should do
+		 * no harm. */
+	case WM8350_IRQ_EXT_USB_FB:
+	case WM8350_IRQ_EXT_WALL_FB:
+		wm8350_charger_config(wm8350, policy);
+	case WM8350_IRQ_EXT_BAT_FB:   /* Fall through */
+		power_supply_changed(&power->battery);
+		power_supply_changed(&power->usb);
+		power_supply_changed(&power->ac);
+		break;
+
+	default:
+		dev_err(wm8350->dev, "Unknown interrupt %d\n", irq);
+	}
+}
+
+/*********************************************************************
+ *		AC Power
+ *********************************************************************/
+static int wm8350_ac_get_prop(struct power_supply *psy,
+			      enum power_supply_property psp,
+			      union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_LINE_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_line_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static enum power_supply_property wm8350_ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		USB Power
+ *********************************************************************/
+static int wm8350_usb_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_USB_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_usb_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static enum power_supply_property wm8350_usb_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Battery properties
+ *********************************************************************/
+
+static int wm8350_bat_get_property(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = wm8350_batt_status(wm8350);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_BATT_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_battery_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm8350_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Initialisation
+ *********************************************************************/
+
+static void wm8350_init_charger(struct wm8350 *wm8350)
+{
+	/* register our interest in charger events */
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+
+	/* and supply change events */
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+}
+
+static void free_charger_irq(struct wm8350 *wm8350)
+{
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+}
+
+static __devinit int wm8350_power_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct wm8350_power *power = &wm8350->power;
+	struct wm8350_charger_policy *policy = power->policy;
+	struct power_supply *usb = &power->usb;
+	struct power_supply *battery = &power->battery;
+	struct power_supply *ac = &power->ac;
+	int ret;
+
+	ac->name = "wm8350-ac";
+	ac->type = POWER_SUPPLY_TYPE_MAINS;
+	ac->properties = wm8350_ac_props;
+	ac->num_properties = ARRAY_SIZE(wm8350_ac_props);
+	ac->get_property = wm8350_ac_get_prop;
+	ret = power_supply_register(&pdev->dev, ac);
+	if (ret)
+		return ret;
+
+	battery->name = "wm8350-battery";
+	battery->properties = wm8350_bat_props;
+	battery->num_properties = ARRAY_SIZE(wm8350_bat_props);
+	battery->get_property = wm8350_bat_get_property;
+	battery->use_for_apm = 1;
+	ret = power_supply_register(&pdev->dev, battery);
+	if (ret)
+		goto battery_failed;
+
+	usb->name = "wm8350-usb",
+	usb->type = POWER_SUPPLY_TYPE_USB;
+	usb->properties = wm8350_usb_props;
+	usb->num_properties = ARRAY_SIZE(wm8350_usb_props);
+	usb->get_property = wm8350_usb_get_prop;
+	ret = power_supply_register(&pdev->dev, usb);
+	if (ret)
+		goto usb_failed;
+
+	ret = device_create_file(&pdev->dev, &dev_attr_charger_state);
+	if (ret < 0)
+		dev_warn(wm8350->dev, "failed to add charge sysfs: %d\n", ret);
+	ret = 0;
+
+	wm8350_init_charger(wm8350);
+	if (wm8350_charger_config(wm8350, policy) == 0) {
+		wm8350_reg_unlock(wm8350);
+		wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CHG_ENA);
+		wm8350_reg_lock(wm8350);
+	}
+
+	return ret;
+
+usb_failed:
+	power_supply_unregister(battery);
+battery_failed:
+	power_supply_unregister(ac);
+
+	return ret;
+}
+
+static __devexit int wm8350_power_remove(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct wm8350_power *power = &wm8350->power;
+
+	free_charger_irq(wm8350);
+	device_remove_file(&pdev->dev, &dev_attr_charger_state);
+	power_supply_unregister(&power->battery);
+	power_supply_unregister(&power->ac);
+	power_supply_unregister(&power->usb);
+	return 0;
+}
+
+static struct platform_driver wm8350_power_driver = {
+	.probe = wm8350_power_probe,
+	.remove = __devexit_p(wm8350_power_remove),
+	.driver = {
+		.name = "wm8350-power",
+	},
+};
+
+static int __init wm8350_power_init(void)
+{
+	return platform_driver_register(&wm8350_power_driver);
+}
+module_init(wm8350_power_init);
+
+static void __exit wm8350_power_exit(void)
+{
+	platform_driver_unregister(&wm8350_power_driver);
+}
+module_exit(wm8350_power_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Power supply driver for WM8350");
+MODULE_ALIAS("platform:wm8350-power");
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index cc19005..d2614df 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -57,6 +57,9 @@
 #define WM8350_OVER_CURRENT_INT_STATUS_MASK     0x25
 #define WM8350_GPIO_INT_STATUS_MASK             0x26
 #define WM8350_COMPARATOR_INT_STATUS_MASK       0x27
+#define WM8350_MISC_OVERRIDES			0xE3
+#define WM8350_COMPARATOR_OVERRIDES		0xE7
+#define WM8350_STATE_MACHINE_STATUS		0xE9
 
 #define WM8350_MAX_REGISTER                     0xFF
 
@@ -523,6 +526,29 @@
 #define WM8350_DC2_STS                          0x0002
 #define WM8350_DC1_STS                          0x0001
 
+/*
+ * R227 (0xE3) - Misc Overrides
+ */
+#define WM8350_USB_LIMIT_OVRDE			0x0400
+
+/*
+ * R227 (0xE7) - Comparator Overrides
+ */
+#define WM8350_USB_FB_OVRDE			0x8000
+#define WM8350_WALL_FB_OVRDE			0x4000
+#define WM8350_BATT_FB_OVRDE			0x2000
+
+
+/*
+ * R233 (0xE9) - State Machinine Status
+ */
+#define WM8350_USB_SM_MASK			0x0700
+#define WM8350_USB_SM_SHIFT			8
+
+#define WM8350_USB_SM_100_SLV   1
+#define WM8350_USB_SM_500_SLV   5
+#define WM8350_USB_SM_STDBY_SLV 7
+
 /* WM8350 wake up conditions */
 #define WM8350_IRQ_WKUP_OFF_STATE		43
 #define WM8350_IRQ_WKUP_HIB_STATE		44
diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h
index 1c8f3cd..7972151 100644
--- a/include/linux/mfd/wm8350/supply.h
+++ b/include/linux/mfd/wm8350/supply.h
@@ -13,7 +13,8 @@
 #ifndef __LINUX_MFD_WM8350_SUPPLY_H_
 #define __LINUX_MFD_WM8350_SUPPLY_H_
 
-#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/power_supply.h>
 
 /*
  * Charger registers
@@ -104,8 +105,28 @@
 #define WM8350_IRQ_EXT_WALL_FB			37
 #define WM8350_IRQ_EXT_BAT_FB			38
 
+/*
+ * Policy to control charger state machine.
+ */
+struct wm8350_charger_policy {
+
+	/* charger state machine policy  - set in machine driver */
+	int eoc_mA;		/* end of charge current (mA)  */
+	int charge_mV;		/* charge voltage */
+	int fast_limit_mA;	/* fast charge current limit */
+	int fast_limit_USB_mA;	/* USB fast charge current limit */
+	int charge_timeout;	/* charge timeout (mins) */
+	int trickle_start_mV;	/* trickle charge starts at mV */
+	int trickle_charge_mA;	/* trickle charge current */
+	int trickle_charge_USB_mA;	/* USB trickle charge current */
+};
+
 struct wm8350_power {
 	struct platform_device *pdev;
+	struct power_supply battery;
+	struct power_supply usb;
+	struct power_supply ac;
+	struct wm8350_charger_policy *policy;
 };
 
 #endif
-- 
cgit v0.10.2


From d756f4a4446227ca9626087939a6769ca55ab036 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2008 20:20:30 +0100
Subject: mfd: Switch WM8350 revision detection to a feature based model

Rather than check for chip revisions in the WM8350 drivers have the core
code set flags for relevant differences.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 60439bd..764bf15 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1240,19 +1240,17 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		switch ((id2 & WM8350_CHIP_REV_MASK) >> 12) {
 		case WM8350_REV_E:
 			dev_info(wm8350->dev, "Found Rev E device\n");
-			wm8350->rev = WM8350_REV_E;
 			break;
 		case WM8350_REV_F:
 			dev_info(wm8350->dev, "Found Rev F device\n");
-			wm8350->rev = WM8350_REV_F;
 			break;
 		case WM8350_REV_G:
 			dev_info(wm8350->dev, "Found Rev G device\n");
-			wm8350->rev = WM8350_REV_G;
+			wm8350->power.rev_g_coeff = 1;
 			break;
 		case WM8350_REV_H:
 			dev_info(wm8350->dev, "Found Rev H device\n");
-			wm8350->rev = WM8350_REV_H;
+			wm8350->power.rev_g_coeff = 1;
 			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 9c0a847..74e7593 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -44,7 +44,7 @@ static int wm8350_read_usb_uvolts(struct wm8350 *wm8350)
 
 static inline int wm8350_charge_time_min(struct wm8350 *wm8350, int min)
 {
-	if (wm8350->rev < WM8350_REV_G)
+	if (!wm8350->power.rev_g_coeff)
 		return (((min - 30) / 15) & 0xf) << 8;
 	else
 		return (((min - 30) / 30) & 0xf) << 8;
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index d2614df..3c97356 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -585,8 +585,6 @@ struct wm8350_irq {
 };
 
 struct wm8350 {
-	int rev;		/* chip revision */
-
 	struct device *dev;
 
 	/* device IO */
diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h
index 7972151..2b94793 100644
--- a/include/linux/mfd/wm8350/supply.h
+++ b/include/linux/mfd/wm8350/supply.h
@@ -127,6 +127,8 @@ struct wm8350_power {
 	struct power_supply usb;
 	struct power_supply ac;
 	struct wm8350_charger_policy *policy;
+
+	int rev_g_coeff;
 };
 
 #endif
-- 
cgit v0.10.2


From b797a5551979da22b0a35632198ffc8a330d9537 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2008 20:22:58 +0100
Subject: mfd: Refactor WM8350 chip identification

Since the WM8350 driver was originally written the semantics for the
identification registers of the chip have been clarified, allowing
us to do an exact match on all the fields. This avoids mistakenly
running on unsupported hardware.

Also change to using the datasheet names more consistently for
legibility and fix a printk() that should be dev_err().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 764bf15..2188d75 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1227,52 +1227,72 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		       struct wm8350_platform_data *pdata)
 {
 	int ret = -EINVAL;
-	u16 id1, id2, mask, mode;
+	u16 id1, id2, mask_rev;
+	u16 cust_id, mode, chip_rev;
 
 	/* get WM8350 revision and config mode */
 	wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
 	wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2);
+	wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), &mask_rev);
 
 	id1 = be16_to_cpu(id1);
 	id2 = be16_to_cpu(id2);
+	mask_rev = be16_to_cpu(mask_rev);
 
-	if (id1 == 0x6143) {
-		switch ((id2 & WM8350_CHIP_REV_MASK) >> 12) {
+	if (id1 != 0x6143) {
+		dev_err(wm8350->dev,
+			"Device with ID %x is not a WM8350\n", id1);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	mode = id2 & WM8350_CONF_STS_MASK >> 10;
+	cust_id = id2 & WM8350_CUST_ID_MASK;
+	chip_rev = (id2 & WM8350_CHIP_REV_MASK) >> 12;
+	dev_info(wm8350->dev,
+		 "CONF_STS %d, CUST_ID %d, MASK_REV %d, CHIP_REV %d\n",
+		 mode, cust_id, mask_rev, chip_rev);
+
+	if (cust_id != 0) {
+		dev_err(wm8350->dev, "Unsupported CUST_ID\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	switch (mask_rev) {
+	case 0:
+		switch (chip_rev) {
 		case WM8350_REV_E:
-			dev_info(wm8350->dev, "Found Rev E device\n");
+			dev_info(wm8350->dev, "WM8350 Rev E\n");
 			break;
 		case WM8350_REV_F:
-			dev_info(wm8350->dev, "Found Rev F device\n");
+			dev_info(wm8350->dev, "WM8350 Rev F\n");
 			break;
 		case WM8350_REV_G:
-			dev_info(wm8350->dev, "Found Rev G device\n");
+			dev_info(wm8350->dev, "WM8350 Rev G\n");
 			wm8350->power.rev_g_coeff = 1;
 			break;
 		case WM8350_REV_H:
-			dev_info(wm8350->dev, "Found Rev H device\n");
+			dev_info(wm8350->dev, "WM8350 Rev H\n");
 			wm8350->power.rev_g_coeff = 1;
 			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
-			dev_info(wm8350->dev, "Found unknown rev %x\n",
-				 (id2 & WM8350_CHIP_REV_MASK) >> 12);
+			dev_err(wm8350->dev, "Unknown WM8350 CHIP_REV\n");
 			ret = -ENODEV;
 			goto err;
 		}
-	} else {
-		dev_info(wm8350->dev, "Device with ID %x is not a WM8350\n",
-			 id1);
+		break;
+
+	default:
+		dev_err(wm8350->dev, "Unknown MASK_REV\n");
 		ret = -ENODEV;
 		goto err;
 	}
 
-	mode = id2 & WM8350_CONF_STS_MASK >> 10;
-	mask = id2 & WM8350_CUST_ID_MASK;
-	dev_info(wm8350->dev, "Config mode %d, ROM mask %d\n", mode, mask);
-
 	ret = wm8350_create_cache(wm8350, mode);
 	if (ret < 0) {
-		printk(KERN_ERR "wm8350: failed to create register cache\n");
+		dev_err(wm8350->dev, "Failed to create register cache\n");
 		return ret;
 	}
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 3c97356..2a7abee 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -29,6 +29,7 @@
  */
 #define WM8350_RESET_ID                         0x00
 #define WM8350_ID                               0x01
+#define WM8350_REVISION				0x02
 #define WM8350_SYSTEM_CONTROL_1                 0x03
 #define WM8350_SYSTEM_CONTROL_2                 0x04
 #define WM8350_SYSTEM_HIBERNATE                 0x05
@@ -80,6 +81,11 @@
 #define WM8350_CUST_ID_MASK                     0x00FF
 
 /*
+ * R2 (0x02) - Revision
+ */
+#define WM8350_MASK_REV_MASK			0x00FF
+
+/*
  * R3 (0x03) - System Control 1
  */
 #define WM8350_CHIP_ON                          0x8000
-- 
cgit v0.10.2


From 7e386e6e0e4f34f0545e8923e22fe4dd61ef9d48 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 30 Nov 2008 22:43:21 +0100
Subject: power_supply: Add cold to the POWER_SUPPLY_HEALTH report values

Some systems are able to report problems with batteries being under
temperature.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 23ae846..ac01e06 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -45,7 +45,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
-		"Unspecified failure"
+		"Unspecified failure", "Cold",
 	};
 	static char *technology_text[] = {
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f9348cb..8ff25e0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -45,6 +45,7 @@ enum {
 	POWER_SUPPLY_HEALTH_DEAD,
 	POWER_SUPPLY_HEALTH_OVERVOLTAGE,
 	POWER_SUPPLY_HEALTH_UNSPEC_FAILURE,
+	POWER_SUPPLY_HEALTH_COLD,
 };
 
 enum {
-- 
cgit v0.10.2


From 4008e879e1325c29362aa2c3fa4b527273ae15a8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 30 Nov 2008 22:45:14 +0100
Subject: power_supply: Add battery health reporting for WM8350

Implement support for reporting battery health in the WM8350 battery
interface. Since we are now able to report this via the classs remove
the diagnostics from the interrupt handler.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index aaf394a..b43d64c 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1314,7 +1314,7 @@ const struct wm8350_reg_access wm8350_reg_io_map[] = {
 	{ 0xFFFF, 0xFFFF, 0xFFFF }, /* R223 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R224 */
 	{ 0x8F3F, 0x0000, 0xFFFF }, /* R225 - DCDC/LDO status */
-	{ 0x0000, 0x0000, 0x0000 }, /* R226 */
+	{ 0x0000, 0x0000, 0xFFFF }, /* R226 - Charger status */
 	{ 0x0000, 0x0000, 0xFFFF }, /* R227 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R228 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R229 */
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 74e7593..1b16bf3 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -190,22 +190,18 @@ static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
 	struct wm8350_charger_policy *policy = power->policy;
 
 	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		dev_err(wm8350->dev, "battery too hot\n");
-		break;
-	case WM8350_IRQ_CHG_BAT_COLD:
-		dev_err(wm8350->dev, "battery too cold\n");
-		break;
 	case WM8350_IRQ_CHG_BAT_FAIL:
 		dev_err(wm8350->dev, "battery failed\n");
 		break;
 	case WM8350_IRQ_CHG_TO:
 		dev_err(wm8350->dev, "charger timeout\n");
-		break;
-	case WM8350_IRQ_CHG_END:
 		power_supply_changed(&power->battery);
 		break;
+
+	case WM8350_IRQ_CHG_BAT_HOT:
+	case WM8350_IRQ_CHG_BAT_COLD:
 	case WM8350_IRQ_CHG_START:
+	case WM8350_IRQ_CHG_END:
 		power_supply_changed(&power->battery);
 		break;
 
@@ -308,6 +304,23 @@ static enum power_supply_property wm8350_usb_props[] = {
  *		Battery properties
  *********************************************************************/
 
+static int wm8350_bat_check_health(struct wm8350 *wm8350)
+{
+	u16 reg;
+
+	if (wm8350_read_battery_uvolts(wm8350) < 2850000)
+		return POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+
+	reg = wm8350_reg_read(wm8350, WM8350_CHARGER_OVERRIDES);
+	if (reg & WM8350_CHG_BATT_HOT_OVRDE)
+		return POWER_SUPPLY_HEALTH_OVERHEAT;
+
+	if (reg & WM8350_CHG_BATT_COLD_OVRDE)
+		return POWER_SUPPLY_HEALTH_COLD;
+
+	return POWER_SUPPLY_HEALTH_GOOD;
+}
+
 static int wm8350_bat_get_property(struct power_supply *psy,
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
@@ -326,6 +339,9 @@ static int wm8350_bat_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 		val->intval = wm8350_read_battery_uvolts(wm8350);
 		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		val->intval = wm8350_bat_check_health(wm8350);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -338,6 +354,7 @@ static enum power_supply_property wm8350_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
 };
 
 /*********************************************************************
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 2a7abee..afeff6f 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -58,6 +58,7 @@
 #define WM8350_OVER_CURRENT_INT_STATUS_MASK     0x25
 #define WM8350_GPIO_INT_STATUS_MASK             0x26
 #define WM8350_COMPARATOR_INT_STATUS_MASK       0x27
+#define WM8350_CHARGER_OVERRIDES		0xE2
 #define WM8350_MISC_OVERRIDES			0xE3
 #define WM8350_COMPARATOR_OVERRIDES		0xE7
 #define WM8350_STATE_MACHINE_STATUS		0xE9
@@ -533,6 +534,12 @@
 #define WM8350_DC1_STS                          0x0001
 
 /*
+ * R226 (0xE2) - Charger status
+ */
+#define WM8350_CHG_BATT_HOT_OVRDE		0x8000
+#define WM8350_CHG_BATT_COLD_OVRDE		0x4000
+
+/*
  * R227 (0xE3) - Misc Overrides
  */
 #define WM8350_USB_LIMIT_OVRDE			0x0400
-- 
cgit v0.10.2


From 5725d66b9d18e630bb63e3b76bedf25fd1027265 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 1 Dec 2008 00:31:04 +0100
Subject: mfd: twl4030: simplified child creation code

Minor cleanup to twl4030-core: define a helper function to populate
a single child node, and use it to replace six inconsistent versions
of the same logic.  Both object and source code shrink.

As part of this, some devices now have more IRQ resources:  battery
charger, keypad, ADC, and USB transceiver.  That helps to remove some
irq #defines that block the children's drivers code from compiling on
non-OMAP platforms.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index ef9a971..f5486cc 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -352,258 +352,126 @@ EXPORT_SYMBOL(twl4030_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
-/*
- * NOTE:  We know the first 8 IRQs after pdata->base_irq are
- * for the PIH, and the next are for the PWR_INT SIH, since
- * that's how twl_init_irq() sets things up.
- */
-
-static int add_children(struct twl4030_platform_data *pdata)
+static struct device *add_child(unsigned chip, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq0, int irq1)
 {
-	struct platform_device	*pdev = NULL;
-	struct twl4030_client	*twl = NULL;
-	int			status = 0;
+	struct platform_device	*pdev;
+	struct twl4030_client	*twl = &twl4030_modules[chip];
+	int			status;
+
+	pdev = platform_device_alloc(name, -1);
+	if (!pdev) {
+		dev_dbg(&twl->client->dev, "can't alloc dev\n");
+		status = -ENOMEM;
+		goto err;
+	}
 
-	if (twl_has_bci() && pdata->bci) {
-		twl = &twl4030_modules[3];
+	device_init_wakeup(&pdev->dev, can_wakeup);
+	pdev->dev.parent = &twl->client->dev;
 
-		pdev = platform_device_alloc("twl4030_bci", -1);
-		if (!pdev) {
-			pr_debug("%s: can't alloc bci dev\n", DRIVER_NAME);
-			status = -ENOMEM;
+	if (pdata) {
+		status = platform_device_add_data(pdev, pdata, pdata_len);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add platform_data\n");
 			goto err;
 		}
+	}
 
-		if (status == 0) {
-			pdev->dev.parent = &twl->client->dev;
-			status = platform_device_add_data(pdev, pdata->bci,
-					sizeof(*pdata->bci));
-			if (status < 0) {
-				dev_dbg(&twl->client->dev,
-					"can't add bci data, %d\n",
-					status);
-				goto err;
-			}
-		}
-
-		if (status == 0) {
-			struct resource r = {
-				.start = pdata->irq_base + 8 + 1,
-				.flags = IORESOURCE_IRQ,
-			};
-
-			status = platform_device_add_resources(pdev, &r, 1);
-		}
-
-		if (status == 0)
-			status = platform_device_add(pdev);
+	if (irq0) {
+		struct resource r[2] = {
+			{ .start = irq0, .flags = IORESOURCE_IRQ, },
+			{ .start = irq1, .flags = IORESOURCE_IRQ, },
+		};
 
+		status = platform_device_add_resources(pdev, r, irq1 ? 2 : 1);
 		if (status < 0) {
-			platform_device_put(pdev);
-			dev_dbg(&twl->client->dev,
-					"can't create bci dev, %d\n",
-					status);
+			dev_dbg(&pdev->dev, "can't add irqs\n");
 			goto err;
 		}
 	}
 
-	if (twl_has_gpio() && pdata->gpio) {
-		twl = &twl4030_modules[1];
+	status = platform_device_add(pdev);
 
-		pdev = platform_device_alloc("twl4030_gpio", -1);
-		if (!pdev) {
-			pr_debug("%s: can't alloc gpio dev\n", DRIVER_NAME);
-			status = -ENOMEM;
-			goto err;
-		}
-
-		/* more driver model init */
-		if (status == 0) {
-			pdev->dev.parent = &twl->client->dev;
-			/* device_init_wakeup(&pdev->dev, 1); */
-
-			status = platform_device_add_data(pdev, pdata->gpio,
-					sizeof(*pdata->gpio));
-			if (status < 0) {
-				dev_dbg(&twl->client->dev,
-					"can't add gpio data, %d\n",
-					status);
-				goto err;
-			}
-		}
+err:
+	if (status < 0) {
+		platform_device_put(pdev);
+		dev_err(&twl->client->dev, "can't add %s dev\n", name);
+		return ERR_PTR(status);
+	}
+	return &pdev->dev;
+}
 
-		/* GPIO module IRQ */
-		if (status == 0) {
-			struct resource	r = {
-				.start = pdata->irq_base + 0,
-				.flags = IORESOURCE_IRQ,
-			};
+/*
+ * NOTE:  We know the first 8 IRQs after pdata->base_irq are
+ * for the PIH, and the next are for the PWR_INT SIH, since
+ * that's how twl_init_irq() sets things up.
+ */
 
-			status = platform_device_add_resources(pdev, &r, 1);
-		}
+static int add_children(struct twl4030_platform_data *pdata)
+{
+	struct device	*child;
 
-		if (status == 0)
-			status = platform_device_add(pdev);
+	if (twl_has_bci() && pdata->bci) {
+		child = add_child(3, "twl4030_bci",
+				pdata->bci, sizeof(*pdata->bci),
+				false,
+				/* irq0 = CHG_PRES, irq1 = BCI */
+				pdata->irq_base + 8 + 1, pdata->irq_base + 2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
 
-		if (status < 0) {
-			platform_device_put(pdev);
-			dev_dbg(&twl->client->dev,
-					"can't create gpio dev, %d\n",
-					status);
-			goto err;
-		}
+	if (twl_has_gpio() && pdata->gpio) {
+		child = add_child(1, "twl4030_gpio",
+				pdata->gpio, sizeof(*pdata->gpio),
+				false, pdata->irq_base + 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	if (twl_has_keypad() && pdata->keypad) {
-		pdev = platform_device_alloc("twl4030_keypad", -1);
-		if (pdev) {
-			twl = &twl4030_modules[2];
-			pdev->dev.parent = &twl->client->dev;
-			device_init_wakeup(&pdev->dev, 1);
-			status = platform_device_add_data(pdev, pdata->keypad,
-					sizeof(*pdata->keypad));
-			if (status < 0) {
-				dev_dbg(&twl->client->dev,
-					"can't add keypad data, %d\n",
-					status);
-				platform_device_put(pdev);
-				goto err;
-			}
-			status = platform_device_add(pdev);
-			if (status < 0) {
-				platform_device_put(pdev);
-				dev_dbg(&twl->client->dev,
-						"can't create keypad dev, %d\n",
-						status);
-				goto err;
-			}
-		} else {
-			pr_debug("%s: can't alloc keypad dev\n", DRIVER_NAME);
-			status = -ENOMEM;
-			goto err;
-		}
+		child = add_child(2, "twl4030_keypad",
+				pdata->keypad, sizeof(*pdata->keypad),
+				true, pdata->irq_base + 1, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	if (twl_has_madc() && pdata->madc) {
-		pdev = platform_device_alloc("twl4030_madc", -1);
-		if (pdev) {
-			twl = &twl4030_modules[2];
-			pdev->dev.parent = &twl->client->dev;
-			device_init_wakeup(&pdev->dev, 1);
-			status = platform_device_add_data(pdev, pdata->madc,
-					sizeof(*pdata->madc));
-			if (status < 0) {
-				platform_device_put(pdev);
-				dev_dbg(&twl->client->dev,
-					"can't add madc data, %d\n",
-					status);
-				goto err;
-			}
-			status = platform_device_add(pdev);
-			if (status < 0) {
-				platform_device_put(pdev);
-				dev_dbg(&twl->client->dev,
-						"can't create madc dev, %d\n",
-						status);
-				goto err;
-			}
-		} else {
-			pr_debug("%s: can't alloc madc dev\n", DRIVER_NAME);
-			status = -ENOMEM;
-			goto err;
-		}
+		child = add_child(2, "twl4030_madc",
+				pdata->madc, sizeof(*pdata->madc),
+				true, pdata->irq_base + 3, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	if (twl_has_rtc()) {
-		twl = &twl4030_modules[3];
-
-		pdev = platform_device_alloc("twl4030_rtc", -1);
-		if (!pdev) {
-			pr_debug("%s: can't alloc rtc dev\n", DRIVER_NAME);
-			status = -ENOMEM;
-		} else {
-			pdev->dev.parent = &twl->client->dev;
-			device_init_wakeup(&pdev->dev, 1);
-		}
-
 		/*
-		 * REVISIT platform_data here currently might use of
+		 * REVISIT platform_data here currently might expose the
 		 * "msecure" line ... but for now we just expect board
-		 * setup to tell the chip "we are secure" at all times.
+		 * setup to tell the chip "it's always ok to SET_TIME".
 		 * Eventually, Linux might become more aware of such
 		 * HW security concerns, and "least privilege".
 		 */
-
-		/* RTC module IRQ */
-		if (status == 0) {
-			struct resource	r = {
-				.start = pdata->irq_base + 8 + 3,
-				.flags = IORESOURCE_IRQ,
-			};
-
-			status = platform_device_add_resources(pdev, &r, 1);
-		}
-
-		if (status == 0)
-			status = platform_device_add(pdev);
-
-		if (status < 0) {
-			platform_device_put(pdev);
-			dev_dbg(&twl->client->dev,
-					"can't create rtc dev, %d\n",
-					status);
-			goto err;
-		}
+		child = add_child(3, "twl4030_rtc",
+				NULL, 0,
+				true, pdata->irq_base + 8 + 3, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	if (twl_has_usb() && pdata->usb) {
-		twl = &twl4030_modules[0];
-
-		pdev = platform_device_alloc("twl4030_usb", -1);
-		if (!pdev) {
-			pr_debug("%s: can't alloc usb dev\n", DRIVER_NAME);
-			status = -ENOMEM;
-			goto err;
-		}
-
-		if (status == 0) {
-			pdev->dev.parent = &twl->client->dev;
-			device_init_wakeup(&pdev->dev, 1);
-			status = platform_device_add_data(pdev, pdata->usb,
-					sizeof(*pdata->usb));
-			if (status < 0) {
-				platform_device_put(pdev);
-				dev_dbg(&twl->client->dev,
-					"can't add usb data, %d\n",
-					status);
-				goto err;
-			}
-		}
-
-		if (status == 0) {
-			struct resource r = {
-				.start = pdata->irq_base + 8 + 2,
-				.flags = IORESOURCE_IRQ,
-			};
-
-			status = platform_device_add_resources(pdev, &r, 1);
-		}
-
-		if (status == 0)
-			status = platform_device_add(pdev);
-
-		if (status < 0) {
-			platform_device_put(pdev);
-			dev_dbg(&twl->client->dev,
-					"can't create usb dev, %d\n",
-					status);
-		}
+		child = add_child(0, "twl4030_usb",
+				pdata->usb, sizeof(*pdata->usb),
+				true,
+				/* irq0 = USB_PRES, irq1 = USB */
+				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
-err:
-	if (status)
-		pr_err("failed to add twl4030's children (status %d)\n", status);
-	return status;
+	return 0;
 }
 
 /*----------------------------------------------------------------------*/
-- 
cgit v0.10.2


From 67460a7c26271fd7a32e5d51b2c806a84ce78a62 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 1 Dec 2008 00:35:33 +0100
Subject: mfd: twl4030: cleanup symbols and OMAP dependency

Finish removing dependency of TWL driver stack on platform-specific
IRQ definitions ... and remove the build dependency on OMAP.

This lets the TWL4030 code be included in test builds for most
platforms, and will make it easier for non-OMAP folk to update
most of this code for new APIs etc.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 2572773..8cd3dd9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -63,7 +63,7 @@ config UCB1400_CORE
 
 config TWL4030_CORE
 	bool "Texas Instruments TWL4030/TPS659x0 Support"
-	depends on I2C=y && GENERIC_HARDIRQS && (ARCH_OMAP2 || ARCH_OMAP3)
+	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  Say yes here if you have TWL4030 family chip on your board.
 	  This core driver provides register access and IRQ handling
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index ae25c90..d484669 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -285,33 +285,6 @@ struct twl4030_platform_data {
 
 int twl4030_sih_setup(int module);
 
-/*
- * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the
- * IRQ data to subsidiary devices using platform device resources.
- */
-
-/* IRQ information-need base */
-#include <mach/irqs.h>
-/* TWL4030 interrupts */
-
-/* #define TWL4030_MODIRQ_GPIO		(TWL4030_IRQ_BASE + 0) */
-#define TWL4030_MODIRQ_KEYPAD		(TWL4030_IRQ_BASE + 1)
-#define TWL4030_MODIRQ_BCI		(TWL4030_IRQ_BASE + 2)
-#define TWL4030_MODIRQ_MADC		(TWL4030_IRQ_BASE + 3)
-/* #define TWL4030_MODIRQ_USB		(TWL4030_IRQ_BASE + 4) */
-/* #define TWL4030_MODIRQ_PWR		(TWL4030_IRQ_BASE + 5) */
-
-#define TWL4030_PWRIRQ_PWRBTN		(TWL4030_PWR_IRQ_BASE + 0)
-/* #define TWL4030_PWRIRQ_CHG_PRES		(TWL4030_PWR_IRQ_BASE + 1) */
-/* #define TWL4030_PWRIRQ_USB_PRES		(TWL4030_PWR_IRQ_BASE + 2) */
-/* #define TWL4030_PWRIRQ_RTC		(TWL4030_PWR_IRQ_BASE + 3) */
-/* #define TWL4030_PWRIRQ_HOT_DIE		(TWL4030_PWR_IRQ_BASE + 4) */
-/* #define TWL4030_PWRIRQ_PWROK_TIMEOUT	(TWL4030_PWR_IRQ_BASE + 5) */
-/* #define TWL4030_PWRIRQ_MBCHG		(TWL4030_PWR_IRQ_BASE + 6) */
-/* #define TWL4030_PWRIRQ_SC_DETECT	(TWL4030_PWR_IRQ_BASE + 7) */
-
-/* Rest are unsued currently*/
-
 /* Offsets to Power Registers */
 #define TWL4030_VDAC_DEV_GRP		0x3B
 #define TWL4030_VDAC_DEDICATED		0x3E
@@ -322,10 +295,6 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-/* TWL4030 GPIO interrupt definitions */
-
-#define TWL4030_GPIO_IRQ_NO(n)		(TWL4030_GPIO_IRQ_BASE + (n))
-
 /*
  * Exported TWL4030 GPIO APIs
  *
-- 
cgit v0.10.2


From dad759ff8ba79927766e3f0159bfc5fb6de0f982 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 1 Dec 2008 00:43:58 +0100
Subject: mfd: twl4030: create some regulator devices

Initial code to create twl4030 voltage regulator devices, using
the new regulator framework.  Note that this now starts to care
what name is used to declare the TWL chip:

 - TWL4030 is the "old" chip; newer ones have a bigger variety
   of VAUX2 voltages.

 - TWL5030 is the core "new" chip; TPS65950 is its catalog version.

 - The TPS65930 and TPS65920 are cost-reduced catalog versions of
   TWL5030 parts ... fewer regulators, no battery charger, etc.

Board-specific regulator configuration should be provided, listing
which regulators are used and their constraints (e.g. 1.8V only).

Code that could ("should"?) leverage the regulator stuff includes
TWL4030 USB transceiver support and MMC glue, LCD support for the
3430SDP and Labrador boards, and S-Video output.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index f5486cc..8ab9ee8 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -33,6 +33,8 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 
+#include <linux/regulator/machine.h>
+
 #include <linux/i2c.h>
 #include <linux/i2c/twl4030.h>
 
@@ -71,6 +73,13 @@
 #define twl_has_gpio()	false
 #endif
 
+#if defined(CONFIG_REGULATOR_TWL4030) \
+	|| defined(CONFIG_REGULATOR_TWL4030_MODULE)
+#define twl_has_regulator()	true
+#else
+#define twl_has_regulator()	false
+#endif
+
 #if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
 #define twl_has_madc()	true
 #else
@@ -149,6 +158,10 @@
 #define HIGH_PERF_SQ			(1 << 3)
 
 
+/* chip-specific feature flags, for i2c_device_id.driver_data */
+#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
+#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
+
 /*----------------------------------------------------------------------*/
 
 /* is driver active, bound to a chip? */
@@ -352,7 +365,8 @@ EXPORT_SYMBOL(twl4030_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
-static struct device *add_child(unsigned chip, const char *name,
+static struct device *
+add_numbered_child(unsigned chip, const char *name, int num,
 		void *pdata, unsigned pdata_len,
 		bool can_wakeup, int irq0, int irq1)
 {
@@ -360,7 +374,7 @@ static struct device *add_child(unsigned chip, const char *name,
 	struct twl4030_client	*twl = &twl4030_modules[chip];
 	int			status;
 
-	pdev = platform_device_alloc(name, -1);
+	pdev = platform_device_alloc(name, num);
 	if (!pdev) {
 		dev_dbg(&twl->client->dev, "can't alloc dev\n");
 		status = -ENOMEM;
@@ -402,17 +416,52 @@ err:
 	return &pdev->dev;
 }
 
+static inline struct device *add_child(unsigned chip, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq0, int irq1)
+{
+	return add_numbered_child(chip, name, -1, pdata, pdata_len,
+		can_wakeup, irq0, irq1);
+}
+
+static struct device *
+add_regulator_linked(int num, struct regulator_init_data *pdata,
+		struct regulator_consumer_supply *consumers,
+		unsigned num_consumers)
+{
+	/* regulator framework demands init_data ... */
+	if (!pdata)
+		return NULL;
+
+	if (consumers && !pdata->consumer_supplies) {
+		pdata->consumer_supplies = consumers;
+		pdata->num_consumer_supplies = num_consumers;
+	}
+
+	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
+	return add_numbered_child(3, "twl4030_reg", num,
+		pdata, sizeof(*pdata), false, 0, 0);
+}
+
+static struct device *
+add_regulator(int num, struct regulator_init_data *pdata)
+{
+	return add_regulator_linked(num, pdata, NULL, 0);
+}
+
 /*
  * NOTE:  We know the first 8 IRQs after pdata->base_irq are
  * for the PIH, and the next are for the PWR_INT SIH, since
  * that's how twl_init_irq() sets things up.
  */
 
-static int add_children(struct twl4030_platform_data *pdata)
+static int
+add_children(struct twl4030_platform_data *pdata, unsigned long features)
 {
 	struct device	*child;
+	struct device	*usb_transceiver = NULL;
 
-	if (twl_has_bci() && pdata->bci) {
+	if (twl_has_bci() && pdata->bci && !(features & TPS_SUBSET)) {
 		child = add_child(3, "twl4030_bci",
 				pdata->bci, sizeof(*pdata->bci),
 				false,
@@ -469,6 +518,111 @@ static int add_children(struct twl4030_platform_data *pdata)
 				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
+
+		/* we need to connect regulators to this transceiver */
+		usb_transceiver = child;
+	}
+
+	if (twl_has_regulator()) {
+		/*
+		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+		*/
+
+		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator((features & TWL4030_VAUX2)
+					? TWL4030_REG_VAUX2_4030
+					: TWL4030_REG_VAUX2,
+				pdata->vaux2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_regulator() && usb_transceiver) {
+		static struct regulator_consumer_supply usb1v5 = {
+			.supply =	"usb1v5",
+		};
+		static struct regulator_consumer_supply usb1v8 = {
+			.supply =	"usb1v8",
+		};
+		static struct regulator_consumer_supply usb3v1 = {
+			.supply =	"usb3v1",
+		};
+		static struct regulator_consumer_supply usbcp = {
+			.supply =	"usbcp",
+		};
+
+		/* this is a template that gets copied */
+		struct regulator_init_data usb_fixed = {
+			.constraints.valid_modes_mask =
+				  REGULATOR_MODE_NORMAL
+				| REGULATOR_MODE_STANDBY,
+			.constraints.valid_ops_mask =
+				  REGULATOR_CHANGE_MODE
+				| REGULATOR_CHANGE_STATUS,
+		};
+
+		usb1v5.dev = usb_transceiver;
+		usb1v8.dev = usb_transceiver;
+		usb3v1.dev = usb_transceiver;
+		usbcp.dev = usb_transceiver;
+
+		child = add_regulator_linked(TWL4030_REG_VUSB1V5, &usb_fixed,
+				&usb1v5, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSB1V8, &usb_fixed,
+				&usb1v8, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSB3V1, &usb_fixed,
+				&usb3v1, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSBCP, &usb_fixed,
+				&usbcp, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* maybe add LDOs that are omitted on cost-reduced parts */
+	if (twl_has_regulator() && !(features & TPS_SUBSET)) {
+		/*
+		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+		*/
+
+		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	return 0;
@@ -632,7 +786,7 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			goto fail;
 	}
 
-	status = add_children(pdata);
+	status = add_children(pdata, id->driver_data);
 fail:
 	if (status < 0)
 		twl4030_remove(client);
@@ -640,11 +794,11 @@ fail:
 }
 
 static const struct i2c_device_id twl4030_ids[] = {
-	{ "twl4030", 0 },	/* "Triton 2" */
-	{ "tps65950", 0 },	/* catalog version of twl4030 */
-	{ "tps65930", 0 },	/* fewer LDOs and DACs; no charger */
-	{ "tps65920", 0 },	/* fewer LDOs; no codec or charger */
-	{ "twl5030", 0 },	/* T2 updated */
+	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
+	{ "twl5030", 0 },		/* T2 updated */
+	{ "tps65950", 0 },		/* catalog version of twl5030 */
+	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
+	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
 	{ /* end of list */ },
 };
 MODULE_DEVICE_TABLE(i2c, twl4030_ids);
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index d484669..e06555d 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -278,6 +278,18 @@ struct twl4030_platform_data {
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
 
+	/* LDO regulators */
+	struct regulator_init_data		*vdac;
+	struct regulator_init_data		*vpll1;
+	struct regulator_init_data		*vpll2;
+	struct regulator_init_data		*vmmc1;
+	struct regulator_init_data		*vmmc2;
+	struct regulator_init_data		*vsim;
+	struct regulator_init_data		*vaux1;
+	struct regulator_init_data		*vaux2;
+	struct regulator_init_data		*vaux3;
+	struct regulator_init_data		*vaux4;
+
 	/* REVISIT more to come ... _nothing_ should be hard-wired */
 };
 
@@ -309,4 +321,39 @@ int twl4030_set_gpio_debounce(int gpio, int enable);
 	static inline int twl4030charger_usb_en(int enable) { return 0; }
 #endif
 
+/*----------------------------------------------------------------------*/
+
+/* Linux-specific regulator identifiers ... for now, we only support
+ * the LDOs, and leave the three buck converters alone.  VDD1 and VDD2
+ * need to tie into hardware based voltage scaling (cpufreq etc), while
+ * VIO is generally fixed.
+ */
+
+/* EXTERNAL dc-to-dc buck converters */
+#define TWL4030_REG_VDD1	0
+#define TWL4030_REG_VDD2	1
+#define TWL4030_REG_VIO		2
+
+/* EXTERNAL LDOs */
+#define TWL4030_REG_VDAC	3
+#define TWL4030_REG_VPLL1	4
+#define TWL4030_REG_VPLL2	5	/* not on all chips */
+#define TWL4030_REG_VMMC1	6
+#define TWL4030_REG_VMMC2	7	/* not on all chips */
+#define TWL4030_REG_VSIM	8	/* not on all chips */
+#define TWL4030_REG_VAUX1	9	/* not on all chips */
+#define TWL4030_REG_VAUX2_4030	10	/* (twl4030-specific) */
+#define TWL4030_REG_VAUX2	11	/* (twl5030 and newer) */
+#define TWL4030_REG_VAUX3	12	/* not on all chips */
+#define TWL4030_REG_VAUX4	13	/* not on all chips */
+
+/* INTERNAL LDOs */
+#define TWL4030_REG_VINTANA1	14
+#define TWL4030_REG_VINTANA2	15
+#define TWL4030_REG_VINTDIG	16
+#define TWL4030_REG_VUSB1V5	17
+#define TWL4030_REG_VUSB1V8	18
+#define TWL4030_REG_VUSB3V1	19
+#define TWL4030_REG_VUSBCP	20
+
 #endif /* End of __TWL4030_H */
-- 
cgit v0.10.2


From b73eac7871d002835be17d4602cced2c15c0db4b Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sun, 7 Dec 2008 19:10:58 +0100
Subject: mfd: twl4030 regulator bug fixes

This contains two bugfixes to the initial twl4030 regulator
support patch related to USB:

 (a) always overwrite the old list of consumers ... else
     the regulator handles all use the same "usb1v5" name;
 (b) don't set up the "usbcp" regulator, which turns out
     to be managed through separate controls, usually ULPI
     directly from the OTG controller.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 8ab9ee8..fdfbd31 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -433,7 +433,7 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 	if (!pdata)
 		return NULL;
 
-	if (consumers && !pdata->consumer_supplies) {
+	if (consumers) {
 		pdata->consumer_supplies = consumers;
 		pdata->num_consumer_supplies = num_consumers;
 	}
@@ -556,9 +556,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		static struct regulator_consumer_supply usb3v1 = {
 			.supply =	"usb3v1",
 		};
-		static struct regulator_consumer_supply usbcp = {
-			.supply =	"usbcp",
-		};
 
 		/* this is a template that gets copied */
 		struct regulator_init_data usb_fixed = {
@@ -573,7 +570,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		usb1v5.dev = usb_transceiver;
 		usb1v8.dev = usb_transceiver;
 		usb3v1.dev = usb_transceiver;
-		usbcp.dev = usb_transceiver;
 
 		child = add_regulator_linked(TWL4030_REG_VUSB1V5, &usb_fixed,
 				&usb1v5, 1);
@@ -589,11 +585,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 				&usb3v1, 1);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
-
-		child = add_regulator_linked(TWL4030_REG_VUSBCP, &usb_fixed,
-				&usbcp, 1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
 	}
 
 	/* maybe add LDOs that are omitted on cost-reduced parts */
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index e06555d..a8f84c0 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -354,6 +354,5 @@ int twl4030_set_gpio_debounce(int gpio, int enable);
 #define TWL4030_REG_VUSB1V5	17
 #define TWL4030_REG_VUSB1V8	18
 #define TWL4030_REG_VUSB3V1	19
-#define TWL4030_REG_VUSBCP	20
 
 #endif /* End of __TWL4030_H */
-- 
cgit v0.10.2


From 6354ab5c63bc986bf539026a1b289cc142f6e87c Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@openedhand.com>
Date: Sun, 7 Dec 2008 20:50:25 +0100
Subject: mfd: Fix twl4030-core.c build error

This is a fix for:

twl4030-core.c:(.text+0x16a797): undefined reference to `clk_get_rate'
twl4030-core.c:(.text+0x16a797): undefined reference to `clk_put'

on x86 and x86_64, as the clock API is not defined on those platforms.

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index fdfbd31..b59c385 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -658,12 +658,7 @@ static void __init clocks_init(void)
 		osc = clk_get(NULL, "osc_ck");
 	else
 		osc = clk_get(NULL, "osc_sys_ck");
-#else
-	/* REVISIT for non-OMAP systems, pass the clock rate from
-	 * board init code, using platform_data.
-	 */
-	osc = ERR_PTR(-EIO);
-#endif
+
 	if (IS_ERR(osc)) {
 		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
 				"using bootloader value (unknown osc rate)\n");
@@ -673,6 +668,18 @@ static void __init clocks_init(void)
 	rate = clk_get_rate(osc);
 	clk_put(osc);
 
+#else
+	/* REVISIT for non-OMAP systems, pass the clock rate from
+	 * board init code, using platform_data.
+	 */
+	osc = ERR_PTR(-EIO);
+
+	printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+	       "using bootloader value (unknown osc rate)\n");
+
+	return;
+#endif
+
 	switch (rate) {
 	case 19200000:
 		ctrl = HFCLK_FREQ_19p2_MHZ;
-- 
cgit v0.10.2


From 44faac3155247d9cb9aec5a53832014e1f807c78 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 10:54:12 +0100
Subject: mfd: Pass driver_data onto child devices

The MFD cell structure provides a driver_data field but doesn't pass it
on to the child devices when instantiating them - do that.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 6c0d1be..54ddf37 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -34,6 +34,7 @@ static int mfd_add_device(struct device *parent, int id,
 		goto fail_device;
 
 	pdev->dev.parent = parent;
+	platform_set_drvdata(pdev, cell->driver_data);
 
 	ret = platform_device_add_data(pdev,
 			cell->platform_data, cell->data_size);
-- 
cgit v0.10.2


From b8380c1a661f1f853418ff2eb798f27a11cade57 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 10:54:22 +0100
Subject: mfd: Register WM8400 codec device

Register a child device for the codec in the WM8400.

Also switch the unregistration of the MFD devices to use the MFD core
since the current code is hand rolling the same thing.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 8cd3dd9..ddfb12b 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -116,6 +116,7 @@ config PMIC_DA903X
 
 config MFD_WM8400
 	tristate "Support Wolfson Microelectronics WM8400"
+	select MFD_CORE
 	depends on I2C
 	help
 	  Support for the Wolfson Microelecronics WM8400 PMIC and audio
diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c
index 6a0cedb..cf30d06 100644
--- a/drivers/mfd/wm8400-core.c
+++ b/drivers/mfd/wm8400-core.c
@@ -15,6 +15,7 @@
 #include <linux/bug.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
+#include <linux/mfd/core.h>
 #include <linux/mfd/wm8400-private.h>
 #include <linux/mfd/wm8400-audio.h>
 
@@ -239,6 +240,16 @@ void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400)
 }
 EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
 
+static int wm8400_register_codec(struct wm8400 *wm8400)
+{
+	struct mfd_cell cell = {
+		.name = "wm8400-codec",
+		.driver_data = wm8400,
+	};
+
+	return mfd_add_devices(wm8400->dev, -1, &cell, 1, NULL, 0);
+}
+
 /*
  * wm8400_init - Generic initialisation
  *
@@ -296,24 +307,32 @@ static int wm8400_init(struct wm8400 *wm8400,
 	reg = (reg & WM8400_CHIP_REV_MASK) >> WM8400_CHIP_REV_SHIFT;
 	dev_info(wm8400->dev, "WM8400 revision %x\n", reg);
 
+	ret = wm8400_register_codec(wm8400);
+	if (ret != 0) {
+		dev_err(wm8400->dev, "Failed to register codec\n");
+		goto err_children;
+	}
+
 	if (pdata && pdata->platform_init) {
 		ret = pdata->platform_init(wm8400->dev);
-		if (ret != 0)
+		if (ret != 0) {
 			dev_err(wm8400->dev, "Platform init failed: %d\n",
 				ret);
+			goto err_children;
+		}
 	} else
 		dev_warn(wm8400->dev, "No platform initialisation supplied\n");
 
+	return 0;
+
+err_children:
+	mfd_remove_devices(wm8400->dev);
 	return ret;
 }
 
 static void wm8400_release(struct wm8400 *wm8400)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wm8400->regulators); i++)
-		if (wm8400->regulators[i].name)
-			platform_device_unregister(&wm8400->regulators[i]);
+	mfd_remove_devices(wm8400->dev);
 }
 
 #if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
-- 
cgit v0.10.2


From 856f6fd119411d5701d5db96e1aae1dd69923887 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 18 Dec 2008 10:54:27 +0100
Subject: mfd: Dialog DA9030 battery charger MFD driver

This patch amends DA903x MFD driver with definitions and methods
needed for battery charger driver.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index 0b5bd85..fcaf1f6 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -151,12 +151,24 @@ int da903x_write(struct device *dev, int reg, uint8_t val)
 }
 EXPORT_SYMBOL_GPL(da903x_write);
 
+int da903x_writes(struct device *dev, int reg, int len, uint8_t *val)
+{
+	return __da903x_writes(to_i2c_client(dev), reg, len, val);
+}
+EXPORT_SYMBOL_GPL(da903x_writes);
+
 int da903x_read(struct device *dev, int reg, uint8_t *val)
 {
 	return __da903x_read(to_i2c_client(dev), reg, val);
 }
 EXPORT_SYMBOL_GPL(da903x_read);
 
+int da903x_reads(struct device *dev, int reg, int len, uint8_t *val)
+{
+	return __da903x_reads(to_i2c_client(dev), reg, len, val);
+}
+EXPORT_SYMBOL_GPL(da903x_reads);
+
 int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
 {
 	struct da903x_chip *chip = dev_get_drvdata(dev);
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index cad314c..115dbe9 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -32,6 +32,7 @@ enum {
 	DA9030_ID_LDO18,
 	DA9030_ID_LDO19,
 	DA9030_ID_LDO_INT,	/* LDO Internal */
+	DA9030_ID_BAT,		/* battery charger */
 
 	DA9034_ID_LED_1,
 	DA9034_ID_LED_2,
@@ -93,6 +94,43 @@ struct da9034_touch_pdata {
 	int	y_inverted;
 };
 
+/* DA9030 battery charger data */
+struct power_supply_info;
+
+struct da9030_battery_info {
+	/* battery parameters */
+	struct power_supply_info *battery_info;
+
+	/* current and voltage to use for battery charging */
+	unsigned int charge_milliamp;
+	unsigned int charge_millivolt;
+
+	/* voltage thresholds (in millivolts) */
+	int vbat_low;
+	int vbat_crit;
+	int vbat_charge_start;
+	int vbat_charge_stop;
+	int vbat_charge_restart;
+
+	/* battery nominal minimal and maximal voltages in millivolts */
+	int vcharge_min;
+	int vcharge_max;
+
+	/* Temperature thresholds. These are DA9030 register values
+	   "as is" and should be measured for each battery type */
+	int tbat_low;
+	int tbat_high;
+	int tbat_restart;
+
+
+	/* battery monitor interval (seconds) */
+	unsigned int batmon_interval;
+
+	/* platform callbacks for battery low and critical events */
+	void (*battery_low)(void);
+	void (*battery_critical)(void);
+};
+
 struct da903x_subdev_info {
 	int		id;
 	const char	*name;
@@ -190,11 +228,13 @@ extern int da903x_unregister_notifier(struct device *dev,
 extern int da903x_query_status(struct device *dev, unsigned int status);
 
 
-/* NOTE: the two functions below are not intended for use outside
- * of the DA9034 sub-device drivers
+/* NOTE: the functions below are not intended for use outside
+ * of the DA903x sub-device drivers
  */
 extern int da903x_write(struct device *dev, int reg, uint8_t val);
+extern int da903x_writes(struct device *dev, int reg, int len, uint8_t *val);
 extern int da903x_read(struct device *dev, int reg, uint8_t *val);
+extern int da903x_reads(struct device *dev, int reg, int len, uint8_t *val);
 extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask);
 extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
-- 
cgit v0.10.2


From 342d765e011f9cbe4292119a9164f76ccf0b922a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 30 Dec 2008 22:44:53 +0100
Subject: power_supply: Add Dialog DA9030 battery charger driver

Driver for battery charger integrated into Dialog Semiconductor DA9030 PMIC

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 52f8676..6684724 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -75,4 +75,11 @@ config BATTERY_BQ27x00
 	help
 	  Say Y here to enable support for batteries with BQ27200(I2C) chip.
 
+config BATTERY_DA9030
+	tristate "DA9030 battery driver"
+	depends on PMIC_DA903X
+	help
+	  Say Y here to enable support for batteries charger integrated into
+	  DA9030 PMIC.
+
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index e6f6865..eebb155 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
 obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
+obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
diff --git a/drivers/power/da9030_battery.c b/drivers/power/da9030_battery.c
new file mode 100644
index 0000000..1662bb0
--- /dev/null
+++ b/drivers/power/da9030_battery.c
@@ -0,0 +1,600 @@
+/*
+ * Battery charger driver for Dialog Semiconductor DA9030
+ *
+ * Copyright (C) 2008 Compulab, Ltd.
+ * 	Mike Rapoport <mike@compulab.co.il>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/mfd/da903x.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define DA9030_STATUS_CHDET	(1 << 3)
+
+#define DA9030_FAULT_LOG		0x0a
+#define DA9030_FAULT_LOG_OVER_TEMP	(1 << 7)
+#define DA9030_FAULT_LOG_VBAT_OVER	(1 << 4)
+
+#define DA9030_CHARGE_CONTROL		0x28
+#define DA9030_CHRG_CHARGER_ENABLE	(1 << 7)
+
+#define DA9030_ADC_MAN_CONTROL		0x30
+#define DA9030_ADC_TBATREF_ENABLE	(1 << 5)
+#define DA9030_ADC_LDO_INT_ENABLE	(1 << 4)
+
+#define DA9030_ADC_AUTO_CONTROL		0x31
+#define DA9030_ADC_TBAT_ENABLE		(1 << 5)
+#define DA9030_ADC_VBAT_IN_TXON		(1 << 4)
+#define DA9030_ADC_VCH_ENABLE		(1 << 3)
+#define DA9030_ADC_ICH_ENABLE		(1 << 2)
+#define DA9030_ADC_VBAT_ENABLE		(1 << 1)
+#define DA9030_ADC_AUTO_SLEEP_ENABLE	(1 << 0)
+
+#define DA9030_VBATMON		0x32
+#define DA9030_VBATMONTXON	0x33
+#define DA9030_TBATHIGHP	0x34
+#define DA9030_TBATHIGHN	0x35
+#define DA9030_TBATLOW		0x36
+
+#define DA9030_VBAT_RES		0x41
+#define DA9030_VBATMIN_RES	0x42
+#define DA9030_VBATMINTXON_RES	0x43
+#define DA9030_ICHMAX_RES	0x44
+#define DA9030_ICHMIN_RES	0x45
+#define DA9030_ICHAVERAGE_RES	0x46
+#define DA9030_VCHMAX_RES	0x47
+#define DA9030_VCHMIN_RES	0x48
+#define DA9030_TBAT_RES		0x49
+
+struct da9030_adc_res {
+	uint8_t vbat_res;
+	uint8_t vbatmin_res;
+	uint8_t vbatmintxon;
+	uint8_t ichmax_res;
+	uint8_t ichmin_res;
+	uint8_t ichaverage_res;
+	uint8_t vchmax_res;
+	uint8_t vchmin_res;
+	uint8_t tbat_res;
+	uint8_t adc_in4_res;
+	uint8_t adc_in5_res;
+};
+
+struct da9030_battery_thresholds {
+	int tbat_low;
+	int tbat_high;
+	int tbat_restart;
+
+	int vbat_low;
+	int vbat_crit;
+	int vbat_charge_start;
+	int vbat_charge_stop;
+	int vbat_charge_restart;
+
+	int vcharge_min;
+	int vcharge_max;
+};
+
+struct da9030_charger {
+	struct power_supply psy;
+
+	struct device *master;
+
+	struct da9030_adc_res adc;
+	struct delayed_work work;
+	unsigned int interval;
+
+	struct power_supply_info *battery_info;
+
+	struct da9030_battery_thresholds thresholds;
+
+	unsigned int charge_milliamp;
+	unsigned int charge_millivolt;
+
+	/* charger status */
+	bool chdet;
+	uint8_t fault;
+	int mA;
+	int mV;
+	bool is_on;
+
+	struct notifier_block nb;
+
+	/* platform callbacks for battery low and critical events */
+	void (*battery_low)(void);
+	void (*battery_critical)(void);
+
+	struct dentry *debug_file;
+};
+
+static inline int da9030_reg_to_mV(int reg)
+{
+	return ((reg * 2650) >> 8) + 2650;
+}
+
+static inline int da9030_millivolt_to_reg(int mV)
+{
+	return ((mV - 2650) << 8) / 2650;
+}
+
+static inline int da9030_reg_to_mA(int reg)
+{
+	return ((reg * 24000) >> 8) / 15;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int bat_debug_show(struct seq_file *s, void *data)
+{
+	struct da9030_charger *charger = s->private;
+
+	seq_printf(s, "charger is %s\n", charger->is_on ? "on" : "off");
+	if (charger->chdet) {
+		seq_printf(s, "iset = %dmA, vset = %dmV\n",
+			   charger->mA, charger->mV);
+	}
+
+	seq_printf(s, "vbat_res = %d (%dmV)\n",
+		   charger->adc.vbat_res,
+		   da9030_reg_to_mV(charger->adc.vbat_res));
+	seq_printf(s, "vbatmin_res = %d (%dmV)\n",
+		   charger->adc.vbatmin_res,
+		   da9030_reg_to_mV(charger->adc.vbatmin_res));
+	seq_printf(s, "vbatmintxon = %d (%dmV)\n",
+		   charger->adc.vbatmintxon,
+		   da9030_reg_to_mV(charger->adc.vbatmintxon));
+	seq_printf(s, "ichmax_res = %d (%dmA)\n",
+		   charger->adc.ichmax_res,
+		   da9030_reg_to_mV(charger->adc.ichmax_res));
+	seq_printf(s, "ichmin_res = %d (%dmA)\n",
+		   charger->adc.ichmin_res,
+		   da9030_reg_to_mA(charger->adc.ichmin_res));
+	seq_printf(s, "ichaverage_res = %d (%dmA)\n",
+		   charger->adc.ichaverage_res,
+		   da9030_reg_to_mA(charger->adc.ichaverage_res));
+	seq_printf(s, "vchmax_res = %d (%dmV)\n",
+		   charger->adc.vchmax_res,
+		   da9030_reg_to_mA(charger->adc.vchmax_res));
+	seq_printf(s, "vchmin_res = %d (%dmV)\n",
+		   charger->adc.vchmin_res,
+		   da9030_reg_to_mV(charger->adc.vchmin_res));
+
+	return 0;
+}
+
+static int debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, bat_debug_show, inode->i_private);
+}
+
+static const struct file_operations bat_debug_fops = {
+	.open		= debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *da9030_bat_create_debugfs(struct da9030_charger *charger)
+{
+	charger->debug_file = debugfs_create_file("charger", 0666, 0, charger,
+						 &bat_debug_fops);
+	return charger->debug_file;
+}
+
+static void da9030_bat_remove_debugfs(struct da9030_charger *charger)
+{
+	debugfs_remove(charger->debug_file);
+}
+#else
+static inline struct dentry *da9030_bat_create_debugfs(struct da9030_charger *charger)
+{
+	return NULL;
+}
+static inline void da9030_bat_remove_debugfs(struct da9030_charger *charger)
+{
+}
+#endif
+
+static inline void da9030_read_adc(struct da9030_charger *charger,
+				   struct da9030_adc_res *adc)
+{
+	da903x_reads(charger->master, DA9030_VBAT_RES,
+		     sizeof(*adc), (uint8_t *)adc);
+}
+
+static void da9030_charger_update_state(struct da9030_charger *charger)
+{
+	uint8_t val;
+
+	da903x_read(charger->master, DA9030_CHARGE_CONTROL, &val);
+	charger->is_on = (val & DA9030_CHRG_CHARGER_ENABLE) ? 1 : 0;
+	charger->mA = ((val >> 3) & 0xf) * 100;
+	charger->mV = (val & 0x7) * 50 + 4000;
+
+	da9030_read_adc(charger, &charger->adc);
+	da903x_read(charger->master, DA9030_FAULT_LOG, &charger->fault);
+	charger->chdet = da903x_query_status(charger->master,
+						     DA9030_STATUS_CHDET);
+}
+
+static void da9030_set_charge(struct da9030_charger *charger, int on)
+{
+	uint8_t val;
+
+	if (on) {
+		val = DA9030_CHRG_CHARGER_ENABLE;
+		val |= (charger->charge_milliamp / 100) << 3;
+		val |= (charger->charge_millivolt - 4000) / 50;
+		charger->is_on = 1;
+	} else {
+		val = 0;
+		charger->is_on = 0;
+	}
+
+	da903x_write(charger->master, DA9030_CHARGE_CONTROL, val);
+}
+
+static void da9030_charger_check_state(struct da9030_charger *charger)
+{
+	da9030_charger_update_state(charger);
+
+	/* we wake or boot with external power on */
+	if (!charger->is_on) {
+		if ((charger->chdet) &&
+		    (charger->adc.vbat_res <
+		     charger->thresholds.vbat_charge_start)) {
+			da9030_set_charge(charger, 1);
+		}
+	} else {
+		if (charger->adc.vbat_res >=
+		    charger->thresholds.vbat_charge_stop) {
+			da9030_set_charge(charger, 0);
+			da903x_write(charger->master, DA9030_VBATMON,
+				       charger->thresholds.vbat_charge_restart);
+		} else if (charger->adc.vbat_res >
+			   charger->thresholds.vbat_low) {
+			/* we are charging and passed LOW_THRESH,
+			   so upate DA9030 VBAT threshold
+			 */
+			da903x_write(charger->master, DA9030_VBATMON,
+				     charger->thresholds.vbat_low);
+		}
+		if (charger->adc.vchmax_res > charger->thresholds.vcharge_max ||
+		    charger->adc.vchmin_res < charger->thresholds.vcharge_min ||
+		    /* Tempreture readings are negative */
+		    charger->adc.tbat_res < charger->thresholds.tbat_high ||
+		    charger->adc.tbat_res > charger->thresholds.tbat_low) {
+			/* disable charger */
+			da9030_set_charge(charger, 0);
+		}
+	}
+}
+
+static void da9030_charging_monitor(struct work_struct *work)
+{
+	struct da9030_charger *charger;
+
+	charger = container_of(work, struct da9030_charger, work.work);
+
+	da9030_charger_check_state(charger);
+
+	/* reschedule for the next time */
+	schedule_delayed_work(&charger->work, charger->interval);
+}
+
+static enum power_supply_property da9030_battery_props[] = {
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+};
+
+static void da9030_battery_check_status(struct da9030_charger *charger,
+				    union power_supply_propval *val)
+{
+	if (charger->chdet) {
+		if (charger->is_on)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	} else {
+		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+}
+
+static void da9030_battery_check_health(struct da9030_charger *charger,
+				    union power_supply_propval *val)
+{
+	if (charger->fault & DA9030_FAULT_LOG_OVER_TEMP)
+		val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+	else if (charger->fault & DA9030_FAULT_LOG_VBAT_OVER)
+		val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	else
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+}
+
+static int da9030_battery_get_property(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct da9030_charger *charger;
+	charger = container_of(psy, struct da9030_charger, psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		da9030_battery_check_status(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		da9030_battery_check_health(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = charger->battery_info->technology;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		val->intval = charger->battery_info->voltage_max_design;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = charger->battery_info->voltage_min_design;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = da9030_reg_to_mV(charger->adc.vbat_res) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_AVG:
+		val->intval =
+			da9030_reg_to_mA(charger->adc.ichaverage_res) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = charger->battery_info->name;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void da9030_battery_vbat_event(struct da9030_charger *charger)
+{
+	da9030_read_adc(charger, &charger->adc);
+
+	if (charger->is_on)
+		return;
+
+	if (charger->adc.vbat_res < charger->thresholds.vbat_low) {
+		/* set VBAT threshold for critical */
+		da903x_write(charger->master, DA9030_VBATMON,
+			     charger->thresholds.vbat_crit);
+		if (charger->battery_low)
+			charger->battery_low();
+	} else if (charger->adc.vbat_res <
+		   charger->thresholds.vbat_crit) {
+		/* notify the system of battery critical */
+		if (charger->battery_critical)
+			charger->battery_critical();
+	}
+}
+
+static int da9030_battery_event(struct notifier_block *nb, unsigned long event,
+				void *data)
+{
+	struct da9030_charger *charger =
+		container_of(nb, struct da9030_charger, nb);
+	int status;
+
+	switch (event) {
+	case DA9030_EVENT_CHDET:
+		status = da903x_query_status(charger->master,
+					     DA9030_STATUS_CHDET);
+		da9030_set_charge(charger, status);
+		break;
+	case DA9030_EVENT_VBATMON:
+		da9030_battery_vbat_event(charger);
+		break;
+	case DA9030_EVENT_CHIOVER:
+	case DA9030_EVENT_TBAT:
+		da9030_set_charge(charger, 0);
+		break;
+	}
+
+	return 0;
+}
+
+static void da9030_battery_convert_thresholds(struct da9030_charger *charger,
+					      struct da9030_battery_info *pdata)
+{
+	charger->thresholds.tbat_low = pdata->tbat_low;
+	charger->thresholds.tbat_high = pdata->tbat_high;
+	charger->thresholds.tbat_restart  = pdata->tbat_restart;
+
+	charger->thresholds.vbat_low =
+		da9030_millivolt_to_reg(pdata->vbat_low);
+	charger->thresholds.vbat_crit =
+		da9030_millivolt_to_reg(pdata->vbat_crit);
+	charger->thresholds.vbat_charge_start =
+		da9030_millivolt_to_reg(pdata->vbat_charge_start);
+	charger->thresholds.vbat_charge_stop =
+		da9030_millivolt_to_reg(pdata->vbat_charge_stop);
+	charger->thresholds.vbat_charge_restart =
+		da9030_millivolt_to_reg(pdata->vbat_charge_restart);
+
+	charger->thresholds.vcharge_min =
+		da9030_millivolt_to_reg(pdata->vcharge_min);
+	charger->thresholds.vcharge_max =
+		da9030_millivolt_to_reg(pdata->vcharge_max);
+}
+
+static void da9030_battery_setup_psy(struct da9030_charger *charger)
+{
+	struct power_supply *psy = &charger->psy;
+	struct power_supply_info *info = charger->battery_info;
+
+	psy->name = info->name;
+	psy->use_for_apm = info->use_for_apm;
+	psy->type = POWER_SUPPLY_TYPE_BATTERY;
+	psy->get_property = da9030_battery_get_property;
+
+	psy->properties = da9030_battery_props;
+	psy->num_properties = ARRAY_SIZE(da9030_battery_props);
+};
+
+static int da9030_battery_charger_init(struct da9030_charger *charger)
+{
+	char v[5];
+	int ret;
+
+	v[0] = v[1] = charger->thresholds.vbat_low;
+	v[2] = charger->thresholds.tbat_high;
+	v[3] = charger->thresholds.tbat_restart;
+	v[4] = charger->thresholds.tbat_low;
+
+	ret = da903x_writes(charger->master, DA9030_VBATMON, 5, v);
+	if (ret)
+		return ret;
+
+	/*
+	 * Enable reference voltage supply for ADC from the LDO_INTERNAL
+	 * regulator. Must be set before ADC measurements can be made.
+	 */
+	ret = da903x_write(charger->master, DA9030_ADC_MAN_CONTROL,
+			   DA9030_ADC_LDO_INT_ENABLE |
+			   DA9030_ADC_TBATREF_ENABLE);
+	if (ret)
+		return ret;
+
+	/* enable auto ADC measuremnts */
+	return da903x_write(charger->master, DA9030_ADC_AUTO_CONTROL,
+			    DA9030_ADC_TBAT_ENABLE | DA9030_ADC_VBAT_IN_TXON |
+			    DA9030_ADC_VCH_ENABLE | DA9030_ADC_ICH_ENABLE |
+			    DA9030_ADC_VBAT_ENABLE |
+			    DA9030_ADC_AUTO_SLEEP_ENABLE);
+}
+
+static int da9030_battery_probe(struct platform_device *pdev)
+{
+	struct da9030_charger *charger;
+	struct da9030_battery_info *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (pdata == NULL)
+		return -EINVAL;
+
+	if (pdata->charge_milliamp >= 1500 ||
+	    pdata->charge_millivolt < 4000 ||
+	    pdata->charge_millivolt > 4350)
+		return -EINVAL;
+
+	charger = kzalloc(sizeof(*charger), GFP_KERNEL);
+	if (charger == NULL)
+		return -ENOMEM;
+
+	charger->master = pdev->dev.parent;
+
+	/* 10 seconds between monotor runs unless platfrom defines other
+	   interval */
+	charger->interval = msecs_to_jiffies(
+		(pdata->batmon_interval ? : 10) * 1000);
+
+	charger->charge_milliamp = pdata->charge_milliamp;
+	charger->charge_millivolt = pdata->charge_millivolt;
+	charger->battery_info = pdata->battery_info;
+	charger->battery_low = pdata->battery_low;
+	charger->battery_critical = pdata->battery_critical;
+
+	da9030_battery_convert_thresholds(charger, pdata);
+
+	ret = da9030_battery_charger_init(charger);
+	if (ret)
+		goto err_charger_init;
+
+	INIT_DELAYED_WORK(&charger->work, da9030_charging_monitor);
+	schedule_delayed_work(&charger->work, charger->interval);
+
+	charger->nb.notifier_call = da9030_battery_event;
+	ret = da903x_register_notifier(charger->master, &charger->nb,
+				       DA9030_EVENT_CHDET |
+				       DA9030_EVENT_VBATMON |
+				       DA9030_EVENT_CHIOVER |
+				       DA9030_EVENT_TBAT);
+	if (ret)
+		goto err_notifier;
+
+	da9030_battery_setup_psy(charger);
+	ret = power_supply_register(&pdev->dev, &charger->psy);
+	if (ret)
+		goto err_ps_register;
+
+	charger->debug_file = da9030_bat_create_debugfs(charger);
+	platform_set_drvdata(pdev, charger);
+	return 0;
+
+err_ps_register:
+	da903x_unregister_notifier(charger->master, &charger->nb,
+				   DA9030_EVENT_CHDET | DA9030_EVENT_VBATMON |
+				   DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT);
+err_notifier:
+	cancel_delayed_work(&charger->work);
+
+err_charger_init:
+	kfree(charger);
+
+	return ret;
+}
+
+static int da9030_battery_remove(struct platform_device *dev)
+{
+	struct da9030_charger *charger = platform_get_drvdata(dev);
+
+	da9030_bat_remove_debugfs(charger);
+
+	da903x_unregister_notifier(charger->master, &charger->nb,
+				   DA9030_EVENT_CHDET | DA9030_EVENT_VBATMON |
+				   DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT);
+	cancel_delayed_work(&charger->work);
+	power_supply_unregister(&charger->psy);
+
+	kfree(charger);
+
+	return 0;
+}
+
+static struct platform_driver da903x_battery_driver = {
+	.driver	= {
+		.name	= "da903x-battery",
+		.owner	= THIS_MODULE,
+	},
+	.probe = da9030_battery_probe,
+	.remove = da9030_battery_remove,
+};
+
+static int da903x_battery_init(void)
+{
+	return platform_driver_register(&da903x_battery_driver);
+}
+
+static void da903x_battery_exit(void)
+{
+	platform_driver_unregister(&da903x_battery_driver);
+}
+
+module_init(da903x_battery_init);
+module_exit(da903x_battery_exit);
+
+MODULE_DESCRIPTION("DA9030 battery charger driver");
+MODULE_AUTHOR("Mike Rapoport, CompuLab");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 94964f96a6b7018d68b7386cd8c0b8505d3cf69f Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@openedhand.com>
Date: Thu, 18 Dec 2008 11:38:02 +0100
Subject: mfd: Use irq_to_desc in twl4030 code

The global irq_desc array is soon going to be accessible only with
!CONFIG_SPARSE_IRQ. We should start using the generic irq_to_desc()
routines instead.

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index fae868a..b108760 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -180,10 +180,15 @@ static struct completion irq_event;
 static int twl4030_irq_thread(void *data)
 {
 	long irq = (long)data;
-	irq_desc_t *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	static unsigned i2c_errors;
 	const static unsigned max_i2c_errors = 100;
 
+	if (!desc) {
+		pr_err("twl4030: Invalid IRQ: %ld\n", irq);
+		return -EINVAL;
+	}
+
 	current->flags |= PF_NOFREEZE;
 
 	while (!kthread_should_stop()) {
@@ -215,7 +220,13 @@ static int twl4030_irq_thread(void *data)
 				pih_isr;
 				pih_isr >>= 1, module_irq++) {
 			if (pih_isr & 0x1) {
-				irq_desc_t *d = irq_desc + module_irq;
+				struct irq_desc *d = irq_to_desc(module_irq);
+
+				if (!d) {
+					pr_err("twl4030: Invalid SIH IRQ: %d\n",
+					       module_irq);
+					return -EINVAL;
+				}
 
 				/* These can't be masked ... always warn
 				 * if we get any surprises.
@@ -452,10 +463,16 @@ static void twl4030_sih_do_edge(struct work_struct *work)
 	/* Modify only the bits we know must change */
 	while (edge_change) {
 		int		i = fls(edge_change) - 1;
-		struct irq_desc	*d = irq_desc + i + agent->irq_base;
+		struct irq_desc	*d = irq_to_desc(i + agent->irq_base);
 		int		byte = 1 + (i >> 2);
 		int		off = (i & 0x3) * 2;
 
+		if (!d) {
+			pr_err("twl4030: Invalid IRQ: %d\n",
+			       i + agent->irq_base);
+			return;
+		}
+
 		bytes[byte] &= ~(0x03 << off);
 
 		spin_lock_irq(&d->lock);
@@ -512,9 +529,14 @@ static void twl4030_sih_unmask(unsigned irq)
 static int twl4030_sih_set_type(unsigned irq, unsigned trigger)
 {
 	struct sih_agent *sih = get_irq_chip_data(irq);
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
+	if (!desc) {
+		pr_err("twl4030: Invalid IRQ: %d\n", irq);
+		return -EINVAL;
+	}
+
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
 
-- 
cgit v0.10.2


From 96920630624868add3f63f596523e70dbb64549a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:09:50 +0100
Subject: mfd: Add WM8352 support

The WM8352 is a variant of the WM8350. Aside from the register defaults
there are no software visible differences to the WM8350.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ddfb12b..76a482d 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -143,6 +143,22 @@ config MFD_WM8350_CONFIG_MODE_3
 	bool
 	depends on MFD_WM8350
 
+config MFD_WM8352_CONFIG_MODE_0
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_1
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_2
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_3
+	bool
+	depends on MFD_WM8350
+
 config MFD_WM8350_I2C
 	tristate "Support Wolfson Microelectronics WM8350 with I2C"
 	select MFD_WM8350
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 2188d75..fa505ac 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1133,35 +1133,75 @@ EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
 /*
  * Cache is always host endian.
  */
-static int wm8350_create_cache(struct wm8350 *wm8350, int mode)
+static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 {
 	int i, ret = 0;
 	u16 value;
 	const u16 *reg_map;
 
-	switch (mode) {
-#ifdef CONFIG_MFD_WM8350_CONFIG_MODE_0
+	switch (type) {
 	case 0:
-		reg_map = wm8350_mode0_defaults;
-		break;
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8350_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8350_mode0_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_1
-	case 1:
-		reg_map = wm8350_mode1_defaults;
-		break;
+		case 1:
+			reg_map = wm8350_mode1_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_2
-	case 2:
-		reg_map = wm8350_mode2_defaults;
-		break;
+		case 2:
+			reg_map = wm8350_mode2_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_3
-	case 3:
-		reg_map = wm8350_mode3_defaults;
-		break;
+		case 3:
+			reg_map = wm8350_mode3_defaults;
+			break;
 #endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8350 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+
+	case 2:
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8352_mode0_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_1
+		case 1:
+			reg_map = wm8352_mode1_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_2
+		case 2:
+			reg_map = wm8352_mode2_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_3
+		case 3:
+			reg_map = wm8352_mode3_defaults;
+			break;
+#endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8352 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+		break;
+
 	default:
-		dev_err(wm8350->dev, "Configuration mode %d not supported\n",
+		dev_err(wm8350->dev,
+			"WM835x configuration mode %d not supported\n",
 			mode);
 		return -EINVAL;
 	}
@@ -1284,13 +1324,27 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 		break;
 
+	case 2:
+		switch (chip_rev) {
+		case 0:
+			dev_info(wm8350->dev, "WM8352 Rev A\n");
+			wm8350->power.rev_g_coeff = 1;
+			break;
+
+		default:
+			dev_err(wm8350->dev, "Unknown WM8352 CHIP_REV\n");
+			ret = -ENODEV;
+			goto err;
+		}
+		break;
+
 	default:
 		dev_err(wm8350->dev, "Unknown MASK_REV\n");
 		ret = -ENODEV;
 		goto err;
 	}
 
-	ret = wm8350_create_cache(wm8350, mode);
+	ret = wm8350_create_cache(wm8350, mask_rev, mode);
 	if (ret < 0) {
 		dev_err(wm8350->dev, "Failed to create register cache\n");
 		return ret;
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 876e693..8780512 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -97,6 +97,7 @@ static int wm8350_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id wm8350_i2c_id[] = {
        { "wm8350", 0 },
+       { "wm8352", 0 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, wm8350_i2c_id);
diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index b43d64c..3e2cc37 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1074,6 +1074,1058 @@ const u16 wm8350_mode3_defaults[] = {
 };
 #endif
 
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode0_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0004,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0FFC,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFC,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0013,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x0000,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0000,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0000,     /* R186 - DCDC3 Control */
+	0x0000,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0000,     /* R189 - DCDC4 Control */
+	0x0000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0000,     /* R195 - DCDC6 Control */
+	0x0000,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001B,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001B,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001B,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_1
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode1_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFF,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0300,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x0062,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0006,     /* R186 - DCDC3 Control */
+	0x0800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0006,     /* R189 - DCDC4 Control */
+	0x0C00,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 - DCDC6 Control */
+	0x1000,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0002,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001A,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001F,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001F,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_2
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode2_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0110,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x09DA,     /* R134 - GPIO Configuration (i/o) */
+	0x0DD6,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x1310,     /* R140 - GPIO Function Select 1 */
+	0x0033,     /* R141 - GPIO Function Select 2 */
+	0x2000,     /* R142 - GPIO Function Select 3 */
+	0x0000,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0800,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0056,     /* R186 - DCDC3 Control */
+	0x1800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x000E,     /* R189 - DCDC4 Control */
+	0x1000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 - DCDC6 Control */
+	0x0C00,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0006,     /* R203 - LDO2 Control */
+	0x0400,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001C,     /* R206 - LDO3 Control */
+	0x1400,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_3
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode3_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0010,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFD,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0310,     /* R140 - GPIO Function Select 1 */
+	0x0001,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x0006,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0050,     /* R186 - DCDC3 Control */
+	0x0C00,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x000E,     /* R189 - DCDC4 Control */
+	0x0400,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0029,     /* R195 - DCDC6 Control */
+	0x0800,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001D,     /* R200 - LDO1 Control */
+	0x1000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0017,     /* R203 - LDO2 Control */
+	0x1000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0006,     /* R206 - LDO3 Control */
+	0x1000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x0010,     /* R209 - LDO4 Control */
+	0x1000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
 /* The register defaults for the config mode used must be compiled in but
  * due to the impact on kernel size it is possible to disable
  */
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index afeff6f..7375790 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[];
 extern const u16 wm8350_mode1_defaults[];
 extern const u16 wm8350_mode2_defaults[];
 extern const u16 wm8350_mode3_defaults[];
+extern const u16 wm8352_mode0_defaults[];
+extern const u16 wm8352_mode1_defaults[];
+extern const u16 wm8352_mode2_defaults[];
+extern const u16 wm8352_mode3_defaults[];
 
 struct wm8350;
 
-- 
cgit v0.10.2


From 53a0d99b1ef14f56baec06eec1e3dad031672b3a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:12:08 +0100
Subject: mfd: Handle missing WM8350 platform data

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index fa505ac..03af3b1 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1350,7 +1350,7 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		return ret;
 	}
 
-	if (pdata->init) {
+	if (pdata && pdata->init) {
 		ret = pdata->init(wm8350);
 		if (ret != 0) {
 			dev_err(wm8350->dev, "Platform init() failed: %d\n",
-- 
cgit v0.10.2


From 645524a9c6e1e42dc4fe03217befb20e2fc4d43e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:12:16 +0100
Subject: mfd: Support configurable numbers of DCDCs and ISINKs on WM8350

Some WM8350 variants have fewer DCDCs and ISINKs. Identify these at
probe and refuse to use the absent DCDCs when running on these chips.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 03af3b1..56c363c 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1301,6 +1301,9 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	switch (mask_rev) {
 	case 0:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
+		wm8350->pmic.max_isink = WM8350_ISINK_B;
+
 		switch (chip_rev) {
 		case WM8350_REV_E:
 			dev_info(wm8350->dev, "WM8350 Rev E\n");
@@ -1325,6 +1328,9 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		break;
 
 	case 2:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
+		wm8350->pmic.max_isink = WM8350_ISINK_B;
+
 		switch (chip_rev) {
 		case 0:
 			dev_info(wm8350->dev, "WM8352 Rev A\n");
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 1f44b17..c68c496 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1380,6 +1380,13 @@ int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
 	if (wm8350->pmic.pdev[reg])
 		return -EBUSY;
 
+	if (reg >= WM8350_DCDC_1 && reg <= WM8350_DCDC_6 &&
+	    reg > wm8350->pmic.max_dcdc)
+		return -ENODEV;
+	if (reg >= WM8350_ISINK_A && reg <= WM8350_ISINK_B &&
+	    reg > wm8350->pmic.max_isink)
+		return -ENODEV;
+
 	pdev = platform_device_alloc("wm8350-regulator", reg);
 	if (!pdev)
 		return -ENOMEM;
diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index 69b69e0..96acbfc 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -701,6 +701,10 @@ struct platform_device;
 struct regulator_init_data;
 
 struct wm8350_pmic {
+	/* Number of regulators of each type on this device */
+	int max_dcdc;
+	int max_isink;
+
 	/* ISINK to DCDC mapping */
 	int isink_A_dcdc;
 	int isink_B_dcdc;
-- 
cgit v0.10.2


From ca23f8c1b0aa15dc69565244fc5dffa67a72dd02 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:12:28 +0100
Subject: mfd: Add WM8351 support

The WM8351 is a WM8350 variant. As well as register default changes the
WM8351 has fewer voltage and current regulators than the WM8350.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 76a482d..781a279 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -143,6 +143,22 @@ config MFD_WM8350_CONFIG_MODE_3
 	bool
 	depends on MFD_WM8350
 
+config MFD_WM8351_CONFIG_MODE_0
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_1
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_2
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_3
+	bool
+	depends on MFD_WM8350
+
 config MFD_WM8352_CONFIG_MODE_0
 	bool
 	depends on MFD_WM8350
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 56c363c..e03fe60 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1169,6 +1169,36 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 			return -EINVAL;
 		}
 
+	case 1:
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8351_mode0_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_1
+		case 1:
+			reg_map = wm8351_mode1_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_2
+		case 2:
+			reg_map = wm8351_mode2_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_3
+		case 3:
+			reg_map = wm8351_mode3_defaults;
+			break;
+#endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8351 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+		break;
+
 	case 2:
 		switch (mode) {
 #ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
@@ -1327,6 +1357,23 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 		break;
 
+	case 1:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_4;
+		wm8350->pmic.max_isink = WM8350_ISINK_A;
+
+		switch (chip_rev) {
+		case 0:
+			dev_info(wm8350->dev, "WM8351 Rev A\n");
+			wm8350->power.rev_g_coeff = 1;
+			break;
+
+		default:
+			dev_err(wm8350->dev, "Unknown WM8351 CHIP_REV\n");
+			ret = -ENODEV;
+			goto err;
+		}
+		break;
+
 	case 2:
 		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
 		wm8350->pmic.max_isink = WM8350_ISINK_B;
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 8780512..8d8c932 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -97,6 +97,7 @@ static int wm8350_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id wm8350_i2c_id[] = {
        { "wm8350", 0 },
+       { "wm8351", 0 },
        { "wm8352", 0 },
        { }
 };
diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index 3e2cc37..68887b8 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1074,6 +1074,1050 @@ const u16 wm8350_mode3_defaults[] = {
 };
 #endif
 
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_0
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode0_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0004,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0FFC,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFC,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0013,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x0000,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0000,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0000,     /* R186 - DCDC3 Control */
+	0x0000,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0000,     /* R189 - DCDC4 Control */
+	0x0000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0000,     /* R195 */
+	0x0000,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001B,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001B,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001B,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_1
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode1_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0CFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0C1F,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0300,     /* R140 - GPIO Function Select 1 */
+	0x1110,     /* R141 - GPIO Function Select 2 */
+	0x0013,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0C00,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0026,     /* R186 - DCDC3 Control */
+	0x0400,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0062,     /* R189 - DCDC4 Control */
+	0x0800,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x000A,     /* R195 */
+	0x1000,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0006,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0010,     /* R203 - LDO2 Control */
+	0x0C00,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001F,     /* R206 - LDO3 Control */
+	0x0800,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x000A,     /* R209 - LDO4 Control */
+	0x0800,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x1000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_2
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode2_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0214,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0110,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x09FA,     /* R134 - GPIO Configuration (i/o) */
+	0x0DF6,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x1310,     /* R140 - GPIO Function Select 1 */
+	0x0003,     /* R141 - GPIO Function Select 2 */
+	0x2000,     /* R142 - GPIO Function Select 3 */
+	0x0000,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x001A,     /* R180 - DCDC1 Control */
+	0x0800,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0056,     /* R186 - DCDC3 Control */
+	0x0400,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0026,     /* R189 - DCDC4 Control */
+	0x0C00,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 */
+	0x0C00,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0400,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0010,     /* R203 - LDO2 Control */
+	0x0C00,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0015,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_3
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode3_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0010,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFD,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0310,     /* R140 - GPIO Function Select 1 */
+	0x0001,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0026,     /* R186 - DCDC3 Control */
+	0x0800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0062,     /* R189 - DCDC4 Control */
+	0x1400,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 */
+	0x0400,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0006,     /* R200 - LDO1 Control */
+	0x0C00,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0016,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0019,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x1000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
 #ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
 
 #undef WM8350_HAVE_CONFIG_MODE
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 7375790..980669d 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[];
 extern const u16 wm8350_mode1_defaults[];
 extern const u16 wm8350_mode2_defaults[];
 extern const u16 wm8350_mode3_defaults[];
+extern const u16 wm8351_mode0_defaults[];
+extern const u16 wm8351_mode1_defaults[];
+extern const u16 wm8351_mode2_defaults[];
+extern const u16 wm8351_mode3_defaults[];
 extern const u16 wm8352_mode0_defaults[];
 extern const u16 wm8352_mode1_defaults[];
 extern const u16 wm8352_mode2_defaults[];
-- 
cgit v0.10.2


From 4331bb32339a55fd88fbfb0581ed5132207bf9a2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:20:14 +0100
Subject: mfd: Add missing break from wm3850-core

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index e03fe60..3a273cc 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1168,6 +1168,7 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 				mode);
 			return -EINVAL;
 		}
+		break;
 
 	case 1:
 		switch (mode) {
-- 
cgit v0.10.2


From 0931a4c6dbfab03f2bfd22a9170130f7b155d53a Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 22 Dec 2008 12:05:27 +0100
Subject: mfd: dm355evm msp430 driver

Basic MFD framework for the MSP430 microcontroller firmware used
on the dm355evm board:

 - Provides an interface for other drivers: register read/write
   utilities, and register declarations.

 - Directly exports:
     * Many signals through the GPIO framework
         + LEDs
         + SW6 through gpio sysfs
	 + NTSC/nPAL jumper through gpio sysfs
	 + ... more could be added later, e.g. MMC signals
     * Child devices:
	+ LEDs, via leds-gpio child (and default triggers)
	+ RTC, via rtc-dm355evm child device
	+ Buttons and IR control, via dm355evm_keys

 - Supports power-off system call.  Use the reset button to power
   the board back up; the power supply LED will be on, but the
   MSP430 waits to re-activate the regulators.

 - On probe() this:
     * Announces firmware revision
     * Turns off the banked LEDs
     * Exports the resources noted above
     * Hooks the power-off support
     * Muxes tvp5146 -or- imager for video input

Unless the new tvp514x driver (tracked for mainline) is configured,
this assumes that some custom imager driver handles video-in.

This completely ignores the registers reporting the output voltages
on the various power supplies.  Someone could add a hwmon interface
if that seems useful.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 781a279..02e9146 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -34,6 +34,14 @@ config MFD_ASIC3
 	  This driver supports the ASIC3 multifunction chip found on many
 	  PDAs (mainly iPAQ and HTC based ones)
 
+config MFD_DM355EVM_MSP
+	bool "DaVinci DM355 EVM microcontroller"
+	depends on I2C && MACH_DAVINCI_DM355_EVM
+	help
+	  This driver supports the MSP430 microcontroller used on these
+	  boards.  MSP430 firmware manages resets and power sequencing,
+	  inputs from buttons and the IR remote, LEDs, an RTC, and more.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 9a5ad8a..8f6cd5c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_MFD_ASIC3)		+= asic3.o
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
+obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
+
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
 obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c
new file mode 100644
index 0000000..4214b3f
--- /dev/null
+++ b/drivers/mfd/dm355evm_msp.c
@@ -0,0 +1,420 @@
+/*
+ * dm355evm_msp.c - driver for MSP430 firmware on DM355EVM board
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+#include <linux/i2c.h>
+#include <linux/i2c/dm355evm_msp.h>
+
+
+/*
+ * The DM355 is a DaVinci chip with video support but no C64+ DSP.  Its
+ * EVM board has an MSP430 programmed with firmware for various board
+ * support functions.  This driver exposes some of them directly, and
+ * supports other drivers (e.g. RTC, input) for more complex access.
+ *
+ * Because this firmware is entirely board-specific, this file embeds
+ * knowledge that would be passed as platform_data in a generic driver.
+ *
+ * This driver was tested with firmware revision A4.
+ */
+
+#if defined(CONFIG_KEYBOARD_DM355EVM) \
+		|| defined(CONFIG_KEYBOARD_DM355EVM_MODULE)
+#define msp_has_keyboard()	true
+#else
+#define msp_has_keyboard()	false
+#endif
+
+#if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
+#define msp_has_leds()		true
+#else
+#define msp_has_leds()		false
+#endif
+
+#if defined(CONFIG_RTC_DRV_DM355EVM) || defined(CONFIG_RTC_DRV_DM355EVM_MODULE)
+#define msp_has_rtc()		true
+#else
+#define msp_has_rtc()		false
+#endif
+
+#if defined(CONFIG_VIDEO_TVP514X) || defined(CONFIG_VIDEO_TVP514X_MODULE)
+#define msp_has_tvp()		true
+#else
+#define msp_has_tvp()		false
+#endif
+
+
+/*----------------------------------------------------------------------*/
+
+/* REVISIT for paranoia's sake, retry reads/writes on error */
+
+static struct i2c_client *msp430;
+
+/**
+ * dm355evm_msp_write - Writes a register in dm355evm_msp
+ * @value: the value to be written
+ * @reg: register address
+ *
+ * Returns result of operation - 0 is success, else negative errno
+ */
+int dm355evm_msp_write(u8 value, u8 reg)
+{
+	return i2c_smbus_write_byte_data(msp430, reg, value);
+}
+EXPORT_SYMBOL(dm355evm_msp_write);
+
+/**
+ * dm355evm_msp_read - Reads a register from dm355evm_msp
+ * @reg: register address
+ *
+ * Returns result of operation - value, or negative errno
+ */
+int dm355evm_msp_read(u8 reg)
+{
+	return i2c_smbus_read_byte_data(msp430, reg);
+}
+EXPORT_SYMBOL(dm355evm_msp_read);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Many of the msp430 pins are just used as fixed-direction GPIOs.
+ * We could export a few more of them this way, if we wanted.
+ */
+#define MSP_GPIO(bit,reg)	((DM355EVM_MSP_ ## reg) << 3 | (bit))
+
+static const u8 msp_gpios[] = {
+	/* eight leds */
+	MSP_GPIO(0, LED), MSP_GPIO(1, LED),
+	MSP_GPIO(2, LED), MSP_GPIO(3, LED),
+	MSP_GPIO(4, LED), MSP_GPIO(5, LED),
+	MSP_GPIO(6, LED), MSP_GPIO(7, LED),
+	/* SW6 and the NTSC/nPAL jumper */
+	MSP_GPIO(0, SWITCH1), MSP_GPIO(1, SWITCH1),
+	MSP_GPIO(2, SWITCH1), MSP_GPIO(3, SWITCH1),
+	MSP_GPIO(4, SWITCH1),
+};
+
+#define MSP_GPIO_REG(offset)	(msp_gpios[(offset)] >> 3)
+#define MSP_GPIO_MASK(offset)	BIT(msp_gpios[(offset)] & 0x07)
+
+static int msp_gpio_in(struct gpio_chip *chip, unsigned offset)
+{
+	switch (MSP_GPIO_REG(offset)) {
+	case DM355EVM_MSP_SWITCH1:
+	case DM355EVM_MSP_SWITCH2:
+	case DM355EVM_MSP_SDMMC:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static u8 msp_led_cache;
+
+static int msp_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	int reg, status;
+
+	reg = MSP_GPIO_REG(offset);
+	status = dm355evm_msp_read(reg);
+	if (status < 0)
+		return status;
+	if (reg == DM355EVM_MSP_LED)
+		msp_led_cache = status;
+	return status & MSP_GPIO_MASK(offset);
+}
+
+static int msp_gpio_out(struct gpio_chip *chip, unsigned offset, int value)
+{
+	int mask, bits;
+
+	/* NOTE:  there are some other signals that could be
+	 * packaged as output GPIOs, but they aren't as useful
+	 * as the LEDs ... so for now we don't.
+	 */
+	if (MSP_GPIO_REG(offset) != DM355EVM_MSP_LED)
+		return -EINVAL;
+
+	mask = MSP_GPIO_MASK(offset);
+	bits = msp_led_cache;
+
+	bits &= ~mask;
+	if (value)
+		bits |= mask;
+	msp_led_cache = bits;
+
+	return dm355evm_msp_write(bits, DM355EVM_MSP_LED);
+}
+
+static void msp_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	msp_gpio_out(chip, offset, value);
+}
+
+static struct gpio_chip dm355evm_msp_gpio = {
+	.label			= "dm355evm_msp",
+	.owner			= THIS_MODULE,
+	.direction_input	= msp_gpio_in,
+	.get			= msp_gpio_get,
+	.direction_output	= msp_gpio_out,
+	.set			= msp_gpio_set,
+	.base			= -EINVAL,		/* dynamic assignment */
+	.ngpio			= ARRAY_SIZE(msp_gpios),
+	.can_sleep		= true,
+};
+
+/*----------------------------------------------------------------------*/
+
+static struct device *add_child(struct i2c_client *client, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq)
+{
+	struct platform_device	*pdev;
+	int			status;
+
+	pdev = platform_device_alloc(name, -1);
+	if (!pdev) {
+		dev_dbg(&client->dev, "can't alloc dev\n");
+		status = -ENOMEM;
+		goto err;
+	}
+
+	device_init_wakeup(&pdev->dev, can_wakeup);
+	pdev->dev.parent = &client->dev;
+
+	if (pdata) {
+		status = platform_device_add_data(pdev, pdata, pdata_len);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add platform_data\n");
+			goto err;
+		}
+	}
+
+	if (irq) {
+		struct resource r = {
+			.start = irq,
+			.flags = IORESOURCE_IRQ,
+		};
+
+		status = platform_device_add_resources(pdev, &r, 1);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add irq\n");
+			goto err;
+		}
+	}
+
+	status = platform_device_add(pdev);
+
+err:
+	if (status < 0) {
+		platform_device_put(pdev);
+		dev_err(&client->dev, "can't add %s dev\n", name);
+		return ERR_PTR(status);
+	}
+	return &pdev->dev;
+}
+
+static int add_children(struct i2c_client *client)
+{
+	static const struct {
+		int offset;
+		char *label;
+	} config_inputs[] = {
+		/* 8 == right after the LEDs */
+		{ 8 + 0, "sw6_1", },
+		{ 8 + 1, "sw6_2", },
+		{ 8 + 2, "sw6_3", },
+		{ 8 + 3, "sw6_4", },
+		{ 8 + 4, "NTSC/nPAL", },
+	};
+
+	struct device	*child;
+	int		status;
+	int		i;
+
+	/* GPIO-ish stuff */
+	dm355evm_msp_gpio.dev = &client->dev;
+	status = gpiochip_add(&dm355evm_msp_gpio);
+	if (status < 0)
+		return status;
+
+	/* LED output */
+	if (msp_has_leds()) {
+#define GPIO_LED(l)	.name = l, .active_low = true
+		static struct gpio_led evm_leds[] = {
+			{ GPIO_LED("dm355evm::ds14"),
+				.default_trigger = "heartbeat", },
+			{ GPIO_LED("dm355evm::ds15"),
+				.default_trigger = "mmc0", },
+			{ GPIO_LED("dm355evm::ds16"),
+				/* could also be a CE-ATA drive */
+				.default_trigger = "mmc1", },
+			{ GPIO_LED("dm355evm::ds17"),
+				.default_trigger = "nand-disk", },
+			{ GPIO_LED("dm355evm::ds18"), },
+			{ GPIO_LED("dm355evm::ds19"), },
+			{ GPIO_LED("dm355evm::ds20"), },
+			{ GPIO_LED("dm355evm::ds21"), },
+		};
+#undef GPIO_LED
+
+		struct gpio_led_platform_data evm_led_data = {
+			.num_leds	= ARRAY_SIZE(evm_leds),
+			.leds		= evm_leds,
+		};
+
+		for (i = 0; i < ARRAY_SIZE(evm_leds); i++)
+			evm_leds[i].gpio = i + dm355evm_msp_gpio.base;
+
+		/* NOTE:  these are the only fully programmable LEDs
+		 * on the board, since GPIO-61/ds22 (and many signals
+		 * going to DC7) must be used for AEMIF address lines
+		 * unless the top 1 GB of NAND is unused...
+		 */
+		child = add_child(client, "leds-gpio",
+				&evm_led_data, sizeof(evm_led_data),
+				false, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* configuration inputs */
+	for (i = 0; i < ARRAY_SIZE(config_inputs); i++) {
+		int gpio = dm355evm_msp_gpio.base + config_inputs[i].offset;
+
+		gpio_request(gpio, config_inputs[i].label);
+		gpio_direction_input(gpio);
+
+		/* make it easy for userspace to see these */
+		gpio_export(gpio, false);
+	}
+
+	/* RTC is a 32 bit counter, no alarm */
+	if (msp_has_rtc()) {
+		child = add_child(client, "rtc-dm355evm",
+				NULL, 0, false, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* input from buttons and IR remote (uses the IRQ) */
+	if (msp_has_keyboard()) {
+		child = add_child(client, "dm355evm_keys",
+				NULL, 0, true, client->irq);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------------*/
+
+static void dm355evm_command(unsigned command)
+{
+	int status;
+
+	status = dm355evm_msp_write(command, DM355EVM_MSP_COMMAND);
+	if (status < 0)
+		dev_err(&msp430->dev, "command %d failure %d\n",
+				command, status);
+}
+
+static void dm355evm_power_off(void)
+{
+	dm355evm_command(MSP_COMMAND_POWEROFF);
+}
+
+static int dm355evm_msp_remove(struct i2c_client *client)
+{
+	pm_power_off = NULL;
+	msp430 = NULL;
+	return 0;
+}
+
+static int
+dm355evm_msp_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int		status;
+	const char	*video = msp_has_tvp() ? "TVP5146" : "imager";
+
+	if (msp430)
+		return -EBUSY;
+	msp430 = client;
+
+	/* display revision status; doubles as sanity check */
+	status = dm355evm_msp_read(DM355EVM_MSP_FIRMREV);
+	if (status < 0)
+		goto fail;
+	dev_info(&client->dev, "firmware v.%02X, %s as video-in\n",
+			status, video);
+
+	/* mux video input:  either tvp5146 or some external imager */
+	status = dm355evm_msp_write(msp_has_tvp() ? 0 : MSP_VIDEO_IMAGER,
+			DM355EVM_MSP_VIDEO_IN);
+	if (status < 0)
+		dev_warn(&client->dev, "error %d muxing %s as video-in\n",
+			status, video);
+
+	/* init LED cache, and turn off the LEDs */
+	msp_led_cache = 0xff;
+	dm355evm_msp_write(msp_led_cache, DM355EVM_MSP_LED);
+
+	/* export capabilities we support */
+	status = add_children(client);
+	if (status < 0)
+		goto fail;
+
+	/* PM hookup */
+	pm_power_off = dm355evm_power_off;
+
+	return 0;
+
+fail:
+	/* FIXME remove children ... */
+	dm355evm_msp_remove(client);
+	return status;
+}
+
+static const struct i2c_device_id dm355evm_msp_ids[] = {
+	{ "dm355evm_msp", 0 },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(i2c, dm355evm_msp_ids);
+
+static struct i2c_driver dm355evm_msp_driver = {
+	.driver.name	= "dm355evm_msp",
+	.id_table	= dm355evm_msp_ids,
+	.probe		= dm355evm_msp_probe,
+	.remove		= dm355evm_msp_remove,
+};
+
+static int __init dm355evm_msp_init(void)
+{
+	return i2c_add_driver(&dm355evm_msp_driver);
+}
+subsys_initcall(dm355evm_msp_init);
+
+static void __exit dm355evm_msp_exit(void)
+{
+	i2c_del_driver(&dm355evm_msp_driver);
+}
+module_exit(dm355evm_msp_exit);
+
+MODULE_DESCRIPTION("Interface to MSP430 firmware on DM355EVM");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/dm355evm_msp.h b/include/linux/i2c/dm355evm_msp.h
new file mode 100644
index 0000000..3724703
--- /dev/null
+++ b/include/linux/i2c/dm355evm_msp.h
@@ -0,0 +1,79 @@
+/*
+ * dm355evm_msp.h - support MSP430 microcontroller on DM355EVM board
+ */
+#ifndef __LINUX_I2C_DM355EVM_MSP
+#define __LINUX_I2C_DM355EVM_MSP
+
+/*
+ * Written against Spectrum's writeup for the A4 firmware revision,
+ * and tweaked to match source and rev D2 schematics by removing CPLD
+ * and NOR flash hooks (which were last appropriate in rev B boards).
+ *
+ * Note that the firmware supports a flavor of write posting ... to be
+ * sure a write completes, issue another read or write.
+ */
+
+/* utilities to access "registers" emulated by msp430 firmware */
+extern int dm355evm_msp_write(u8 value, u8 reg);
+extern int dm355evm_msp_read(u8 reg);
+
+
+/* command/control registers */
+#define DM355EVM_MSP_COMMAND		0x00
+#	define MSP_COMMAND_NULL		0
+#	define MSP_COMMAND_RESET_COLD	1
+#	define MSP_COMMAND_RESET_WARM	2
+#	define MSP_COMMAND_RESET_WARM_I	3
+#	define MSP_COMMAND_POWEROFF	4
+#	define MSP_COMMAND_IR_REINIT	5
+#define DM355EVM_MSP_STATUS		0x01
+#	define MSP_STATUS_BAD_OFFSET	BIT(0)
+#	define MSP_STATUS_BAD_COMMAND	BIT(1)
+#	define MSP_STATUS_POWER_ERROR	BIT(2)
+#	define MSP_STATUS_RXBUF_OVERRUN	BIT(3)
+#define DM355EVM_MSP_RESET		0x02	/* 0 bits == in reset */
+#	define MSP_RESET_DC5		BIT(0)
+#	define MSP_RESET_TVP5154	BIT(2)
+#	define MSP_RESET_IMAGER		BIT(3)
+#	define MSP_RESET_ETHERNET	BIT(4)
+#	define MSP_RESET_SYS		BIT(5)
+#	define MSP_RESET_AIC33		BIT(7)
+
+/* GPIO registers ... bit patterns mostly match the source MSP ports */
+#define DM355EVM_MSP_LED		0x03	/* active low (MSP P4) */
+#define DM355EVM_MSP_SWITCH1		0x04	/* (MSP P5, masked) */
+#	define MSP_SWITCH1_SW6_1	BIT(0)
+#	define MSP_SWITCH1_SW6_2	BIT(1)
+#	define MSP_SWITCH1_SW6_3	BIT(2)
+#	define MSP_SWITCH1_SW6_4	BIT(3)
+#	define MSP_SWITCH1_J1		BIT(4)	/* NTSC/PAL */
+#	define MSP_SWITCH1_MSP_INT	BIT(5)	/* active low */
+#define DM355EVM_MSP_SWITCH2		0x05	/* (MSP P6, masked) */
+#	define MSP_SWITCH2_SW10		BIT(3)
+#	define MSP_SWITCH2_SW11		BIT(4)
+#	define MSP_SWITCH2_SW12		BIT(5)
+#	define MSP_SWITCH2_SW13		BIT(6)
+#	define MSP_SWITCH2_SW14		BIT(7)
+#define DM355EVM_MSP_SDMMC		0x06	/* (MSP P2, masked) */
+#	define MSP_SDMMC_0_WP		BIT(1)
+#	define MSP_SDMMC_0_CD		BIT(2)	/* active low */
+#	define MSP_SDMMC_1_WP		BIT(3)
+#	define MSP_SDMMC_1_CD		BIT(4)	/* active low */
+#define DM355EVM_MSP_FIRMREV		0x07	/* not a GPIO (out of order) */
+#define DM355EVM_MSP_VIDEO_IN		0x08	/* (MSP P3, masked) */
+#	define MSP_VIDEO_IMAGER		BIT(7)	/* low == tvp5146 */
+
+/* power supply registers are currently omitted */
+
+/* RTC registers */
+#define DM355EVM_MSP_RTC_0		0x12	/* LSB */
+#define DM355EVM_MSP_RTC_1		0x13
+#define DM355EVM_MSP_RTC_2		0x14
+#define DM355EVM_MSP_RTC_3		0x15	/* MSB */
+
+/* input event queue registers; code == ((HIGH << 8) | LOW) */
+#define DM355EVM_MSP_INPUT_COUNT	0x16	/* decrement by reading LOW */
+#define DM355EVM_MSP_INPUT_HIGH		0x17
+#define DM355EVM_MSP_INPUT_LOW		0x18
+
+#endif /* __LINUX_I2C_DM355EVM_MSP */
-- 
cgit v0.10.2


From 87c13493e6a59c0da55c2824f0205f9ef941b760 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 22 Dec 2008 12:16:27 +0100
Subject: mfd: move drivers/i2c/chips/tps65010.c to drivers/mfd

Move the tps65010 driver from drivers/i2c/chips to drivers/mfd
since it's more of a multi-function device than anything else,
and since Jean is trying to vanish drivers/i2c/chips ASAP.

One way to think of these chips are as the PMIC family most
used with OMAP1 generation chips.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 4c35702..fa69c99 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -126,19 +126,6 @@ config ISP1301_OMAP
 	  This driver can also be built as a module.  If so, the module
 	  will be called isp1301_omap.
 
-config TPS65010
-	tristate "TPS6501x Power Management chips"
-	depends on GPIOLIB
-	default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
-	help
-	  If you say yes here you get support for the TPS6501x series of
-	  Power Management chips.  These include voltage regulators,
-	  lithium ion/polymer battery charging, and other features that
-	  are often used in portable devices like cell phones and cameras.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called tps65010.
-
 config SENSORS_MAX6875
 	tristate "Maxim MAX6875 Power supply supervisor"
 	depends on EXPERIMENTAL
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index 23d2a31..0c7e2f1 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
 obj-$(CONFIG_PCF8575)		+= pcf8575.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
-obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 obj-$(CONFIG_MCU_MPC8349EMITX)	+= mcu_mpc8349emitx.o
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
deleted file mode 100644
index acf8b9d..0000000
--- a/drivers/i2c/chips/tps65010.c
+++ /dev/null
@@ -1,1072 +0,0 @@
-/*
- * tps65010 - driver for tps6501x power management chips
- *
- * Copyright (C) 2004 Texas Instruments
- * Copyright (C) 2004-2005 David Brownell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/i2c.h>
-#include <linux/delay.h>
-#include <linux/workqueue.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-
-#include <linux/i2c/tps65010.h>
-
-#include <asm/gpio.h>
-
-
-/*-------------------------------------------------------------------------*/
-
-#define	DRIVER_VERSION	"2 May 2005"
-#define	DRIVER_NAME	(tps65010_driver.driver.name)
-
-MODULE_DESCRIPTION("TPS6501x Power Management Driver");
-MODULE_LICENSE("GPL");
-
-static struct i2c_driver tps65010_driver;
-
-/*-------------------------------------------------------------------------*/
-
-/* This driver handles a family of multipurpose chips, which incorporate
- * voltage regulators, lithium ion/polymer battery charging, GPIOs, LEDs,
- * and other features often needed in portable devices like cell phones
- * or digital cameras.
- *
- * The tps65011 and tps65013 have different voltage settings compared
- * to tps65010 and tps65012.  The tps65013 has a NO_CHG status/irq.
- * All except tps65010 have "wait" mode, possibly defaulted so that
- * battery-insert != device-on.
- *
- * We could distinguish between some models by checking VDCDC1.UVLO or
- * other registers, unless they've been changed already after powerup
- * as part of board setup by a bootloader.
- */
-enum tps_model {
-	TPS65010,
-	TPS65011,
-	TPS65012,
-	TPS65013,
-};
-
-struct tps65010 {
-	struct i2c_client	*client;
-	struct mutex		lock;
-	struct delayed_work	work;
-	struct dentry		*file;
-	unsigned		charging:1;
-	unsigned		por:1;
-	unsigned		model:8;
-	u16			vbus;
-	unsigned long		flags;
-#define	FLAG_VBUS_CHANGED	0
-#define	FLAG_IRQ_ENABLE		1
-
-	/* copies of last register state */
-	u8			chgstatus, regstatus, chgconf;
-	u8			nmask1, nmask2;
-
-	u8			outmask;
-	struct gpio_chip	chip;
-	struct platform_device	*leds;
-};
-
-#define	POWER_POLL_DELAY	msecs_to_jiffies(5000)
-
-/*-------------------------------------------------------------------------*/
-
-#if	defined(DEBUG) || defined(CONFIG_DEBUG_FS)
-
-static void dbg_chgstat(char *buf, size_t len, u8 chgstatus)
-{
-	snprintf(buf, len, "%02x%s%s%s%s%s%s%s%s\n",
-		chgstatus,
-		(chgstatus & TPS_CHG_USB) ? " USB" : "",
-		(chgstatus & TPS_CHG_AC) ? " AC" : "",
-		(chgstatus & TPS_CHG_THERM) ? " therm" : "",
-		(chgstatus & TPS_CHG_TERM) ? " done" :
-			((chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
-				? " (charging)" : ""),
-		(chgstatus & TPS_CHG_TAPER_TMO) ? " taper_tmo" : "",
-		(chgstatus & TPS_CHG_CHG_TMO) ? " charge_tmo" : "",
-		(chgstatus & TPS_CHG_PRECHG_TMO) ? " prechg_tmo" : "",
-		(chgstatus & TPS_CHG_TEMP_ERR) ? " temp_err" : "");
-}
-
-static void dbg_regstat(char *buf, size_t len, u8 regstatus)
-{
-	snprintf(buf, len, "%02x %s%s%s%s%s%s%s%s\n",
-		regstatus,
-		(regstatus & TPS_REG_ONOFF) ? "off" : "(on)",
-		(regstatus & TPS_REG_COVER) ? " uncover" : "",
-		(regstatus & TPS_REG_UVLO) ? " UVLO" : "",
-		(regstatus & TPS_REG_NO_CHG) ? " NO_CHG" : "",
-		(regstatus & TPS_REG_PG_LD02) ? " ld02_bad" : "",
-		(regstatus & TPS_REG_PG_LD01) ? " ld01_bad" : "",
-		(regstatus & TPS_REG_PG_MAIN) ? " main_bad" : "",
-		(regstatus & TPS_REG_PG_CORE) ? " core_bad" : "");
-}
-
-static void dbg_chgconf(int por, char *buf, size_t len, u8 chgconfig)
-{
-	const char *hibit;
-
-	if (por)
-		hibit = (chgconfig & TPS_CHARGE_POR)
-				? "POR=69ms" : "POR=1sec";
-	else
-		hibit = (chgconfig & TPS65013_AUA) ? "AUA" : "";
-
-	snprintf(buf, len, "%02x %s%s%s AC=%d%% USB=%dmA %sCharge\n",
-		chgconfig, hibit,
-		(chgconfig & TPS_CHARGE_RESET) ? " reset" : "",
-		(chgconfig & TPS_CHARGE_FAST) ? " fast" : "",
-		({int p; switch ((chgconfig >> 3) & 3) {
-		case 3:		p = 100; break;
-		case 2:		p = 75; break;
-		case 1:		p = 50; break;
-		default:	p = 25; break;
-		}; p; }),
-		(chgconfig & TPS_VBUS_CHARGING)
-			? ((chgconfig & TPS_VBUS_500MA) ? 500 : 100)
-			: 0,
-		(chgconfig & TPS_CHARGE_ENABLE) ? "" : "No");
-}
-
-#endif
-
-#ifdef	DEBUG
-
-static void show_chgstatus(const char *label, u8 chgstatus)
-{
-	char buf [100];
-
-	dbg_chgstat(buf, sizeof buf, chgstatus);
-	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
-}
-
-static void show_regstatus(const char *label, u8 regstatus)
-{
-	char buf [100];
-
-	dbg_regstat(buf, sizeof buf, regstatus);
-	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
-}
-
-static void show_chgconfig(int por, const char *label, u8 chgconfig)
-{
-	char buf [100];
-
-	dbg_chgconf(por, buf, sizeof buf, chgconfig);
-	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
-}
-
-#else
-
-static inline void show_chgstatus(const char *label, u8 chgstatus) { }
-static inline void show_regstatus(const char *label, u8 chgstatus) { }
-static inline void show_chgconfig(int por, const char *label, u8 chgconfig) { }
-
-#endif
-
-#ifdef	CONFIG_DEBUG_FS
-
-static int dbg_show(struct seq_file *s, void *_)
-{
-	struct tps65010	*tps = s->private;
-	u8		value, v2;
-	unsigned	i;
-	char		buf[100];
-	const char	*chip;
-
-	switch (tps->model) {
-	case TPS65010:	chip = "tps65010"; break;
-	case TPS65011:	chip = "tps65011"; break;
-	case TPS65012:	chip = "tps65012"; break;
-	case TPS65013:	chip = "tps65013"; break;
-	default:	chip = NULL; break;
-	}
-	seq_printf(s, "driver  %s\nversion %s\nchip    %s\n\n",
-			DRIVER_NAME, DRIVER_VERSION, chip);
-
-	mutex_lock(&tps->lock);
-
-	/* FIXME how can we tell whether a battery is present?
-	 * likely involves a charge gauging chip (like BQ26501).
-	 */
-
-	seq_printf(s, "%scharging\n\n", tps->charging ? "" : "(not) ");
-
-
-	/* registers for monitoring battery charging and status; note
-	 * that reading chgstat and regstat may ack IRQs...
-	 */
-	value = i2c_smbus_read_byte_data(tps->client, TPS_CHGCONFIG);
-	dbg_chgconf(tps->por, buf, sizeof buf, value);
-	seq_printf(s, "chgconfig %s", buf);
-
-	value = i2c_smbus_read_byte_data(tps->client, TPS_CHGSTATUS);
-	dbg_chgstat(buf, sizeof buf, value);
-	seq_printf(s, "chgstat   %s", buf);
-	value = i2c_smbus_read_byte_data(tps->client, TPS_MASK1);
-	dbg_chgstat(buf, sizeof buf, value);
-	seq_printf(s, "mask1     %s", buf);
-	/* ignore ackint1 */
-
-	value = i2c_smbus_read_byte_data(tps->client, TPS_REGSTATUS);
-	dbg_regstat(buf, sizeof buf, value);
-	seq_printf(s, "regstat   %s", buf);
-	value = i2c_smbus_read_byte_data(tps->client, TPS_MASK2);
-	dbg_regstat(buf, sizeof buf, value);
-	seq_printf(s, "mask2     %s\n", buf);
-	/* ignore ackint2 */
-
-	(void) schedule_delayed_work(&tps->work, POWER_POLL_DELAY);
-
-
-	/* VMAIN voltage, enable lowpower, etc */
-	value = i2c_smbus_read_byte_data(tps->client, TPS_VDCDC1);
-	seq_printf(s, "vdcdc1    %02x\n", value);
-
-	/* VCORE voltage, vibrator on/off */
-	value = i2c_smbus_read_byte_data(tps->client, TPS_VDCDC2);
-	seq_printf(s, "vdcdc2    %02x\n", value);
-
-	/* both LD0s, and their lowpower behavior */
-	value = i2c_smbus_read_byte_data(tps->client, TPS_VREGS1);
-	seq_printf(s, "vregs1    %02x\n\n", value);
-
-
-	/* LEDs and GPIOs */
-	value = i2c_smbus_read_byte_data(tps->client, TPS_LED1_ON);
-	v2 = i2c_smbus_read_byte_data(tps->client, TPS_LED1_PER);
-	seq_printf(s, "led1 %s, on=%02x, per=%02x, %d/%d msec\n",
-		(value & 0x80)
-			? ((v2 & 0x80) ? "on" : "off")
-			: ((v2 & 0x80) ? "blink" : "(nPG)"),
-		value, v2,
-		(value & 0x7f) * 10, (v2 & 0x7f) * 100);
-
-	value = i2c_smbus_read_byte_data(tps->client, TPS_LED2_ON);
-	v2 = i2c_smbus_read_byte_data(tps->client, TPS_LED2_PER);
-	seq_printf(s, "led2 %s, on=%02x, per=%02x, %d/%d msec\n",
-		(value & 0x80)
-			? ((v2 & 0x80) ? "on" : "off")
-			: ((v2 & 0x80) ? "blink" : "off"),
-		value, v2,
-		(value & 0x7f) * 10, (v2 & 0x7f) * 100);
-
-	value = i2c_smbus_read_byte_data(tps->client, TPS_DEFGPIO);
-	v2 = i2c_smbus_read_byte_data(tps->client, TPS_MASK3);
-	seq_printf(s, "defgpio %02x mask3 %02x\n", value, v2);
-
-	for (i = 0; i < 4; i++) {
-		if (value & (1 << (4 + i)))
-			seq_printf(s, "  gpio%d-out %s\n", i + 1,
-				(value & (1 << i)) ? "low" : "hi ");
-		else
-			seq_printf(s, "  gpio%d-in  %s %s %s\n", i + 1,
-				(value & (1 << i)) ? "hi " : "low",
-				(v2 & (1 << i)) ? "no-irq" : "irq",
-				(v2 & (1 << (4 + i))) ? "rising" : "falling");
-	}
-
-	mutex_unlock(&tps->lock);
-	return 0;
-}
-
-static int dbg_tps_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dbg_show, inode->i_private);
-}
-
-static const struct file_operations debug_fops = {
-	.open		= dbg_tps_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-#define	DEBUG_FOPS	&debug_fops
-
-#else
-#define	DEBUG_FOPS	NULL
-#endif
-
-/*-------------------------------------------------------------------------*/
-
-/* handle IRQS in a task context, so we can use I2C calls */
-static void tps65010_interrupt(struct tps65010 *tps)
-{
-	u8 tmp = 0, mask, poll;
-
-	/* IRQs won't trigger for certain events, but we can get
-	 * others by polling (normally, with external power applied).
-	 */
-	poll = 0;
-
-	/* regstatus irqs */
-	if (tps->nmask2) {
-		tmp = i2c_smbus_read_byte_data(tps->client, TPS_REGSTATUS);
-		mask = tmp ^ tps->regstatus;
-		tps->regstatus = tmp;
-		mask &= tps->nmask2;
-	} else
-		mask = 0;
-	if (mask) {
-		tps->regstatus =  tmp;
-		/* may need to shut something down ... */
-
-		/* "off" usually means deep sleep */
-		if (tmp & TPS_REG_ONOFF) {
-			pr_info("%s: power off button\n", DRIVER_NAME);
-#if 0
-			/* REVISIT:  this might need its own workqueue
-			 * plus tweaks including deadlock avoidance ...
-			 * also needs to get error handling and probably
-			 * an #ifdef CONFIG_HIBERNATION
-			 */
-			hibernate();
-#endif
-			poll = 1;
-		}
-	}
-
-	/* chgstatus irqs */
-	if (tps->nmask1) {
-		tmp = i2c_smbus_read_byte_data(tps->client, TPS_CHGSTATUS);
-		mask = tmp ^ tps->chgstatus;
-		tps->chgstatus = tmp;
-		mask &= tps->nmask1;
-	} else
-		mask = 0;
-	if (mask) {
-		unsigned	charging = 0;
-
-		show_chgstatus("chg/irq", tmp);
-		if (tmp & (TPS_CHG_USB|TPS_CHG_AC))
-			show_chgconfig(tps->por, "conf", tps->chgconf);
-
-		/* Unless it was turned off or disabled, we charge any
-		 * battery whenever there's power available for it
-		 * and the charger hasn't been disabled.
-		 */
-		if (!(tps->chgstatus & ~(TPS_CHG_USB|TPS_CHG_AC))
-				&& (tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
-				&& (tps->chgconf & TPS_CHARGE_ENABLE)
-				) {
-			if (tps->chgstatus & TPS_CHG_USB) {
-				/* VBUS options are readonly until reconnect */
-				if (mask & TPS_CHG_USB)
-					set_bit(FLAG_VBUS_CHANGED, &tps->flags);
-				charging = 1;
-			} else if (tps->chgstatus & TPS_CHG_AC)
-				charging = 1;
-		}
-		if (charging != tps->charging) {
-			tps->charging = charging;
-			pr_info("%s: battery %scharging\n",
-				DRIVER_NAME, charging ? "" :
-				((tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
-					? "NOT " : "dis"));
-		}
-	}
-
-	/* always poll to detect (a) power removal, without tps65013
-	 * NO_CHG IRQ; or (b) restart of charging after stop.
-	 */
-	if ((tps->model != TPS65013 || !tps->charging)
-			&& (tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC)))
-		poll = 1;
-	if (poll)
-		(void) schedule_delayed_work(&tps->work, POWER_POLL_DELAY);
-
-	/* also potentially gpio-in rise or fall */
-}
-
-/* handle IRQs and polling using keventd for now */
-static void tps65010_work(struct work_struct *work)
-{
-	struct tps65010		*tps;
-
-	tps = container_of(work, struct tps65010, work.work);
-	mutex_lock(&tps->lock);
-
-	tps65010_interrupt(tps);
-
-	if (test_and_clear_bit(FLAG_VBUS_CHANGED, &tps->flags)) {
-		int	status;
-		u8	chgconfig, tmp;
-
-		chgconfig = i2c_smbus_read_byte_data(tps->client,
-					TPS_CHGCONFIG);
-		chgconfig &= ~(TPS_VBUS_500MA | TPS_VBUS_CHARGING);
-		if (tps->vbus == 500)
-			chgconfig |= TPS_VBUS_500MA | TPS_VBUS_CHARGING;
-		else if (tps->vbus >= 100)
-			chgconfig |= TPS_VBUS_CHARGING;
-
-		status = i2c_smbus_write_byte_data(tps->client,
-				TPS_CHGCONFIG, chgconfig);
-
-		/* vbus update fails unless VBUS is connected! */
-		tmp = i2c_smbus_read_byte_data(tps->client, TPS_CHGCONFIG);
-		tps->chgconf = tmp;
-		show_chgconfig(tps->por, "update vbus", tmp);
-	}
-
-	if (test_and_clear_bit(FLAG_IRQ_ENABLE, &tps->flags))
-		enable_irq(tps->client->irq);
-
-	mutex_unlock(&tps->lock);
-}
-
-static irqreturn_t tps65010_irq(int irq, void *_tps)
-{
-	struct tps65010		*tps = _tps;
-
-	disable_irq_nosync(irq);
-	set_bit(FLAG_IRQ_ENABLE, &tps->flags);
-	(void) schedule_work(&tps->work.work);
-	return IRQ_HANDLED;
-}
-
-/*-------------------------------------------------------------------------*/
-
-/* offsets 0..3 == GPIO1..GPIO4
- * offsets 4..5 == LED1/nPG, LED2 (we set one of the non-BLINK modes)
- * offset 6 == vibrator motor driver
- */
-static void
-tps65010_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	if (offset < 4)
-		tps65010_set_gpio_out_value(offset + 1, value);
-	else if (offset < 6)
-		tps65010_set_led(offset - 3, value ? ON : OFF);
-	else
-		tps65010_set_vib(value);
-}
-
-static int
-tps65010_output(struct gpio_chip *chip, unsigned offset, int value)
-{
-	/* GPIOs may be input-only */
-	if (offset < 4) {
-		struct tps65010		*tps;
-
-		tps = container_of(chip, struct tps65010, chip);
-		if (!(tps->outmask & (1 << offset)))
-			return -EINVAL;
-		tps65010_set_gpio_out_value(offset + 1, value);
-	} else if (offset < 6)
-		tps65010_set_led(offset - 3, value ? ON : OFF);
-	else
-		tps65010_set_vib(value);
-
-	return 0;
-}
-
-static int tps65010_gpio_get(struct gpio_chip *chip, unsigned offset)
-{
-	int			value;
-	struct tps65010		*tps;
-
-	tps = container_of(chip, struct tps65010, chip);
-
-	if (offset < 4) {
-		value = i2c_smbus_read_byte_data(tps->client, TPS_DEFGPIO);
-		if (value < 0)
-			return 0;
-		if (value & (1 << (offset + 4)))	/* output */
-			return !(value & (1 << offset));
-		else					/* input */
-			return (value & (1 << offset));
-	}
-
-	/* REVISIT we *could* report LED1/nPG and LED2 state ... */
-	return 0;
-}
-
-
-/*-------------------------------------------------------------------------*/
-
-static struct tps65010 *the_tps;
-
-static int __exit tps65010_remove(struct i2c_client *client)
-{
-	struct tps65010		*tps = i2c_get_clientdata(client);
-	struct tps65010_board	*board = client->dev.platform_data;
-
-	if (board && board->teardown) {
-		int status = board->teardown(client, board->context);
-		if (status < 0)
-			dev_dbg(&client->dev, "board %s %s err %d\n",
-				"teardown", client->name, status);
-	}
-	if (client->irq > 0)
-		free_irq(client->irq, tps);
-	cancel_delayed_work(&tps->work);
-	flush_scheduled_work();
-	debugfs_remove(tps->file);
-	kfree(tps);
-	i2c_set_clientdata(client, NULL);
-	the_tps = NULL;
-	return 0;
-}
-
-static int tps65010_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
-{
-	struct tps65010		*tps;
-	int			status;
-	struct tps65010_board	*board = client->dev.platform_data;
-
-	if (the_tps) {
-		dev_dbg(&client->dev, "only one tps6501x chip allowed\n");
-		return -ENODEV;
-	}
-
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
-		return -EINVAL;
-
-	tps = kzalloc(sizeof *tps, GFP_KERNEL);
-	if (!tps)
-		return -ENOMEM;
-
-	mutex_init(&tps->lock);
-	INIT_DELAYED_WORK(&tps->work, tps65010_work);
-	tps->client = client;
-	tps->model = id->driver_data;
-
-	/* the IRQ is active low, but many gpio lines can't support that
-	 * so this driver uses falling-edge triggers instead.
-	 */
-	if (client->irq > 0) {
-		status = request_irq(client->irq, tps65010_irq,
-			IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_FALLING,
-			DRIVER_NAME, tps);
-		if (status < 0) {
-			dev_dbg(&client->dev, "can't get IRQ %d, err %d\n",
-					client->irq, status);
-			goto fail1;
-		}
-		/* annoying race here, ideally we'd have an option
-		 * to claim the irq now and enable it later.
-		 * FIXME genirq IRQF_NOAUTOEN now solves that ...
-		 */
-		disable_irq(client->irq);
-		set_bit(FLAG_IRQ_ENABLE, &tps->flags);
-	} else
-		dev_warn(&client->dev, "IRQ not configured!\n");
-
-
-	switch (tps->model) {
-	case TPS65010:
-	case TPS65012:
-		tps->por = 1;
-		break;
-	/* else CHGCONFIG.POR is replaced by AUA, enabling a WAIT mode */
-	}
-	tps->chgconf = i2c_smbus_read_byte_data(client, TPS_CHGCONFIG);
-	show_chgconfig(tps->por, "conf/init", tps->chgconf);
-
-	show_chgstatus("chg/init",
-		i2c_smbus_read_byte_data(client, TPS_CHGSTATUS));
-	show_regstatus("reg/init",
-		i2c_smbus_read_byte_data(client, TPS_REGSTATUS));
-
-	pr_debug("%s: vdcdc1 0x%02x, vdcdc2 %02x, vregs1 %02x\n", DRIVER_NAME,
-		i2c_smbus_read_byte_data(client, TPS_VDCDC1),
-		i2c_smbus_read_byte_data(client, TPS_VDCDC2),
-		i2c_smbus_read_byte_data(client, TPS_VREGS1));
-	pr_debug("%s: defgpio 0x%02x, mask3 0x%02x\n", DRIVER_NAME,
-		i2c_smbus_read_byte_data(client, TPS_DEFGPIO),
-		i2c_smbus_read_byte_data(client, TPS_MASK3));
-
-	i2c_set_clientdata(client, tps);
-	the_tps = tps;
-
-#if	defined(CONFIG_USB_GADGET) && !defined(CONFIG_USB_OTG)
-	/* USB hosts can't draw VBUS.  OTG devices could, later
-	 * when OTG infrastructure enables it.  USB peripherals
-	 * could be relying on VBUS while booting, though.
-	 */
-	tps->vbus = 100;
-#endif
-
-	/* unmask the "interesting" irqs, then poll once to
-	 * kickstart monitoring, initialize shadowed status
-	 * registers, and maybe disable VBUS draw.
-	 */
-	tps->nmask1 = ~0;
-	(void) i2c_smbus_write_byte_data(client, TPS_MASK1, ~tps->nmask1);
-
-	tps->nmask2 = TPS_REG_ONOFF;
-	if (tps->model == TPS65013)
-		tps->nmask2 |= TPS_REG_NO_CHG;
-	(void) i2c_smbus_write_byte_data(client, TPS_MASK2, ~tps->nmask2);
-
-	(void) i2c_smbus_write_byte_data(client, TPS_MASK3, 0x0f
-		| i2c_smbus_read_byte_data(client, TPS_MASK3));
-
-	tps65010_work(&tps->work.work);
-
-	tps->file = debugfs_create_file(DRIVER_NAME, S_IRUGO, NULL,
-				tps, DEBUG_FOPS);
-
-	/* optionally register GPIOs */
-	if (board && board->base > 0) {
-		tps->outmask = board->outmask;
-
-		tps->chip.label = client->name;
-		tps->chip.dev = &client->dev;
-		tps->chip.owner = THIS_MODULE;
-
-		tps->chip.set = tps65010_gpio_set;
-		tps->chip.direction_output = tps65010_output;
-
-		/* NOTE:  only partial support for inputs; nyet IRQs */
-		tps->chip.get = tps65010_gpio_get;
-
-		tps->chip.base = board->base;
-		tps->chip.ngpio = 7;
-		tps->chip.can_sleep = 1;
-
-		status = gpiochip_add(&tps->chip);
-		if (status < 0)
-			dev_err(&client->dev, "can't add gpiochip, err %d\n",
-					status);
-		else if (board->setup) {
-			status = board->setup(client, board->context);
-			if (status < 0) {
-				dev_dbg(&client->dev,
-					"board %s %s err %d\n",
-					"setup", client->name, status);
-				status = 0;
-			}
-		}
-	}
-
-	return 0;
-fail1:
-	kfree(tps);
-	return status;
-}
-
-static const struct i2c_device_id tps65010_id[] = {
-	{ "tps65010", TPS65010 },
-	{ "tps65011", TPS65011 },
-	{ "tps65012", TPS65012 },
-	{ "tps65013", TPS65013 },
-	{ "tps65014", TPS65011 },	/* tps65011 charging at 6.5V max */
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, tps65010_id);
-
-static struct i2c_driver tps65010_driver = {
-	.driver = {
-		.name	= "tps65010",
-	},
-	.probe	= tps65010_probe,
-	.remove	= __exit_p(tps65010_remove),
-	.id_table = tps65010_id,
-};
-
-/*-------------------------------------------------------------------------*/
-
-/* Draw from VBUS:
- *   0 mA -- DON'T DRAW (might supply power instead)
- * 100 mA -- usb unit load (slowest charge rate)
- * 500 mA -- usb high power (fast battery charge)
- */
-int tps65010_set_vbus_draw(unsigned mA)
-{
-	unsigned long	flags;
-
-	if (!the_tps)
-		return -ENODEV;
-
-	/* assumes non-SMP */
-	local_irq_save(flags);
-	if (mA >= 500)
-		mA = 500;
-	else if (mA >= 100)
-		mA = 100;
-	else
-		mA = 0;
-	the_tps->vbus = mA;
-	if ((the_tps->chgstatus & TPS_CHG_USB)
-			&& test_and_set_bit(
-				FLAG_VBUS_CHANGED, &the_tps->flags)) {
-		/* gadget drivers call this in_irq() */
-		(void) schedule_work(&the_tps->work.work);
-	}
-	local_irq_restore(flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(tps65010_set_vbus_draw);
-
-/*-------------------------------------------------------------------------*/
-/* tps65010_set_gpio_out_value parameter:
- * gpio:  GPIO1, GPIO2, GPIO3 or GPIO4
- * value: LOW or HIGH
- */
-int tps65010_set_gpio_out_value(unsigned gpio, unsigned value)
-{
-	int	 status;
-	unsigned defgpio;
-
-	if (!the_tps)
-		return -ENODEV;
-	if ((gpio < GPIO1) || (gpio > GPIO4))
-		return -EINVAL;
-
-	mutex_lock(&the_tps->lock);
-
-	defgpio = i2c_smbus_read_byte_data(the_tps->client, TPS_DEFGPIO);
-
-	/* Configure GPIO for output */
-	defgpio |= 1 << (gpio + 3);
-
-	/* Writing 1 forces a logic 0 on that GPIO and vice versa */
-	switch (value) {
-	case LOW:
-		defgpio |= 1 << (gpio - 1);    /* set GPIO low by writing 1 */
-		break;
-	/* case HIGH: */
-	default:
-		defgpio &= ~(1 << (gpio - 1)); /* set GPIO high by writing 0 */
-		break;
-	}
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-		TPS_DEFGPIO, defgpio);
-
-	pr_debug("%s: gpio%dout = %s, defgpio 0x%02x\n", DRIVER_NAME,
-		gpio, value ? "high" : "low",
-		i2c_smbus_read_byte_data(the_tps->client, TPS_DEFGPIO));
-
-	mutex_unlock(&the_tps->lock);
-	return status;
-}
-EXPORT_SYMBOL(tps65010_set_gpio_out_value);
-
-/*-------------------------------------------------------------------------*/
-/* tps65010_set_led parameter:
- * led:  LED1 or LED2
- * mode: ON, OFF or BLINK
- */
-int tps65010_set_led(unsigned led, unsigned mode)
-{
-	int	 status;
-	unsigned led_on, led_per, offs;
-
-	if (!the_tps)
-		return -ENODEV;
-
-	if (led == LED1)
-		offs = 0;
-	else {
-		offs = 2;
-		led = LED2;
-	}
-
-	mutex_lock(&the_tps->lock);
-
-	pr_debug("%s: led%i_on   0x%02x\n", DRIVER_NAME, led,
-		i2c_smbus_read_byte_data(the_tps->client,
-				TPS_LED1_ON + offs));
-
-	pr_debug("%s: led%i_per  0x%02x\n", DRIVER_NAME, led,
-		i2c_smbus_read_byte_data(the_tps->client,
-				TPS_LED1_PER + offs));
-
-	switch (mode) {
-	case OFF:
-		led_on  = 1 << 7;
-		led_per = 0 << 7;
-		break;
-	case ON:
-		led_on  = 1 << 7;
-		led_per = 1 << 7;
-		break;
-	case BLINK:
-		led_on  = 0x30 | (0 << 7);
-		led_per = 0x08 | (1 << 7);
-		break;
-	default:
-		printk(KERN_ERR "%s: Wrong mode parameter for set_led()\n",
-		       DRIVER_NAME);
-		mutex_unlock(&the_tps->lock);
-		return -EINVAL;
-	}
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_LED1_ON + offs, led_on);
-
-	if (status != 0) {
-		printk(KERN_ERR "%s: Failed to write led%i_on register\n",
-		       DRIVER_NAME, led);
-		mutex_unlock(&the_tps->lock);
-		return status;
-	}
-
-	pr_debug("%s: led%i_on   0x%02x\n", DRIVER_NAME, led,
-		i2c_smbus_read_byte_data(the_tps->client, TPS_LED1_ON + offs));
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_LED1_PER + offs, led_per);
-
-	if (status != 0) {
-		printk(KERN_ERR "%s: Failed to write led%i_per register\n",
-		       DRIVER_NAME, led);
-		mutex_unlock(&the_tps->lock);
-		return status;
-	}
-
-	pr_debug("%s: led%i_per  0x%02x\n", DRIVER_NAME, led,
-		i2c_smbus_read_byte_data(the_tps->client,
-				TPS_LED1_PER + offs));
-
-	mutex_unlock(&the_tps->lock);
-
-	return status;
-}
-EXPORT_SYMBOL(tps65010_set_led);
-
-/*-------------------------------------------------------------------------*/
-/* tps65010_set_vib parameter:
- * value: ON or OFF
- */
-int tps65010_set_vib(unsigned value)
-{
-	int	 status;
-	unsigned vdcdc2;
-
-	if (!the_tps)
-		return -ENODEV;
-
-	mutex_lock(&the_tps->lock);
-
-	vdcdc2 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC2);
-	vdcdc2 &= ~(1 << 1);
-	if (value)
-		vdcdc2 |= (1 << 1);
-	status = i2c_smbus_write_byte_data(the_tps->client,
-		TPS_VDCDC2, vdcdc2);
-
-	pr_debug("%s: vibrator %s\n", DRIVER_NAME, value ? "on" : "off");
-
-	mutex_unlock(&the_tps->lock);
-	return status;
-}
-EXPORT_SYMBOL(tps65010_set_vib);
-
-/*-------------------------------------------------------------------------*/
-/* tps65010_set_low_pwr parameter:
- * mode: ON or OFF
- */
-int tps65010_set_low_pwr(unsigned mode)
-{
-	int	 status;
-	unsigned vdcdc1;
-
-	if (!the_tps)
-		return -ENODEV;
-
-	mutex_lock(&the_tps->lock);
-
-	pr_debug("%s: %s low_pwr, vdcdc1 0x%02x\n", DRIVER_NAME,
-		mode ? "enable" : "disable",
-		i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
-
-	vdcdc1 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1);
-
-	switch (mode) {
-	case OFF:
-		vdcdc1 &= ~TPS_ENABLE_LP; /* disable ENABLE_LP bit */
-		break;
-	/* case ON: */
-	default:
-		vdcdc1 |= TPS_ENABLE_LP;  /* enable ENABLE_LP bit */
-		break;
-	}
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_VDCDC1, vdcdc1);
-
-	if (status != 0)
-		printk(KERN_ERR "%s: Failed to write vdcdc1 register\n",
-			DRIVER_NAME);
-	else
-		pr_debug("%s: vdcdc1 0x%02x\n", DRIVER_NAME,
-			i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
-
-	mutex_unlock(&the_tps->lock);
-
-	return status;
-}
-EXPORT_SYMBOL(tps65010_set_low_pwr);
-
-/*-------------------------------------------------------------------------*/
-/* tps65010_config_vregs1 parameter:
- * value to be written to VREGS1 register
- * Note: The complete register is written, set all bits you need
- */
-int tps65010_config_vregs1(unsigned value)
-{
-	int	 status;
-
-	if (!the_tps)
-		return -ENODEV;
-
-	mutex_lock(&the_tps->lock);
-
-	pr_debug("%s: vregs1 0x%02x\n", DRIVER_NAME,
-			i2c_smbus_read_byte_data(the_tps->client, TPS_VREGS1));
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_VREGS1, value);
-
-	if (status != 0)
-		printk(KERN_ERR "%s: Failed to write vregs1 register\n",
-			DRIVER_NAME);
-	else
-		pr_debug("%s: vregs1 0x%02x\n", DRIVER_NAME,
-			i2c_smbus_read_byte_data(the_tps->client, TPS_VREGS1));
-
-	mutex_unlock(&the_tps->lock);
-
-	return status;
-}
-EXPORT_SYMBOL(tps65010_config_vregs1);
-
-/*-------------------------------------------------------------------------*/
-/* tps65013_set_low_pwr parameter:
- * mode: ON or OFF
- */
-
-/* FIXME: Assumes AC or USB power is present. Setting AUA bit is not
-	required if power supply is through a battery */
-
-int tps65013_set_low_pwr(unsigned mode)
-{
-	int	 status;
-	unsigned vdcdc1, chgconfig;
-
-	if (!the_tps || the_tps->por)
-		return -ENODEV;
-
-	mutex_lock(&the_tps->lock);
-
-	pr_debug("%s: %s low_pwr, chgconfig 0x%02x vdcdc1 0x%02x\n",
-		DRIVER_NAME,
-		mode ? "enable" : "disable",
-		i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG),
-		i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
-
-	chgconfig = i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG);
-	vdcdc1 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1);
-
-	switch (mode) {
-	case OFF:
-		chgconfig &= ~TPS65013_AUA; /* disable AUA bit */
-		vdcdc1 &= ~TPS_ENABLE_LP; /* disable ENABLE_LP bit */
-		break;
-	/* case ON: */
-	default:
-		chgconfig |= TPS65013_AUA;  /* enable AUA bit */
-		vdcdc1 |= TPS_ENABLE_LP;  /* enable ENABLE_LP bit */
-		break;
-	}
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_CHGCONFIG, chgconfig);
-	if (status != 0) {
-		printk(KERN_ERR "%s: Failed to write chconfig register\n",
-	 DRIVER_NAME);
-		mutex_unlock(&the_tps->lock);
-		return status;
-	}
-
-	chgconfig = i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG);
-	the_tps->chgconf = chgconfig;
-	show_chgconfig(0, "chgconf", chgconfig);
-
-	status = i2c_smbus_write_byte_data(the_tps->client,
-			TPS_VDCDC1, vdcdc1);
-
-	if (status != 0)
-		printk(KERN_ERR "%s: Failed to write vdcdc1 register\n",
-	 DRIVER_NAME);
-	else
-		pr_debug("%s: vdcdc1 0x%02x\n", DRIVER_NAME,
-			i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
-
-	mutex_unlock(&the_tps->lock);
-
-	return status;
-}
-EXPORT_SYMBOL(tps65013_set_low_pwr);
-
-/*-------------------------------------------------------------------------*/
-
-static int __init tps_init(void)
-{
-	u32	tries = 3;
-	int	status = -ENODEV;
-
-	printk(KERN_INFO "%s: version %s\n", DRIVER_NAME, DRIVER_VERSION);
-
-	/* some boards have startup glitches */
-	while (tries--) {
-		status = i2c_add_driver(&tps65010_driver);
-		if (the_tps)
-			break;
-		i2c_del_driver(&tps65010_driver);
-		if (!tries) {
-			printk(KERN_ERR "%s: no chip?\n", DRIVER_NAME);
-			return -ENODEV;
-		}
-		pr_debug("%s: re-probe ...\n", DRIVER_NAME);
-		msleep(10);
-	}
-
-	return status;
-}
-/* NOTE:  this MUST be initialized before the other parts of the system
- * that rely on it ... but after the i2c bus on which this relies.
- * That is, much earlier than on PC-type systems, which don't often use
- * I2C as a core system bus.
- */
-subsys_initcall(tps_init);
-
-static void __exit tps_exit(void)
-{
-	i2c_del_driver(&tps65010_driver);
-}
-module_exit(tps_exit);
-
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 02e9146..182e148 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -69,6 +69,19 @@ config UCB1400_CORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ucb1400_core.
 
+config TPS65010
+	tristate "TPS6501x Power Management chips"
+	depends on I2C && GPIOLIB
+	default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
+	help
+	  If you say yes here you get support for the TPS6501x series of
+	  Power Management chips.  These include voltage regulators,
+	  lithium ion/polymer battery charging, and other features that
+	  are often used in portable devices like cell phones and cameras.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps65010.
+
 config TWL4030_CORE
 	bool "Texas Instruments TWL4030/TPS659x0 Support"
 	depends on I2C=y && GENERIC_HARDIRQS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8f6cd5c..3989e30 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -19,6 +19,8 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 
+obj-$(CONFIG_TPS65010)		+= tps65010.o
+
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
@@ -33,4 +35,4 @@ obj-$(CONFIG_MCP_UCB1200)	+= ucb1x00-assabet.o
 endif
 obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
-obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
\ No newline at end of file
+obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c
new file mode 100644
index 0000000..acf8b9d
--- /dev/null
+++ b/drivers/mfd/tps65010.c
@@ -0,0 +1,1072 @@
+/*
+ * tps65010 - driver for tps6501x power management chips
+ *
+ * Copyright (C) 2004 Texas Instruments
+ * Copyright (C) 2004-2005 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/i2c/tps65010.h>
+
+#include <asm/gpio.h>
+
+
+/*-------------------------------------------------------------------------*/
+
+#define	DRIVER_VERSION	"2 May 2005"
+#define	DRIVER_NAME	(tps65010_driver.driver.name)
+
+MODULE_DESCRIPTION("TPS6501x Power Management Driver");
+MODULE_LICENSE("GPL");
+
+static struct i2c_driver tps65010_driver;
+
+/*-------------------------------------------------------------------------*/
+
+/* This driver handles a family of multipurpose chips, which incorporate
+ * voltage regulators, lithium ion/polymer battery charging, GPIOs, LEDs,
+ * and other features often needed in portable devices like cell phones
+ * or digital cameras.
+ *
+ * The tps65011 and tps65013 have different voltage settings compared
+ * to tps65010 and tps65012.  The tps65013 has a NO_CHG status/irq.
+ * All except tps65010 have "wait" mode, possibly defaulted so that
+ * battery-insert != device-on.
+ *
+ * We could distinguish between some models by checking VDCDC1.UVLO or
+ * other registers, unless they've been changed already after powerup
+ * as part of board setup by a bootloader.
+ */
+enum tps_model {
+	TPS65010,
+	TPS65011,
+	TPS65012,
+	TPS65013,
+};
+
+struct tps65010 {
+	struct i2c_client	*client;
+	struct mutex		lock;
+	struct delayed_work	work;
+	struct dentry		*file;
+	unsigned		charging:1;
+	unsigned		por:1;
+	unsigned		model:8;
+	u16			vbus;
+	unsigned long		flags;
+#define	FLAG_VBUS_CHANGED	0
+#define	FLAG_IRQ_ENABLE		1
+
+	/* copies of last register state */
+	u8			chgstatus, regstatus, chgconf;
+	u8			nmask1, nmask2;
+
+	u8			outmask;
+	struct gpio_chip	chip;
+	struct platform_device	*leds;
+};
+
+#define	POWER_POLL_DELAY	msecs_to_jiffies(5000)
+
+/*-------------------------------------------------------------------------*/
+
+#if	defined(DEBUG) || defined(CONFIG_DEBUG_FS)
+
+static void dbg_chgstat(char *buf, size_t len, u8 chgstatus)
+{
+	snprintf(buf, len, "%02x%s%s%s%s%s%s%s%s\n",
+		chgstatus,
+		(chgstatus & TPS_CHG_USB) ? " USB" : "",
+		(chgstatus & TPS_CHG_AC) ? " AC" : "",
+		(chgstatus & TPS_CHG_THERM) ? " therm" : "",
+		(chgstatus & TPS_CHG_TERM) ? " done" :
+			((chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
+				? " (charging)" : ""),
+		(chgstatus & TPS_CHG_TAPER_TMO) ? " taper_tmo" : "",
+		(chgstatus & TPS_CHG_CHG_TMO) ? " charge_tmo" : "",
+		(chgstatus & TPS_CHG_PRECHG_TMO) ? " prechg_tmo" : "",
+		(chgstatus & TPS_CHG_TEMP_ERR) ? " temp_err" : "");
+}
+
+static void dbg_regstat(char *buf, size_t len, u8 regstatus)
+{
+	snprintf(buf, len, "%02x %s%s%s%s%s%s%s%s\n",
+		regstatus,
+		(regstatus & TPS_REG_ONOFF) ? "off" : "(on)",
+		(regstatus & TPS_REG_COVER) ? " uncover" : "",
+		(regstatus & TPS_REG_UVLO) ? " UVLO" : "",
+		(regstatus & TPS_REG_NO_CHG) ? " NO_CHG" : "",
+		(regstatus & TPS_REG_PG_LD02) ? " ld02_bad" : "",
+		(regstatus & TPS_REG_PG_LD01) ? " ld01_bad" : "",
+		(regstatus & TPS_REG_PG_MAIN) ? " main_bad" : "",
+		(regstatus & TPS_REG_PG_CORE) ? " core_bad" : "");
+}
+
+static void dbg_chgconf(int por, char *buf, size_t len, u8 chgconfig)
+{
+	const char *hibit;
+
+	if (por)
+		hibit = (chgconfig & TPS_CHARGE_POR)
+				? "POR=69ms" : "POR=1sec";
+	else
+		hibit = (chgconfig & TPS65013_AUA) ? "AUA" : "";
+
+	snprintf(buf, len, "%02x %s%s%s AC=%d%% USB=%dmA %sCharge\n",
+		chgconfig, hibit,
+		(chgconfig & TPS_CHARGE_RESET) ? " reset" : "",
+		(chgconfig & TPS_CHARGE_FAST) ? " fast" : "",
+		({int p; switch ((chgconfig >> 3) & 3) {
+		case 3:		p = 100; break;
+		case 2:		p = 75; break;
+		case 1:		p = 50; break;
+		default:	p = 25; break;
+		}; p; }),
+		(chgconfig & TPS_VBUS_CHARGING)
+			? ((chgconfig & TPS_VBUS_500MA) ? 500 : 100)
+			: 0,
+		(chgconfig & TPS_CHARGE_ENABLE) ? "" : "No");
+}
+
+#endif
+
+#ifdef	DEBUG
+
+static void show_chgstatus(const char *label, u8 chgstatus)
+{
+	char buf [100];
+
+	dbg_chgstat(buf, sizeof buf, chgstatus);
+	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
+}
+
+static void show_regstatus(const char *label, u8 regstatus)
+{
+	char buf [100];
+
+	dbg_regstat(buf, sizeof buf, regstatus);
+	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
+}
+
+static void show_chgconfig(int por, const char *label, u8 chgconfig)
+{
+	char buf [100];
+
+	dbg_chgconf(por, buf, sizeof buf, chgconfig);
+	pr_debug("%s: %s %s", DRIVER_NAME, label, buf);
+}
+
+#else
+
+static inline void show_chgstatus(const char *label, u8 chgstatus) { }
+static inline void show_regstatus(const char *label, u8 chgstatus) { }
+static inline void show_chgconfig(int por, const char *label, u8 chgconfig) { }
+
+#endif
+
+#ifdef	CONFIG_DEBUG_FS
+
+static int dbg_show(struct seq_file *s, void *_)
+{
+	struct tps65010	*tps = s->private;
+	u8		value, v2;
+	unsigned	i;
+	char		buf[100];
+	const char	*chip;
+
+	switch (tps->model) {
+	case TPS65010:	chip = "tps65010"; break;
+	case TPS65011:	chip = "tps65011"; break;
+	case TPS65012:	chip = "tps65012"; break;
+	case TPS65013:	chip = "tps65013"; break;
+	default:	chip = NULL; break;
+	}
+	seq_printf(s, "driver  %s\nversion %s\nchip    %s\n\n",
+			DRIVER_NAME, DRIVER_VERSION, chip);
+
+	mutex_lock(&tps->lock);
+
+	/* FIXME how can we tell whether a battery is present?
+	 * likely involves a charge gauging chip (like BQ26501).
+	 */
+
+	seq_printf(s, "%scharging\n\n", tps->charging ? "" : "(not) ");
+
+
+	/* registers for monitoring battery charging and status; note
+	 * that reading chgstat and regstat may ack IRQs...
+	 */
+	value = i2c_smbus_read_byte_data(tps->client, TPS_CHGCONFIG);
+	dbg_chgconf(tps->por, buf, sizeof buf, value);
+	seq_printf(s, "chgconfig %s", buf);
+
+	value = i2c_smbus_read_byte_data(tps->client, TPS_CHGSTATUS);
+	dbg_chgstat(buf, sizeof buf, value);
+	seq_printf(s, "chgstat   %s", buf);
+	value = i2c_smbus_read_byte_data(tps->client, TPS_MASK1);
+	dbg_chgstat(buf, sizeof buf, value);
+	seq_printf(s, "mask1     %s", buf);
+	/* ignore ackint1 */
+
+	value = i2c_smbus_read_byte_data(tps->client, TPS_REGSTATUS);
+	dbg_regstat(buf, sizeof buf, value);
+	seq_printf(s, "regstat   %s", buf);
+	value = i2c_smbus_read_byte_data(tps->client, TPS_MASK2);
+	dbg_regstat(buf, sizeof buf, value);
+	seq_printf(s, "mask2     %s\n", buf);
+	/* ignore ackint2 */
+
+	(void) schedule_delayed_work(&tps->work, POWER_POLL_DELAY);
+
+
+	/* VMAIN voltage, enable lowpower, etc */
+	value = i2c_smbus_read_byte_data(tps->client, TPS_VDCDC1);
+	seq_printf(s, "vdcdc1    %02x\n", value);
+
+	/* VCORE voltage, vibrator on/off */
+	value = i2c_smbus_read_byte_data(tps->client, TPS_VDCDC2);
+	seq_printf(s, "vdcdc2    %02x\n", value);
+
+	/* both LD0s, and their lowpower behavior */
+	value = i2c_smbus_read_byte_data(tps->client, TPS_VREGS1);
+	seq_printf(s, "vregs1    %02x\n\n", value);
+
+
+	/* LEDs and GPIOs */
+	value = i2c_smbus_read_byte_data(tps->client, TPS_LED1_ON);
+	v2 = i2c_smbus_read_byte_data(tps->client, TPS_LED1_PER);
+	seq_printf(s, "led1 %s, on=%02x, per=%02x, %d/%d msec\n",
+		(value & 0x80)
+			? ((v2 & 0x80) ? "on" : "off")
+			: ((v2 & 0x80) ? "blink" : "(nPG)"),
+		value, v2,
+		(value & 0x7f) * 10, (v2 & 0x7f) * 100);
+
+	value = i2c_smbus_read_byte_data(tps->client, TPS_LED2_ON);
+	v2 = i2c_smbus_read_byte_data(tps->client, TPS_LED2_PER);
+	seq_printf(s, "led2 %s, on=%02x, per=%02x, %d/%d msec\n",
+		(value & 0x80)
+			? ((v2 & 0x80) ? "on" : "off")
+			: ((v2 & 0x80) ? "blink" : "off"),
+		value, v2,
+		(value & 0x7f) * 10, (v2 & 0x7f) * 100);
+
+	value = i2c_smbus_read_byte_data(tps->client, TPS_DEFGPIO);
+	v2 = i2c_smbus_read_byte_data(tps->client, TPS_MASK3);
+	seq_printf(s, "defgpio %02x mask3 %02x\n", value, v2);
+
+	for (i = 0; i < 4; i++) {
+		if (value & (1 << (4 + i)))
+			seq_printf(s, "  gpio%d-out %s\n", i + 1,
+				(value & (1 << i)) ? "low" : "hi ");
+		else
+			seq_printf(s, "  gpio%d-in  %s %s %s\n", i + 1,
+				(value & (1 << i)) ? "hi " : "low",
+				(v2 & (1 << i)) ? "no-irq" : "irq",
+				(v2 & (1 << (4 + i))) ? "rising" : "falling");
+	}
+
+	mutex_unlock(&tps->lock);
+	return 0;
+}
+
+static int dbg_tps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dbg_show, inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= dbg_tps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+#define	DEBUG_FOPS	&debug_fops
+
+#else
+#define	DEBUG_FOPS	NULL
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+/* handle IRQS in a task context, so we can use I2C calls */
+static void tps65010_interrupt(struct tps65010 *tps)
+{
+	u8 tmp = 0, mask, poll;
+
+	/* IRQs won't trigger for certain events, but we can get
+	 * others by polling (normally, with external power applied).
+	 */
+	poll = 0;
+
+	/* regstatus irqs */
+	if (tps->nmask2) {
+		tmp = i2c_smbus_read_byte_data(tps->client, TPS_REGSTATUS);
+		mask = tmp ^ tps->regstatus;
+		tps->regstatus = tmp;
+		mask &= tps->nmask2;
+	} else
+		mask = 0;
+	if (mask) {
+		tps->regstatus =  tmp;
+		/* may need to shut something down ... */
+
+		/* "off" usually means deep sleep */
+		if (tmp & TPS_REG_ONOFF) {
+			pr_info("%s: power off button\n", DRIVER_NAME);
+#if 0
+			/* REVISIT:  this might need its own workqueue
+			 * plus tweaks including deadlock avoidance ...
+			 * also needs to get error handling and probably
+			 * an #ifdef CONFIG_HIBERNATION
+			 */
+			hibernate();
+#endif
+			poll = 1;
+		}
+	}
+
+	/* chgstatus irqs */
+	if (tps->nmask1) {
+		tmp = i2c_smbus_read_byte_data(tps->client, TPS_CHGSTATUS);
+		mask = tmp ^ tps->chgstatus;
+		tps->chgstatus = tmp;
+		mask &= tps->nmask1;
+	} else
+		mask = 0;
+	if (mask) {
+		unsigned	charging = 0;
+
+		show_chgstatus("chg/irq", tmp);
+		if (tmp & (TPS_CHG_USB|TPS_CHG_AC))
+			show_chgconfig(tps->por, "conf", tps->chgconf);
+
+		/* Unless it was turned off or disabled, we charge any
+		 * battery whenever there's power available for it
+		 * and the charger hasn't been disabled.
+		 */
+		if (!(tps->chgstatus & ~(TPS_CHG_USB|TPS_CHG_AC))
+				&& (tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
+				&& (tps->chgconf & TPS_CHARGE_ENABLE)
+				) {
+			if (tps->chgstatus & TPS_CHG_USB) {
+				/* VBUS options are readonly until reconnect */
+				if (mask & TPS_CHG_USB)
+					set_bit(FLAG_VBUS_CHANGED, &tps->flags);
+				charging = 1;
+			} else if (tps->chgstatus & TPS_CHG_AC)
+				charging = 1;
+		}
+		if (charging != tps->charging) {
+			tps->charging = charging;
+			pr_info("%s: battery %scharging\n",
+				DRIVER_NAME, charging ? "" :
+				((tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC))
+					? "NOT " : "dis"));
+		}
+	}
+
+	/* always poll to detect (a) power removal, without tps65013
+	 * NO_CHG IRQ; or (b) restart of charging after stop.
+	 */
+	if ((tps->model != TPS65013 || !tps->charging)
+			&& (tps->chgstatus & (TPS_CHG_USB|TPS_CHG_AC)))
+		poll = 1;
+	if (poll)
+		(void) schedule_delayed_work(&tps->work, POWER_POLL_DELAY);
+
+	/* also potentially gpio-in rise or fall */
+}
+
+/* handle IRQs and polling using keventd for now */
+static void tps65010_work(struct work_struct *work)
+{
+	struct tps65010		*tps;
+
+	tps = container_of(work, struct tps65010, work.work);
+	mutex_lock(&tps->lock);
+
+	tps65010_interrupt(tps);
+
+	if (test_and_clear_bit(FLAG_VBUS_CHANGED, &tps->flags)) {
+		int	status;
+		u8	chgconfig, tmp;
+
+		chgconfig = i2c_smbus_read_byte_data(tps->client,
+					TPS_CHGCONFIG);
+		chgconfig &= ~(TPS_VBUS_500MA | TPS_VBUS_CHARGING);
+		if (tps->vbus == 500)
+			chgconfig |= TPS_VBUS_500MA | TPS_VBUS_CHARGING;
+		else if (tps->vbus >= 100)
+			chgconfig |= TPS_VBUS_CHARGING;
+
+		status = i2c_smbus_write_byte_data(tps->client,
+				TPS_CHGCONFIG, chgconfig);
+
+		/* vbus update fails unless VBUS is connected! */
+		tmp = i2c_smbus_read_byte_data(tps->client, TPS_CHGCONFIG);
+		tps->chgconf = tmp;
+		show_chgconfig(tps->por, "update vbus", tmp);
+	}
+
+	if (test_and_clear_bit(FLAG_IRQ_ENABLE, &tps->flags))
+		enable_irq(tps->client->irq);
+
+	mutex_unlock(&tps->lock);
+}
+
+static irqreturn_t tps65010_irq(int irq, void *_tps)
+{
+	struct tps65010		*tps = _tps;
+
+	disable_irq_nosync(irq);
+	set_bit(FLAG_IRQ_ENABLE, &tps->flags);
+	(void) schedule_work(&tps->work.work);
+	return IRQ_HANDLED;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* offsets 0..3 == GPIO1..GPIO4
+ * offsets 4..5 == LED1/nPG, LED2 (we set one of the non-BLINK modes)
+ * offset 6 == vibrator motor driver
+ */
+static void
+tps65010_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (offset < 4)
+		tps65010_set_gpio_out_value(offset + 1, value);
+	else if (offset < 6)
+		tps65010_set_led(offset - 3, value ? ON : OFF);
+	else
+		tps65010_set_vib(value);
+}
+
+static int
+tps65010_output(struct gpio_chip *chip, unsigned offset, int value)
+{
+	/* GPIOs may be input-only */
+	if (offset < 4) {
+		struct tps65010		*tps;
+
+		tps = container_of(chip, struct tps65010, chip);
+		if (!(tps->outmask & (1 << offset)))
+			return -EINVAL;
+		tps65010_set_gpio_out_value(offset + 1, value);
+	} else if (offset < 6)
+		tps65010_set_led(offset - 3, value ? ON : OFF);
+	else
+		tps65010_set_vib(value);
+
+	return 0;
+}
+
+static int tps65010_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	int			value;
+	struct tps65010		*tps;
+
+	tps = container_of(chip, struct tps65010, chip);
+
+	if (offset < 4) {
+		value = i2c_smbus_read_byte_data(tps->client, TPS_DEFGPIO);
+		if (value < 0)
+			return 0;
+		if (value & (1 << (offset + 4)))	/* output */
+			return !(value & (1 << offset));
+		else					/* input */
+			return (value & (1 << offset));
+	}
+
+	/* REVISIT we *could* report LED1/nPG and LED2 state ... */
+	return 0;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+static struct tps65010 *the_tps;
+
+static int __exit tps65010_remove(struct i2c_client *client)
+{
+	struct tps65010		*tps = i2c_get_clientdata(client);
+	struct tps65010_board	*board = client->dev.platform_data;
+
+	if (board && board->teardown) {
+		int status = board->teardown(client, board->context);
+		if (status < 0)
+			dev_dbg(&client->dev, "board %s %s err %d\n",
+				"teardown", client->name, status);
+	}
+	if (client->irq > 0)
+		free_irq(client->irq, tps);
+	cancel_delayed_work(&tps->work);
+	flush_scheduled_work();
+	debugfs_remove(tps->file);
+	kfree(tps);
+	i2c_set_clientdata(client, NULL);
+	the_tps = NULL;
+	return 0;
+}
+
+static int tps65010_probe(struct i2c_client *client,
+			  const struct i2c_device_id *id)
+{
+	struct tps65010		*tps;
+	int			status;
+	struct tps65010_board	*board = client->dev.platform_data;
+
+	if (the_tps) {
+		dev_dbg(&client->dev, "only one tps6501x chip allowed\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EINVAL;
+
+	tps = kzalloc(sizeof *tps, GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	mutex_init(&tps->lock);
+	INIT_DELAYED_WORK(&tps->work, tps65010_work);
+	tps->client = client;
+	tps->model = id->driver_data;
+
+	/* the IRQ is active low, but many gpio lines can't support that
+	 * so this driver uses falling-edge triggers instead.
+	 */
+	if (client->irq > 0) {
+		status = request_irq(client->irq, tps65010_irq,
+			IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_FALLING,
+			DRIVER_NAME, tps);
+		if (status < 0) {
+			dev_dbg(&client->dev, "can't get IRQ %d, err %d\n",
+					client->irq, status);
+			goto fail1;
+		}
+		/* annoying race here, ideally we'd have an option
+		 * to claim the irq now and enable it later.
+		 * FIXME genirq IRQF_NOAUTOEN now solves that ...
+		 */
+		disable_irq(client->irq);
+		set_bit(FLAG_IRQ_ENABLE, &tps->flags);
+	} else
+		dev_warn(&client->dev, "IRQ not configured!\n");
+
+
+	switch (tps->model) {
+	case TPS65010:
+	case TPS65012:
+		tps->por = 1;
+		break;
+	/* else CHGCONFIG.POR is replaced by AUA, enabling a WAIT mode */
+	}
+	tps->chgconf = i2c_smbus_read_byte_data(client, TPS_CHGCONFIG);
+	show_chgconfig(tps->por, "conf/init", tps->chgconf);
+
+	show_chgstatus("chg/init",
+		i2c_smbus_read_byte_data(client, TPS_CHGSTATUS));
+	show_regstatus("reg/init",
+		i2c_smbus_read_byte_data(client, TPS_REGSTATUS));
+
+	pr_debug("%s: vdcdc1 0x%02x, vdcdc2 %02x, vregs1 %02x\n", DRIVER_NAME,
+		i2c_smbus_read_byte_data(client, TPS_VDCDC1),
+		i2c_smbus_read_byte_data(client, TPS_VDCDC2),
+		i2c_smbus_read_byte_data(client, TPS_VREGS1));
+	pr_debug("%s: defgpio 0x%02x, mask3 0x%02x\n", DRIVER_NAME,
+		i2c_smbus_read_byte_data(client, TPS_DEFGPIO),
+		i2c_smbus_read_byte_data(client, TPS_MASK3));
+
+	i2c_set_clientdata(client, tps);
+	the_tps = tps;
+
+#if	defined(CONFIG_USB_GADGET) && !defined(CONFIG_USB_OTG)
+	/* USB hosts can't draw VBUS.  OTG devices could, later
+	 * when OTG infrastructure enables it.  USB peripherals
+	 * could be relying on VBUS while booting, though.
+	 */
+	tps->vbus = 100;
+#endif
+
+	/* unmask the "interesting" irqs, then poll once to
+	 * kickstart monitoring, initialize shadowed status
+	 * registers, and maybe disable VBUS draw.
+	 */
+	tps->nmask1 = ~0;
+	(void) i2c_smbus_write_byte_data(client, TPS_MASK1, ~tps->nmask1);
+
+	tps->nmask2 = TPS_REG_ONOFF;
+	if (tps->model == TPS65013)
+		tps->nmask2 |= TPS_REG_NO_CHG;
+	(void) i2c_smbus_write_byte_data(client, TPS_MASK2, ~tps->nmask2);
+
+	(void) i2c_smbus_write_byte_data(client, TPS_MASK3, 0x0f
+		| i2c_smbus_read_byte_data(client, TPS_MASK3));
+
+	tps65010_work(&tps->work.work);
+
+	tps->file = debugfs_create_file(DRIVER_NAME, S_IRUGO, NULL,
+				tps, DEBUG_FOPS);
+
+	/* optionally register GPIOs */
+	if (board && board->base > 0) {
+		tps->outmask = board->outmask;
+
+		tps->chip.label = client->name;
+		tps->chip.dev = &client->dev;
+		tps->chip.owner = THIS_MODULE;
+
+		tps->chip.set = tps65010_gpio_set;
+		tps->chip.direction_output = tps65010_output;
+
+		/* NOTE:  only partial support for inputs; nyet IRQs */
+		tps->chip.get = tps65010_gpio_get;
+
+		tps->chip.base = board->base;
+		tps->chip.ngpio = 7;
+		tps->chip.can_sleep = 1;
+
+		status = gpiochip_add(&tps->chip);
+		if (status < 0)
+			dev_err(&client->dev, "can't add gpiochip, err %d\n",
+					status);
+		else if (board->setup) {
+			status = board->setup(client, board->context);
+			if (status < 0) {
+				dev_dbg(&client->dev,
+					"board %s %s err %d\n",
+					"setup", client->name, status);
+				status = 0;
+			}
+		}
+	}
+
+	return 0;
+fail1:
+	kfree(tps);
+	return status;
+}
+
+static const struct i2c_device_id tps65010_id[] = {
+	{ "tps65010", TPS65010 },
+	{ "tps65011", TPS65011 },
+	{ "tps65012", TPS65012 },
+	{ "tps65013", TPS65013 },
+	{ "tps65014", TPS65011 },	/* tps65011 charging at 6.5V max */
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tps65010_id);
+
+static struct i2c_driver tps65010_driver = {
+	.driver = {
+		.name	= "tps65010",
+	},
+	.probe	= tps65010_probe,
+	.remove	= __exit_p(tps65010_remove),
+	.id_table = tps65010_id,
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Draw from VBUS:
+ *   0 mA -- DON'T DRAW (might supply power instead)
+ * 100 mA -- usb unit load (slowest charge rate)
+ * 500 mA -- usb high power (fast battery charge)
+ */
+int tps65010_set_vbus_draw(unsigned mA)
+{
+	unsigned long	flags;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	/* assumes non-SMP */
+	local_irq_save(flags);
+	if (mA >= 500)
+		mA = 500;
+	else if (mA >= 100)
+		mA = 100;
+	else
+		mA = 0;
+	the_tps->vbus = mA;
+	if ((the_tps->chgstatus & TPS_CHG_USB)
+			&& test_and_set_bit(
+				FLAG_VBUS_CHANGED, &the_tps->flags)) {
+		/* gadget drivers call this in_irq() */
+		(void) schedule_work(&the_tps->work.work);
+	}
+	local_irq_restore(flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(tps65010_set_vbus_draw);
+
+/*-------------------------------------------------------------------------*/
+/* tps65010_set_gpio_out_value parameter:
+ * gpio:  GPIO1, GPIO2, GPIO3 or GPIO4
+ * value: LOW or HIGH
+ */
+int tps65010_set_gpio_out_value(unsigned gpio, unsigned value)
+{
+	int	 status;
+	unsigned defgpio;
+
+	if (!the_tps)
+		return -ENODEV;
+	if ((gpio < GPIO1) || (gpio > GPIO4))
+		return -EINVAL;
+
+	mutex_lock(&the_tps->lock);
+
+	defgpio = i2c_smbus_read_byte_data(the_tps->client, TPS_DEFGPIO);
+
+	/* Configure GPIO for output */
+	defgpio |= 1 << (gpio + 3);
+
+	/* Writing 1 forces a logic 0 on that GPIO and vice versa */
+	switch (value) {
+	case LOW:
+		defgpio |= 1 << (gpio - 1);    /* set GPIO low by writing 1 */
+		break;
+	/* case HIGH: */
+	default:
+		defgpio &= ~(1 << (gpio - 1)); /* set GPIO high by writing 0 */
+		break;
+	}
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+		TPS_DEFGPIO, defgpio);
+
+	pr_debug("%s: gpio%dout = %s, defgpio 0x%02x\n", DRIVER_NAME,
+		gpio, value ? "high" : "low",
+		i2c_smbus_read_byte_data(the_tps->client, TPS_DEFGPIO));
+
+	mutex_unlock(&the_tps->lock);
+	return status;
+}
+EXPORT_SYMBOL(tps65010_set_gpio_out_value);
+
+/*-------------------------------------------------------------------------*/
+/* tps65010_set_led parameter:
+ * led:  LED1 or LED2
+ * mode: ON, OFF or BLINK
+ */
+int tps65010_set_led(unsigned led, unsigned mode)
+{
+	int	 status;
+	unsigned led_on, led_per, offs;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	if (led == LED1)
+		offs = 0;
+	else {
+		offs = 2;
+		led = LED2;
+	}
+
+	mutex_lock(&the_tps->lock);
+
+	pr_debug("%s: led%i_on   0x%02x\n", DRIVER_NAME, led,
+		i2c_smbus_read_byte_data(the_tps->client,
+				TPS_LED1_ON + offs));
+
+	pr_debug("%s: led%i_per  0x%02x\n", DRIVER_NAME, led,
+		i2c_smbus_read_byte_data(the_tps->client,
+				TPS_LED1_PER + offs));
+
+	switch (mode) {
+	case OFF:
+		led_on  = 1 << 7;
+		led_per = 0 << 7;
+		break;
+	case ON:
+		led_on  = 1 << 7;
+		led_per = 1 << 7;
+		break;
+	case BLINK:
+		led_on  = 0x30 | (0 << 7);
+		led_per = 0x08 | (1 << 7);
+		break;
+	default:
+		printk(KERN_ERR "%s: Wrong mode parameter for set_led()\n",
+		       DRIVER_NAME);
+		mutex_unlock(&the_tps->lock);
+		return -EINVAL;
+	}
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_LED1_ON + offs, led_on);
+
+	if (status != 0) {
+		printk(KERN_ERR "%s: Failed to write led%i_on register\n",
+		       DRIVER_NAME, led);
+		mutex_unlock(&the_tps->lock);
+		return status;
+	}
+
+	pr_debug("%s: led%i_on   0x%02x\n", DRIVER_NAME, led,
+		i2c_smbus_read_byte_data(the_tps->client, TPS_LED1_ON + offs));
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_LED1_PER + offs, led_per);
+
+	if (status != 0) {
+		printk(KERN_ERR "%s: Failed to write led%i_per register\n",
+		       DRIVER_NAME, led);
+		mutex_unlock(&the_tps->lock);
+		return status;
+	}
+
+	pr_debug("%s: led%i_per  0x%02x\n", DRIVER_NAME, led,
+		i2c_smbus_read_byte_data(the_tps->client,
+				TPS_LED1_PER + offs));
+
+	mutex_unlock(&the_tps->lock);
+
+	return status;
+}
+EXPORT_SYMBOL(tps65010_set_led);
+
+/*-------------------------------------------------------------------------*/
+/* tps65010_set_vib parameter:
+ * value: ON or OFF
+ */
+int tps65010_set_vib(unsigned value)
+{
+	int	 status;
+	unsigned vdcdc2;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	mutex_lock(&the_tps->lock);
+
+	vdcdc2 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC2);
+	vdcdc2 &= ~(1 << 1);
+	if (value)
+		vdcdc2 |= (1 << 1);
+	status = i2c_smbus_write_byte_data(the_tps->client,
+		TPS_VDCDC2, vdcdc2);
+
+	pr_debug("%s: vibrator %s\n", DRIVER_NAME, value ? "on" : "off");
+
+	mutex_unlock(&the_tps->lock);
+	return status;
+}
+EXPORT_SYMBOL(tps65010_set_vib);
+
+/*-------------------------------------------------------------------------*/
+/* tps65010_set_low_pwr parameter:
+ * mode: ON or OFF
+ */
+int tps65010_set_low_pwr(unsigned mode)
+{
+	int	 status;
+	unsigned vdcdc1;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	mutex_lock(&the_tps->lock);
+
+	pr_debug("%s: %s low_pwr, vdcdc1 0x%02x\n", DRIVER_NAME,
+		mode ? "enable" : "disable",
+		i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
+
+	vdcdc1 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1);
+
+	switch (mode) {
+	case OFF:
+		vdcdc1 &= ~TPS_ENABLE_LP; /* disable ENABLE_LP bit */
+		break;
+	/* case ON: */
+	default:
+		vdcdc1 |= TPS_ENABLE_LP;  /* enable ENABLE_LP bit */
+		break;
+	}
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_VDCDC1, vdcdc1);
+
+	if (status != 0)
+		printk(KERN_ERR "%s: Failed to write vdcdc1 register\n",
+			DRIVER_NAME);
+	else
+		pr_debug("%s: vdcdc1 0x%02x\n", DRIVER_NAME,
+			i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
+
+	mutex_unlock(&the_tps->lock);
+
+	return status;
+}
+EXPORT_SYMBOL(tps65010_set_low_pwr);
+
+/*-------------------------------------------------------------------------*/
+/* tps65010_config_vregs1 parameter:
+ * value to be written to VREGS1 register
+ * Note: The complete register is written, set all bits you need
+ */
+int tps65010_config_vregs1(unsigned value)
+{
+	int	 status;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	mutex_lock(&the_tps->lock);
+
+	pr_debug("%s: vregs1 0x%02x\n", DRIVER_NAME,
+			i2c_smbus_read_byte_data(the_tps->client, TPS_VREGS1));
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_VREGS1, value);
+
+	if (status != 0)
+		printk(KERN_ERR "%s: Failed to write vregs1 register\n",
+			DRIVER_NAME);
+	else
+		pr_debug("%s: vregs1 0x%02x\n", DRIVER_NAME,
+			i2c_smbus_read_byte_data(the_tps->client, TPS_VREGS1));
+
+	mutex_unlock(&the_tps->lock);
+
+	return status;
+}
+EXPORT_SYMBOL(tps65010_config_vregs1);
+
+/*-------------------------------------------------------------------------*/
+/* tps65013_set_low_pwr parameter:
+ * mode: ON or OFF
+ */
+
+/* FIXME: Assumes AC or USB power is present. Setting AUA bit is not
+	required if power supply is through a battery */
+
+int tps65013_set_low_pwr(unsigned mode)
+{
+	int	 status;
+	unsigned vdcdc1, chgconfig;
+
+	if (!the_tps || the_tps->por)
+		return -ENODEV;
+
+	mutex_lock(&the_tps->lock);
+
+	pr_debug("%s: %s low_pwr, chgconfig 0x%02x vdcdc1 0x%02x\n",
+		DRIVER_NAME,
+		mode ? "enable" : "disable",
+		i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG),
+		i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
+
+	chgconfig = i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG);
+	vdcdc1 = i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1);
+
+	switch (mode) {
+	case OFF:
+		chgconfig &= ~TPS65013_AUA; /* disable AUA bit */
+		vdcdc1 &= ~TPS_ENABLE_LP; /* disable ENABLE_LP bit */
+		break;
+	/* case ON: */
+	default:
+		chgconfig |= TPS65013_AUA;  /* enable AUA bit */
+		vdcdc1 |= TPS_ENABLE_LP;  /* enable ENABLE_LP bit */
+		break;
+	}
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_CHGCONFIG, chgconfig);
+	if (status != 0) {
+		printk(KERN_ERR "%s: Failed to write chconfig register\n",
+	 DRIVER_NAME);
+		mutex_unlock(&the_tps->lock);
+		return status;
+	}
+
+	chgconfig = i2c_smbus_read_byte_data(the_tps->client, TPS_CHGCONFIG);
+	the_tps->chgconf = chgconfig;
+	show_chgconfig(0, "chgconf", chgconfig);
+
+	status = i2c_smbus_write_byte_data(the_tps->client,
+			TPS_VDCDC1, vdcdc1);
+
+	if (status != 0)
+		printk(KERN_ERR "%s: Failed to write vdcdc1 register\n",
+	 DRIVER_NAME);
+	else
+		pr_debug("%s: vdcdc1 0x%02x\n", DRIVER_NAME,
+			i2c_smbus_read_byte_data(the_tps->client, TPS_VDCDC1));
+
+	mutex_unlock(&the_tps->lock);
+
+	return status;
+}
+EXPORT_SYMBOL(tps65013_set_low_pwr);
+
+/*-------------------------------------------------------------------------*/
+
+static int __init tps_init(void)
+{
+	u32	tries = 3;
+	int	status = -ENODEV;
+
+	printk(KERN_INFO "%s: version %s\n", DRIVER_NAME, DRIVER_VERSION);
+
+	/* some boards have startup glitches */
+	while (tries--) {
+		status = i2c_add_driver(&tps65010_driver);
+		if (the_tps)
+			break;
+		i2c_del_driver(&tps65010_driver);
+		if (!tries) {
+			printk(KERN_ERR "%s: no chip?\n", DRIVER_NAME);
+			return -ENODEV;
+		}
+		pr_debug("%s: re-probe ...\n", DRIVER_NAME);
+		msleep(10);
+	}
+
+	return status;
+}
+/* NOTE:  this MUST be initialized before the other parts of the system
+ * that rely on it ... but after the i2c bus on which this relies.
+ * That is, much earlier than on PC-type systems, which don't often use
+ * I2C as a core system bus.
+ */
+subsys_initcall(tps_init);
+
+static void __exit tps_exit(void)
+{
+	i2c_del_driver(&tps65010_driver);
+}
+module_exit(tps_exit);
+
-- 
cgit v0.10.2


From 88e75cc347f66bc20e3c2b920431fc07253d69be Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 22 Dec 2008 12:18:02 +0100
Subject: mfd: move drivers/i2c/chips/menelaus.c to drivers/mfd

ove the menelaus driver from drivers/i2c/chips to drivers/mfd
since it's more of a multi-function device than anything else,
and since Jean is trying to vanish drivers/i2c/chips ASAP.

One way to think of these chips are as the PMIC family most
used with OMAP2 generation chips.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index fa69c99..864ac56 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -151,16 +151,6 @@ config SENSORS_TSL2550
 	  This driver can also be built as a module.  If so, the module
 	  will be called tsl2550.
 
-config MENELAUS
-	bool "TWL92330/Menelaus PM chip"
-	depends on I2C=y && ARCH_OMAP24XX
-	help
-	  If you say yes here you get support for the Texas Instruments
-	  TWL92330/Menelaus Power Management chip. This include voltage
-	  regulators, Dual slot memory card tranceivers, real-time clock
-	  and other features that are often used in portable devices like
-	  cell phones and PDAs.
-
 config MCU_MPC8349EMITX
 	tristate "MPC8349E-mITX MCU driver"
 	depends on I2C && PPC_83xx
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index 0c7e2f1..8b95f41 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
 obj-$(CONFIG_PCF8575)		+= pcf8575.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
-obj-$(CONFIG_MENELAUS)		+= menelaus.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 obj-$(CONFIG_MCU_MPC8349EMITX)	+= mcu_mpc8349emitx.o
 
diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c
deleted file mode 100644
index 4b364ba..0000000
--- a/drivers/i2c/chips/menelaus.c
+++ /dev/null
@@ -1,1285 +0,0 @@
-/*
- * Copyright (C) 2004 Texas Instruments, Inc.
- *
- * Some parts based tps65010.c:
- * Copyright (C) 2004 Texas Instruments and
- * Copyright (C) 2004-2005 David Brownell
- *
- * Some parts based on tlv320aic24.c:
- * Copyright (C) by Kai Svahn <kai.svahn@nokia.com>
- *
- * Changes for interrupt handling and clean-up by
- * Tony Lindgren <tony@atomide.com> and Imre Deak <imre.deak@nokia.com>
- * Cleanup and generalized support for voltage setting by
- * Juha Yrjola
- * Added support for controlling VCORE and regulator sleep states,
- * Amit Kucheria <amit.kucheria@nokia.com>
- * Copyright (C) 2005, 2006 Nokia Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/module.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/mutex.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-
-#include <asm/mach/irq.h>
-
-#include <mach/gpio.h>
-#include <mach/menelaus.h>
-
-#define DRIVER_NAME			"menelaus"
-
-#define MENELAUS_I2C_ADDRESS		0x72
-
-#define MENELAUS_REV			0x01
-#define MENELAUS_VCORE_CTRL1		0x02
-#define MENELAUS_VCORE_CTRL2		0x03
-#define MENELAUS_VCORE_CTRL3		0x04
-#define MENELAUS_VCORE_CTRL4		0x05
-#define MENELAUS_VCORE_CTRL5		0x06
-#define MENELAUS_DCDC_CTRL1		0x07
-#define MENELAUS_DCDC_CTRL2		0x08
-#define MENELAUS_DCDC_CTRL3		0x09
-#define MENELAUS_LDO_CTRL1		0x0A
-#define MENELAUS_LDO_CTRL2		0x0B
-#define MENELAUS_LDO_CTRL3		0x0C
-#define MENELAUS_LDO_CTRL4		0x0D
-#define MENELAUS_LDO_CTRL5		0x0E
-#define MENELAUS_LDO_CTRL6		0x0F
-#define MENELAUS_LDO_CTRL7		0x10
-#define MENELAUS_LDO_CTRL8		0x11
-#define MENELAUS_SLEEP_CTRL1		0x12
-#define MENELAUS_SLEEP_CTRL2		0x13
-#define MENELAUS_DEVICE_OFF		0x14
-#define MENELAUS_OSC_CTRL		0x15
-#define MENELAUS_DETECT_CTRL		0x16
-#define MENELAUS_INT_MASK1		0x17
-#define MENELAUS_INT_MASK2		0x18
-#define MENELAUS_INT_STATUS1		0x19
-#define MENELAUS_INT_STATUS2		0x1A
-#define MENELAUS_INT_ACK1		0x1B
-#define MENELAUS_INT_ACK2		0x1C
-#define MENELAUS_GPIO_CTRL		0x1D
-#define MENELAUS_GPIO_IN		0x1E
-#define MENELAUS_GPIO_OUT		0x1F
-#define MENELAUS_BBSMS			0x20
-#define MENELAUS_RTC_CTRL		0x21
-#define MENELAUS_RTC_UPDATE		0x22
-#define MENELAUS_RTC_SEC		0x23
-#define MENELAUS_RTC_MIN		0x24
-#define MENELAUS_RTC_HR			0x25
-#define MENELAUS_RTC_DAY		0x26
-#define MENELAUS_RTC_MON		0x27
-#define MENELAUS_RTC_YR			0x28
-#define MENELAUS_RTC_WKDAY		0x29
-#define MENELAUS_RTC_AL_SEC		0x2A
-#define MENELAUS_RTC_AL_MIN		0x2B
-#define MENELAUS_RTC_AL_HR		0x2C
-#define MENELAUS_RTC_AL_DAY		0x2D
-#define MENELAUS_RTC_AL_MON		0x2E
-#define MENELAUS_RTC_AL_YR		0x2F
-#define MENELAUS_RTC_COMP_MSB		0x30
-#define MENELAUS_RTC_COMP_LSB		0x31
-#define MENELAUS_S1_PULL_EN		0x32
-#define MENELAUS_S1_PULL_DIR		0x33
-#define MENELAUS_S2_PULL_EN		0x34
-#define MENELAUS_S2_PULL_DIR		0x35
-#define MENELAUS_MCT_CTRL1		0x36
-#define MENELAUS_MCT_CTRL2		0x37
-#define MENELAUS_MCT_CTRL3		0x38
-#define MENELAUS_MCT_PIN_ST		0x39
-#define MENELAUS_DEBOUNCE1		0x3A
-
-#define IH_MENELAUS_IRQS		12
-#define MENELAUS_MMC_S1CD_IRQ		0	/* MMC slot 1 card change */
-#define MENELAUS_MMC_S2CD_IRQ		1	/* MMC slot 2 card change */
-#define MENELAUS_MMC_S1D1_IRQ		2	/* MMC DAT1 low in slot 1 */
-#define MENELAUS_MMC_S2D1_IRQ		3	/* MMC DAT1 low in slot 2 */
-#define MENELAUS_LOWBAT_IRQ		4	/* Low battery */
-#define MENELAUS_HOTDIE_IRQ		5	/* Hot die detect */
-#define MENELAUS_UVLO_IRQ		6	/* UVLO detect */
-#define MENELAUS_TSHUT_IRQ		7	/* Thermal shutdown */
-#define MENELAUS_RTCTMR_IRQ		8	/* RTC timer */
-#define MENELAUS_RTCALM_IRQ		9	/* RTC alarm */
-#define MENELAUS_RTCERR_IRQ		10	/* RTC error */
-#define MENELAUS_PSHBTN_IRQ		11	/* Push button */
-#define MENELAUS_RESERVED12_IRQ		12	/* Reserved */
-#define MENELAUS_RESERVED13_IRQ		13	/* Reserved */
-#define MENELAUS_RESERVED14_IRQ		14	/* Reserved */
-#define MENELAUS_RESERVED15_IRQ		15	/* Reserved */
-
-static void menelaus_work(struct work_struct *_menelaus);
-
-struct menelaus_chip {
-	struct mutex		lock;
-	struct i2c_client	*client;
-	struct work_struct	work;
-#ifdef CONFIG_RTC_DRV_TWL92330
-	struct rtc_device	*rtc;
-	u8			rtc_control;
-	unsigned		uie:1;
-#endif
-	unsigned		vcore_hw_mode:1;
-	u8			mask1, mask2;
-	void			(*handlers[16])(struct menelaus_chip *);
-	void			(*mmc_callback)(void *data, u8 mask);
-	void			*mmc_callback_data;
-};
-
-static struct menelaus_chip *the_menelaus;
-
-static int menelaus_write_reg(int reg, u8 value)
-{
-	int val = i2c_smbus_write_byte_data(the_menelaus->client, reg, value);
-
-	if (val < 0) {
-		pr_err(DRIVER_NAME ": write error");
-		return val;
-	}
-
-	return 0;
-}
-
-static int menelaus_read_reg(int reg)
-{
-	int val = i2c_smbus_read_byte_data(the_menelaus->client, reg);
-
-	if (val < 0)
-		pr_err(DRIVER_NAME ": read error");
-
-	return val;
-}
-
-static int menelaus_enable_irq(int irq)
-{
-	if (irq > 7) {
-		irq -= 8;
-		the_menelaus->mask2 &= ~(1 << irq);
-		return menelaus_write_reg(MENELAUS_INT_MASK2,
-				the_menelaus->mask2);
-	} else {
-		the_menelaus->mask1 &= ~(1 << irq);
-		return menelaus_write_reg(MENELAUS_INT_MASK1,
-				the_menelaus->mask1);
-	}
-}
-
-static int menelaus_disable_irq(int irq)
-{
-	if (irq > 7) {
-		irq -= 8;
-		the_menelaus->mask2 |= (1 << irq);
-		return menelaus_write_reg(MENELAUS_INT_MASK2,
-				the_menelaus->mask2);
-	} else {
-		the_menelaus->mask1 |= (1 << irq);
-		return menelaus_write_reg(MENELAUS_INT_MASK1,
-				the_menelaus->mask1);
-	}
-}
-
-static int menelaus_ack_irq(int irq)
-{
-	if (irq > 7)
-		return menelaus_write_reg(MENELAUS_INT_ACK2, 1 << (irq - 8));
-	else
-		return menelaus_write_reg(MENELAUS_INT_ACK1, 1 << irq);
-}
-
-/* Adds a handler for an interrupt. Does not run in interrupt context */
-static int menelaus_add_irq_work(int irq,
-		void (*handler)(struct menelaus_chip *))
-{
-	int ret = 0;
-
-	mutex_lock(&the_menelaus->lock);
-	the_menelaus->handlers[irq] = handler;
-	ret = menelaus_enable_irq(irq);
-	mutex_unlock(&the_menelaus->lock);
-
-	return ret;
-}
-
-/* Removes handler for an interrupt */
-static int menelaus_remove_irq_work(int irq)
-{
-	int ret = 0;
-
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_disable_irq(irq);
-	the_menelaus->handlers[irq] = NULL;
-	mutex_unlock(&the_menelaus->lock);
-
-	return ret;
-}
-
-/*
- * Gets scheduled when a card detect interrupt happens. Note that in some cases
- * this line is wired to card cover switch rather than the card detect switch
- * in each slot. In this case the cards are not seen by menelaus.
- * FIXME: Add handling for D1 too
- */
-static void menelaus_mmc_cd_work(struct menelaus_chip *menelaus_hw)
-{
-	int reg;
-	unsigned char card_mask = 0;
-
-	reg = menelaus_read_reg(MENELAUS_MCT_PIN_ST);
-	if (reg < 0)
-		return;
-
-	if (!(reg & 0x1))
-		card_mask |= (1 << 0);
-
-	if (!(reg & 0x2))
-		card_mask |= (1 << 1);
-
-	if (menelaus_hw->mmc_callback)
-		menelaus_hw->mmc_callback(menelaus_hw->mmc_callback_data,
-					  card_mask);
-}
-
-/*
- * Toggles the MMC slots between open-drain and push-pull mode.
- */
-int menelaus_set_mmc_opendrain(int slot, int enable)
-{
-	int ret, val;
-
-	if (slot != 1 && slot != 2)
-		return -EINVAL;
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_read_reg(MENELAUS_MCT_CTRL1);
-	if (ret < 0) {
-		mutex_unlock(&the_menelaus->lock);
-		return ret;
-	}
-	val = ret;
-	if (slot == 1) {
-		if (enable)
-			val |= 1 << 2;
-		else
-			val &= ~(1 << 2);
-	} else {
-		if (enable)
-			val |= 1 << 3;
-		else
-			val &= ~(1 << 3);
-	}
-	ret = menelaus_write_reg(MENELAUS_MCT_CTRL1, val);
-	mutex_unlock(&the_menelaus->lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(menelaus_set_mmc_opendrain);
-
-int menelaus_set_slot_sel(int enable)
-{
-	int ret;
-
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_read_reg(MENELAUS_GPIO_CTRL);
-	if (ret < 0)
-		goto out;
-	ret |= 0x02;
-	if (enable)
-		ret |= 1 << 5;
-	else
-		ret &= ~(1 << 5);
-	ret = menelaus_write_reg(MENELAUS_GPIO_CTRL, ret);
-out:
-	mutex_unlock(&the_menelaus->lock);
-	return ret;
-}
-EXPORT_SYMBOL(menelaus_set_slot_sel);
-
-int menelaus_set_mmc_slot(int slot, int enable, int power, int cd_en)
-{
-	int ret, val;
-
-	if (slot != 1 && slot != 2)
-		return -EINVAL;
-	if (power >= 3)
-		return -EINVAL;
-
-	mutex_lock(&the_menelaus->lock);
-
-	ret = menelaus_read_reg(MENELAUS_MCT_CTRL2);
-	if (ret < 0)
-		goto out;
-	val = ret;
-	if (slot == 1) {
-		if (cd_en)
-			val |= (1 << 4) | (1 << 6);
-		else
-			val &= ~((1 << 4) | (1 << 6));
-	} else {
-		if (cd_en)
-			val |= (1 << 5) | (1 << 7);
-		else
-			val &= ~((1 << 5) | (1 << 7));
-	}
-	ret = menelaus_write_reg(MENELAUS_MCT_CTRL2, val);
-	if (ret < 0)
-		goto out;
-
-	ret = menelaus_read_reg(MENELAUS_MCT_CTRL3);
-	if (ret < 0)
-		goto out;
-	val = ret;
-	if (slot == 1) {
-		if (enable)
-			val |= 1 << 0;
-		else
-			val &= ~(1 << 0);
-	} else {
-		int b;
-
-		if (enable)
-			ret |= 1 << 1;
-		else
-			ret &= ~(1 << 1);
-		b = menelaus_read_reg(MENELAUS_MCT_CTRL2);
-		b &= ~0x03;
-		b |= power;
-		ret = menelaus_write_reg(MENELAUS_MCT_CTRL2, b);
-		if (ret < 0)
-			goto out;
-	}
-	/* Disable autonomous shutdown */
-	val &= ~(0x03 << 2);
-	ret = menelaus_write_reg(MENELAUS_MCT_CTRL3, val);
-out:
-	mutex_unlock(&the_menelaus->lock);
-	return ret;
-}
-EXPORT_SYMBOL(menelaus_set_mmc_slot);
-
-int menelaus_register_mmc_callback(void (*callback)(void *data, u8 card_mask),
-				   void *data)
-{
-	int ret = 0;
-
-	the_menelaus->mmc_callback_data = data;
-	the_menelaus->mmc_callback = callback;
-	ret = menelaus_add_irq_work(MENELAUS_MMC_S1CD_IRQ,
-				    menelaus_mmc_cd_work);
-	if (ret < 0)
-		return ret;
-	ret = menelaus_add_irq_work(MENELAUS_MMC_S2CD_IRQ,
-				    menelaus_mmc_cd_work);
-	if (ret < 0)
-		return ret;
-	ret = menelaus_add_irq_work(MENELAUS_MMC_S1D1_IRQ,
-				    menelaus_mmc_cd_work);
-	if (ret < 0)
-		return ret;
-	ret = menelaus_add_irq_work(MENELAUS_MMC_S2D1_IRQ,
-				    menelaus_mmc_cd_work);
-
-	return ret;
-}
-EXPORT_SYMBOL(menelaus_register_mmc_callback);
-
-void menelaus_unregister_mmc_callback(void)
-{
-	menelaus_remove_irq_work(MENELAUS_MMC_S1CD_IRQ);
-	menelaus_remove_irq_work(MENELAUS_MMC_S2CD_IRQ);
-	menelaus_remove_irq_work(MENELAUS_MMC_S1D1_IRQ);
-	menelaus_remove_irq_work(MENELAUS_MMC_S2D1_IRQ);
-
-	the_menelaus->mmc_callback = NULL;
-	the_menelaus->mmc_callback_data = 0;
-}
-EXPORT_SYMBOL(menelaus_unregister_mmc_callback);
-
-struct menelaus_vtg {
-	const char *name;
-	u8 vtg_reg;
-	u8 vtg_shift;
-	u8 vtg_bits;
-	u8 mode_reg;
-};
-
-struct menelaus_vtg_value {
-	u16 vtg;
-	u16 val;
-};
-
-static int menelaus_set_voltage(const struct menelaus_vtg *vtg, int mV,
-				int vtg_val, int mode)
-{
-	int val, ret;
-	struct i2c_client *c = the_menelaus->client;
-
-	mutex_lock(&the_menelaus->lock);
-	if (vtg == 0)
-		goto set_voltage;
-
-	ret = menelaus_read_reg(vtg->vtg_reg);
-	if (ret < 0)
-		goto out;
-	val = ret & ~(((1 << vtg->vtg_bits) - 1) << vtg->vtg_shift);
-	val |= vtg_val << vtg->vtg_shift;
-
-	dev_dbg(&c->dev, "Setting voltage '%s'"
-			 "to %d mV (reg 0x%02x, val 0x%02x)\n",
-			vtg->name, mV, vtg->vtg_reg, val);
-
-	ret = menelaus_write_reg(vtg->vtg_reg, val);
-	if (ret < 0)
-		goto out;
-set_voltage:
-	ret = menelaus_write_reg(vtg->mode_reg, mode);
-out:
-	mutex_unlock(&the_menelaus->lock);
-	if (ret == 0) {
-		/* Wait for voltage to stabilize */
-		msleep(1);
-	}
-	return ret;
-}
-
-static int menelaus_get_vtg_value(int vtg, const struct menelaus_vtg_value *tbl,
-				  int n)
-{
-	int i;
-
-	for (i = 0; i < n; i++, tbl++)
-		if (tbl->vtg == vtg)
-			return tbl->val;
-	return -EINVAL;
-}
-
-/*
- * Vcore can be programmed in two ways:
- * SW-controlled: Required voltage is programmed into VCORE_CTRL1
- * HW-controlled: Required range (roof-floor) is programmed into VCORE_CTRL3
- * and VCORE_CTRL4
- *
- * Call correct 'set' function accordingly
- */
-
-static const struct menelaus_vtg_value vcore_values[] = {
-	{ 1000, 0 },
-	{ 1025, 1 },
-	{ 1050, 2 },
-	{ 1075, 3 },
-	{ 1100, 4 },
-	{ 1125, 5 },
-	{ 1150, 6 },
-	{ 1175, 7 },
-	{ 1200, 8 },
-	{ 1225, 9 },
-	{ 1250, 10 },
-	{ 1275, 11 },
-	{ 1300, 12 },
-	{ 1325, 13 },
-	{ 1350, 14 },
-	{ 1375, 15 },
-	{ 1400, 16 },
-	{ 1425, 17 },
-	{ 1450, 18 },
-};
-
-int menelaus_set_vcore_sw(unsigned int mV)
-{
-	int val, ret;
-	struct i2c_client *c = the_menelaus->client;
-
-	val = menelaus_get_vtg_value(mV, vcore_values,
-				     ARRAY_SIZE(vcore_values));
-	if (val < 0)
-		return -EINVAL;
-
-	dev_dbg(&c->dev, "Setting VCORE to %d mV (val 0x%02x)\n", mV, val);
-
-	/* Set SW mode and the voltage in one go. */
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL1, val);
-	if (ret == 0)
-		the_menelaus->vcore_hw_mode = 0;
-	mutex_unlock(&the_menelaus->lock);
-	msleep(1);
-
-	return ret;
-}
-
-int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV)
-{
-	int fval, rval, val, ret;
-	struct i2c_client *c = the_menelaus->client;
-
-	rval = menelaus_get_vtg_value(roof_mV, vcore_values,
-				      ARRAY_SIZE(vcore_values));
-	if (rval < 0)
-		return -EINVAL;
-	fval = menelaus_get_vtg_value(floor_mV, vcore_values,
-				      ARRAY_SIZE(vcore_values));
-	if (fval < 0)
-		return -EINVAL;
-
-	dev_dbg(&c->dev, "Setting VCORE FLOOR to %d mV and ROOF to %d mV\n",
-	       floor_mV, roof_mV);
-
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL3, fval);
-	if (ret < 0)
-		goto out;
-	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL4, rval);
-	if (ret < 0)
-		goto out;
-	if (!the_menelaus->vcore_hw_mode) {
-		val = menelaus_read_reg(MENELAUS_VCORE_CTRL1);
-		/* HW mode, turn OFF byte comparator */
-		val |= ((1 << 7) | (1 << 5));
-		ret = menelaus_write_reg(MENELAUS_VCORE_CTRL1, val);
-		the_menelaus->vcore_hw_mode = 1;
-	}
-	msleep(1);
-out:
-	mutex_unlock(&the_menelaus->lock);
-	return ret;
-}
-
-static const struct menelaus_vtg vmem_vtg = {
-	.name = "VMEM",
-	.vtg_reg = MENELAUS_LDO_CTRL1,
-	.vtg_shift = 0,
-	.vtg_bits = 2,
-	.mode_reg = MENELAUS_LDO_CTRL3,
-};
-
-static const struct menelaus_vtg_value vmem_values[] = {
-	{ 1500, 0 },
-	{ 1800, 1 },
-	{ 1900, 2 },
-	{ 2500, 3 },
-};
-
-int menelaus_set_vmem(unsigned int mV)
-{
-	int val;
-
-	if (mV == 0)
-		return menelaus_set_voltage(&vmem_vtg, 0, 0, 0);
-
-	val = menelaus_get_vtg_value(mV, vmem_values, ARRAY_SIZE(vmem_values));
-	if (val < 0)
-		return -EINVAL;
-	return menelaus_set_voltage(&vmem_vtg, mV, val, 0x02);
-}
-EXPORT_SYMBOL(menelaus_set_vmem);
-
-static const struct menelaus_vtg vio_vtg = {
-	.name = "VIO",
-	.vtg_reg = MENELAUS_LDO_CTRL1,
-	.vtg_shift = 2,
-	.vtg_bits = 2,
-	.mode_reg = MENELAUS_LDO_CTRL4,
-};
-
-static const struct menelaus_vtg_value vio_values[] = {
-	{ 1500, 0 },
-	{ 1800, 1 },
-	{ 2500, 2 },
-	{ 2800, 3 },
-};
-
-int menelaus_set_vio(unsigned int mV)
-{
-	int val;
-
-	if (mV == 0)
-		return menelaus_set_voltage(&vio_vtg, 0, 0, 0);
-
-	val = menelaus_get_vtg_value(mV, vio_values, ARRAY_SIZE(vio_values));
-	if (val < 0)
-		return -EINVAL;
-	return menelaus_set_voltage(&vio_vtg, mV, val, 0x02);
-}
-EXPORT_SYMBOL(menelaus_set_vio);
-
-static const struct menelaus_vtg_value vdcdc_values[] = {
-	{ 1500, 0 },
-	{ 1800, 1 },
-	{ 2000, 2 },
-	{ 2200, 3 },
-	{ 2400, 4 },
-	{ 2800, 5 },
-	{ 3000, 6 },
-	{ 3300, 7 },
-};
-
-static const struct menelaus_vtg vdcdc2_vtg = {
-	.name = "VDCDC2",
-	.vtg_reg = MENELAUS_DCDC_CTRL1,
-	.vtg_shift = 0,
-	.vtg_bits = 3,
-	.mode_reg = MENELAUS_DCDC_CTRL2,
-};
-
-static const struct menelaus_vtg vdcdc3_vtg = {
-	.name = "VDCDC3",
-	.vtg_reg = MENELAUS_DCDC_CTRL1,
-	.vtg_shift = 3,
-	.vtg_bits = 3,
-	.mode_reg = MENELAUS_DCDC_CTRL3,
-};
-
-int menelaus_set_vdcdc(int dcdc, unsigned int mV)
-{
-	const struct menelaus_vtg *vtg;
-	int val;
-
-	if (dcdc != 2 && dcdc != 3)
-		return -EINVAL;
-	if (dcdc == 2)
-		vtg = &vdcdc2_vtg;
-	else
-		vtg = &vdcdc3_vtg;
-
-	if (mV == 0)
-		return menelaus_set_voltage(vtg, 0, 0, 0);
-
-	val = menelaus_get_vtg_value(mV, vdcdc_values,
-				     ARRAY_SIZE(vdcdc_values));
-	if (val < 0)
-		return -EINVAL;
-	return menelaus_set_voltage(vtg, mV, val, 0x03);
-}
-
-static const struct menelaus_vtg_value vmmc_values[] = {
-	{ 1850, 0 },
-	{ 2800, 1 },
-	{ 3000, 2 },
-	{ 3100, 3 },
-};
-
-static const struct menelaus_vtg vmmc_vtg = {
-	.name = "VMMC",
-	.vtg_reg = MENELAUS_LDO_CTRL1,
-	.vtg_shift = 6,
-	.vtg_bits = 2,
-	.mode_reg = MENELAUS_LDO_CTRL7,
-};
-
-int menelaus_set_vmmc(unsigned int mV)
-{
-	int val;
-
-	if (mV == 0)
-		return menelaus_set_voltage(&vmmc_vtg, 0, 0, 0);
-
-	val = menelaus_get_vtg_value(mV, vmmc_values, ARRAY_SIZE(vmmc_values));
-	if (val < 0)
-		return -EINVAL;
-	return menelaus_set_voltage(&vmmc_vtg, mV, val, 0x02);
-}
-EXPORT_SYMBOL(menelaus_set_vmmc);
-
-
-static const struct menelaus_vtg_value vaux_values[] = {
-	{ 1500, 0 },
-	{ 1800, 1 },
-	{ 2500, 2 },
-	{ 2800, 3 },
-};
-
-static const struct menelaus_vtg vaux_vtg = {
-	.name = "VAUX",
-	.vtg_reg = MENELAUS_LDO_CTRL1,
-	.vtg_shift = 4,
-	.vtg_bits = 2,
-	.mode_reg = MENELAUS_LDO_CTRL6,
-};
-
-int menelaus_set_vaux(unsigned int mV)
-{
-	int val;
-
-	if (mV == 0)
-		return menelaus_set_voltage(&vaux_vtg, 0, 0, 0);
-
-	val = menelaus_get_vtg_value(mV, vaux_values, ARRAY_SIZE(vaux_values));
-	if (val < 0)
-		return -EINVAL;
-	return menelaus_set_voltage(&vaux_vtg, mV, val, 0x02);
-}
-EXPORT_SYMBOL(menelaus_set_vaux);
-
-int menelaus_get_slot_pin_states(void)
-{
-	return menelaus_read_reg(MENELAUS_MCT_PIN_ST);
-}
-EXPORT_SYMBOL(menelaus_get_slot_pin_states);
-
-int menelaus_set_regulator_sleep(int enable, u32 val)
-{
-	int t, ret;
-	struct i2c_client *c = the_menelaus->client;
-
-	mutex_lock(&the_menelaus->lock);
-	ret = menelaus_write_reg(MENELAUS_SLEEP_CTRL2, val);
-	if (ret < 0)
-		goto out;
-
-	dev_dbg(&c->dev, "regulator sleep configuration: %02x\n", val);
-
-	ret = menelaus_read_reg(MENELAUS_GPIO_CTRL);
-	if (ret < 0)
-		goto out;
-	t = ((1 << 6) | 0x04);
-	if (enable)
-		ret |= t;
-	else
-		ret &= ~t;
-	ret = menelaus_write_reg(MENELAUS_GPIO_CTRL, ret);
-out:
-	mutex_unlock(&the_menelaus->lock);
-	return ret;
-}
-
-/*-----------------------------------------------------------------------*/
-
-/* Handles Menelaus interrupts. Does not run in interrupt context */
-static void menelaus_work(struct work_struct *_menelaus)
-{
-	struct menelaus_chip *menelaus =
-			container_of(_menelaus, struct menelaus_chip, work);
-	void (*handler)(struct menelaus_chip *menelaus);
-
-	while (1) {
-		unsigned isr;
-
-		isr = (menelaus_read_reg(MENELAUS_INT_STATUS2)
-				& ~menelaus->mask2) << 8;
-		isr |= menelaus_read_reg(MENELAUS_INT_STATUS1)
-				& ~menelaus->mask1;
-		if (!isr)
-			break;
-
-		while (isr) {
-			int irq = fls(isr) - 1;
-			isr &= ~(1 << irq);
-
-			mutex_lock(&menelaus->lock);
-			menelaus_disable_irq(irq);
-			menelaus_ack_irq(irq);
-			handler = menelaus->handlers[irq];
-			if (handler)
-				handler(menelaus);
-			menelaus_enable_irq(irq);
-			mutex_unlock(&menelaus->lock);
-		}
-	}
-	enable_irq(menelaus->client->irq);
-}
-
-/*
- * We cannot use I2C in interrupt context, so we just schedule work.
- */
-static irqreturn_t menelaus_irq(int irq, void *_menelaus)
-{
-	struct menelaus_chip *menelaus = _menelaus;
-
-	disable_irq_nosync(irq);
-	(void)schedule_work(&menelaus->work);
-
-	return IRQ_HANDLED;
-}
-
-/*-----------------------------------------------------------------------*/
-
-/*
- * The RTC needs to be set once, then it runs on backup battery power.
- * It supports alarms, including system wake alarms (from some modes);
- * and 1/second IRQs if requested.
- */
-#ifdef CONFIG_RTC_DRV_TWL92330
-
-#define RTC_CTRL_RTC_EN		(1 << 0)
-#define RTC_CTRL_AL_EN		(1 << 1)
-#define RTC_CTRL_MODE12		(1 << 2)
-#define RTC_CTRL_EVERY_MASK	(3 << 3)
-#define RTC_CTRL_EVERY_SEC	(0 << 3)
-#define RTC_CTRL_EVERY_MIN	(1 << 3)
-#define RTC_CTRL_EVERY_HR	(2 << 3)
-#define RTC_CTRL_EVERY_DAY	(3 << 3)
-
-#define RTC_UPDATE_EVERY	0x08
-
-#define RTC_HR_PM		(1 << 7)
-
-static void menelaus_to_time(char *regs, struct rtc_time *t)
-{
-	t->tm_sec = bcd2bin(regs[0]);
-	t->tm_min = bcd2bin(regs[1]);
-	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
-		t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
-		if (regs[2] & RTC_HR_PM)
-			t->tm_hour += 12;
-	} else
-		t->tm_hour = bcd2bin(regs[2] & 0x3f);
-	t->tm_mday = bcd2bin(regs[3]);
-	t->tm_mon = bcd2bin(regs[4]) - 1;
-	t->tm_year = bcd2bin(regs[5]) + 100;
-}
-
-static int time_to_menelaus(struct rtc_time *t, int regnum)
-{
-	int	hour, status;
-
-	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
-	if (status < 0)
-		goto fail;
-
-	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
-	if (status < 0)
-		goto fail;
-
-	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
-		hour = t->tm_hour + 1;
-		if (hour > 12)
-			hour = RTC_HR_PM | bin2bcd(hour - 12);
-		else
-			hour = bin2bcd(hour);
-	} else
-		hour = bin2bcd(t->tm_hour);
-	status = menelaus_write_reg(regnum++, hour);
-	if (status < 0)
-		goto fail;
-
-	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
-	if (status < 0)
-		goto fail;
-
-	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
-	if (status < 0)
-		goto fail;
-
-	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
-	if (status < 0)
-		goto fail;
-
-	return 0;
-fail:
-	dev_err(&the_menelaus->client->dev, "rtc write reg %02x, err %d\n",
-			--regnum, status);
-	return status;
-}
-
-static int menelaus_read_time(struct device *dev, struct rtc_time *t)
-{
-	struct i2c_msg	msg[2];
-	char		regs[7];
-	int		status;
-
-	/* block read date and time registers */
-	regs[0] = MENELAUS_RTC_SEC;
-
-	msg[0].addr = MENELAUS_I2C_ADDRESS;
-	msg[0].flags = 0;
-	msg[0].len = 1;
-	msg[0].buf = regs;
-
-	msg[1].addr = MENELAUS_I2C_ADDRESS;
-	msg[1].flags = I2C_M_RD;
-	msg[1].len = sizeof(regs);
-	msg[1].buf = regs;
-
-	status = i2c_transfer(the_menelaus->client->adapter, msg, 2);
-	if (status != 2) {
-		dev_err(dev, "%s error %d\n", "read", status);
-		return -EIO;
-	}
-
-	menelaus_to_time(regs, t);
-	t->tm_wday = bcd2bin(regs[6]);
-
-	return 0;
-}
-
-static int menelaus_set_time(struct device *dev, struct rtc_time *t)
-{
-	int		status;
-
-	/* write date and time registers */
-	status = time_to_menelaus(t, MENELAUS_RTC_SEC);
-	if (status < 0)
-		return status;
-	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
-	if (status < 0) {
-		dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
-				"err %d\n", MENELAUS_RTC_WKDAY, status);
-		return status;
-	}
-
-	/* now commit the write */
-	status = menelaus_write_reg(MENELAUS_RTC_UPDATE, RTC_UPDATE_EVERY);
-	if (status < 0)
-		dev_err(&the_menelaus->client->dev, "rtc commit time, err %d\n",
-				status);
-
-	return 0;
-}
-
-static int menelaus_read_alarm(struct device *dev, struct rtc_wkalrm *w)
-{
-	struct i2c_msg	msg[2];
-	char		regs[6];
-	int		status;
-
-	/* block read alarm registers */
-	regs[0] = MENELAUS_RTC_AL_SEC;
-
-	msg[0].addr = MENELAUS_I2C_ADDRESS;
-	msg[0].flags = 0;
-	msg[0].len = 1;
-	msg[0].buf = regs;
-
-	msg[1].addr = MENELAUS_I2C_ADDRESS;
-	msg[1].flags = I2C_M_RD;
-	msg[1].len = sizeof(regs);
-	msg[1].buf = regs;
-
-	status = i2c_transfer(the_menelaus->client->adapter, msg, 2);
-	if (status != 2) {
-		dev_err(dev, "%s error %d\n", "alarm read", status);
-		return -EIO;
-	}
-
-	menelaus_to_time(regs, &w->time);
-
-	w->enabled = !!(the_menelaus->rtc_control & RTC_CTRL_AL_EN);
-
-	/* NOTE we *could* check if actually pending... */
-	w->pending = 0;
-
-	return 0;
-}
-
-static int menelaus_set_alarm(struct device *dev, struct rtc_wkalrm *w)
-{
-	int		status;
-
-	if (the_menelaus->client->irq <= 0 && w->enabled)
-		return -ENODEV;
-
-	/* clear previous alarm enable */
-	if (the_menelaus->rtc_control & RTC_CTRL_AL_EN) {
-		the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
-		status = menelaus_write_reg(MENELAUS_RTC_CTRL,
-				the_menelaus->rtc_control);
-		if (status < 0)
-			return status;
-	}
-
-	/* write alarm registers */
-	status = time_to_menelaus(&w->time, MENELAUS_RTC_AL_SEC);
-	if (status < 0)
-		return status;
-
-	/* enable alarm if requested */
-	if (w->enabled) {
-		the_menelaus->rtc_control |= RTC_CTRL_AL_EN;
-		status = menelaus_write_reg(MENELAUS_RTC_CTRL,
-				the_menelaus->rtc_control);
-	}
-
-	return status;
-}
-
-#ifdef CONFIG_RTC_INTF_DEV
-
-static void menelaus_rtc_update_work(struct menelaus_chip *m)
-{
-	/* report 1/sec update */
-	local_irq_disable();
-	rtc_update_irq(m->rtc, 1, RTC_IRQF | RTC_UF);
-	local_irq_enable();
-}
-
-static int menelaus_ioctl(struct device *dev, unsigned cmd, unsigned long arg)
-{
-	int	status;
-
-	if (the_menelaus->client->irq <= 0)
-		return -ENOIOCTLCMD;
-
-	switch (cmd) {
-	/* alarm IRQ */
-	case RTC_AIE_ON:
-		if (the_menelaus->rtc_control & RTC_CTRL_AL_EN)
-			return 0;
-		the_menelaus->rtc_control |= RTC_CTRL_AL_EN;
-		break;
-	case RTC_AIE_OFF:
-		if (!(the_menelaus->rtc_control & RTC_CTRL_AL_EN))
-			return 0;
-		the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
-		break;
-	/* 1/second "update" IRQ */
-	case RTC_UIE_ON:
-		if (the_menelaus->uie)
-			return 0;
-		status = menelaus_remove_irq_work(MENELAUS_RTCTMR_IRQ);
-		status = menelaus_add_irq_work(MENELAUS_RTCTMR_IRQ,
-				menelaus_rtc_update_work);
-		if (status == 0)
-			the_menelaus->uie = 1;
-		return status;
-	case RTC_UIE_OFF:
-		if (!the_menelaus->uie)
-			return 0;
-		status = menelaus_remove_irq_work(MENELAUS_RTCTMR_IRQ);
-		if (status == 0)
-			the_menelaus->uie = 0;
-		return status;
-	default:
-		return -ENOIOCTLCMD;
-	}
-	return menelaus_write_reg(MENELAUS_RTC_CTRL, the_menelaus->rtc_control);
-}
-
-#else
-#define menelaus_ioctl	NULL
-#endif
-
-/* REVISIT no compensation register support ... */
-
-static const struct rtc_class_ops menelaus_rtc_ops = {
-	.ioctl			= menelaus_ioctl,
-	.read_time		= menelaus_read_time,
-	.set_time		= menelaus_set_time,
-	.read_alarm		= menelaus_read_alarm,
-	.set_alarm		= menelaus_set_alarm,
-};
-
-static void menelaus_rtc_alarm_work(struct menelaus_chip *m)
-{
-	/* report alarm */
-	local_irq_disable();
-	rtc_update_irq(m->rtc, 1, RTC_IRQF | RTC_AF);
-	local_irq_enable();
-
-	/* then disable it; alarms are oneshot */
-	the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
-	menelaus_write_reg(MENELAUS_RTC_CTRL, the_menelaus->rtc_control);
-}
-
-static inline void menelaus_rtc_init(struct menelaus_chip *m)
-{
-	int	alarm = (m->client->irq > 0);
-
-	/* assume 32KDETEN pin is pulled high */
-	if (!(menelaus_read_reg(MENELAUS_OSC_CTRL) & 0x80)) {
-		dev_dbg(&m->client->dev, "no 32k oscillator\n");
-		return;
-	}
-
-	/* support RTC alarm; it can issue wakeups */
-	if (alarm) {
-		if (menelaus_add_irq_work(MENELAUS_RTCALM_IRQ,
-				menelaus_rtc_alarm_work) < 0) {
-			dev_err(&m->client->dev, "can't handle RTC alarm\n");
-			return;
-		}
-		device_init_wakeup(&m->client->dev, 1);
-	}
-
-	/* be sure RTC is enabled; allow 1/sec irqs; leave 12hr mode alone */
-	m->rtc_control = menelaus_read_reg(MENELAUS_RTC_CTRL);
-	if (!(m->rtc_control & RTC_CTRL_RTC_EN)
-			|| (m->rtc_control & RTC_CTRL_AL_EN)
-			|| (m->rtc_control & RTC_CTRL_EVERY_MASK)) {
-		if (!(m->rtc_control & RTC_CTRL_RTC_EN)) {
-			dev_warn(&m->client->dev, "rtc clock needs setting\n");
-			m->rtc_control |= RTC_CTRL_RTC_EN;
-		}
-		m->rtc_control &= ~RTC_CTRL_EVERY_MASK;
-		m->rtc_control &= ~RTC_CTRL_AL_EN;
-		menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control);
-	}
-
-	m->rtc = rtc_device_register(DRIVER_NAME,
-			&m->client->dev,
-			&menelaus_rtc_ops, THIS_MODULE);
-	if (IS_ERR(m->rtc)) {
-		if (alarm) {
-			menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ);
-			device_init_wakeup(&m->client->dev, 0);
-		}
-		dev_err(&m->client->dev, "can't register RTC: %d\n",
-				(int) PTR_ERR(m->rtc));
-		the_menelaus->rtc = NULL;
-	}
-}
-
-#else
-
-static inline void menelaus_rtc_init(struct menelaus_chip *m)
-{
-	/* nothing */
-}
-
-#endif
-
-/*-----------------------------------------------------------------------*/
-
-static struct i2c_driver menelaus_i2c_driver;
-
-static int menelaus_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
-{
-	struct menelaus_chip	*menelaus;
-	int			rev = 0, val;
-	int			err = 0;
-	struct menelaus_platform_data *menelaus_pdata =
-					client->dev.platform_data;
-
-	if (the_menelaus) {
-		dev_dbg(&client->dev, "only one %s for now\n",
-				DRIVER_NAME);
-		return -ENODEV;
-	}
-
-	menelaus = kzalloc(sizeof *menelaus, GFP_KERNEL);
-	if (!menelaus)
-		return -ENOMEM;
-
-	i2c_set_clientdata(client, menelaus);
-
-	the_menelaus = menelaus;
-	menelaus->client = client;
-
-	/* If a true probe check the device */
-	rev = menelaus_read_reg(MENELAUS_REV);
-	if (rev < 0) {
-		pr_err(DRIVER_NAME ": device not found");
-		err = -ENODEV;
-		goto fail1;
-	}
-
-	/* Ack and disable all Menelaus interrupts */
-	menelaus_write_reg(MENELAUS_INT_ACK1, 0xff);
-	menelaus_write_reg(MENELAUS_INT_ACK2, 0xff);
-	menelaus_write_reg(MENELAUS_INT_MASK1, 0xff);
-	menelaus_write_reg(MENELAUS_INT_MASK2, 0xff);
-	menelaus->mask1 = 0xff;
-	menelaus->mask2 = 0xff;
-
-	/* Set output buffer strengths */
-	menelaus_write_reg(MENELAUS_MCT_CTRL1, 0x73);
-
-	if (client->irq > 0) {
-		err = request_irq(client->irq, menelaus_irq, IRQF_DISABLED,
-				  DRIVER_NAME, menelaus);
-		if (err) {
-			dev_dbg(&client->dev,  "can't get IRQ %d, err %d\n",
-					client->irq, err);
-			goto fail1;
-		}
-	}
-
-	mutex_init(&menelaus->lock);
-	INIT_WORK(&menelaus->work, menelaus_work);
-
-	pr_info("Menelaus rev %d.%d\n", rev >> 4, rev & 0x0f);
-
-	val = menelaus_read_reg(MENELAUS_VCORE_CTRL1);
-	if (val < 0)
-		goto fail2;
-	if (val & (1 << 7))
-		menelaus->vcore_hw_mode = 1;
-	else
-		menelaus->vcore_hw_mode = 0;
-
-	if (menelaus_pdata != NULL && menelaus_pdata->late_init != NULL) {
-		err = menelaus_pdata->late_init(&client->dev);
-		if (err < 0)
-			goto fail2;
-	}
-
-	menelaus_rtc_init(menelaus);
-
-	return 0;
-fail2:
-	free_irq(client->irq, menelaus);
-	flush_scheduled_work();
-fail1:
-	kfree(menelaus);
-	return err;
-}
-
-static int __exit menelaus_remove(struct i2c_client *client)
-{
-	struct menelaus_chip	*menelaus = i2c_get_clientdata(client);
-
-	free_irq(client->irq, menelaus);
-	kfree(menelaus);
-	i2c_set_clientdata(client, NULL);
-	the_menelaus = NULL;
-	return 0;
-}
-
-static const struct i2c_device_id menelaus_id[] = {
-	{ "menelaus", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, menelaus_id);
-
-static struct i2c_driver menelaus_i2c_driver = {
-	.driver = {
-		.name		= DRIVER_NAME,
-	},
-	.probe		= menelaus_probe,
-	.remove		= __exit_p(menelaus_remove),
-	.id_table	= menelaus_id,
-};
-
-static int __init menelaus_init(void)
-{
-	int res;
-
-	res = i2c_add_driver(&menelaus_i2c_driver);
-	if (res < 0) {
-		pr_err(DRIVER_NAME ": driver registration failed\n");
-		return res;
-	}
-
-	return 0;
-}
-
-static void __exit menelaus_exit(void)
-{
-	i2c_del_driver(&menelaus_i2c_driver);
-
-	/* FIXME: Shutdown menelaus parts that can be shut down */
-}
-
-MODULE_AUTHOR("Texas Instruments, Inc. (and others)");
-MODULE_DESCRIPTION("I2C interface for Menelaus.");
-MODULE_LICENSE("GPL");
-
-module_init(menelaus_init);
-module_exit(menelaus_exit);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 182e148..416f9e7 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -82,6 +82,16 @@ config TPS65010
 	  This driver can also be built as a module.  If so, the module
 	  will be called tps65010.
 
+config MENELAUS
+	bool "Texas Instruments TWL92330/Menelaus PM chip"
+	depends on I2C=y && ARCH_OMAP24XX
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  TWL92330/Menelaus Power Management chip. This include voltage
+	  regulators, Dual slot memory card tranceivers, real-time clock
+	  and other features that are often used in portable devices like
+	  cell phones and PDAs.
+
 config TWL4030_CORE
 	bool "Texas Instruments TWL4030/TPS659x0 Support"
 	depends on I2C=y && GENERIC_HARDIRQS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 3989e30..0c9418b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 
 obj-$(CONFIG_TPS65010)		+= tps65010.o
+obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 
diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c
new file mode 100644
index 0000000..4b364ba
--- /dev/null
+++ b/drivers/mfd/menelaus.c
@@ -0,0 +1,1285 @@
+/*
+ * Copyright (C) 2004 Texas Instruments, Inc.
+ *
+ * Some parts based tps65010.c:
+ * Copyright (C) 2004 Texas Instruments and
+ * Copyright (C) 2004-2005 David Brownell
+ *
+ * Some parts based on tlv320aic24.c:
+ * Copyright (C) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Changes for interrupt handling and clean-up by
+ * Tony Lindgren <tony@atomide.com> and Imre Deak <imre.deak@nokia.com>
+ * Cleanup and generalized support for voltage setting by
+ * Juha Yrjola
+ * Added support for controlling VCORE and regulator sleep states,
+ * Amit Kucheria <amit.kucheria@nokia.com>
+ * Copyright (C) 2005, 2006 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+
+#include <asm/mach/irq.h>
+
+#include <mach/gpio.h>
+#include <mach/menelaus.h>
+
+#define DRIVER_NAME			"menelaus"
+
+#define MENELAUS_I2C_ADDRESS		0x72
+
+#define MENELAUS_REV			0x01
+#define MENELAUS_VCORE_CTRL1		0x02
+#define MENELAUS_VCORE_CTRL2		0x03
+#define MENELAUS_VCORE_CTRL3		0x04
+#define MENELAUS_VCORE_CTRL4		0x05
+#define MENELAUS_VCORE_CTRL5		0x06
+#define MENELAUS_DCDC_CTRL1		0x07
+#define MENELAUS_DCDC_CTRL2		0x08
+#define MENELAUS_DCDC_CTRL3		0x09
+#define MENELAUS_LDO_CTRL1		0x0A
+#define MENELAUS_LDO_CTRL2		0x0B
+#define MENELAUS_LDO_CTRL3		0x0C
+#define MENELAUS_LDO_CTRL4		0x0D
+#define MENELAUS_LDO_CTRL5		0x0E
+#define MENELAUS_LDO_CTRL6		0x0F
+#define MENELAUS_LDO_CTRL7		0x10
+#define MENELAUS_LDO_CTRL8		0x11
+#define MENELAUS_SLEEP_CTRL1		0x12
+#define MENELAUS_SLEEP_CTRL2		0x13
+#define MENELAUS_DEVICE_OFF		0x14
+#define MENELAUS_OSC_CTRL		0x15
+#define MENELAUS_DETECT_CTRL		0x16
+#define MENELAUS_INT_MASK1		0x17
+#define MENELAUS_INT_MASK2		0x18
+#define MENELAUS_INT_STATUS1		0x19
+#define MENELAUS_INT_STATUS2		0x1A
+#define MENELAUS_INT_ACK1		0x1B
+#define MENELAUS_INT_ACK2		0x1C
+#define MENELAUS_GPIO_CTRL		0x1D
+#define MENELAUS_GPIO_IN		0x1E
+#define MENELAUS_GPIO_OUT		0x1F
+#define MENELAUS_BBSMS			0x20
+#define MENELAUS_RTC_CTRL		0x21
+#define MENELAUS_RTC_UPDATE		0x22
+#define MENELAUS_RTC_SEC		0x23
+#define MENELAUS_RTC_MIN		0x24
+#define MENELAUS_RTC_HR			0x25
+#define MENELAUS_RTC_DAY		0x26
+#define MENELAUS_RTC_MON		0x27
+#define MENELAUS_RTC_YR			0x28
+#define MENELAUS_RTC_WKDAY		0x29
+#define MENELAUS_RTC_AL_SEC		0x2A
+#define MENELAUS_RTC_AL_MIN		0x2B
+#define MENELAUS_RTC_AL_HR		0x2C
+#define MENELAUS_RTC_AL_DAY		0x2D
+#define MENELAUS_RTC_AL_MON		0x2E
+#define MENELAUS_RTC_AL_YR		0x2F
+#define MENELAUS_RTC_COMP_MSB		0x30
+#define MENELAUS_RTC_COMP_LSB		0x31
+#define MENELAUS_S1_PULL_EN		0x32
+#define MENELAUS_S1_PULL_DIR		0x33
+#define MENELAUS_S2_PULL_EN		0x34
+#define MENELAUS_S2_PULL_DIR		0x35
+#define MENELAUS_MCT_CTRL1		0x36
+#define MENELAUS_MCT_CTRL2		0x37
+#define MENELAUS_MCT_CTRL3		0x38
+#define MENELAUS_MCT_PIN_ST		0x39
+#define MENELAUS_DEBOUNCE1		0x3A
+
+#define IH_MENELAUS_IRQS		12
+#define MENELAUS_MMC_S1CD_IRQ		0	/* MMC slot 1 card change */
+#define MENELAUS_MMC_S2CD_IRQ		1	/* MMC slot 2 card change */
+#define MENELAUS_MMC_S1D1_IRQ		2	/* MMC DAT1 low in slot 1 */
+#define MENELAUS_MMC_S2D1_IRQ		3	/* MMC DAT1 low in slot 2 */
+#define MENELAUS_LOWBAT_IRQ		4	/* Low battery */
+#define MENELAUS_HOTDIE_IRQ		5	/* Hot die detect */
+#define MENELAUS_UVLO_IRQ		6	/* UVLO detect */
+#define MENELAUS_TSHUT_IRQ		7	/* Thermal shutdown */
+#define MENELAUS_RTCTMR_IRQ		8	/* RTC timer */
+#define MENELAUS_RTCALM_IRQ		9	/* RTC alarm */
+#define MENELAUS_RTCERR_IRQ		10	/* RTC error */
+#define MENELAUS_PSHBTN_IRQ		11	/* Push button */
+#define MENELAUS_RESERVED12_IRQ		12	/* Reserved */
+#define MENELAUS_RESERVED13_IRQ		13	/* Reserved */
+#define MENELAUS_RESERVED14_IRQ		14	/* Reserved */
+#define MENELAUS_RESERVED15_IRQ		15	/* Reserved */
+
+static void menelaus_work(struct work_struct *_menelaus);
+
+struct menelaus_chip {
+	struct mutex		lock;
+	struct i2c_client	*client;
+	struct work_struct	work;
+#ifdef CONFIG_RTC_DRV_TWL92330
+	struct rtc_device	*rtc;
+	u8			rtc_control;
+	unsigned		uie:1;
+#endif
+	unsigned		vcore_hw_mode:1;
+	u8			mask1, mask2;
+	void			(*handlers[16])(struct menelaus_chip *);
+	void			(*mmc_callback)(void *data, u8 mask);
+	void			*mmc_callback_data;
+};
+
+static struct menelaus_chip *the_menelaus;
+
+static int menelaus_write_reg(int reg, u8 value)
+{
+	int val = i2c_smbus_write_byte_data(the_menelaus->client, reg, value);
+
+	if (val < 0) {
+		pr_err(DRIVER_NAME ": write error");
+		return val;
+	}
+
+	return 0;
+}
+
+static int menelaus_read_reg(int reg)
+{
+	int val = i2c_smbus_read_byte_data(the_menelaus->client, reg);
+
+	if (val < 0)
+		pr_err(DRIVER_NAME ": read error");
+
+	return val;
+}
+
+static int menelaus_enable_irq(int irq)
+{
+	if (irq > 7) {
+		irq -= 8;
+		the_menelaus->mask2 &= ~(1 << irq);
+		return menelaus_write_reg(MENELAUS_INT_MASK2,
+				the_menelaus->mask2);
+	} else {
+		the_menelaus->mask1 &= ~(1 << irq);
+		return menelaus_write_reg(MENELAUS_INT_MASK1,
+				the_menelaus->mask1);
+	}
+}
+
+static int menelaus_disable_irq(int irq)
+{
+	if (irq > 7) {
+		irq -= 8;
+		the_menelaus->mask2 |= (1 << irq);
+		return menelaus_write_reg(MENELAUS_INT_MASK2,
+				the_menelaus->mask2);
+	} else {
+		the_menelaus->mask1 |= (1 << irq);
+		return menelaus_write_reg(MENELAUS_INT_MASK1,
+				the_menelaus->mask1);
+	}
+}
+
+static int menelaus_ack_irq(int irq)
+{
+	if (irq > 7)
+		return menelaus_write_reg(MENELAUS_INT_ACK2, 1 << (irq - 8));
+	else
+		return menelaus_write_reg(MENELAUS_INT_ACK1, 1 << irq);
+}
+
+/* Adds a handler for an interrupt. Does not run in interrupt context */
+static int menelaus_add_irq_work(int irq,
+		void (*handler)(struct menelaus_chip *))
+{
+	int ret = 0;
+
+	mutex_lock(&the_menelaus->lock);
+	the_menelaus->handlers[irq] = handler;
+	ret = menelaus_enable_irq(irq);
+	mutex_unlock(&the_menelaus->lock);
+
+	return ret;
+}
+
+/* Removes handler for an interrupt */
+static int menelaus_remove_irq_work(int irq)
+{
+	int ret = 0;
+
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_disable_irq(irq);
+	the_menelaus->handlers[irq] = NULL;
+	mutex_unlock(&the_menelaus->lock);
+
+	return ret;
+}
+
+/*
+ * Gets scheduled when a card detect interrupt happens. Note that in some cases
+ * this line is wired to card cover switch rather than the card detect switch
+ * in each slot. In this case the cards are not seen by menelaus.
+ * FIXME: Add handling for D1 too
+ */
+static void menelaus_mmc_cd_work(struct menelaus_chip *menelaus_hw)
+{
+	int reg;
+	unsigned char card_mask = 0;
+
+	reg = menelaus_read_reg(MENELAUS_MCT_PIN_ST);
+	if (reg < 0)
+		return;
+
+	if (!(reg & 0x1))
+		card_mask |= (1 << 0);
+
+	if (!(reg & 0x2))
+		card_mask |= (1 << 1);
+
+	if (menelaus_hw->mmc_callback)
+		menelaus_hw->mmc_callback(menelaus_hw->mmc_callback_data,
+					  card_mask);
+}
+
+/*
+ * Toggles the MMC slots between open-drain and push-pull mode.
+ */
+int menelaus_set_mmc_opendrain(int slot, int enable)
+{
+	int ret, val;
+
+	if (slot != 1 && slot != 2)
+		return -EINVAL;
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_read_reg(MENELAUS_MCT_CTRL1);
+	if (ret < 0) {
+		mutex_unlock(&the_menelaus->lock);
+		return ret;
+	}
+	val = ret;
+	if (slot == 1) {
+		if (enable)
+			val |= 1 << 2;
+		else
+			val &= ~(1 << 2);
+	} else {
+		if (enable)
+			val |= 1 << 3;
+		else
+			val &= ~(1 << 3);
+	}
+	ret = menelaus_write_reg(MENELAUS_MCT_CTRL1, val);
+	mutex_unlock(&the_menelaus->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(menelaus_set_mmc_opendrain);
+
+int menelaus_set_slot_sel(int enable)
+{
+	int ret;
+
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_read_reg(MENELAUS_GPIO_CTRL);
+	if (ret < 0)
+		goto out;
+	ret |= 0x02;
+	if (enable)
+		ret |= 1 << 5;
+	else
+		ret &= ~(1 << 5);
+	ret = menelaus_write_reg(MENELAUS_GPIO_CTRL, ret);
+out:
+	mutex_unlock(&the_menelaus->lock);
+	return ret;
+}
+EXPORT_SYMBOL(menelaus_set_slot_sel);
+
+int menelaus_set_mmc_slot(int slot, int enable, int power, int cd_en)
+{
+	int ret, val;
+
+	if (slot != 1 && slot != 2)
+		return -EINVAL;
+	if (power >= 3)
+		return -EINVAL;
+
+	mutex_lock(&the_menelaus->lock);
+
+	ret = menelaus_read_reg(MENELAUS_MCT_CTRL2);
+	if (ret < 0)
+		goto out;
+	val = ret;
+	if (slot == 1) {
+		if (cd_en)
+			val |= (1 << 4) | (1 << 6);
+		else
+			val &= ~((1 << 4) | (1 << 6));
+	} else {
+		if (cd_en)
+			val |= (1 << 5) | (1 << 7);
+		else
+			val &= ~((1 << 5) | (1 << 7));
+	}
+	ret = menelaus_write_reg(MENELAUS_MCT_CTRL2, val);
+	if (ret < 0)
+		goto out;
+
+	ret = menelaus_read_reg(MENELAUS_MCT_CTRL3);
+	if (ret < 0)
+		goto out;
+	val = ret;
+	if (slot == 1) {
+		if (enable)
+			val |= 1 << 0;
+		else
+			val &= ~(1 << 0);
+	} else {
+		int b;
+
+		if (enable)
+			ret |= 1 << 1;
+		else
+			ret &= ~(1 << 1);
+		b = menelaus_read_reg(MENELAUS_MCT_CTRL2);
+		b &= ~0x03;
+		b |= power;
+		ret = menelaus_write_reg(MENELAUS_MCT_CTRL2, b);
+		if (ret < 0)
+			goto out;
+	}
+	/* Disable autonomous shutdown */
+	val &= ~(0x03 << 2);
+	ret = menelaus_write_reg(MENELAUS_MCT_CTRL3, val);
+out:
+	mutex_unlock(&the_menelaus->lock);
+	return ret;
+}
+EXPORT_SYMBOL(menelaus_set_mmc_slot);
+
+int menelaus_register_mmc_callback(void (*callback)(void *data, u8 card_mask),
+				   void *data)
+{
+	int ret = 0;
+
+	the_menelaus->mmc_callback_data = data;
+	the_menelaus->mmc_callback = callback;
+	ret = menelaus_add_irq_work(MENELAUS_MMC_S1CD_IRQ,
+				    menelaus_mmc_cd_work);
+	if (ret < 0)
+		return ret;
+	ret = menelaus_add_irq_work(MENELAUS_MMC_S2CD_IRQ,
+				    menelaus_mmc_cd_work);
+	if (ret < 0)
+		return ret;
+	ret = menelaus_add_irq_work(MENELAUS_MMC_S1D1_IRQ,
+				    menelaus_mmc_cd_work);
+	if (ret < 0)
+		return ret;
+	ret = menelaus_add_irq_work(MENELAUS_MMC_S2D1_IRQ,
+				    menelaus_mmc_cd_work);
+
+	return ret;
+}
+EXPORT_SYMBOL(menelaus_register_mmc_callback);
+
+void menelaus_unregister_mmc_callback(void)
+{
+	menelaus_remove_irq_work(MENELAUS_MMC_S1CD_IRQ);
+	menelaus_remove_irq_work(MENELAUS_MMC_S2CD_IRQ);
+	menelaus_remove_irq_work(MENELAUS_MMC_S1D1_IRQ);
+	menelaus_remove_irq_work(MENELAUS_MMC_S2D1_IRQ);
+
+	the_menelaus->mmc_callback = NULL;
+	the_menelaus->mmc_callback_data = 0;
+}
+EXPORT_SYMBOL(menelaus_unregister_mmc_callback);
+
+struct menelaus_vtg {
+	const char *name;
+	u8 vtg_reg;
+	u8 vtg_shift;
+	u8 vtg_bits;
+	u8 mode_reg;
+};
+
+struct menelaus_vtg_value {
+	u16 vtg;
+	u16 val;
+};
+
+static int menelaus_set_voltage(const struct menelaus_vtg *vtg, int mV,
+				int vtg_val, int mode)
+{
+	int val, ret;
+	struct i2c_client *c = the_menelaus->client;
+
+	mutex_lock(&the_menelaus->lock);
+	if (vtg == 0)
+		goto set_voltage;
+
+	ret = menelaus_read_reg(vtg->vtg_reg);
+	if (ret < 0)
+		goto out;
+	val = ret & ~(((1 << vtg->vtg_bits) - 1) << vtg->vtg_shift);
+	val |= vtg_val << vtg->vtg_shift;
+
+	dev_dbg(&c->dev, "Setting voltage '%s'"
+			 "to %d mV (reg 0x%02x, val 0x%02x)\n",
+			vtg->name, mV, vtg->vtg_reg, val);
+
+	ret = menelaus_write_reg(vtg->vtg_reg, val);
+	if (ret < 0)
+		goto out;
+set_voltage:
+	ret = menelaus_write_reg(vtg->mode_reg, mode);
+out:
+	mutex_unlock(&the_menelaus->lock);
+	if (ret == 0) {
+		/* Wait for voltage to stabilize */
+		msleep(1);
+	}
+	return ret;
+}
+
+static int menelaus_get_vtg_value(int vtg, const struct menelaus_vtg_value *tbl,
+				  int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++, tbl++)
+		if (tbl->vtg == vtg)
+			return tbl->val;
+	return -EINVAL;
+}
+
+/*
+ * Vcore can be programmed in two ways:
+ * SW-controlled: Required voltage is programmed into VCORE_CTRL1
+ * HW-controlled: Required range (roof-floor) is programmed into VCORE_CTRL3
+ * and VCORE_CTRL4
+ *
+ * Call correct 'set' function accordingly
+ */
+
+static const struct menelaus_vtg_value vcore_values[] = {
+	{ 1000, 0 },
+	{ 1025, 1 },
+	{ 1050, 2 },
+	{ 1075, 3 },
+	{ 1100, 4 },
+	{ 1125, 5 },
+	{ 1150, 6 },
+	{ 1175, 7 },
+	{ 1200, 8 },
+	{ 1225, 9 },
+	{ 1250, 10 },
+	{ 1275, 11 },
+	{ 1300, 12 },
+	{ 1325, 13 },
+	{ 1350, 14 },
+	{ 1375, 15 },
+	{ 1400, 16 },
+	{ 1425, 17 },
+	{ 1450, 18 },
+};
+
+int menelaus_set_vcore_sw(unsigned int mV)
+{
+	int val, ret;
+	struct i2c_client *c = the_menelaus->client;
+
+	val = menelaus_get_vtg_value(mV, vcore_values,
+				     ARRAY_SIZE(vcore_values));
+	if (val < 0)
+		return -EINVAL;
+
+	dev_dbg(&c->dev, "Setting VCORE to %d mV (val 0x%02x)\n", mV, val);
+
+	/* Set SW mode and the voltage in one go. */
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL1, val);
+	if (ret == 0)
+		the_menelaus->vcore_hw_mode = 0;
+	mutex_unlock(&the_menelaus->lock);
+	msleep(1);
+
+	return ret;
+}
+
+int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV)
+{
+	int fval, rval, val, ret;
+	struct i2c_client *c = the_menelaus->client;
+
+	rval = menelaus_get_vtg_value(roof_mV, vcore_values,
+				      ARRAY_SIZE(vcore_values));
+	if (rval < 0)
+		return -EINVAL;
+	fval = menelaus_get_vtg_value(floor_mV, vcore_values,
+				      ARRAY_SIZE(vcore_values));
+	if (fval < 0)
+		return -EINVAL;
+
+	dev_dbg(&c->dev, "Setting VCORE FLOOR to %d mV and ROOF to %d mV\n",
+	       floor_mV, roof_mV);
+
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL3, fval);
+	if (ret < 0)
+		goto out;
+	ret = menelaus_write_reg(MENELAUS_VCORE_CTRL4, rval);
+	if (ret < 0)
+		goto out;
+	if (!the_menelaus->vcore_hw_mode) {
+		val = menelaus_read_reg(MENELAUS_VCORE_CTRL1);
+		/* HW mode, turn OFF byte comparator */
+		val |= ((1 << 7) | (1 << 5));
+		ret = menelaus_write_reg(MENELAUS_VCORE_CTRL1, val);
+		the_menelaus->vcore_hw_mode = 1;
+	}
+	msleep(1);
+out:
+	mutex_unlock(&the_menelaus->lock);
+	return ret;
+}
+
+static const struct menelaus_vtg vmem_vtg = {
+	.name = "VMEM",
+	.vtg_reg = MENELAUS_LDO_CTRL1,
+	.vtg_shift = 0,
+	.vtg_bits = 2,
+	.mode_reg = MENELAUS_LDO_CTRL3,
+};
+
+static const struct menelaus_vtg_value vmem_values[] = {
+	{ 1500, 0 },
+	{ 1800, 1 },
+	{ 1900, 2 },
+	{ 2500, 3 },
+};
+
+int menelaus_set_vmem(unsigned int mV)
+{
+	int val;
+
+	if (mV == 0)
+		return menelaus_set_voltage(&vmem_vtg, 0, 0, 0);
+
+	val = menelaus_get_vtg_value(mV, vmem_values, ARRAY_SIZE(vmem_values));
+	if (val < 0)
+		return -EINVAL;
+	return menelaus_set_voltage(&vmem_vtg, mV, val, 0x02);
+}
+EXPORT_SYMBOL(menelaus_set_vmem);
+
+static const struct menelaus_vtg vio_vtg = {
+	.name = "VIO",
+	.vtg_reg = MENELAUS_LDO_CTRL1,
+	.vtg_shift = 2,
+	.vtg_bits = 2,
+	.mode_reg = MENELAUS_LDO_CTRL4,
+};
+
+static const struct menelaus_vtg_value vio_values[] = {
+	{ 1500, 0 },
+	{ 1800, 1 },
+	{ 2500, 2 },
+	{ 2800, 3 },
+};
+
+int menelaus_set_vio(unsigned int mV)
+{
+	int val;
+
+	if (mV == 0)
+		return menelaus_set_voltage(&vio_vtg, 0, 0, 0);
+
+	val = menelaus_get_vtg_value(mV, vio_values, ARRAY_SIZE(vio_values));
+	if (val < 0)
+		return -EINVAL;
+	return menelaus_set_voltage(&vio_vtg, mV, val, 0x02);
+}
+EXPORT_SYMBOL(menelaus_set_vio);
+
+static const struct menelaus_vtg_value vdcdc_values[] = {
+	{ 1500, 0 },
+	{ 1800, 1 },
+	{ 2000, 2 },
+	{ 2200, 3 },
+	{ 2400, 4 },
+	{ 2800, 5 },
+	{ 3000, 6 },
+	{ 3300, 7 },
+};
+
+static const struct menelaus_vtg vdcdc2_vtg = {
+	.name = "VDCDC2",
+	.vtg_reg = MENELAUS_DCDC_CTRL1,
+	.vtg_shift = 0,
+	.vtg_bits = 3,
+	.mode_reg = MENELAUS_DCDC_CTRL2,
+};
+
+static const struct menelaus_vtg vdcdc3_vtg = {
+	.name = "VDCDC3",
+	.vtg_reg = MENELAUS_DCDC_CTRL1,
+	.vtg_shift = 3,
+	.vtg_bits = 3,
+	.mode_reg = MENELAUS_DCDC_CTRL3,
+};
+
+int menelaus_set_vdcdc(int dcdc, unsigned int mV)
+{
+	const struct menelaus_vtg *vtg;
+	int val;
+
+	if (dcdc != 2 && dcdc != 3)
+		return -EINVAL;
+	if (dcdc == 2)
+		vtg = &vdcdc2_vtg;
+	else
+		vtg = &vdcdc3_vtg;
+
+	if (mV == 0)
+		return menelaus_set_voltage(vtg, 0, 0, 0);
+
+	val = menelaus_get_vtg_value(mV, vdcdc_values,
+				     ARRAY_SIZE(vdcdc_values));
+	if (val < 0)
+		return -EINVAL;
+	return menelaus_set_voltage(vtg, mV, val, 0x03);
+}
+
+static const struct menelaus_vtg_value vmmc_values[] = {
+	{ 1850, 0 },
+	{ 2800, 1 },
+	{ 3000, 2 },
+	{ 3100, 3 },
+};
+
+static const struct menelaus_vtg vmmc_vtg = {
+	.name = "VMMC",
+	.vtg_reg = MENELAUS_LDO_CTRL1,
+	.vtg_shift = 6,
+	.vtg_bits = 2,
+	.mode_reg = MENELAUS_LDO_CTRL7,
+};
+
+int menelaus_set_vmmc(unsigned int mV)
+{
+	int val;
+
+	if (mV == 0)
+		return menelaus_set_voltage(&vmmc_vtg, 0, 0, 0);
+
+	val = menelaus_get_vtg_value(mV, vmmc_values, ARRAY_SIZE(vmmc_values));
+	if (val < 0)
+		return -EINVAL;
+	return menelaus_set_voltage(&vmmc_vtg, mV, val, 0x02);
+}
+EXPORT_SYMBOL(menelaus_set_vmmc);
+
+
+static const struct menelaus_vtg_value vaux_values[] = {
+	{ 1500, 0 },
+	{ 1800, 1 },
+	{ 2500, 2 },
+	{ 2800, 3 },
+};
+
+static const struct menelaus_vtg vaux_vtg = {
+	.name = "VAUX",
+	.vtg_reg = MENELAUS_LDO_CTRL1,
+	.vtg_shift = 4,
+	.vtg_bits = 2,
+	.mode_reg = MENELAUS_LDO_CTRL6,
+};
+
+int menelaus_set_vaux(unsigned int mV)
+{
+	int val;
+
+	if (mV == 0)
+		return menelaus_set_voltage(&vaux_vtg, 0, 0, 0);
+
+	val = menelaus_get_vtg_value(mV, vaux_values, ARRAY_SIZE(vaux_values));
+	if (val < 0)
+		return -EINVAL;
+	return menelaus_set_voltage(&vaux_vtg, mV, val, 0x02);
+}
+EXPORT_SYMBOL(menelaus_set_vaux);
+
+int menelaus_get_slot_pin_states(void)
+{
+	return menelaus_read_reg(MENELAUS_MCT_PIN_ST);
+}
+EXPORT_SYMBOL(menelaus_get_slot_pin_states);
+
+int menelaus_set_regulator_sleep(int enable, u32 val)
+{
+	int t, ret;
+	struct i2c_client *c = the_menelaus->client;
+
+	mutex_lock(&the_menelaus->lock);
+	ret = menelaus_write_reg(MENELAUS_SLEEP_CTRL2, val);
+	if (ret < 0)
+		goto out;
+
+	dev_dbg(&c->dev, "regulator sleep configuration: %02x\n", val);
+
+	ret = menelaus_read_reg(MENELAUS_GPIO_CTRL);
+	if (ret < 0)
+		goto out;
+	t = ((1 << 6) | 0x04);
+	if (enable)
+		ret |= t;
+	else
+		ret &= ~t;
+	ret = menelaus_write_reg(MENELAUS_GPIO_CTRL, ret);
+out:
+	mutex_unlock(&the_menelaus->lock);
+	return ret;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* Handles Menelaus interrupts. Does not run in interrupt context */
+static void menelaus_work(struct work_struct *_menelaus)
+{
+	struct menelaus_chip *menelaus =
+			container_of(_menelaus, struct menelaus_chip, work);
+	void (*handler)(struct menelaus_chip *menelaus);
+
+	while (1) {
+		unsigned isr;
+
+		isr = (menelaus_read_reg(MENELAUS_INT_STATUS2)
+				& ~menelaus->mask2) << 8;
+		isr |= menelaus_read_reg(MENELAUS_INT_STATUS1)
+				& ~menelaus->mask1;
+		if (!isr)
+			break;
+
+		while (isr) {
+			int irq = fls(isr) - 1;
+			isr &= ~(1 << irq);
+
+			mutex_lock(&menelaus->lock);
+			menelaus_disable_irq(irq);
+			menelaus_ack_irq(irq);
+			handler = menelaus->handlers[irq];
+			if (handler)
+				handler(menelaus);
+			menelaus_enable_irq(irq);
+			mutex_unlock(&menelaus->lock);
+		}
+	}
+	enable_irq(menelaus->client->irq);
+}
+
+/*
+ * We cannot use I2C in interrupt context, so we just schedule work.
+ */
+static irqreturn_t menelaus_irq(int irq, void *_menelaus)
+{
+	struct menelaus_chip *menelaus = _menelaus;
+
+	disable_irq_nosync(irq);
+	(void)schedule_work(&menelaus->work);
+
+	return IRQ_HANDLED;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/*
+ * The RTC needs to be set once, then it runs on backup battery power.
+ * It supports alarms, including system wake alarms (from some modes);
+ * and 1/second IRQs if requested.
+ */
+#ifdef CONFIG_RTC_DRV_TWL92330
+
+#define RTC_CTRL_RTC_EN		(1 << 0)
+#define RTC_CTRL_AL_EN		(1 << 1)
+#define RTC_CTRL_MODE12		(1 << 2)
+#define RTC_CTRL_EVERY_MASK	(3 << 3)
+#define RTC_CTRL_EVERY_SEC	(0 << 3)
+#define RTC_CTRL_EVERY_MIN	(1 << 3)
+#define RTC_CTRL_EVERY_HR	(2 << 3)
+#define RTC_CTRL_EVERY_DAY	(3 << 3)
+
+#define RTC_UPDATE_EVERY	0x08
+
+#define RTC_HR_PM		(1 << 7)
+
+static void menelaus_to_time(char *regs, struct rtc_time *t)
+{
+	t->tm_sec = bcd2bin(regs[0]);
+	t->tm_min = bcd2bin(regs[1]);
+	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
+		t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
+		if (regs[2] & RTC_HR_PM)
+			t->tm_hour += 12;
+	} else
+		t->tm_hour = bcd2bin(regs[2] & 0x3f);
+	t->tm_mday = bcd2bin(regs[3]);
+	t->tm_mon = bcd2bin(regs[4]) - 1;
+	t->tm_year = bcd2bin(regs[5]) + 100;
+}
+
+static int time_to_menelaus(struct rtc_time *t, int regnum)
+{
+	int	hour, status;
+
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
+	if (status < 0)
+		goto fail;
+
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
+	if (status < 0)
+		goto fail;
+
+	if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
+		hour = t->tm_hour + 1;
+		if (hour > 12)
+			hour = RTC_HR_PM | bin2bcd(hour - 12);
+		else
+			hour = bin2bcd(hour);
+	} else
+		hour = bin2bcd(t->tm_hour);
+	status = menelaus_write_reg(regnum++, hour);
+	if (status < 0)
+		goto fail;
+
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
+	if (status < 0)
+		goto fail;
+
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
+	if (status < 0)
+		goto fail;
+
+	status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
+	if (status < 0)
+		goto fail;
+
+	return 0;
+fail:
+	dev_err(&the_menelaus->client->dev, "rtc write reg %02x, err %d\n",
+			--regnum, status);
+	return status;
+}
+
+static int menelaus_read_time(struct device *dev, struct rtc_time *t)
+{
+	struct i2c_msg	msg[2];
+	char		regs[7];
+	int		status;
+
+	/* block read date and time registers */
+	regs[0] = MENELAUS_RTC_SEC;
+
+	msg[0].addr = MENELAUS_I2C_ADDRESS;
+	msg[0].flags = 0;
+	msg[0].len = 1;
+	msg[0].buf = regs;
+
+	msg[1].addr = MENELAUS_I2C_ADDRESS;
+	msg[1].flags = I2C_M_RD;
+	msg[1].len = sizeof(regs);
+	msg[1].buf = regs;
+
+	status = i2c_transfer(the_menelaus->client->adapter, msg, 2);
+	if (status != 2) {
+		dev_err(dev, "%s error %d\n", "read", status);
+		return -EIO;
+	}
+
+	menelaus_to_time(regs, t);
+	t->tm_wday = bcd2bin(regs[6]);
+
+	return 0;
+}
+
+static int menelaus_set_time(struct device *dev, struct rtc_time *t)
+{
+	int		status;
+
+	/* write date and time registers */
+	status = time_to_menelaus(t, MENELAUS_RTC_SEC);
+	if (status < 0)
+		return status;
+	status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
+	if (status < 0) {
+		dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
+				"err %d\n", MENELAUS_RTC_WKDAY, status);
+		return status;
+	}
+
+	/* now commit the write */
+	status = menelaus_write_reg(MENELAUS_RTC_UPDATE, RTC_UPDATE_EVERY);
+	if (status < 0)
+		dev_err(&the_menelaus->client->dev, "rtc commit time, err %d\n",
+				status);
+
+	return 0;
+}
+
+static int menelaus_read_alarm(struct device *dev, struct rtc_wkalrm *w)
+{
+	struct i2c_msg	msg[2];
+	char		regs[6];
+	int		status;
+
+	/* block read alarm registers */
+	regs[0] = MENELAUS_RTC_AL_SEC;
+
+	msg[0].addr = MENELAUS_I2C_ADDRESS;
+	msg[0].flags = 0;
+	msg[0].len = 1;
+	msg[0].buf = regs;
+
+	msg[1].addr = MENELAUS_I2C_ADDRESS;
+	msg[1].flags = I2C_M_RD;
+	msg[1].len = sizeof(regs);
+	msg[1].buf = regs;
+
+	status = i2c_transfer(the_menelaus->client->adapter, msg, 2);
+	if (status != 2) {
+		dev_err(dev, "%s error %d\n", "alarm read", status);
+		return -EIO;
+	}
+
+	menelaus_to_time(regs, &w->time);
+
+	w->enabled = !!(the_menelaus->rtc_control & RTC_CTRL_AL_EN);
+
+	/* NOTE we *could* check if actually pending... */
+	w->pending = 0;
+
+	return 0;
+}
+
+static int menelaus_set_alarm(struct device *dev, struct rtc_wkalrm *w)
+{
+	int		status;
+
+	if (the_menelaus->client->irq <= 0 && w->enabled)
+		return -ENODEV;
+
+	/* clear previous alarm enable */
+	if (the_menelaus->rtc_control & RTC_CTRL_AL_EN) {
+		the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
+		status = menelaus_write_reg(MENELAUS_RTC_CTRL,
+				the_menelaus->rtc_control);
+		if (status < 0)
+			return status;
+	}
+
+	/* write alarm registers */
+	status = time_to_menelaus(&w->time, MENELAUS_RTC_AL_SEC);
+	if (status < 0)
+		return status;
+
+	/* enable alarm if requested */
+	if (w->enabled) {
+		the_menelaus->rtc_control |= RTC_CTRL_AL_EN;
+		status = menelaus_write_reg(MENELAUS_RTC_CTRL,
+				the_menelaus->rtc_control);
+	}
+
+	return status;
+}
+
+#ifdef CONFIG_RTC_INTF_DEV
+
+static void menelaus_rtc_update_work(struct menelaus_chip *m)
+{
+	/* report 1/sec update */
+	local_irq_disable();
+	rtc_update_irq(m->rtc, 1, RTC_IRQF | RTC_UF);
+	local_irq_enable();
+}
+
+static int menelaus_ioctl(struct device *dev, unsigned cmd, unsigned long arg)
+{
+	int	status;
+
+	if (the_menelaus->client->irq <= 0)
+		return -ENOIOCTLCMD;
+
+	switch (cmd) {
+	/* alarm IRQ */
+	case RTC_AIE_ON:
+		if (the_menelaus->rtc_control & RTC_CTRL_AL_EN)
+			return 0;
+		the_menelaus->rtc_control |= RTC_CTRL_AL_EN;
+		break;
+	case RTC_AIE_OFF:
+		if (!(the_menelaus->rtc_control & RTC_CTRL_AL_EN))
+			return 0;
+		the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
+		break;
+	/* 1/second "update" IRQ */
+	case RTC_UIE_ON:
+		if (the_menelaus->uie)
+			return 0;
+		status = menelaus_remove_irq_work(MENELAUS_RTCTMR_IRQ);
+		status = menelaus_add_irq_work(MENELAUS_RTCTMR_IRQ,
+				menelaus_rtc_update_work);
+		if (status == 0)
+			the_menelaus->uie = 1;
+		return status;
+	case RTC_UIE_OFF:
+		if (!the_menelaus->uie)
+			return 0;
+		status = menelaus_remove_irq_work(MENELAUS_RTCTMR_IRQ);
+		if (status == 0)
+			the_menelaus->uie = 0;
+		return status;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	return menelaus_write_reg(MENELAUS_RTC_CTRL, the_menelaus->rtc_control);
+}
+
+#else
+#define menelaus_ioctl	NULL
+#endif
+
+/* REVISIT no compensation register support ... */
+
+static const struct rtc_class_ops menelaus_rtc_ops = {
+	.ioctl			= menelaus_ioctl,
+	.read_time		= menelaus_read_time,
+	.set_time		= menelaus_set_time,
+	.read_alarm		= menelaus_read_alarm,
+	.set_alarm		= menelaus_set_alarm,
+};
+
+static void menelaus_rtc_alarm_work(struct menelaus_chip *m)
+{
+	/* report alarm */
+	local_irq_disable();
+	rtc_update_irq(m->rtc, 1, RTC_IRQF | RTC_AF);
+	local_irq_enable();
+
+	/* then disable it; alarms are oneshot */
+	the_menelaus->rtc_control &= ~RTC_CTRL_AL_EN;
+	menelaus_write_reg(MENELAUS_RTC_CTRL, the_menelaus->rtc_control);
+}
+
+static inline void menelaus_rtc_init(struct menelaus_chip *m)
+{
+	int	alarm = (m->client->irq > 0);
+
+	/* assume 32KDETEN pin is pulled high */
+	if (!(menelaus_read_reg(MENELAUS_OSC_CTRL) & 0x80)) {
+		dev_dbg(&m->client->dev, "no 32k oscillator\n");
+		return;
+	}
+
+	/* support RTC alarm; it can issue wakeups */
+	if (alarm) {
+		if (menelaus_add_irq_work(MENELAUS_RTCALM_IRQ,
+				menelaus_rtc_alarm_work) < 0) {
+			dev_err(&m->client->dev, "can't handle RTC alarm\n");
+			return;
+		}
+		device_init_wakeup(&m->client->dev, 1);
+	}
+
+	/* be sure RTC is enabled; allow 1/sec irqs; leave 12hr mode alone */
+	m->rtc_control = menelaus_read_reg(MENELAUS_RTC_CTRL);
+	if (!(m->rtc_control & RTC_CTRL_RTC_EN)
+			|| (m->rtc_control & RTC_CTRL_AL_EN)
+			|| (m->rtc_control & RTC_CTRL_EVERY_MASK)) {
+		if (!(m->rtc_control & RTC_CTRL_RTC_EN)) {
+			dev_warn(&m->client->dev, "rtc clock needs setting\n");
+			m->rtc_control |= RTC_CTRL_RTC_EN;
+		}
+		m->rtc_control &= ~RTC_CTRL_EVERY_MASK;
+		m->rtc_control &= ~RTC_CTRL_AL_EN;
+		menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control);
+	}
+
+	m->rtc = rtc_device_register(DRIVER_NAME,
+			&m->client->dev,
+			&menelaus_rtc_ops, THIS_MODULE);
+	if (IS_ERR(m->rtc)) {
+		if (alarm) {
+			menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ);
+			device_init_wakeup(&m->client->dev, 0);
+		}
+		dev_err(&m->client->dev, "can't register RTC: %d\n",
+				(int) PTR_ERR(m->rtc));
+		the_menelaus->rtc = NULL;
+	}
+}
+
+#else
+
+static inline void menelaus_rtc_init(struct menelaus_chip *m)
+{
+	/* nothing */
+}
+
+#endif
+
+/*-----------------------------------------------------------------------*/
+
+static struct i2c_driver menelaus_i2c_driver;
+
+static int menelaus_probe(struct i2c_client *client,
+			  const struct i2c_device_id *id)
+{
+	struct menelaus_chip	*menelaus;
+	int			rev = 0, val;
+	int			err = 0;
+	struct menelaus_platform_data *menelaus_pdata =
+					client->dev.platform_data;
+
+	if (the_menelaus) {
+		dev_dbg(&client->dev, "only one %s for now\n",
+				DRIVER_NAME);
+		return -ENODEV;
+	}
+
+	menelaus = kzalloc(sizeof *menelaus, GFP_KERNEL);
+	if (!menelaus)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, menelaus);
+
+	the_menelaus = menelaus;
+	menelaus->client = client;
+
+	/* If a true probe check the device */
+	rev = menelaus_read_reg(MENELAUS_REV);
+	if (rev < 0) {
+		pr_err(DRIVER_NAME ": device not found");
+		err = -ENODEV;
+		goto fail1;
+	}
+
+	/* Ack and disable all Menelaus interrupts */
+	menelaus_write_reg(MENELAUS_INT_ACK1, 0xff);
+	menelaus_write_reg(MENELAUS_INT_ACK2, 0xff);
+	menelaus_write_reg(MENELAUS_INT_MASK1, 0xff);
+	menelaus_write_reg(MENELAUS_INT_MASK2, 0xff);
+	menelaus->mask1 = 0xff;
+	menelaus->mask2 = 0xff;
+
+	/* Set output buffer strengths */
+	menelaus_write_reg(MENELAUS_MCT_CTRL1, 0x73);
+
+	if (client->irq > 0) {
+		err = request_irq(client->irq, menelaus_irq, IRQF_DISABLED,
+				  DRIVER_NAME, menelaus);
+		if (err) {
+			dev_dbg(&client->dev,  "can't get IRQ %d, err %d\n",
+					client->irq, err);
+			goto fail1;
+		}
+	}
+
+	mutex_init(&menelaus->lock);
+	INIT_WORK(&menelaus->work, menelaus_work);
+
+	pr_info("Menelaus rev %d.%d\n", rev >> 4, rev & 0x0f);
+
+	val = menelaus_read_reg(MENELAUS_VCORE_CTRL1);
+	if (val < 0)
+		goto fail2;
+	if (val & (1 << 7))
+		menelaus->vcore_hw_mode = 1;
+	else
+		menelaus->vcore_hw_mode = 0;
+
+	if (menelaus_pdata != NULL && menelaus_pdata->late_init != NULL) {
+		err = menelaus_pdata->late_init(&client->dev);
+		if (err < 0)
+			goto fail2;
+	}
+
+	menelaus_rtc_init(menelaus);
+
+	return 0;
+fail2:
+	free_irq(client->irq, menelaus);
+	flush_scheduled_work();
+fail1:
+	kfree(menelaus);
+	return err;
+}
+
+static int __exit menelaus_remove(struct i2c_client *client)
+{
+	struct menelaus_chip	*menelaus = i2c_get_clientdata(client);
+
+	free_irq(client->irq, menelaus);
+	kfree(menelaus);
+	i2c_set_clientdata(client, NULL);
+	the_menelaus = NULL;
+	return 0;
+}
+
+static const struct i2c_device_id menelaus_id[] = {
+	{ "menelaus", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, menelaus_id);
+
+static struct i2c_driver menelaus_i2c_driver = {
+	.driver = {
+		.name		= DRIVER_NAME,
+	},
+	.probe		= menelaus_probe,
+	.remove		= __exit_p(menelaus_remove),
+	.id_table	= menelaus_id,
+};
+
+static int __init menelaus_init(void)
+{
+	int res;
+
+	res = i2c_add_driver(&menelaus_i2c_driver);
+	if (res < 0) {
+		pr_err(DRIVER_NAME ": driver registration failed\n");
+		return res;
+	}
+
+	return 0;
+}
+
+static void __exit menelaus_exit(void)
+{
+	i2c_del_driver(&menelaus_i2c_driver);
+
+	/* FIXME: Shutdown menelaus parts that can be shut down */
+}
+
+MODULE_AUTHOR("Texas Instruments, Inc. (and others)");
+MODULE_DESCRIPTION("I2C interface for Menelaus.");
+MODULE_LICENSE("GPL");
+
+module_init(menelaus_init);
+module_exit(menelaus_exit);
-- 
cgit v0.10.2


From 52942b6b16c6ebb25f4dd4df0208d840ba0cbc5c Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sat, 3 Jan 2009 00:16:03 +0100
Subject: swiotlb: Don't include linux/swiotlb.h twice in lib/swiotlb.c

There's no point in including the linux/swiotlb.h header twice in
lib/swiotlb.c - this patch gets rid of the unneeded include.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b6d0aae..7f5e21b 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -23,7 +23,6 @@
 #include <linux/spinlock.h>
 #include <linux/swiotlb.h>
 #include <linux/string.h>
-#include <linux/swiotlb.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
-- 
cgit v0.10.2


From 3f874b6643e189d3d07618928ceed0013d71593e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@sirena.org.uk>
Date: Sun, 4 Jan 2009 15:31:49 +0100
Subject: mfd: Fix section mismatch in da903x

The subdevice removal functions are marked __devexit but are referenced
from the error handling path when probing so are needed even when
__devexit functions are removed.

Signed-off-by: Mark Brown <broonie@sirena.org.uk>
Acked-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>

diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index fcaf1f6..99f8dcf 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -447,13 +447,13 @@ static const struct i2c_device_id da903x_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, da903x_id_table);
 
-static int __devexit __remove_subdev(struct device *dev, void *unused)
+static int __remove_subdev(struct device *dev, void *unused)
 {
 	platform_device_unregister(to_platform_device(dev));
 	return 0;
 }
 
-static int __devexit da903x_remove_subdevs(struct da903x_chip *chip)
+static int da903x_remove_subdevs(struct da903x_chip *chip)
 {
 	return device_for_each_child(chip->dev, NULL, __remove_subdev);
 }
-- 
cgit v0.10.2


From 4f6b434fee2402b3decdeae9d16eb648725ae426 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 9 Dec 2008 19:50:34 -0500
Subject: don't reallocate buffer in every audit_sockaddr()

No need to do that more than once per process lifetime; allocating/freeing
on each sendto/accept/etc. is bloody pointless.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4819f37..c2e43eb 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -174,12 +174,6 @@ struct audit_aux_data_socketcall {
 	unsigned long		args[0];
 };
 
-struct audit_aux_data_sockaddr {
-	struct audit_aux_data	d;
-	int			len;
-	char			a[0];
-};
-
 struct audit_aux_data_fd_pair {
 	struct	audit_aux_data d;
 	int	fd[2];
@@ -234,7 +228,8 @@ struct audit_context {
 	struct audit_context *previous; /* For nested syscalls */
 	struct audit_aux_data *aux;
 	struct audit_aux_data *aux_pids;
-
+	struct sockaddr_storage *sockaddr;
+	size_t sockaddr_len;
 				/* Save things to print about task_struct */
 	pid_t		    pid, ppid;
 	uid_t		    uid, euid, suid, fsuid;
@@ -921,6 +916,7 @@ static inline void audit_free_context(struct audit_context *context)
 		free_tree_refs(context);
 		audit_free_aux(context);
 		kfree(context->filterkey);
+		kfree(context->sockaddr);
 		kfree(context);
 		context  = previous;
 	} while (context);
@@ -1383,13 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				audit_log_format(ab, " a%d=%lx", i, axs->args[i]);
 			break; }
 
-		case AUDIT_SOCKADDR: {
-			struct audit_aux_data_sockaddr *axs = (void *)aux;
-
-			audit_log_format(ab, "saddr=");
-			audit_log_n_hex(ab, axs->a, axs->len);
-			break; }
-
 		case AUDIT_FD_PAIR: {
 			struct audit_aux_data_fd_pair *axs = (void *)aux;
 			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
@@ -1421,6 +1410,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		audit_log_end(ab);
 	}
 
+	if (context->sockaddr_len) {
+		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
+		if (ab) {
+			audit_log_format(ab, "saddr=");
+			audit_log_n_hex(ab, (void *)context->sockaddr,
+					context->sockaddr_len);
+			audit_log_end(ab);
+		}
+	}
+
 	for (aux = context->aux_pids; aux; aux = aux->next) {
 		struct audit_aux_data_pids *axs = (void *)aux;
 
@@ -1689,6 +1688,7 @@ void audit_syscall_exit(int valid, long return_code)
 		context->aux_pids = NULL;
 		context->target_pid = 0;
 		context->target_sid = 0;
+		context->sockaddr_len = 0;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -2468,22 +2468,20 @@ int __audit_fd_pair(int fd1, int fd2)
  */
 int audit_sockaddr(int len, void *a)
 {
-	struct audit_aux_data_sockaddr *ax;
 	struct audit_context *context = current->audit_context;
 
 	if (likely(!context || context->dummy))
 		return 0;
 
-	ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->len = len;
-	memcpy(ax->a, a, len);
+	if (!context->sockaddr) {
+		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		context->sockaddr = p;
+	}
 
-	ax->d.type = AUDIT_SOCKADDR;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
+	context->sockaddr_len = len;
+	memcpy(context->sockaddr, a, len);
 	return 0;
 }
 
-- 
cgit v0.10.2


From f3298dc4f2277874d40cb4fc3a6e277317d6603b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:16:51 -0500
Subject: sanitize audit_socketcall

* don't bother with allocations
* now that it can't fail, make it return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 26c4f6f..466a953 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -446,7 +446,7 @@ extern void audit_log_task_context(struct audit_buffer *ab);
 extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
-extern int audit_socketcall(int nargs, unsigned long *args);
+extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
@@ -549,7 +549,7 @@ extern int audit_signals;
 #define audit_ipc_obj(i) ({ 0; })
 #define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
-#define audit_socketcall(n,a) ({ 0; })
+#define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c2e43eb..5cda664 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,12 +168,6 @@ struct audit_aux_data_execve {
 	struct mm_struct *mm;
 };
 
-struct audit_aux_data_socketcall {
-	struct audit_aux_data	d;
-	int			nargs;
-	unsigned long		args[0];
-};
-
 struct audit_aux_data_fd_pair {
 	struct	audit_aux_data d;
 	int	fd[2];
@@ -247,6 +241,14 @@ struct audit_context {
 	struct audit_tree_refs *trees, *first_trees;
 	int tree_count;
 
+	int type;
+	union {
+		struct {
+			int nargs;
+			long args[6];
+		} socketcall;
+	};
+
 #if AUDIT_DEBUG
 	int		    put_count;
 	int		    ino_count;
@@ -1226,6 +1228,27 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
 }
 
+static void show_special(struct audit_context *context)
+{
+	struct audit_buffer *ab;
+	int i;
+
+	ab = audit_log_start(context, GFP_KERNEL, context->type);
+	if (!ab)
+		return;
+
+	switch (context->type) {
+	case AUDIT_SOCKETCALL: {
+		int nargs = context->socketcall.nargs;
+		audit_log_format(ab, "nargs=%d", nargs);
+		for (i = 0; i < nargs; i++)
+			audit_log_format(ab, " a%d=%lx", i,
+				context->socketcall.args[i]);
+		break; }
+	}
+	audit_log_end(ab);
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	const struct cred *cred;
@@ -1372,13 +1395,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_execve_info(context, &ab, axi);
 			break; }
 
-		case AUDIT_SOCKETCALL: {
-			struct audit_aux_data_socketcall *axs = (void *)aux;
-			audit_log_format(ab, "nargs=%d", axs->nargs);
-			for (i=0; i<axs->nargs; i++)
-				audit_log_format(ab, " a%d=%lx", i, axs->args[i]);
-			break; }
-
 		case AUDIT_FD_PAIR: {
 			struct audit_aux_data_fd_pair *axs = (void *)aux;
 			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
@@ -1410,6 +1426,9 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		audit_log_end(ab);
 	}
 
+	if (context->type)
+		show_special(context);
+
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
 		if (ab) {
@@ -1689,6 +1708,7 @@ void audit_syscall_exit(int valid, long return_code)
 		context->target_pid = 0;
 		context->target_sid = 0;
 		context->sockaddr_len = 0;
+		context->type = 0;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -2406,27 +2426,17 @@ int audit_bprm(struct linux_binprm *bprm)
  * @nargs: number of args
  * @args: args array
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_socketcall(int nargs, unsigned long *args)
+void audit_socketcall(int nargs, unsigned long *args)
 {
-	struct audit_aux_data_socketcall *ax;
 	struct audit_context *context = current->audit_context;
 
 	if (likely(!context || context->dummy))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->nargs = nargs;
-	memcpy(ax->args, args, nargs * sizeof(unsigned long));
+		return;
 
-	ax->d.type = AUDIT_SOCKETCALL;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_SOCKETCALL;
+	context->socketcall.nargs = nargs;
+	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
 }
 
 /**
diff --git a/net/socket.c b/net/socket.c
index 2c730fc..b41a920 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2065,9 +2065,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	if (copy_from_user(a, args, nargs[call]))
 		return -EFAULT;
 
-	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
-	if (err)
-		return err;
+	audit_socketcall(nargs[call] / sizeof(unsigned long), a);
 
 	a0 = a[0];
 	a1 = a[1];
-- 
cgit v0.10.2


From a33e6751003c5ade603737d828b1519d980ce392 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:40:06 -0500
Subject: sanitize audit_ipc_obj()

* get rid of allocations
* make it return void
* simplify callers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 466a953..f8578b9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -443,7 +443,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
-extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
+extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
@@ -460,11 +460,10 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *old);
 extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
 
-static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_ipc_obj(ipcp);
-	return 0;
+		__audit_ipc_obj(ipcp);
 }
 static inline int audit_fd_pair(int fd1, int fd2)
 {
@@ -546,7 +545,7 @@ extern int audit_signals;
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
 #define audit_log_task_context(b) do { ; } while (0)
-#define audit_ipc_obj(i) ({ 0; })
+#define audit_ipc_obj(i) ((void)0)
 #define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
diff --git a/ipc/shm.c b/ipc/shm.c
index 38a0557..57dd500 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -747,9 +747,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 			goto out;
 		}
 
-		err = audit_ipc_obj(&(shp->shm_perm));
-		if (err)
-			goto out_unlock;
+		audit_ipc_obj(&(shp->shm_perm));
 
 		if (!capable(CAP_IPC_LOCK)) {
 			uid_t euid = current_euid();
diff --git a/ipc/util.c b/ipc/util.c
index 5a1808c..579552a 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -624,10 +624,9 @@ void ipc_rcu_putref(void *ptr)
 int ipcperms (struct kern_ipc_perm *ipcp, short flag)
 {	/* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
 	uid_t euid = current_euid();
-	int requested_mode, granted_mode, err;
+	int requested_mode, granted_mode;
 
-	if (unlikely((err = audit_ipc_obj(ipcp))))
-		return err;
+	audit_ipc_obj(ipcp);
 	requested_mode = (flag >> 6) | (flag >> 3) | flag;
 	granted_mode = ipcp->mode;
 	if (euid == ipcp->cuid ||
@@ -803,9 +802,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 		goto out_up;
 	}
 
-	err = audit_ipc_obj(ipcp);
-	if (err)
-		goto out_unlock;
+	audit_ipc_obj(ipcp);
 
 	if (cmd == IPC_SET) {
 		err = audit_ipc_set_perm(extra_perm, perm->uid,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5cda664..7350431 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -247,6 +247,12 @@ struct audit_context {
 			int nargs;
 			long args[6];
 		} socketcall;
+		struct {
+			uid_t			uid;
+			gid_t			gid;
+			mode_t			mode;
+			u32			osid;
+		} ipc;
 	};
 
 #if AUDIT_DEBUG
@@ -605,19 +611,12 @@ static int audit_filter_rules(struct task_struct *tsk,
 					}
 				}
 				/* Find ipc objects that match */
-				if (ctx) {
-					struct audit_aux_data *aux;
-					for (aux = ctx->aux; aux;
-					     aux = aux->next) {
-						if (aux->type == AUDIT_IPC) {
-							struct audit_aux_data_ipcctl *axi = (void *)aux;
-							if (security_audit_rule_match(axi->osid, f->type, f->op, f->lsm_rule, ctx)) {
-								++result;
-								break;
-							}
-						}
-					}
-				}
+				if (!ctx || ctx->type != AUDIT_IPC)
+					break;
+				if (security_audit_rule_match(ctx->ipc.osid,
+							      f->type, f->op,
+							      f->lsm_rule, ctx))
+					++result;
 			}
 			break;
 		case AUDIT_ARG0:
@@ -1228,7 +1227,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
 }
 
-static void show_special(struct audit_context *context)
+static void show_special(struct audit_context *context, int *call_panic)
 {
 	struct audit_buffer *ab;
 	int i;
@@ -1245,6 +1244,23 @@ static void show_special(struct audit_context *context)
 			audit_log_format(ab, " a%d=%lx", i,
 				context->socketcall.args[i]);
 		break; }
+	case AUDIT_IPC: {
+		u32 osid = context->ipc.osid;
+
+		audit_log_format(ab, "ouid=%u ogid=%u mode=%#o",
+			 context->ipc.uid, context->ipc.gid, context->ipc.mode);
+		if (osid) {
+			char *ctx = NULL;
+			u32 len;
+			if (security_secid_to_secctx(osid, &ctx, &len)) {
+				audit_log_format(ab, " osid=%u", osid);
+				*call_panic = 1;
+			} else {
+				audit_log_format(ab, " obj=%s", ctx);
+				security_release_secctx(ctx, len);
+			}
+		}
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1363,26 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
 			break; }
 
-		case AUDIT_IPC: {
-			struct audit_aux_data_ipcctl *axi = (void *)aux;
-			audit_log_format(ab, 
-				 "ouid=%u ogid=%u mode=%#o",
-				 axi->uid, axi->gid, axi->mode);
-			if (axi->osid != 0) {
-				char *ctx = NULL;
-				u32 len;
-				if (security_secid_to_secctx(
-						axi->osid, &ctx, &len)) {
-					audit_log_format(ab, " osid=%u",
-							axi->osid);
-					call_panic = 1;
-				} else {
-					audit_log_format(ab, " obj=%s", ctx);
-					security_release_secctx(ctx, len);
-				}
-			}
-			break; }
-
 		case AUDIT_IPC_SET_PERM: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab,
@@ -1427,7 +1423,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	}
 
 	if (context->type)
-		show_special(context);
+		show_special(context, &call_panic);
 
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
@@ -2349,25 +2345,15 @@ int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
  * audit_ipc_obj - record audit data for ipc object
  * @ipcp: ipc permissions
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
+void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
-	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->uid = ipcp->uid;
-	ax->gid = ipcp->gid;
-	ax->mode = ipcp->mode;
-	security_ipc_getsecid(ipcp, &ax->osid);
-	ax->d.type = AUDIT_IPC;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->ipc.uid = ipcp->uid;
+	context->ipc.gid = ipcp->gid;
+	context->ipc.mode = ipcp->mode;
+	security_ipc_getsecid(ipcp, &context->ipc.osid);
+	context->type = AUDIT_IPC;
 }
 
 /**
-- 
cgit v0.10.2


From e816f370cbadd2afea9f1a42f232d0636137d563 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:47:15 -0500
Subject: sanitize audit_ipc_set_perm()

* get rid of allocations
* make it return void
* simplify callers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f8578b9..b7abfe0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -444,7 +444,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
-extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
@@ -471,11 +471,10 @@ static inline int audit_fd_pair(int fd1, int fd2)
 		return __audit_fd_pair(fd1, fd2);
 	return 0;
 }
-static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_ipc_set_perm(qbytes, uid, gid, mode);
-	return 0;
+		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
 static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
@@ -546,7 +545,7 @@ extern int audit_signals;
 #define audit_get_sessionid(t) (-1)
 #define audit_log_task_context(b) do { ; } while (0)
 #define audit_ipc_obj(i) ((void)0)
-#define audit_ipc_set_perm(q,u,g,m) ({ 0; })
+#define audit_ipc_set_perm(q,u,g,m) ((void)0)
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ({ 0; })
diff --git a/ipc/util.c b/ipc/util.c
index 579552a..7585a72 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -803,13 +803,9 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 	}
 
 	audit_ipc_obj(ipcp);
-
-	if (cmd == IPC_SET) {
-		err = audit_ipc_set_perm(extra_perm, perm->uid,
+	if (cmd == IPC_SET)
+		audit_ipc_set_perm(extra_perm, perm->uid,
 					 perm->gid, perm->mode);
-		if (err)
-			goto out_unlock;
-	}
 
 	euid = current_euid();
 	if (euid == ipcp->cuid ||
@@ -817,7 +813,6 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 		return ipcp;
 
 	err = -EPERM;
-out_unlock:
 	ipc_unlock(ipcp);
 out_up:
 	up_write(&ids->rw_mutex);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7350431..fbed62e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -151,16 +151,6 @@ struct audit_aux_data_mq_getsetattr {
 	struct mq_attr 		mqstat;
 };
 
-struct audit_aux_data_ipcctl {
-	struct audit_aux_data	d;
-	struct ipc_perm		p;
-	unsigned long		qbytes;
-	uid_t			uid;
-	gid_t			gid;
-	mode_t			mode;
-	u32			osid;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -252,6 +242,11 @@ struct audit_context {
 			gid_t			gid;
 			mode_t			mode;
 			u32			osid;
+			int			has_perm;
+			uid_t			perm_uid;
+			gid_t			perm_gid;
+			mode_t			perm_mode;
+			unsigned long		qbytes;
 		} ipc;
 	};
 
@@ -1260,6 +1255,19 @@ static void show_special(struct audit_context *context, int *call_panic)
 				security_release_secctx(ctx, len);
 			}
 		}
+		if (context->ipc.has_perm) {
+			audit_log_end(ab);
+			ab = audit_log_start(context, GFP_KERNEL,
+					     AUDIT_IPC_SET_PERM);
+			audit_log_format(ab,
+				"qbytes=%lx ouid=%u ogid=%u mode=%#o",
+				context->ipc.qbytes,
+				context->ipc.perm_uid,
+				context->ipc.perm_gid,
+				context->ipc.perm_mode);
+			if (!ab)
+				return;
+		}
 		break; }
 	}
 	audit_log_end(ab);
@@ -1379,13 +1387,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
 			break; }
 
-		case AUDIT_IPC_SET_PERM: {
-			struct audit_aux_data_ipcctl *axi = (void *)aux;
-			audit_log_format(ab,
-				"qbytes=%lx ouid=%u ogid=%u mode=%#o",
-				axi->qbytes, axi->uid, axi->gid, axi->mode);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2352,6 +2353,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 	context->ipc.uid = ipcp->uid;
 	context->ipc.gid = ipcp->gid;
 	context->ipc.mode = ipcp->mode;
+	context->ipc.has_perm = 0;
 	security_ipc_getsecid(ipcp, &context->ipc.osid);
 	context->type = AUDIT_IPC;
 }
@@ -2363,26 +2365,17 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
  * @gid: msgq group id
  * @mode: msgq mode (permissions)
  *
- * Returns 0 for success or NULL context or < 0 on error.
+ * Called only after audit_ipc_obj().
  */
-int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
-	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
 
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->qbytes = qbytes;
-	ax->uid = uid;
-	ax->gid = gid;
-	ax->mode = mode;
-
-	ax->d.type = AUDIT_IPC_SET_PERM;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->ipc.qbytes = qbytes;
+	context->ipc.perm_uid = uid;
+	context->ipc.perm_gid = gid;
+	context->ipc.perm_mode = mode;
+	context->ipc.has_perm = 1;
 }
 
 int audit_bprm(struct linux_binprm *bprm)
-- 
cgit v0.10.2


From 7392906ea915b9a2c14dea32b3604b4e178f82f7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 06:58:59 -0500
Subject: sanitize audit_mq_getsetattr()

* get rid of allocations
* make it return void
* don't duplicate parts of audit_dummy_context()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b7abfe0..b7707e5 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -454,7 +454,7 @@ extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr
 extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
-extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
+extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
 				  const struct cred *old);
@@ -500,11 +500,10 @@ static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_n
 		return __audit_mq_notify(mqdes, u_notification);
 	return 0;
 }
-static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_getsetattr(mqdes, mqstat);
-	return 0;
+		__audit_mq_getsetattr(mqdes, mqstat);
 }
 
 static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
@@ -555,7 +554,7 @@ extern int audit_signals;
 #define audit_mq_timedsend(d,l,p,t) ({ 0; })
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
 #define audit_mq_notify(d,n) ({ 0; })
-#define audit_mq_getsetattr(d,s) ({ 0; })
+#define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
 #define audit_log_capset(pid, ncr, ocr) ({ 0; })
 #define audit_ptrace(t) ((void)0)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d9393f8..7563611 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1150,11 +1150,7 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
 	omqstat = info->attr;
 	omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
 	if (u_mqstat) {
-		ret = audit_mq_getsetattr(mqdes, &mqstat);
-		if (ret != 0) {
-			spin_unlock(&info->lock);
-			goto out_fput;
-		}
+		audit_mq_getsetattr(mqdes, &mqstat);
 		if (mqstat.mq_flags & O_NONBLOCK)
 			filp->f_flags |= O_NONBLOCK;
 		else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fbed62e..c50178c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -145,12 +145,6 @@ struct audit_aux_data_mq_notify {
 	struct sigevent 	notification;
 };
 
-struct audit_aux_data_mq_getsetattr {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	struct mq_attr 		mqstat;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -248,6 +242,10 @@ struct audit_context {
 			mode_t			perm_mode;
 			unsigned long		qbytes;
 		} ipc;
+		struct {
+			mqd_t			mqdes;
+			struct mq_attr 		mqstat;
+		} mq_getsetattr;
 	};
 
 #if AUDIT_DEBUG
@@ -1269,6 +1267,15 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_GETSETATTR: {
+		struct mq_attr *attr = &context->mq_getsetattr.mqstat;
+		audit_log_format(ab,
+			"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
+			"mq_curmsgs=%ld ",
+			context->mq_getsetattr.mqdes,
+			attr->mq_flags, attr->mq_maxmsg,
+			attr->mq_msgsize, attr->mq_curmsgs);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1377,16 +1384,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->notification.sigev_signo);
 			break; }
 
-		case AUDIT_MQ_GETSETATTR: {
-			struct audit_aux_data_mq_getsetattr *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
-				"mq_curmsgs=%ld ",
-				axi->mqdes,
-				axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg,
-				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2316,30 +2313,13 @@ int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
  * @mqdes: MQ descriptor
  * @mqstat: MQ flags
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
-	struct audit_aux_data_mq_getsetattr *ax;
 	struct audit_context *context = current->audit_context;
-
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->mqdes = mqdes;
-	ax->mqstat = *mqstat;
-
-	ax->d.type = AUDIT_MQ_GETSETATTR;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->mq_getsetattr.mqdes = mqdes;
+	context->mq_getsetattr.mqstat = *mqstat;
+	context->type = AUDIT_MQ_GETSETATTR;
 }
 
 /**
-- 
cgit v0.10.2


From 20114f71b27cafeb7c7e41d2b0f0b68c3fbb022b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 07:16:12 -0500
Subject: sanitize audit_mq_notify()

* don't copy_from_user() twice
* don't bother with allocations
* don't duplicate parts of audit_dummy_context()
* make it return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b7707e5..8101d2c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -453,7 +453,7 @@ extern int audit_set_macxattr(const char *name);
 extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
 extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
-extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
+extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
@@ -494,11 +494,10 @@ static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned in
 		return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
 	return 0;
 }
-static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_notify(mqdes, u_notification);
-	return 0;
+		__audit_mq_notify(mqdes, notification);
 }
 static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
@@ -553,7 +552,7 @@ extern int audit_signals;
 #define audit_mq_open(o,m,a) ({ 0; })
 #define audit_mq_timedsend(d,l,p,t) ({ 0; })
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
-#define audit_mq_notify(d,n) ({ 0; })
+#define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
 #define audit_log_capset(pid, ncr, ocr) ({ 0; })
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 7563611..e7b2f68 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1003,17 +1003,17 @@ asmlinkage long sys_mq_notify(mqd_t mqdes,
 	struct mqueue_inode_info *info;
 	struct sk_buff *nc;
 
-	ret = audit_mq_notify(mqdes, u_notification);
-	if (ret != 0)
-		return ret;
-
-	nc = NULL;
-	sock = NULL;
-	if (u_notification != NULL) {
+	if (u_notification) {
 		if (copy_from_user(&notification, u_notification,
 					sizeof(struct sigevent)))
 			return -EFAULT;
+	}
+
+	audit_mq_notify(mqdes, u_notification ? &notification : NULL);
 
+	nc = NULL;
+	sock = NULL;
+	if (u_notification != NULL) {
 		if (unlikely(notification.sigev_notify != SIGEV_NONE &&
 			     notification.sigev_notify != SIGEV_SIGNAL &&
 			     notification.sigev_notify != SIGEV_THREAD))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c50178c..3ece960 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -139,12 +139,6 @@ struct audit_aux_data_mq_sendrecv {
 	struct timespec		abs_timeout;
 };
 
-struct audit_aux_data_mq_notify {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	struct sigevent 	notification;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -246,6 +240,10 @@ struct audit_context {
 			mqd_t			mqdes;
 			struct mq_attr 		mqstat;
 		} mq_getsetattr;
+		struct {
+			mqd_t			mqdes;
+			int			sigev_signo;
+		} mq_notify;
 	};
 
 #if AUDIT_DEBUG
@@ -1267,6 +1265,11 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_NOTIFY: {
+		audit_log_format(ab, "mqdes=%d sigev_signo=%d",
+				context->mq_notify.mqdes,
+				context->mq_notify.sigev_signo);
+		break; }
 	case AUDIT_MQ_GETSETATTR: {
 		struct mq_attr *attr = &context->mq_getsetattr.mqstat;
 		audit_log_format(ab,
@@ -1376,14 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
 			break; }
 
-		case AUDIT_MQ_NOTIFY: {
-			struct audit_aux_data_mq_notify *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d sigev_signo=%d",
-				axi->mqdes,
-				axi->notification.sigev_signo);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2274,38 +2269,19 @@ int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
  * @mqdes: MQ descriptor
  * @u_notification: Notification event
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
 
-int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
-	struct audit_aux_data_mq_notify *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_notification != NULL) {
-		if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->notification, 0, sizeof(ax->notification));
-
-	ax->mqdes = mqdes;
+	if (notification)
+		context->mq_notify.sigev_signo = notification->sigev_signo;
+	else
+		context->mq_notify.sigev_signo = 0;
 
-	ax->d.type = AUDIT_MQ_NOTIFY;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->mq_notify.mqdes = mqdes;
+	context->type = AUDIT_MQ_NOTIFY;
 }
 
 /**
-- 
cgit v0.10.2


From c32c8af43b9adde8d6f938d8e6328c13b8de79ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 03:46:48 -0500
Subject: sanitize AUDIT_MQ_SENDRECV

* logging the original value of *msg_prio in mq_timedreceive(2)
  is insane - the argument is write-only (i.e. syscall always
  ignores the original value and only overwrites it).
* merge __audit_mq_timed{send,receive}
* don't do copy_from_user() twice
* don't mess with allocations in auditsc part
* ... and don't bother checking !audit_enabled and !context in there -
  we'd already checked for audit_dummy_context().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8101d2c..67f0cdd 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -451,8 +451,7 @@ extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
-extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
-extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
+extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
@@ -482,17 +481,10 @@ static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u
 		return __audit_mq_open(oflag, mode, u_attr);
 	return 0;
 }
-static inline int audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout)
+static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
-	return 0;
-}
-static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout)
-{
-	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
-	return 0;
+		__audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout);
 }
 static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
@@ -550,8 +542,7 @@ extern int audit_signals;
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ({ 0; })
-#define audit_mq_timedsend(d,l,p,t) ({ 0; })
-#define audit_mq_timedreceive(d,l,p,t) ({ 0; })
+#define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e7b2f68..192da80 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -524,31 +524,27 @@ static void __do_notify(struct mqueue_inode_info *info)
 	wake_up(&info->wait_q);
 }
 
-static long prepare_timeout(const struct timespec __user *u_arg)
+static long prepare_timeout(struct timespec *p)
 {
-	struct timespec ts, nowts;
+	struct timespec nowts;
 	long timeout;
 
-	if (u_arg) {
-		if (unlikely(copy_from_user(&ts, u_arg,
-					sizeof(struct timespec))))
-			return -EFAULT;
-
-		if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0
-			|| ts.tv_nsec >= NSEC_PER_SEC))
+	if (p) {
+		if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
+			|| p->tv_nsec >= NSEC_PER_SEC))
 			return -EINVAL;
 		nowts = CURRENT_TIME;
 		/* first subtract as jiffies can't be too big */
-		ts.tv_sec -= nowts.tv_sec;
-		if (ts.tv_nsec < nowts.tv_nsec) {
-			ts.tv_nsec += NSEC_PER_SEC;
-			ts.tv_sec--;
+		p->tv_sec -= nowts.tv_sec;
+		if (p->tv_nsec < nowts.tv_nsec) {
+			p->tv_nsec += NSEC_PER_SEC;
+			p->tv_sec--;
 		}
-		ts.tv_nsec -= nowts.tv_nsec;
-		if (ts.tv_sec < 0)
+		p->tv_nsec -= nowts.tv_nsec;
+		if (p->tv_sec < 0)
 			return 0;
 
-		timeout = timespec_to_jiffies(&ts) + 1;
+		timeout = timespec_to_jiffies(p) + 1;
 	} else
 		return MAX_SCHEDULE_TIMEOUT;
 
@@ -829,17 +825,22 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
 	struct ext_wait_queue *receiver;
 	struct msg_msg *msg_ptr;
 	struct mqueue_inode_info *info;
+	struct timespec ts, *p = NULL;
 	long timeout;
 	int ret;
 
-	ret = audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
-	if (ret != 0)
-		return ret;
+	if (u_abs_timeout) {
+		if (copy_from_user(&ts, u_abs_timeout, 
+					sizeof(struct timespec)))
+			return -EFAULT;
+		p = &ts;
+	}
 
 	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
 		return -EINVAL;
 
-	timeout = prepare_timeout(u_abs_timeout);
+	audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
+	timeout = prepare_timeout(p);
 
 	ret = -EBADF;
 	filp = fget(mqdes);
@@ -918,12 +919,17 @@ asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 	struct inode *inode;
 	struct mqueue_inode_info *info;
 	struct ext_wait_queue wait;
+	struct timespec ts, *p = NULL;
 
-	ret = audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
-	if (ret != 0)
-		return ret;
+	if (u_abs_timeout) {
+		if (copy_from_user(&ts, u_abs_timeout, 
+					sizeof(struct timespec)))
+			return -EFAULT;
+		p = &ts;
+	}
 
-	timeout = prepare_timeout(u_abs_timeout);
+	audit_mq_sendrecv(mqdes, msg_len, 0, p);
+	timeout = prepare_timeout(p);
 
 	ret = -EBADF;
 	filp = fget(mqdes);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3ece960..140c474 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -131,14 +131,6 @@ struct audit_aux_data_mq_open {
 	struct mq_attr		attr;
 };
 
-struct audit_aux_data_mq_sendrecv {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	size_t			msg_len;
-	unsigned int		msg_prio;
-	struct timespec		abs_timeout;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -244,6 +236,12 @@ struct audit_context {
 			mqd_t			mqdes;
 			int			sigev_signo;
 		} mq_notify;
+		struct {
+			mqd_t			mqdes;
+			size_t			msg_len;
+			unsigned int		msg_prio;
+			struct timespec		abs_timeout;
+		} mq_sendrecv;
 	};
 
 #if AUDIT_DEBUG
@@ -1265,6 +1263,16 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_SENDRECV: {
+		audit_log_format(ab,
+			"mqdes=%d msg_len=%zd msg_prio=%u "
+			"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
+			context->mq_sendrecv.mqdes,
+			context->mq_sendrecv.msg_len,
+			context->mq_sendrecv.msg_prio,
+			context->mq_sendrecv.abs_timeout.tv_sec,
+			context->mq_sendrecv.abs_timeout.tv_nsec);
+		break; }
 	case AUDIT_MQ_NOTIFY: {
 		audit_log_format(ab, "mqdes=%d sigev_signo=%d",
 				context->mq_notify.mqdes,
@@ -1370,15 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->attr.mq_curmsgs);
 			break; }
 
-		case AUDIT_MQ_SENDRECV: {
-			struct audit_aux_data_mq_sendrecv *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d msg_len=%zd msg_prio=%u "
-				"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
-				axi->mqdes, axi->msg_len, axi->msg_prio,
-				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2171,97 +2170,29 @@ int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
 }
 
 /**
- * __audit_mq_timedsend - record audit data for a POSIX MQ timed send
+ * __audit_mq_sendrecv - record audit data for a POSIX MQ timed send/receive
  * @mqdes: MQ descriptor
  * @msg_len: Message length
  * @msg_prio: Message priority
- * @u_abs_timeout: Message timeout in absolute time
- *
- * Returns 0 for success or NULL context or < 0 on error.
- */
-int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
-			const struct timespec __user *u_abs_timeout)
-{
-	struct audit_aux_data_mq_sendrecv *ax;
-	struct audit_context *context = current->audit_context;
-
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_abs_timeout != NULL) {
-		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
-
-	ax->mqdes = mqdes;
-	ax->msg_len = msg_len;
-	ax->msg_prio = msg_prio;
-
-	ax->d.type = AUDIT_MQ_SENDRECV;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
-}
-
-/**
- * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive
- * @mqdes: MQ descriptor
- * @msg_len: Message length
- * @u_msg_prio: Message priority
- * @u_abs_timeout: Message timeout in absolute time
+ * @abs_timeout: Message timeout in absolute time
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
-				unsigned int __user *u_msg_prio,
-				const struct timespec __user *u_abs_timeout)
+void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
+			const struct timespec *abs_timeout)
 {
-	struct audit_aux_data_mq_sendrecv *ax;
 	struct audit_context *context = current->audit_context;
+	struct timespec *p = &context->mq_sendrecv.abs_timeout;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_msg_prio != NULL) {
-		if (get_user(ax->msg_prio, u_msg_prio)) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		ax->msg_prio = 0;
-
-	if (u_abs_timeout != NULL) {
-		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+	if (abs_timeout)
+		memcpy(p, abs_timeout, sizeof(struct timespec));
+	else
+		memset(p, 0, sizeof(struct timespec));
 
-	ax->mqdes = mqdes;
-	ax->msg_len = msg_len;
+	context->mq_sendrecv.mqdes = mqdes;
+	context->mq_sendrecv.msg_len = msg_len;
+	context->mq_sendrecv.msg_prio = msg_prio;
 
-	ax->d.type = AUDIT_MQ_SENDRECV;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_MQ_SENDRECV;
 }
 
 /**
-- 
cgit v0.10.2


From 564f6993ffef656aebaf46cf2f1f6cb4f5c97207 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 04:02:26 -0500
Subject: sanitize audit_mq_open()

* don't bother with allocations
* don't do double copy_from_user()
* don't duplicate parts of check for audit_dummy_context()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67f0cdd..54978bd 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -450,7 +450,7 @@ extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
-extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
+extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
@@ -475,11 +475,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
-static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+static inline void audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_open(oflag, mode, u_attr);
-	return 0;
+		__audit_mq_open(oflag, mode, attr);
 }
 static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout)
 {
@@ -541,7 +540,7 @@ extern int audit_signals;
 #define audit_fd_pair(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
-#define audit_mq_open(o,m,a) ({ 0; })
+#define audit_mq_open(o,m,a) ((void)0)
 #define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 192da80..d448b69 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -588,22 +588,18 @@ static int mq_attr_ok(struct mq_attr *attr)
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-			int oflag, mode_t mode, struct mq_attr __user *u_attr)
+			int oflag, mode_t mode, struct mq_attr *attr)
 {
 	const struct cred *cred = current_cred();
-	struct mq_attr attr;
 	struct file *result;
 	int ret;
 
-	if (u_attr) {
-		ret = -EFAULT;
-		if (copy_from_user(&attr, u_attr, sizeof(attr)))
-			goto out;
+	if (attr) {
 		ret = -EINVAL;
-		if (!mq_attr_ok(&attr))
+		if (!mq_attr_ok(attr))
 			goto out;
 		/* store for use during create */
-		dentry->d_fsdata = &attr;
+		dentry->d_fsdata = attr;
 	}
 
 	mode &= ~current->fs->umask;
@@ -660,11 +656,13 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	struct dentry *dentry;
 	struct file *filp;
 	char *name;
+	struct mq_attr attr;
 	int fd, error;
 
-	error = audit_mq_open(oflag, mode, u_attr);
-	if (error != 0)
-		return error;
+	if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
+		return -EFAULT;
+
+	audit_mq_open(oflag, mode, u_attr ? &attr : NULL);
 
 	if (IS_ERR(name = getname(u_name)))
 		return PTR_ERR(name);
@@ -690,7 +688,8 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 			filp = do_open(dentry, oflag);
 		} else {
 			filp = do_create(mqueue_mnt->mnt_root, dentry,
-						oflag, mode, u_attr);
+						oflag, mode,
+						u_attr ? &attr : NULL);
 		}
 	} else {
 		error = -ENOENT;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 140c474..83e946f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -124,13 +124,6 @@ struct audit_aux_data {
 /* Number of target pids per aux struct. */
 #define AUDIT_AUX_PIDS	16
 
-struct audit_aux_data_mq_open {
-	struct audit_aux_data	d;
-	int			oflag;
-	mode_t			mode;
-	struct mq_attr		attr;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -242,6 +235,11 @@ struct audit_context {
 			unsigned int		msg_prio;
 			struct timespec		abs_timeout;
 		} mq_sendrecv;
+		struct {
+			int			oflag;
+			mode_t			mode;
+			struct mq_attr		attr;
+		} mq_open;
 	};
 
 #if AUDIT_DEBUG
@@ -1263,6 +1261,16 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_OPEN: {
+		audit_log_format(ab,
+			"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
+			"mq_msgsize=%ld mq_curmsgs=%ld",
+			context->mq_open.oflag, context->mq_open.mode,
+			context->mq_open.attr.mq_flags,
+			context->mq_open.attr.mq_maxmsg,
+			context->mq_open.attr.mq_msgsize,
+			context->mq_open.attr.mq_curmsgs);
+		break; }
 	case AUDIT_MQ_SENDRECV: {
 		audit_log_format(ab,
 			"mqdes=%d msg_len=%zd msg_prio=%u "
@@ -1368,15 +1376,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			continue; /* audit_panic has been called */
 
 		switch (aux->type) {
-		case AUDIT_MQ_OPEN: {
-			struct audit_aux_data_mq_open *axi = (void *)aux;
-			audit_log_format(ab,
-				"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
-				"mq_msgsize=%ld mq_curmsgs=%ld",
-				axi->oflag, axi->mode, axi->attr.mq_flags,
-				axi->attr.mq_maxmsg, axi->attr.mq_msgsize,
-				axi->attr.mq_curmsgs);
-			break; }
 
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
@@ -2135,38 +2134,20 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
  * @mode: mode bits
  * @u_attr: queue attributes
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
 {
-	struct audit_aux_data_mq_open *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_attr != NULL) {
-		if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->attr, 0, sizeof(ax->attr));
+	if (attr)
+		memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr));
+	else
+		memset(&context->mq_open.attr, 0, sizeof(struct mq_attr));
 
-	ax->oflag = oflag;
-	ax->mode = mode;
+	context->mq_open.oflag = oflag;
+	context->mq_open.mode = mode;
 
-	ax->d.type = AUDIT_MQ_OPEN;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_MQ_OPEN;
 }
 
 /**
-- 
cgit v0.10.2


From 157cf649a735a2f7e8dba0ed08e6e38b6c30d886 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 04:57:47 -0500
Subject: sanitize audit_fd_pair()

* no allocations
* return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/pipe.c b/fs/pipe.c
index aaf797b..8916971 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,10 +1016,7 @@ int do_pipe_flags(int *fd, int flags)
 		goto err_fdr;
 	fdw = error;
 
-	error = audit_fd_pair(fdr, fdw);
-	if (error < 0)
-		goto err_fdw;
-
+	audit_fd_pair(fdr, fdw);
 	fd_install(fdr, fr);
 	fd_install(fdw, fw);
 	fd[0] = fdr;
@@ -1027,8 +1024,6 @@ int do_pipe_flags(int *fd, int flags)
 
 	return 0;
 
- err_fdw:
-	put_unused_fd(fdw);
  err_fdr:
 	put_unused_fd(fdr);
  err_read_pipe:
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 54978bd..bd59cd1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -448,7 +448,7 @@ extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mod
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
-extern int __audit_fd_pair(int fd1, int fd2);
+extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
@@ -464,11 +464,10 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_obj(ipcp);
 }
-static inline int audit_fd_pair(int fd1, int fd2)
+static inline void audit_fd_pair(int fd1, int fd2)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_fd_pair(fd1, fd2);
-	return 0;
+		__audit_fd_pair(fd1, fd2);
 }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
@@ -537,7 +536,7 @@ extern int audit_signals;
 #define audit_ipc_set_perm(q,u,g,m) ((void)0)
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
-#define audit_fd_pair(n,a) ({ 0; })
+#define audit_fd_pair(n,a) ((void)0)
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ((void)0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 83e946f..327e65d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -131,11 +131,6 @@ struct audit_aux_data_execve {
 	struct mm_struct *mm;
 };
 
-struct audit_aux_data_fd_pair {
-	struct	audit_aux_data d;
-	int	fd[2];
-};
-
 struct audit_aux_data_pids {
 	struct audit_aux_data	d;
 	pid_t			target_pid[AUDIT_AUX_PIDS];
@@ -241,6 +236,7 @@ struct audit_context {
 			struct mq_attr		attr;
 		} mq_open;
 	};
+	int fds[2];
 
 #if AUDIT_DEBUG
 	int		    put_count;
@@ -1382,11 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_execve_info(context, &ab, axi);
 			break; }
 
-		case AUDIT_FD_PAIR: {
-			struct audit_aux_data_fd_pair *axs = (void *)aux;
-			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
-			break; }
-
 		case AUDIT_BPRM_FCAPS: {
 			struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
 			audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1416,6 +1407,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	if (context->type)
 		show_special(context, &call_panic);
 
+	if (context->fds[0] >= 0) {
+		ab = audit_log_start(context, GFP_KERNEL, AUDIT_FD_PAIR);
+		if (ab) {
+			audit_log_format(ab, "fd0=%d fd1=%d",
+					context->fds[0], context->fds[1]);
+			audit_log_end(ab);
+		}
+	}
+
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
 		if (ab) {
@@ -1696,6 +1696,7 @@ void audit_syscall_exit(int valid, long return_code)
 		context->target_sid = 0;
 		context->sockaddr_len = 0;
 		context->type = 0;
+		context->fds[0] = -1;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -2291,29 +2292,12 @@ void audit_socketcall(int nargs, unsigned long *args)
  * @fd1: the first file descriptor
  * @fd2: the second file descriptor
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_fd_pair(int fd1, int fd2)
+void __audit_fd_pair(int fd1, int fd2)
 {
 	struct audit_context *context = current->audit_context;
-	struct audit_aux_data_fd_pair *ax;
-
-	if (likely(!context)) {
-		return 0;
-	}
-
-	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-	if (!ax) {
-		return -ENOMEM;
-	}
-
-	ax->fd[0] = fd1;
-	ax->fd[1] = fd2;
-
-	ax->d.type = AUDIT_FD_PAIR;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->fds[0] = fd1;
+	context->fds[1] = fd2;
 }
 
 /**
diff --git a/net/socket.c b/net/socket.c
index b41a920..06603d7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1313,13 +1313,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		goto out_fd1;
 	}
 
-	err = audit_fd_pair(fd1, fd2);
-	if (err < 0) {
-		fput(newfile1);
-		fput(newfile2);
-		goto out_fd;
-	}
-
+	audit_fd_pair(fd1, fd2);
 	fd_install(fd1, newfile1);
 	fd_install(fd2, newfile2);
 	/* fd1 and fd2 may be already another descriptors.
@@ -1349,7 +1343,6 @@ out_fd2:
 out_fd1:
 	put_filp(newfile2);
 	sock_release(sock2);
-out_fd:
 	put_unused_fd(fd1);
 	put_unused_fd(fd2);
 	goto out;
-- 
cgit v0.10.2


From 57f71a0af4244d9ba3c0bce74b1d2e66e8d520bd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Jan 2009 14:52:57 -0500
Subject: sanitize audit_log_capset()

* no allocations
* return void
* don't duplicate checked for dummy context

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bd59cd1..7ddcb6a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -457,7 +457,7 @@ extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
 				  const struct cred *old);
-extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
+extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
 
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -504,12 +504,11 @@ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
 	return 0;
 }
 
-static inline int audit_log_capset(pid_t pid, const struct cred *new,
+static inline void audit_log_capset(pid_t pid, const struct cred *new,
 				   const struct cred *old)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_log_capset(pid, new, old);
-	return 0;
+		__audit_log_capset(pid, new, old);
 }
 
 extern int audit_n_rules;
@@ -544,7 +543,7 @@ extern int audit_signals;
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
-#define audit_log_capset(pid, ncr, ocr) ({ 0; })
+#define audit_log_capset(pid, ncr, ocr) ((void)0)
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 327e65d..c76a582 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -235,6 +235,10 @@ struct audit_context {
 			mode_t			mode;
 			struct mq_attr		attr;
 		} mq_open;
+		struct {
+			pid_t			pid;
+			struct audit_cap_data	cap;
+		} capset;
 	};
 	int fds[2];
 
@@ -1291,6 +1295,12 @@ static void show_special(struct audit_context *context, int *call_panic)
 			attr->mq_flags, attr->mq_maxmsg,
 			attr->mq_msgsize, attr->mq_curmsgs);
 		break; }
+	case AUDIT_CAPSET: {
+		audit_log_format(ab, "pid=%d", context->capset.pid);
+		audit_log_cap(ab, "cap_pi", &context->capset.cap.inheritable);
+		audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted);
+		audit_log_cap(ab, "cap_pe", &context->capset.cap.effective);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1392,14 +1402,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_cap(ab, "new_pe", &axs->new_pcap.effective);
 			break; }
 
-		case AUDIT_CAPSET: {
-			struct audit_aux_data_capset *axs = (void *)aux;
-			audit_log_format(ab, "pid=%d", axs->pid);
-			audit_log_cap(ab, "cap_pi", &axs->cap.inheritable);
-			audit_log_cap(ab, "cap_pp", &axs->cap.permitted);
-			audit_log_cap(ab, "cap_pe", &axs->cap.effective);
-			break; }
-
 		}
 		audit_log_end(ab);
 	}
@@ -2456,29 +2458,15 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
  * Record the aguments userspace sent to sys_capset for later printing by the
  * audit system if applicable
  */
-int __audit_log_capset(pid_t pid,
+void __audit_log_capset(pid_t pid,
 		       const struct cred *new, const struct cred *old)
 {
-	struct audit_aux_data_capset *ax;
 	struct audit_context *context = current->audit_context;
-
-	if (likely(!audit_enabled || !context || context->dummy))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->d.type = AUDIT_CAPSET;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-
-	ax->pid = pid;
-	ax->cap.effective   = new->cap_effective;
-	ax->cap.inheritable = new->cap_effective;
-	ax->cap.permitted   = new->cap_permitted;
-
-	return 0;
+	context->capset.pid = pid;
+	context->capset.cap.effective   = new->cap_effective;
+	context->capset.cap.inheritable = new->cap_effective;
+	context->capset.cap.permitted   = new->cap_permitted;
+	context->type = AUDIT_CAPSET;
 }
 
 /**
diff --git a/kernel/capability.c b/kernel/capability.c
index 36b4b4d..c598d9d 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -280,9 +280,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (ret < 0)
 		goto error;
 
-	ret = audit_log_capset(pid, new, current_cred());
-	if (ret < 0)
-		return ret;
+	audit_log_capset(pid, new, current_cred());
 
 	return commit_creds(new);
 
-- 
cgit v0.10.2


From 1a9d0797b8977d413435277bf9661efbbd584693 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 12:04:02 -0500
Subject: audit_update_lsm_rules() misses the audit_inode_hash[] ones

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 9fd85a4..0febaa0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1778,6 +1778,41 @@ unlock_and_return:
 	return result;
 }
 
+static int update_lsm_rule(struct audit_entry *entry)
+{
+	struct audit_entry *nentry;
+	struct audit_watch *watch;
+	struct audit_tree *tree;
+	int err = 0;
+
+	if (!security_audit_rule_known(&entry->rule))
+		return 0;
+
+	watch = entry->rule.watch;
+	tree = entry->rule.tree;
+	nentry = audit_dupe_rule(&entry->rule, watch);
+	if (IS_ERR(nentry)) {
+		/* save the first error encountered for the
+		 * return value */
+		err = PTR_ERR(nentry);
+		audit_panic("error updating LSM filters");
+		if (watch)
+			list_del(&entry->rule.rlist);
+		list_del_rcu(&entry->list);
+	} else {
+		if (watch) {
+			list_add(&nentry->rule.rlist, &watch->rules);
+			list_del(&entry->rule.rlist);
+		} else if (tree)
+			list_replace_init(&entry->rule.rlist,
+				     &nentry->rule.rlist);
+		list_replace_rcu(&entry->list, &nentry->list);
+	}
+	call_rcu(&entry->rcu, audit_free_rule_rcu);
+
+	return err;
+}
+
 /* This function will re-initialize the lsm_rule field of all applicable rules.
  * It will traverse the filter lists serarching for rules that contain LSM
  * specific filter fields.  When such a rule is found, it is copied, the
@@ -1785,42 +1820,24 @@ unlock_and_return:
  * updated rule. */
 int audit_update_lsm_rules(void)
 {
-	struct audit_entry *entry, *n, *nentry;
-	struct audit_watch *watch;
-	struct audit_tree *tree;
+	struct audit_entry *e, *n;
 	int i, err = 0;
 
 	/* audit_filter_mutex synchronizes the writers */
 	mutex_lock(&audit_filter_mutex);
 
 	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
-			if (!security_audit_rule_known(&entry->rule))
-				continue;
-
-			watch = entry->rule.watch;
-			tree = entry->rule.tree;
-			nentry = audit_dupe_rule(&entry->rule, watch);
-			if (IS_ERR(nentry)) {
-				/* save the first error encountered for the
-				 * return value */
-				if (!err)
-					err = PTR_ERR(nentry);
-				audit_panic("error updating LSM filters");
-				if (watch)
-					list_del(&entry->rule.rlist);
-				list_del_rcu(&entry->list);
-			} else {
-				if (watch) {
-					list_add(&nentry->rule.rlist,
-						 &watch->rules);
-					list_del(&entry->rule.rlist);
-				} else if (tree)
-					list_replace_init(&entry->rule.rlist,
-						     &nentry->rule.rlist);
-				list_replace_rcu(&entry->list, &nentry->list);
-			}
-			call_rcu(&entry->rcu, audit_free_rule_rcu);
+		list_for_each_entry_safe(e, n, &audit_filter_list[i], list) {
+			int res = update_lsm_rule(e);
+			if (!err)
+				err = res;
+		}
+	}
+	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
+		list_for_each_entry_safe(e, n, &audit_inode_hash[i], list) {
+			int res = update_lsm_rule(e);
+			if (!err)
+				err = res;
 		}
 	}
 
-- 
cgit v0.10.2


From 0590b9335a1c72a3f0defcc6231287f7817e07c8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 23:45:27 -0500
Subject: fixing audit rule ordering mess, part 1

Problem: ordering between the rules on exit chain is currently lost;
all watch and inode rules are listed after everything else _and_
exit,never on one kind doesn't stop exit,always on another from
being matched.

Solution: assign priorities to rules, keep track of the current
highest-priority matching rule and its result (always/never).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7ddcb6a..5b47eeb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -373,6 +373,7 @@ struct audit_krule {
 	struct audit_watch	*watch;	/* associated watch */
 	struct audit_tree	*tree;	/* associated watched tree */
 	struct list_head	rlist;	/* entry in audit_{watch,tree}.rules list */
+	u64			prio;
 };
 
 struct audit_field {
diff --git a/kernel/audit.h b/kernel/audit.h
index 9d67174..16f18ca 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -159,11 +159,8 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
 		return __audit_signal_info(sig, t);
 	return 0;
 }
-extern enum audit_state audit_filter_inodes(struct task_struct *,
-					    struct audit_context *);
-extern void audit_set_auditable(struct audit_context *);
+extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
 #else
 #define audit_signal_info(s,t) AUDIT_DISABLED
 #define audit_filter_inodes(t,c) AUDIT_DISABLED
-#define audit_set_auditable(c)
 #endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 0febaa0..995a2e8 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -919,6 +919,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
 	new->action = old->action;
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
 		new->mask[i] = old->mask[i];
+	new->prio = old->prio;
 	new->buflen = old->buflen;
 	new->inode_f = old->inode_f;
 	new->watch = NULL;
@@ -987,9 +988,8 @@ static void audit_update_watch(struct audit_parent *parent,
 
 		/* If the update involves invalidating rules, do the inode-based
 		 * filtering now, so we don't omit records. */
-		if (invalidating && current->audit_context &&
-		    audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
-			audit_set_auditable(current->audit_context);
+		if (invalidating && current->audit_context)
+			audit_filter_inodes(current, current->audit_context);
 
 		nwatch = audit_dupe_watch(owatch);
 		if (IS_ERR(nwatch)) {
@@ -1258,6 +1258,9 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
 	return ret;
 }
 
+static u64 prio_low = ~0ULL/2;
+static u64 prio_high = ~0ULL/2 - 1;
+
 /* Add rule to given filterlist if not a duplicate. */
 static inline int audit_add_rule(struct audit_entry *entry,
 				 struct list_head *list)
@@ -1319,6 +1322,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
 		}
 	}
 
+	entry->rule.prio = ~0ULL;
+	if (entry->rule.listnr == AUDIT_FILTER_EXIT) {
+		if (entry->rule.flags & AUDIT_FILTER_PREPEND)
+			entry->rule.prio = ++prio_high;
+		else
+			entry->rule.prio = --prio_low;
+	}
+
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
 		list_add_rcu(&entry->list, list);
 		entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c76a582..19d2c27 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -165,14 +165,14 @@ struct audit_tree_refs {
 struct audit_context {
 	int		    dummy;	/* must be the first element */
 	int		    in_syscall;	/* 1 if task is in a syscall */
-	enum audit_state    state;
+	enum audit_state    state, current_state;
 	unsigned int	    serial;     /* serial number for record */
 	struct timespec	    ctime;      /* time of syscall entry */
 	int		    major;      /* syscall number */
 	unsigned long	    argv[4];    /* syscall arguments */
 	int		    return_valid; /* return code is valid */
 	long		    return_code;/* syscall return code */
-	int		    auditable;  /* 1 if record should be written */
+	u64		    prio;
 	int		    name_count;
 	struct audit_names  names[AUDIT_NAMES];
 	char *		    filterkey;	/* key for rule that triggered record */
@@ -630,8 +630,16 @@ static int audit_filter_rules(struct task_struct *tsk,
 			return 0;
 		}
 	}
-	if (rule->filterkey && ctx)
-		ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
+
+	if (ctx) {
+		if (rule->prio <= ctx->prio)
+			return 0;
+		if (rule->filterkey) {
+			kfree(ctx->filterkey);
+			ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
+		}
+		ctx->prio = rule->prio;
+	}
 	switch (rule->action) {
 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
@@ -685,6 +693,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 			    audit_filter_rules(tsk, &e->rule, ctx, NULL,
 					       &state)) {
 				rcu_read_unlock();
+				ctx->current_state = state;
 				return state;
 			}
 		}
@@ -698,15 +707,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
  * buckets applicable to the inode numbers in audit_names[].
  * Regarding audit_state, same rules apply as for audit_filter_syscall().
  */
-enum audit_state audit_filter_inodes(struct task_struct *tsk,
-				     struct audit_context *ctx)
+void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
 {
 	int i;
 	struct audit_entry *e;
 	enum audit_state state;
 
 	if (audit_pid && tsk->tgid == audit_pid)
-		return AUDIT_DISABLED;
+		return;
 
 	rcu_read_lock();
 	for (i = 0; i < ctx->name_count; i++) {
@@ -723,17 +731,20 @@ enum audit_state audit_filter_inodes(struct task_struct *tsk,
 			if ((e->rule.mask[word] & bit) == bit &&
 			    audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
 				rcu_read_unlock();
-				return state;
+				ctx->current_state = state;
+				return;
 			}
 		}
 	}
 	rcu_read_unlock();
-	return AUDIT_BUILD_CONTEXT;
 }
 
-void audit_set_auditable(struct audit_context *ctx)
+static void audit_set_auditable(struct audit_context *ctx)
 {
-	ctx->auditable = 1;
+	if (!ctx->prio) {
+		ctx->prio = 1;
+		ctx->current_state = AUDIT_RECORD_CONTEXT;
+	}
 }
 
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
@@ -764,23 +775,11 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	else
 		context->return_code  = return_code;
 
-	if (context->in_syscall && !context->dummy && !context->auditable) {
-		enum audit_state state;
-
-		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
-		if (state == AUDIT_RECORD_CONTEXT) {
-			context->auditable = 1;
-			goto get_context;
-		}
-
-		state = audit_filter_inodes(tsk, context);
-		if (state == AUDIT_RECORD_CONTEXT)
-			context->auditable = 1;
-
+	if (context->in_syscall && !context->dummy) {
+		audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
+		audit_filter_inodes(tsk, context);
 	}
 
-get_context:
-
 	tsk->audit_context = NULL;
 	return context;
 }
@@ -790,8 +789,7 @@ static inline void audit_free_names(struct audit_context *context)
 	int i;
 
 #if AUDIT_DEBUG == 2
-	if (context->auditable
-	    ||context->put_count + context->ino_count != context->name_count) {
+	if (context->put_count + context->ino_count != context->name_count) {
 		printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d"
 		       " name_count=%d put_count=%d"
 		       " ino_count=%d [NOT freeing]\n",
@@ -842,6 +840,7 @@ static inline void audit_zero_context(struct audit_context *context,
 {
 	memset(context, 0, sizeof(*context));
 	context->state      = state;
+	context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 }
 
 static inline struct audit_context *audit_alloc_context(enum audit_state state)
@@ -1543,7 +1542,7 @@ void audit_free(struct task_struct *tsk)
 	 * We use GFP_ATOMIC here because we might be doing this
 	 * in the context of the idle thread */
 	/* that can happen only if we are called from do_exit() */
-	if (context->in_syscall && context->auditable)
+	if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
 		audit_log_exit(context, tsk);
 
 	audit_free_context(context);
@@ -1627,15 +1626,17 @@ void audit_syscall_entry(int arch, int major,
 
 	state = context->state;
 	context->dummy = !audit_n_rules;
-	if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
+	if (!context->dummy && state == AUDIT_BUILD_CONTEXT) {
+		context->prio = 0;
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
+	}
 	if (likely(state == AUDIT_DISABLED))
 		return;
 
 	context->serial     = 0;
 	context->ctime      = CURRENT_TIME;
 	context->in_syscall = 1;
-	context->auditable  = !!(state == AUDIT_RECORD_CONTEXT);
+	context->current_state  = state;
 	context->ppid       = 0;
 }
 
@@ -1643,17 +1644,20 @@ void audit_finish_fork(struct task_struct *child)
 {
 	struct audit_context *ctx = current->audit_context;
 	struct audit_context *p = child->audit_context;
-	if (!p || !ctx || !ctx->auditable)
+	if (!p || !ctx)
+		return;
+	if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
 		return;
 	p->arch = ctx->arch;
 	p->major = ctx->major;
 	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
 	p->ctime = ctx->ctime;
 	p->dummy = ctx->dummy;
-	p->auditable = ctx->auditable;
 	p->in_syscall = ctx->in_syscall;
 	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
 	p->ppid = current->pid;
+	p->prio = ctx->prio;
+	p->current_state = ctx->current_state;
 }
 
 /**
@@ -1677,11 +1681,11 @@ void audit_syscall_exit(int valid, long return_code)
 	if (likely(!context))
 		return;
 
-	if (context->in_syscall && context->auditable)
+	if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
 		audit_log_exit(context, tsk);
 
 	context->in_syscall = 0;
-	context->auditable  = 0;
+	context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 
 	if (context->previous) {
 		struct audit_context *new_context = context->previous;
@@ -2091,7 +2095,10 @@ int auditsc_get_stamp(struct audit_context *ctx,
 	t->tv_sec  = ctx->ctime.tv_sec;
 	t->tv_nsec = ctx->ctime.tv_nsec;
 	*serial    = ctx->serial;
-	ctx->auditable = 1;
+	if (!ctx->prio) {
+		ctx->prio = 1;
+		ctx->current_state = AUDIT_RECORD_CONTEXT;
+	}
 	return 1;
 }
 
-- 
cgit v0.10.2


From e45aa212ea81d39b38ba158df344dc3a500153e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 15 Dec 2008 01:17:50 -0500
Subject: audit rules ordering, part 2

Fix the actual rule listing; add per-type lists _not_ used for matching,
with all exit,... sitting on one such list.  Simplifies "do something
for all rules" logics, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 5b47eeb..cc71fdb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -373,6 +373,7 @@ struct audit_krule {
 	struct audit_watch	*watch;	/* associated watch */
 	struct audit_tree	*tree;	/* associated watched tree */
 	struct list_head	rlist;	/* entry in audit_{watch,tree}.rules list */
+	struct list_head	list;	/* for AUDIT_LIST* purposes only */
 	u64			prio;
 };
 
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8b50944..48bddad 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -450,6 +450,7 @@ static void kill_rules(struct audit_tree *tree)
 			audit_log_end(ab);
 			rule->tree = NULL;
 			list_del_rcu(&entry->list);
+			list_del(&entry->rule.list);
 			call_rcu(&entry->rcu, audit_free_rule_rcu);
 		}
 	}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 995a2e8..5d4edc6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -86,6 +86,14 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 #error Fix audit_filter_list initialiser
 #endif
 };
+static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
+	LIST_HEAD_INIT(audit_rules_list[0]),
+	LIST_HEAD_INIT(audit_rules_list[1]),
+	LIST_HEAD_INIT(audit_rules_list[2]),
+	LIST_HEAD_INIT(audit_rules_list[3]),
+	LIST_HEAD_INIT(audit_rules_list[4]),
+	LIST_HEAD_INIT(audit_rules_list[5]),
+};
 
 DEFINE_MUTEX(audit_filter_mutex);
 
@@ -1007,12 +1015,15 @@ static void audit_update_watch(struct audit_parent *parent,
 			list_del_rcu(&oentry->list);
 
 			nentry = audit_dupe_rule(&oentry->rule, nwatch);
-			if (IS_ERR(nentry))
+			if (IS_ERR(nentry)) {
+				list_del(&oentry->rule.list);
 				audit_panic("error updating watch, removing");
-			else {
+			} else {
 				int h = audit_hash_ino((u32)ino);
 				list_add(&nentry->rule.rlist, &nwatch->rules);
 				list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+				list_replace(&oentry->rule.list,
+					     &nentry->rule.list);
 			}
 
 			call_rcu(&oentry->rcu, audit_free_rule_rcu);
@@ -1077,6 +1088,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 				audit_log_end(ab);
 			}
 			list_del(&r->rlist);
+			list_del(&r->list);
 			list_del_rcu(&e->list);
 			call_rcu(&e->rcu, audit_free_rule_rcu);
 		}
@@ -1331,9 +1343,13 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	}
 
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
+		list_add(&entry->rule.list,
+			 &audit_rules_list[entry->rule.listnr]);
 		list_add_rcu(&entry->list, list);
 		entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
 	} else {
+		list_add_tail(&entry->rule.list,
+			      &audit_rules_list[entry->rule.listnr]);
 		list_add_tail_rcu(&entry->list, list);
 	}
 #ifdef CONFIG_AUDITSYSCALL
@@ -1415,6 +1431,7 @@ static inline int audit_del_rule(struct audit_entry *entry,
 		audit_remove_tree_rule(&e->rule);
 
 	list_del_rcu(&e->list);
+	list_del(&e->rule.list);
 	call_rcu(&e->rcu, audit_free_rule_rcu);
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -1443,30 +1460,16 @@ out:
 static void audit_list(int pid, int seq, struct sk_buff_head *q)
 {
 	struct sk_buff *skb;
-	struct audit_entry *entry;
+	struct audit_krule *r;
 	int i;
 
 	/* This is a blocking read, so use audit_filter_mutex instead of rcu
 	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry(entry, &audit_filter_list[i], list) {
-			struct audit_rule *rule;
-
-			rule = audit_krule_to_rule(&entry->rule);
-			if (unlikely(!rule))
-				break;
-			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
-					 rule, sizeof(*rule));
-			if (skb)
-				skb_queue_tail(q, skb);
-			kfree(rule);
-		}
-	}
-	for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry(entry, &audit_inode_hash[i], list) {
+		list_for_each_entry(r, &audit_rules_list[i], list) {
 			struct audit_rule *rule;
 
-			rule = audit_krule_to_rule(&entry->rule);
+			rule = audit_krule_to_rule(r);
 			if (unlikely(!rule))
 				break;
 			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
@@ -1485,30 +1488,16 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q)
 static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 {
 	struct sk_buff *skb;
-	struct audit_entry *e;
+	struct audit_krule *r;
 	int i;
 
 	/* This is a blocking read, so use audit_filter_mutex instead of rcu
 	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry(e, &audit_filter_list[i], list) {
-			struct audit_rule_data *data;
-
-			data = audit_krule_to_data(&e->rule);
-			if (unlikely(!data))
-				break;
-			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
-					 data, sizeof(*data) + data->buflen);
-			if (skb)
-				skb_queue_tail(q, skb);
-			kfree(data);
-		}
-	}
-	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry(e, &audit_inode_hash[i], list) {
+		list_for_each_entry(r, &audit_rules_list[i], list) {
 			struct audit_rule_data *data;
 
-			data = audit_krule_to_data(&e->rule);
+			data = audit_krule_to_data(r);
 			if (unlikely(!data))
 				break;
 			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
@@ -1789,35 +1778,37 @@ unlock_and_return:
 	return result;
 }
 
-static int update_lsm_rule(struct audit_entry *entry)
+static int update_lsm_rule(struct audit_krule *r)
 {
+	struct audit_entry *entry = container_of(r, struct audit_entry, rule);
 	struct audit_entry *nentry;
 	struct audit_watch *watch;
 	struct audit_tree *tree;
 	int err = 0;
 
-	if (!security_audit_rule_known(&entry->rule))
+	if (!security_audit_rule_known(r))
 		return 0;
 
-	watch = entry->rule.watch;
-	tree = entry->rule.tree;
-	nentry = audit_dupe_rule(&entry->rule, watch);
+	watch = r->watch;
+	tree = r->tree;
+	nentry = audit_dupe_rule(r, watch);
 	if (IS_ERR(nentry)) {
 		/* save the first error encountered for the
 		 * return value */
 		err = PTR_ERR(nentry);
 		audit_panic("error updating LSM filters");
 		if (watch)
-			list_del(&entry->rule.rlist);
+			list_del(&r->rlist);
 		list_del_rcu(&entry->list);
+		list_del(&r->list);
 	} else {
 		if (watch) {
 			list_add(&nentry->rule.rlist, &watch->rules);
-			list_del(&entry->rule.rlist);
+			list_del(&r->rlist);
 		} else if (tree)
-			list_replace_init(&entry->rule.rlist,
-				     &nentry->rule.rlist);
+			list_replace_init(&r->rlist, &nentry->rule.rlist);
 		list_replace_rcu(&entry->list, &nentry->list);
+		list_replace(&r->list, &nentry->rule.list);
 	}
 	call_rcu(&entry->rcu, audit_free_rule_rcu);
 
@@ -1831,27 +1822,19 @@ static int update_lsm_rule(struct audit_entry *entry)
  * updated rule. */
 int audit_update_lsm_rules(void)
 {
-	struct audit_entry *e, *n;
+	struct audit_krule *r, *n;
 	int i, err = 0;
 
 	/* audit_filter_mutex synchronizes the writers */
 	mutex_lock(&audit_filter_mutex);
 
 	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry_safe(e, n, &audit_filter_list[i], list) {
-			int res = update_lsm_rule(e);
-			if (!err)
-				err = res;
-		}
-	}
-	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry_safe(e, n, &audit_inode_hash[i], list) {
-			int res = update_lsm_rule(e);
+		list_for_each_entry_safe(r, n, &audit_rules_list[i], list) {
+			int res = update_lsm_rule(r);
 			if (!err)
 				err = res;
 		}
 	}
-
 	mutex_unlock(&audit_filter_mutex);
 
 	return err;
-- 
cgit v0.10.2


From e048e02c89db7bd49d1a5fac77a11c8fb3603087 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Dec 2008 03:51:22 -0500
Subject: make sure that filterkey of task,always rules is reported

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 19d2c27..8cbddff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -652,7 +652,7 @@ static int audit_filter_rules(struct task_struct *tsk,
  * completely disabled for this task.  Since we only have the task
  * structure at this point, we can only check uid and gid.
  */
-static enum audit_state audit_filter_task(struct task_struct *tsk)
+static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 {
 	struct audit_entry *e;
 	enum audit_state   state;
@@ -660,6 +660,8 @@ static enum audit_state audit_filter_task(struct task_struct *tsk)
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
 		if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) {
+			if (state == AUDIT_RECORD_CONTEXT)
+				*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 			rcu_read_unlock();
 			return state;
 		}
@@ -866,18 +868,21 @@ int audit_alloc(struct task_struct *tsk)
 {
 	struct audit_context *context;
 	enum audit_state     state;
+	char *key = NULL;
 
 	if (likely(!audit_ever_enabled))
 		return 0; /* Return if not auditing. */
 
-	state = audit_filter_task(tsk);
+	state = audit_filter_task(tsk, &key);
 	if (likely(state == AUDIT_DISABLED))
 		return 0;
 
 	if (!(context = audit_alloc_context(state))) {
+		kfree(key);
 		audit_log_lost("out of memory in audit_alloc");
 		return -ENOMEM;
 	}
+	context->filterkey = key;
 
 	tsk->audit_context  = context;
 	set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
@@ -1703,8 +1708,10 @@ void audit_syscall_exit(int valid, long return_code)
 		context->sockaddr_len = 0;
 		context->type = 0;
 		context->fds[0] = -1;
-		kfree(context->filterkey);
-		context->filterkey = NULL;
+		if (context->state != AUDIT_RECORD_CONTEXT) {
+			kfree(context->filterkey);
+			context->filterkey = NULL;
+		}
 		tsk->audit_context = context;
 	}
 }
-- 
cgit v0.10.2


From 36c4f1b18c8a7d0adb4085e7f531860b837bb6b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 15 Dec 2008 01:50:28 -0500
Subject: clean up audit_rule_{add,del} a bit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5d4edc6..e6e3829 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1114,12 +1114,16 @@ static void audit_inotify_unregister(struct list_head *in_list)
 /* Find an existing audit rule.
  * Caller must hold audit_filter_mutex to prevent stale rule data. */
 static struct audit_entry *audit_find_rule(struct audit_entry *entry,
-					   struct list_head *list)
+					   struct list_head **p)
 {
 	struct audit_entry *e, *found = NULL;
+	struct list_head *list;
 	int h;
 
-	if (entry->rule.watch) {
+	if (entry->rule.inode_f) {
+		h = audit_hash_ino(entry->rule.inode_f->val);
+		*p = list = &audit_inode_hash[h];
+	} else if (entry->rule.watch) {
 		/* we don't know the inode number, so must walk entire hash */
 		for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
 			list = &audit_inode_hash[h];
@@ -1130,6 +1134,8 @@ static struct audit_entry *audit_find_rule(struct audit_entry *entry,
 				}
 		}
 		goto out;
+	} else {
+		*p = list = &audit_filter_list[entry->rule.listnr];
 	}
 
 	list_for_each_entry(e, list, list)
@@ -1274,14 +1280,13 @@ static u64 prio_low = ~0ULL/2;
 static u64 prio_high = ~0ULL/2 - 1;
 
 /* Add rule to given filterlist if not a duplicate. */
-static inline int audit_add_rule(struct audit_entry *entry,
-				 struct list_head *list)
+static inline int audit_add_rule(struct audit_entry *entry)
 {
 	struct audit_entry *e;
-	struct audit_field *inode_f = entry->rule.inode_f;
 	struct audit_watch *watch = entry->rule.watch;
 	struct audit_tree *tree = entry->rule.tree;
 	struct nameidata *ndp = NULL, *ndw = NULL;
+	struct list_head *list;
 	int h, err;
 #ifdef CONFIG_AUDITSYSCALL
 	int dont_count = 0;
@@ -1292,13 +1297,8 @@ static inline int audit_add_rule(struct audit_entry *entry,
 		dont_count = 1;
 #endif
 
-	if (inode_f) {
-		h = audit_hash_ino(inode_f->val);
-		list = &audit_inode_hash[h];
-	}
-
 	mutex_lock(&audit_filter_mutex);
-	e = audit_find_rule(entry, list);
+	e = audit_find_rule(entry, &list);
 	mutex_unlock(&audit_filter_mutex);
 	if (e) {
 		err = -EEXIST;
@@ -1372,15 +1372,14 @@ error:
 }
 
 /* Remove an existing rule from filterlist. */
-static inline int audit_del_rule(struct audit_entry *entry,
-				 struct list_head *list)
+static inline int audit_del_rule(struct audit_entry *entry)
 {
 	struct audit_entry  *e;
-	struct audit_field *inode_f = entry->rule.inode_f;
 	struct audit_watch *watch, *tmp_watch = entry->rule.watch;
 	struct audit_tree *tree = entry->rule.tree;
+	struct list_head *list;
 	LIST_HEAD(inotify_list);
-	int h, ret = 0;
+	int ret = 0;
 #ifdef CONFIG_AUDITSYSCALL
 	int dont_count = 0;
 
@@ -1390,13 +1389,8 @@ static inline int audit_del_rule(struct audit_entry *entry,
 		dont_count = 1;
 #endif
 
-	if (inode_f) {
-		h = audit_hash_ino(inode_f->val);
-		list = &audit_inode_hash[h];
-	}
-
 	mutex_lock(&audit_filter_mutex);
-	e = audit_find_rule(entry, list);
+	e = audit_find_rule(entry, &list);
 	if (!e) {
 		mutex_unlock(&audit_filter_mutex);
 		ret = -ENOENT;
@@ -1603,8 +1597,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 		if (IS_ERR(entry))
 			return PTR_ERR(entry);
 
-		err = audit_add_rule(entry,
-				     &audit_filter_list[entry->rule.listnr]);
+		err = audit_add_rule(entry);
 		audit_log_rule_change(loginuid, sessionid, sid, "add",
 				      &entry->rule, !err);
 
@@ -1620,8 +1613,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 		if (IS_ERR(entry))
 			return PTR_ERR(entry);
 
-		err = audit_del_rule(entry,
-				     &audit_filter_list[entry->rule.listnr]);
+		err = audit_del_rule(entry);
 		audit_log_rule_change(loginuid, sessionid, sid, "remove",
 				      &entry->rule, !err);
 
-- 
cgit v0.10.2


From 5af75d8d58d0f9f7b7c0515b35786b22892d5f12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Dec 2008 05:59:26 -0500
Subject: audit: validate comparison operations, store them in sane form

Don't store the field->op in the messy (and very inconvenient for e.g.
audit_comparator()) form; translate to dense set of values and do full
validation of userland-submitted value while we are at it.

->audit_init_rule() and ->audit_match_rule() get new values now; in-tree
instances updated.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index cc71fdb..67e5dbf 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -247,6 +247,18 @@
 #define AUDIT_GREATER_THAN_OR_EQUAL	(AUDIT_GREATER_THAN|AUDIT_EQUAL)
 #define AUDIT_OPERATORS			(AUDIT_EQUAL|AUDIT_NOT_EQUAL|AUDIT_BIT_MASK)
 
+enum {
+	Audit_equal,
+	Audit_not_equal,
+	Audit_bitmask,
+	Audit_bittest,
+	Audit_lt,
+	Audit_gt,
+	Audit_le,
+	Audit_ge,
+	Audit_bad
+};
+
 /* Status symbols */
 				/* Mask values */
 #define AUDIT_STATUS_ENABLED		0x0001
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 48bddad..8ad9545 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -618,7 +618,7 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 
 	if (pathname[0] != '/' ||
 	    rule->listnr != AUDIT_FILTER_EXIT ||
-	    op & ~AUDIT_EQUAL ||
+	    op != Audit_equal ||
 	    rule->inode_f || rule->watch || rule->tree)
 		return -EINVAL;
 	rule->tree = alloc_tree(pathname);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index e6e3829..fbf24d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -252,7 +252,8 @@ static inline int audit_to_inode(struct audit_krule *krule,
 				 struct audit_field *f)
 {
 	if (krule->listnr != AUDIT_FILTER_EXIT ||
-	    krule->watch || krule->inode_f || krule->tree)
+	    krule->watch || krule->inode_f || krule->tree ||
+	    (f->op != Audit_equal && f->op != Audit_not_equal))
 		return -EINVAL;
 
 	krule->inode_f = f;
@@ -270,7 +271,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len,
 
 	if (path[0] != '/' || path[len-1] == '/' ||
 	    krule->listnr != AUDIT_FILTER_EXIT ||
-	    op & ~AUDIT_EQUAL ||
+	    op != Audit_equal ||
 	    krule->inode_f || krule->watch || krule->tree)
 		return -EINVAL;
 
@@ -420,12 +421,32 @@ exit_err:
 	return ERR_PTR(err);
 }
 
+static u32 audit_ops[] =
+{
+	[Audit_equal] = AUDIT_EQUAL,
+	[Audit_not_equal] = AUDIT_NOT_EQUAL,
+	[Audit_bitmask] = AUDIT_BIT_MASK,
+	[Audit_bittest] = AUDIT_BIT_TEST,
+	[Audit_lt] = AUDIT_LESS_THAN,
+	[Audit_gt] = AUDIT_GREATER_THAN,
+	[Audit_le] = AUDIT_LESS_THAN_OR_EQUAL,
+	[Audit_ge] = AUDIT_GREATER_THAN_OR_EQUAL,
+};
+
+static u32 audit_to_op(u32 op)
+{
+	u32 n;
+	for (n = Audit_equal; n < Audit_bad && audit_ops[n] != op; n++)
+		;
+	return n;
+}
+
+
 /* Translate struct audit_rule to kernel's rule respresentation.
  * Exists for backward compatibility with userspace. */
 static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 {
 	struct audit_entry *entry;
-	struct audit_field *ino_f;
 	int err = 0;
 	int i;
 
@@ -435,12 +456,28 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 
 	for (i = 0; i < rule->field_count; i++) {
 		struct audit_field *f = &entry->rule.fields[i];
+		u32 n;
+
+		n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
+
+		/* Support for legacy operators where
+		 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
+		if (n & AUDIT_NEGATE)
+			f->op = Audit_not_equal;
+		else if (!n)
+			f->op = Audit_equal;
+		else
+			f->op = audit_to_op(n);
+
+		entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1;
 
-		f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->val = rule->values[i];
 
 		err = -EINVAL;
+		if (f->op == Audit_bad)
+			goto exit_free;
+
 		switch(f->type) {
 		default:
 			goto exit_free;
@@ -462,11 +499,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_EXIT:
 		case AUDIT_SUCCESS:
 			/* bit ops are only useful on syscall args */
-			if (f->op == AUDIT_BIT_MASK ||
-						f->op == AUDIT_BIT_TEST) {
-				err = -EINVAL;
+			if (f->op == Audit_bitmask || f->op == Audit_bittest)
 				goto exit_free;
-			}
 			break;
 		case AUDIT_ARG0:
 		case AUDIT_ARG1:
@@ -475,11 +509,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 			break;
 		/* arch is only allowed to be = or != */
 		case AUDIT_ARCH:
-			if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL)
-					&& (f->op != AUDIT_NEGATE) && (f->op)) {
-				err = -EINVAL;
+			if (f->op != Audit_not_equal && f->op != Audit_equal)
 				goto exit_free;
-			}
 			entry->rule.arch_f = f;
 			break;
 		case AUDIT_PERM:
@@ -496,33 +527,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 				goto exit_free;
 			break;
 		}
-
-		entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
-
-		/* Support for legacy operators where
-		 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
-		if (f->op & AUDIT_NEGATE)
-			f->op = AUDIT_NOT_EQUAL;
-		else if (!f->op)
-			f->op = AUDIT_EQUAL;
-		else if (f->op == AUDIT_OPERATORS) {
-			err = -EINVAL;
-			goto exit_free;
-		}
 	}
 
-	ino_f = entry->rule.inode_f;
-	if (ino_f) {
-		switch(ino_f->op) {
-		case AUDIT_NOT_EQUAL:
-			entry->rule.inode_f = NULL;
-		case AUDIT_EQUAL:
-			break;
-		default:
-			err = -EINVAL;
-			goto exit_free;
-		}
-	}
+	if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
+		entry->rule.inode_f = NULL;
 
 exit_nofree:
 	return entry;
@@ -538,7 +546,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 {
 	int err = 0;
 	struct audit_entry *entry;
-	struct audit_field *ino_f;
 	void *bufp;
 	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
@@ -554,11 +561,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		struct audit_field *f = &entry->rule.fields[i];
 
 		err = -EINVAL;
-		if (!(data->fieldflags[i] & AUDIT_OPERATORS) ||
-		    data->fieldflags[i] & ~AUDIT_OPERATORS)
+
+		f->op = audit_to_op(data->fieldflags[i]);
+		if (f->op == Audit_bad)
 			goto exit_free;
 
-		f->op = data->fieldflags[i] & AUDIT_OPERATORS;
 		f->type = data->fields[i];
 		f->val = data->values[i];
 		f->lsm_str = NULL;
@@ -670,18 +677,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		}
 	}
 
-	ino_f = entry->rule.inode_f;
-	if (ino_f) {
-		switch(ino_f->op) {
-		case AUDIT_NOT_EQUAL:
-			entry->rule.inode_f = NULL;
-		case AUDIT_EQUAL:
-			break;
-		default:
-			err = -EINVAL;
-			goto exit_free;
-		}
-	}
+	if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
+		entry->rule.inode_f = NULL;
 
 exit_nofree:
 	return entry;
@@ -721,10 +718,10 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
 		rule->fields[i] = krule->fields[i].type;
 
 		if (krule->vers_ops == 1) {
-			if (krule->fields[i].op & AUDIT_NOT_EQUAL)
+			if (krule->fields[i].op == Audit_not_equal)
 				rule->fields[i] |= AUDIT_NEGATE;
 		} else {
-			rule->fields[i] |= krule->fields[i].op;
+			rule->fields[i] |= audit_ops[krule->fields[i].op];
 		}
 	}
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i];
@@ -752,7 +749,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 		struct audit_field *f = &krule->fields[i];
 
 		data->fields[i] = f->type;
-		data->fieldflags[i] = f->op;
+		data->fieldflags[i] = audit_ops[f->op];
 		switch(f->type) {
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
@@ -1626,28 +1623,29 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 	return err;
 }
 
-int audit_comparator(const u32 left, const u32 op, const u32 right)
+int audit_comparator(u32 left, u32 op, u32 right)
 {
 	switch (op) {
-	case AUDIT_EQUAL:
+	case Audit_equal:
 		return (left == right);
-	case AUDIT_NOT_EQUAL:
+	case Audit_not_equal:
 		return (left != right);
-	case AUDIT_LESS_THAN:
+	case Audit_lt:
 		return (left < right);
-	case AUDIT_LESS_THAN_OR_EQUAL:
+	case Audit_le:
 		return (left <= right);
-	case AUDIT_GREATER_THAN:
+	case Audit_gt:
 		return (left > right);
-	case AUDIT_GREATER_THAN_OR_EQUAL:
+	case Audit_ge:
 		return (left >= right);
-	case AUDIT_BIT_MASK:
+	case Audit_bitmask:
 		return (left & right);
-	case AUDIT_BIT_TEST:
+	case Audit_bittest:
 		return ((left & right) == right);
+	default:
+		BUG();
+		return 0;
 	}
-	BUG();
-	return 0;
 }
 
 /* Compare given dentry name with last component in given path,
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 343c8ab..c65e4fe 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2602,7 +2602,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	case AUDIT_OBJ_ROLE:
 	case AUDIT_OBJ_TYPE:
 		/* only 'equals' and 'not equals' fit user, role, and type */
-		if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL)
+		if (op != Audit_equal && op != Audit_not_equal)
 			return -EINVAL;
 		break;
 	case AUDIT_SUBJ_SEN:
@@ -2736,10 +2736,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_USER:
 	case AUDIT_OBJ_USER:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->user == rule->au_ctxt.user);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->user != rule->au_ctxt.user);
 			break;
 		}
@@ -2747,10 +2747,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_ROLE:
 	case AUDIT_OBJ_ROLE:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->role == rule->au_ctxt.role);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->role != rule->au_ctxt.role);
 			break;
 		}
@@ -2758,10 +2758,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_TYPE:
 	case AUDIT_OBJ_TYPE:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->type == rule->au_ctxt.type);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->type != rule->au_ctxt.type);
 			break;
 		}
@@ -2774,31 +2774,31 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 			  field == AUDIT_OBJ_LEV_LOW) ?
 			 &ctxt->range.level[0] : &ctxt->range.level[1]);
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = mls_level_eq(&rule->au_ctxt.range.level[0],
 					     level);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = !mls_level_eq(&rule->au_ctxt.range.level[0],
 					      level);
 			break;
-		case AUDIT_LESS_THAN:
+		case Audit_lt:
 			match = (mls_level_dom(&rule->au_ctxt.range.level[0],
 					       level) &&
 				 !mls_level_eq(&rule->au_ctxt.range.level[0],
 					       level));
 			break;
-		case AUDIT_LESS_THAN_OR_EQUAL:
+		case Audit_le:
 			match = mls_level_dom(&rule->au_ctxt.range.level[0],
 					      level);
 			break;
-		case AUDIT_GREATER_THAN:
+		case Audit_gt:
 			match = (mls_level_dom(level,
 					      &rule->au_ctxt.range.level[0]) &&
 				 !mls_level_eq(level,
 					       &rule->au_ctxt.range.level[0]));
 			break;
-		case AUDIT_GREATER_THAN_OR_EQUAL:
+		case Audit_ge:
 			match = mls_level_dom(level,
 					      &rule->au_ctxt.range.level[0]);
 			break;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 1b5551d..848212f 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2492,7 +2492,7 @@ static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	if (field != AUDIT_SUBJ_USER && field != AUDIT_OBJ_USER)
 		return -EINVAL;
 
-	if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL)
+	if (op != Audit_equal && op != Audit_not_equal)
 		return -EINVAL;
 
 	*rule = smk_import(rulestr, 0);
@@ -2556,9 +2556,9 @@ static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule,
 	 * both pointers will point to the same smack_known
 	 * label.
 	 */
-	if (op == AUDIT_EQUAL)
+	if (op == Audit_equal)
 		return (rule == smack);
-	if (op == AUDIT_NOT_EQUAL)
+	if (op == Audit_not_equal)
 		return (rule != smack);
 
 	return 0;
-- 
cgit v0.10.2


From 7b574b7b0124ed344911f5d581e9bc2d83bbeb19 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Sun, 4 Jan 2009 12:00:45 -0800
Subject: cgroups: fix a race between cgroup_clone and umount

The race is calling cgroup_clone() while umounting the ns cgroup subsys,
and thus cgroup_clone() might access invalid cgroup_fs, or kill_sb() is
called after cgroup_clone() created a new dir in it.

The BUG I triggered is BUG_ON(root->number_of_cgroups != 1);

  ------------[ cut here ]------------
  kernel BUG at kernel/cgroup.c:1093!
  invalid opcode: 0000 [#1] SMP
  ...
  Process umount (pid: 5177, ti=e411e000 task=e40c4670 task.ti=e411e000)
  ...
  Call Trace:
   [<c0493df7>] ? deactivate_super+0x3f/0x51
   [<c04a3600>] ? mntput_no_expire+0xb3/0xdd
   [<c04a3ab2>] ? sys_umount+0x265/0x2ac
   [<c04a3b06>] ? sys_oldumount+0xd/0xf
   [<c0403911>] ? sysenter_do_call+0x12/0x31
  ...
  EIP: [<c0456e76>] cgroup_kill_sb+0x23/0xe0 SS:ESP 0068:e411ef2c
  ---[ end trace c766c1be3bf944ac ]---

Cc: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "Serge E. Hallyn" <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 48348dd..891a84e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2945,7 +2945,11 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
 	parent = task_cgroup(tsk, subsys->subsys_id);
 
 	/* Pin the hierarchy */
-	atomic_inc(&parent->root->sb->s_active);
+	if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
+		/* We race with the final deactivate_super() */
+		mutex_unlock(&cgroup_mutex);
+		return 0;
+	}
 
 	/* Keep the cgroup alive */
 	get_css_set(cg);
-- 
cgit v0.10.2


From 2e4e27c7d082b2198b63041310609d7191185a9d Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Sun, 4 Jan 2009 12:00:46 -0800
Subject: vmalloc.c: fix flushing in vmap_page_range()

The flush_cache_vmap in vmap_page_range() is called with the end of the
range twice.  The following patch fixes this for me.

Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1ddb77b..7465f22 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -151,11 +151,12 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
  *
  * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
  */
-static int vmap_page_range(unsigned long addr, unsigned long end,
+static int vmap_page_range(unsigned long start, unsigned long end,
 				pgprot_t prot, struct page **pages)
 {
 	pgd_t *pgd;
 	unsigned long next;
+	unsigned long addr = start;
 	int err = 0;
 	int nr = 0;
 
@@ -167,7 +168,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-	flush_cache_vmap(addr, end);
+	flush_cache_vmap(start, end);
 
 	if (unlikely(err))
 		return err;
-- 
cgit v0.10.2


From 0a30c5cefa53cbac429dcb2de906c0637b646253 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 4 Jan 2009 12:00:47 -0800
Subject: spi.h uses/needs device.h

Include header files as used/needed:

  In file included from drivers/leds/leds-dac124s085.c:16:
  include/linux/spi/spi.h:66: error: field 'dev' has incomplete type
  include/linux/spi/spi.h: In function 'to_spi_device':
  include/linux/spi/spi.h:100: warning: type defaults to 'int' in declaration of '__mptr'
  ...

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4be01bb..8222931 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -19,6 +19,8 @@
 #ifndef __LINUX_SPI_H
 #define __LINUX_SPI_H
 
+#include <linux/device.h>
+
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
  * (There's no SPI slave support for Linux yet...)
-- 
cgit v0.10.2


From c644f0e4b56f9a2fc066cd0d75a18074d130e4a3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Sun, 4 Jan 2009 12:00:48 -0800
Subject: fs: introduce bgl_lock_ptr()

As suggested by Andreas Dilger, introduce a bgl_lock_ptr() helper in
<linux/blockgroup_lock.h> and add separate sb_bgl_lock() helpers to
filesystem specific header files to break the hidden dependency to
struct ext[234]_sb_info.

Also, while at it, convert the macros to static inlines to try make up
for all the times I broke Andrew Morton's tree.

Acked-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde6..b21f167 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -146,4 +146,10 @@ struct ext4_sb_info {
 	struct flex_groups *s_flex_groups;
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _EXT4_SB */
diff --git a/include/linux/blockgroup_lock.h b/include/linux/blockgroup_lock.h
index 8607312..e44b88b 100644
--- a/include/linux/blockgroup_lock.h
+++ b/include/linux/blockgroup_lock.h
@@ -53,7 +53,10 @@ static inline void bgl_lock_init(struct blockgroup_lock *bgl)
  * The accessor is a macro so we can embed a blockgroup_lock into different
  * superblock types
  */
-#define sb_bgl_lock(sb, block_group) \
-	(&(sb)->s_blockgroup_lock.locks[(block_group) & (NR_BG_LOCKS-1)].lock)
+static inline spinlock_t *
+bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group)
+{
+	return &bgl->locks[(block_group) & (NR_BG_LOCKS-1)].lock;
+}
 
 #endif
diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index f273415..dc541f3 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -108,4 +108,10 @@ struct ext2_sb_info {
 	struct ext2_reserve_window_node s_rsv_window_head;
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _LINUX_EXT2_FS_SB */
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index b65f028..e024e38 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -83,4 +83,10 @@ struct ext3_sb_info {
 #endif
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _LINUX_EXT3_FS_SB */
-- 
cgit v0.10.2


From e687d691cb3790d25e31c74f5941fd7c565e9df5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= <bonbons@linux-vserver.org>
Date: Sun, 4 Jan 2009 13:11:54 -0800
Subject: viafb: fix crashes due to 4k stack overflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function viafb_cursor() uses 2 stack-variables of CURSOR_SIZE bits;
CURSOR_SIZE is defined as (8 * 1024).  Using up twice 1k on stack is too
much for 4k-stack (though it works with 8k-stacks).  Make those two
variables kzalloc'ed to preserve stack space.

Also merge the whole lot of local struct's in viafb_ioctl into a union so
the stack usage gets minimized here as well.  (struct's are only accessed
in their indicidual IOCTL case) This second part is only compile-tested as
I know of no userspace app using the IOCTLs.

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Cc: <JosephChan@via.com.tw>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 73ac754..e21fe5b 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -546,23 +546,25 @@ static int viafb_blank(int blank_mode, struct fb_info *info)
 
 static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 {
-	struct viafb_ioctl_mode viamode;
-	struct viafb_ioctl_samm viasamm;
-	struct viafb_driver_version driver_version;
-	struct fb_var_screeninfo sec_var;
-	struct _panel_size_pos_info panel_pos_size_para;
+	union {
+		struct viafb_ioctl_mode viamode;
+		struct viafb_ioctl_samm viasamm;
+		struct viafb_driver_version driver_version;
+		struct fb_var_screeninfo sec_var;
+		struct _panel_size_pos_info panel_pos_size_para;
+		struct viafb_ioctl_setting viafb_setting;
+		struct device_t active_dev;
+	} u;
 	u32 state_info = 0;
-	u32 viainfo_size = sizeof(struct viafb_ioctl_info);
 	u32 *viafb_gamma_table;
 	char driver_name[] = "viafb";
 
 	u32 __user *argp = (u32 __user *) arg;
 	u32 gpu32;
 	u32 video_dev_info = 0;
-	struct viafb_ioctl_setting viafb_setting = {};
-	struct device_t active_dev = {};
 
 	DEBUG_MSG(KERN_INFO "viafb_ioctl: 0x%X !!\n", cmd);
+	memset(&u, 0, sizeof(u));
 
 	switch (cmd) {
 	case VIAFB_GET_CHIP_INFO:
@@ -571,7 +573,7 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 			return -EFAULT;
 		break;
 	case VIAFB_GET_INFO_SIZE:
-		return put_user(viainfo_size, argp);
+		return put_user((u32)sizeof(struct viafb_ioctl_info), argp);
 	case VIAFB_GET_INFO:
 		return viafb_ioctl_get_viafb_info(arg);
 	case VIAFB_HOTPLUG:
@@ -584,60 +586,60 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 		viafb_hotplug = (gpu32) ? 1 : 0;
 		break;
 	case VIAFB_GET_RESOLUTION:
-		viamode.xres = (u32) viafb_hotplug_Xres;
-		viamode.yres = (u32) viafb_hotplug_Yres;
-		viamode.refresh = (u32) viafb_hotplug_refresh;
-		viamode.bpp = (u32) viafb_hotplug_bpp;
+		u.viamode.xres = (u32) viafb_hotplug_Xres;
+		u.viamode.yres = (u32) viafb_hotplug_Yres;
+		u.viamode.refresh = (u32) viafb_hotplug_refresh;
+		u.viamode.bpp = (u32) viafb_hotplug_bpp;
 		if (viafb_SAMM_ON == 1) {
-			viamode.xres_sec = viafb_second_xres;
-			viamode.yres_sec = viafb_second_yres;
-			viamode.virtual_xres_sec = viafb_second_virtual_xres;
-			viamode.virtual_yres_sec = viafb_second_virtual_yres;
-			viamode.refresh_sec = viafb_refresh1;
-			viamode.bpp_sec = viafb_bpp1;
+			u.viamode.xres_sec = viafb_second_xres;
+			u.viamode.yres_sec = viafb_second_yres;
+			u.viamode.virtual_xres_sec = viafb_second_virtual_xres;
+			u.viamode.virtual_yres_sec = viafb_second_virtual_yres;
+			u.viamode.refresh_sec = viafb_refresh1;
+			u.viamode.bpp_sec = viafb_bpp1;
 		} else {
-			viamode.xres_sec = 0;
-			viamode.yres_sec = 0;
-			viamode.virtual_xres_sec = 0;
-			viamode.virtual_yres_sec = 0;
-			viamode.refresh_sec = 0;
-			viamode.bpp_sec = 0;
+			u.viamode.xres_sec = 0;
+			u.viamode.yres_sec = 0;
+			u.viamode.virtual_xres_sec = 0;
+			u.viamode.virtual_yres_sec = 0;
+			u.viamode.refresh_sec = 0;
+			u.viamode.bpp_sec = 0;
 		}
-		if (copy_to_user(argp, &viamode, sizeof(viamode)))
+		if (copy_to_user(argp, &u.viamode, sizeof(u.viamode)))
 			return -EFAULT;
 		break;
 	case VIAFB_GET_SAMM_INFO:
-		viasamm.samm_status = viafb_SAMM_ON;
+		u.viasamm.samm_status = viafb_SAMM_ON;
 
 		if (viafb_SAMM_ON == 1) {
 			if (viafb_dual_fb) {
-				viasamm.size_prim = viaparinfo->fbmem_free;
-				viasamm.size_sec = viaparinfo1->fbmem_free;
+				u.viasamm.size_prim = viaparinfo->fbmem_free;
+				u.viasamm.size_sec = viaparinfo1->fbmem_free;
 			} else {
 				if (viafb_second_size) {
-					viasamm.size_prim =
+					u.viasamm.size_prim =
 					    viaparinfo->fbmem_free -
 					    viafb_second_size * 1024 * 1024;
-					viasamm.size_sec =
+					u.viasamm.size_sec =
 					    viafb_second_size * 1024 * 1024;
 				} else {
-					viasamm.size_prim =
+					u.viasamm.size_prim =
 					    viaparinfo->fbmem_free >> 1;
-					viasamm.size_sec =
+					u.viasamm.size_sec =
 					    (viaparinfo->fbmem_free >> 1);
 				}
 			}
-			viasamm.mem_base = viaparinfo->fbmem;
-			viasamm.offset_sec = viafb_second_offset;
+			u.viasamm.mem_base = viaparinfo->fbmem;
+			u.viasamm.offset_sec = viafb_second_offset;
 		} else {
-			viasamm.size_prim =
+			u.viasamm.size_prim =
 			    viaparinfo->memsize - viaparinfo->fbmem_used;
-			viasamm.size_sec = 0;
-			viasamm.mem_base = viaparinfo->fbmem;
-			viasamm.offset_sec = 0;
+			u.viasamm.size_sec = 0;
+			u.viasamm.mem_base = viaparinfo->fbmem;
+			u.viasamm.offset_sec = 0;
 		}
 
-		if (copy_to_user(argp, &viasamm, sizeof(viasamm)))
+		if (copy_to_user(argp, &u.viasamm, sizeof(u.viasamm)))
 			return -EFAULT;
 
 		break;
@@ -662,74 +664,75 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 			viafb_lcd_disable();
 		break;
 	case VIAFB_SET_DEVICE:
-		if (copy_from_user(&active_dev, (void *)argp,
-			sizeof(active_dev)))
+		if (copy_from_user(&u.active_dev, (void *)argp,
+			sizeof(u.active_dev)))
 			return -EFAULT;
-		viafb_set_device(active_dev);
+		viafb_set_device(u.active_dev);
 		viafb_set_par(info);
 		break;
 	case VIAFB_GET_DEVICE:
-		active_dev.crt = viafb_CRT_ON;
-		active_dev.dvi = viafb_DVI_ON;
-		active_dev.lcd = viafb_LCD_ON;
-		active_dev.samm = viafb_SAMM_ON;
-		active_dev.primary_dev = viafb_primary_dev;
+		u.active_dev.crt = viafb_CRT_ON;
+		u.active_dev.dvi = viafb_DVI_ON;
+		u.active_dev.lcd = viafb_LCD_ON;
+		u.active_dev.samm = viafb_SAMM_ON;
+		u.active_dev.primary_dev = viafb_primary_dev;
 
-		active_dev.lcd_dsp_cent = viafb_lcd_dsp_method;
-		active_dev.lcd_panel_id = viafb_lcd_panel_id;
-		active_dev.lcd_mode = viafb_lcd_mode;
+		u.active_dev.lcd_dsp_cent = viafb_lcd_dsp_method;
+		u.active_dev.lcd_panel_id = viafb_lcd_panel_id;
+		u.active_dev.lcd_mode = viafb_lcd_mode;
 
-		active_dev.xres = viafb_hotplug_Xres;
-		active_dev.yres = viafb_hotplug_Yres;
+		u.active_dev.xres = viafb_hotplug_Xres;
+		u.active_dev.yres = viafb_hotplug_Yres;
 
-		active_dev.xres1 = viafb_second_xres;
-		active_dev.yres1 = viafb_second_yres;
+		u.active_dev.xres1 = viafb_second_xres;
+		u.active_dev.yres1 = viafb_second_yres;
 
-		active_dev.bpp = viafb_bpp;
-		active_dev.bpp1 = viafb_bpp1;
-		active_dev.refresh = viafb_refresh;
-		active_dev.refresh1 = viafb_refresh1;
+		u.active_dev.bpp = viafb_bpp;
+		u.active_dev.bpp1 = viafb_bpp1;
+		u.active_dev.refresh = viafb_refresh;
+		u.active_dev.refresh1 = viafb_refresh1;
 
-		active_dev.epia_dvi = viafb_platform_epia_dvi;
-		active_dev.lcd_dual_edge = viafb_device_lcd_dualedge;
-		active_dev.bus_width = viafb_bus_width;
+		u.active_dev.epia_dvi = viafb_platform_epia_dvi;
+		u.active_dev.lcd_dual_edge = viafb_device_lcd_dualedge;
+		u.active_dev.bus_width = viafb_bus_width;
 
-		if (copy_to_user(argp, &active_dev, sizeof(active_dev)))
+		if (copy_to_user(argp, &u.active_dev, sizeof(u.active_dev)))
 			return -EFAULT;
 		break;
 
 	case VIAFB_GET_DRIVER_VERSION:
-		driver_version.iMajorNum = VERSION_MAJOR;
-		driver_version.iKernelNum = VERSION_KERNEL;
-		driver_version.iOSNum = VERSION_OS;
-		driver_version.iMinorNum = VERSION_MINOR;
+		u.driver_version.iMajorNum = VERSION_MAJOR;
+		u.driver_version.iKernelNum = VERSION_KERNEL;
+		u.driver_version.iOSNum = VERSION_OS;
+		u.driver_version.iMinorNum = VERSION_MINOR;
 
-		if (copy_to_user(argp, &driver_version,
-			sizeof(driver_version)))
+		if (copy_to_user(argp, &u.driver_version,
+			sizeof(u.driver_version)))
 			return -EFAULT;
 
 		break;
 
 	case VIAFB_SET_DEVICE_INFO:
-		if (copy_from_user(&viafb_setting,
-			argp, sizeof(viafb_setting)))
+		if (copy_from_user(&u.viafb_setting,
+			argp, sizeof(u.viafb_setting)))
 			return -EFAULT;
-		if (apply_device_setting(viafb_setting, info) < 0)
+		if (apply_device_setting(u.viafb_setting, info) < 0)
 			return -EINVAL;
 
 		break;
 
 	case VIAFB_SET_SECOND_MODE:
-		if (copy_from_user(&sec_var, argp, sizeof(sec_var)))
+		if (copy_from_user(&u.sec_var, argp, sizeof(u.sec_var)))
 			return -EFAULT;
-		apply_second_mode_setting(&sec_var);
+		apply_second_mode_setting(&u.sec_var);
 		break;
 
 	case VIAFB_GET_DEVICE_INFO:
 
-		retrieve_device_setting(&viafb_setting);
+		retrieve_device_setting(&u.viafb_setting);
 
-		if (copy_to_user(argp, &viafb_setting, sizeof(viafb_setting)))
+		if (copy_to_user(argp, &u.viafb_setting,
+				 sizeof(u.viafb_setting)))
 			return -EFAULT;
 
 		break;
@@ -806,51 +809,51 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 		break;
 
 	case VIAFB_GET_PANEL_MAX_SIZE:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
-		panel_pos_size_para.x = panel_pos_size_para.y = 0;
-		if (copy_to_user(argp, &panel_pos_size_para,
-		     sizeof(panel_pos_size_para)))
+		u.panel_pos_size_para.x = u.panel_pos_size_para.y = 0;
+		if (copy_to_user(argp, &u.panel_pos_size_para,
+		     sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 	case VIAFB_GET_PANEL_MAX_POSITION:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
-		panel_pos_size_para.x = panel_pos_size_para.y = 0;
-		if (copy_to_user(argp, &panel_pos_size_para,
-		     sizeof(panel_pos_size_para)))
+		u.panel_pos_size_para.x = u.panel_pos_size_para.y = 0;
+		if (copy_to_user(argp, &u.panel_pos_size_para,
+				 sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 
 	case VIAFB_GET_PANEL_POSITION:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
-		panel_pos_size_para.x = panel_pos_size_para.y = 0;
-		if (copy_to_user(argp, &panel_pos_size_para,
-		     sizeof(panel_pos_size_para)))
+		u.panel_pos_size_para.x = u.panel_pos_size_para.y = 0;
+		if (copy_to_user(argp, &u.panel_pos_size_para,
+				 sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 	case VIAFB_GET_PANEL_SIZE:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
-		panel_pos_size_para.x = panel_pos_size_para.y = 0;
-		if (copy_to_user(argp, &panel_pos_size_para,
-		     sizeof(panel_pos_size_para)))
+		u.panel_pos_size_para.x = u.panel_pos_size_para.y = 0;
+		if (copy_to_user(argp, &u.panel_pos_size_para,
+				 sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 
 	case VIAFB_SET_PANEL_POSITION:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 	case VIAFB_SET_PANEL_SIZE:
-		if (copy_from_user
-		    (&panel_pos_size_para, argp, sizeof(panel_pos_size_para)))
+		if (copy_from_user(&u.panel_pos_size_para, argp,
+				   sizeof(u.panel_pos_size_para)))
 			return -EFAULT;
 		break;
 
@@ -1052,10 +1055,8 @@ static void viafb_imageblit(struct fb_info *info,
 
 static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
-	u8 data[CURSOR_SIZE / 8];
-	u32 data_bak[CURSOR_SIZE / 32];
 	u32 temp, xx, yy, bg_col = 0, fg_col = 0;
-	int size, i, j = 0;
+	int i, j = 0;
 	static int hw_cursor;
 	struct viafb_par *p_viafb_par;
 
@@ -1178,22 +1179,29 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 	}
 
 	if (cursor->set & FB_CUR_SETSHAPE) {
-		size =
+		struct {
+			u8 data[CURSOR_SIZE / 8];
+			u32 bak[CURSOR_SIZE / 32];
+		} *cr_data = kzalloc(sizeof(*cr_data), GFP_ATOMIC);
+		int size =
 		    ((viacursor.image.width + 7) >> 3) *
 		    viacursor.image.height;
 
+		if (cr_data == NULL)
+			goto out;
+
 		if (MAX_CURS == 32) {
 			for (i = 0; i < (CURSOR_SIZE / 32); i++) {
-				data_bak[i] = 0x0;
-				data_bak[i + 1] = 0xFFFFFFFF;
+				cr_data->bak[i] = 0x0;
+				cr_data->bak[i + 1] = 0xFFFFFFFF;
 				i += 1;
 			}
 		} else if (MAX_CURS == 64) {
 			for (i = 0; i < (CURSOR_SIZE / 32); i++) {
-				data_bak[i] = 0x0;
-				data_bak[i + 1] = 0x0;
-				data_bak[i + 2] = 0xFFFFFFFF;
-				data_bak[i + 3] = 0xFFFFFFFF;
+				cr_data->bak[i] = 0x0;
+				cr_data->bak[i + 1] = 0x0;
+				cr_data->bak[i + 2] = 0xFFFFFFFF;
+				cr_data->bak[i + 3] = 0xFFFFFFFF;
 				i += 3;
 			}
 		}
@@ -1201,12 +1209,12 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 		switch (viacursor.rop) {
 		case ROP_XOR:
 			for (i = 0; i < size; i++)
-				data[i] = viacursor.mask[i];
+				cr_data->data[i] = viacursor.mask[i];
 			break;
 		case ROP_COPY:
 
 			for (i = 0; i < size; i++)
-				data[i] = viacursor.mask[i];
+				cr_data->data[i] = viacursor.mask[i];
 			break;
 		default:
 			break;
@@ -1214,23 +1222,25 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 
 		if (MAX_CURS == 32) {
 			for (i = 0; i < size; i++) {
-				data_bak[j] = (u32) data[i];
-				data_bak[j + 1] = ~data_bak[j];
+				cr_data->bak[j] = (u32) cr_data->data[i];
+				cr_data->bak[j + 1] = ~cr_data->bak[j];
 				j += 2;
 			}
 		} else if (MAX_CURS == 64) {
 			for (i = 0; i < size; i++) {
-				data_bak[j] = (u32) data[i];
-				data_bak[j + 1] = 0x0;
-				data_bak[j + 2] = ~data_bak[j];
-				data_bak[j + 3] = ~data_bak[j + 1];
+				cr_data->bak[j] = (u32) cr_data->data[i];
+				cr_data->bak[j + 1] = 0x0;
+				cr_data->bak[j + 2] = ~cr_data->bak[j];
+				cr_data->bak[j + 3] = ~cr_data->bak[j + 1];
 				j += 4;
 			}
 		}
 
 		memcpy(((struct viafb_par *)(info->par))->fbmem_virt +
 		       ((struct viafb_par *)(info->par))->cursor_start,
-		       data_bak, CURSOR_SIZE);
+		       cr_data->bak, CURSOR_SIZE);
+out:
+		kfree(cr_data);
 	}
 
 	if (viacursor.enable)
-- 
cgit v0.10.2


From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sun, 4 Jan 2009 12:00:53 -0800
Subject: fs: symlink write_begin allocation context fix

With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened.  They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim.  This bug could
cause filesystem deadlocks.

The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called.  It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock.  The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.

Add a new flag for write_begin, AOP_FLAG_NOFS.  Filesystems can now act on
this flag in their write_begin function.  Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).

This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg.  ocfs2_alloc_write_ctxt, for a
random example).

[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org>		[2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
  untouched to the grab_cache_page_write_begin() function.  That
  just simplifies everybody, and may even allow future expansion of the
  logic.   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1377b12..9246cb4 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
 	}
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index d6b85da..3fb36d4 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -144,7 +144,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	candidate->state = AFS_WBACK_PENDING;
 	init_waitqueue_head(&candidate->waitq);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		kfree(candidate);
 		return -ENOMEM;
diff --git a/fs/buffer.c b/fs/buffer.c
index 776ae09..a13f09b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1996,7 +1996,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
 	page = *pagep;
 	if (page == NULL) {
 		ownpage = 1;
-		page = __grab_cache_page(mapping, index);
+		page = grab_cache_page_write_begin(mapping, index, flags);
 		if (!page) {
 			status = -ENOMEM;
 			goto out;
@@ -2502,7 +2502,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b1e1fc6..12bb656 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2074,7 +2074,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
 
 	cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 04d7b3f..46cec2b 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -288,7 +288,7 @@ static int ecryptfs_write_begin(struct file *file,
 	loff_t prev_page_end_size;
 	int rc = 0;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c4bdccf..5fa453b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1161,7 +1161,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
 	to = from + len;
 
 retry:
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 297ea8d..1dd2abe 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2175,8 +2175,7 @@ retry:
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
-		err = __page_symlink(inode, symname, l,
-				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		err = __page_symlink(inode, symname, l, 1);
 		if (err) {
 			drop_nlink(inode);
 			unlock_new_inode(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c3325e..6702a49 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1346,7 +1346,7 @@ retry:
 		goto out;
 	}
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		ext4_journal_stop(handle);
 		ret = -ENOMEM;
@@ -2550,7 +2550,7 @@ retry:
 		goto out;
 	}
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		ext4_journal_stop(handle);
 		ret = -ENOMEM;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da98a90..9fd2a5e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2212,8 +2212,7 @@ retry:
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
-		err = __page_symlink(inode, symname, l,
-				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		err = __page_symlink(inode, symname, l, 1);
 		if (err) {
 			clear_nlink(inode);
 			unlock_new_inode(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34930a9..4c9ee70 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -646,7 +646,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
@@ -779,7 +779,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 			break;
 
 		err = -ENOMEM;
-		page = __grab_cache_page(mapping, index);
+		page = grab_cache_page_write_begin(mapping, index, 0);
 		if (!page)
 			break;
 
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 2756381..15f710f 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		goto out_trans_fail;
 
 	error = -ENOMEM;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	*pagep = page;
 	if (unlikely(!page))
 		goto out_endtrans;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a31451..5c538e0 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -501,7 +501,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5a98aa8..5edc2bf 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
 	int ret = 0;
 
-	pg = __grab_cache_page(mapping, index);
+	pg = grab_cache_page_write_begin(mapping, index, flags);
 	if (!pg)
 		return -ENOMEM;
 	*pagep = pg;
diff --git a/fs/libfs.c b/fs/libfs.c
index e960a83..bdaec17 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 
diff --git a/fs/namei.c b/fs/namei.c
index dd5c9f0..df2d3df 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 	}
 }
 
-int __page_symlink(struct inode *inode, const char *symname, int len,
-		gfp_t gfp_mask)
+/*
+ * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
+ */
+int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 	void *fsdata;
 	int err;
 	char *kaddr;
+	unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
+	if (nofs)
+		flags |= AOP_FLAG_NOFS;
 
 retry:
 	err = pagecache_write_begin(NULL, mapping, 0, len-1,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+				flags, &page, &fsdata);
 	if (err)
 		goto fail;
 
@@ -2852,7 +2857,7 @@ fail:
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
 	return __page_symlink(inode, symname, len,
-			mapping_gfp_mask(inode->i_mapping));
+			!(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
 }
 
 const struct inode_operations page_symlink_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d319b49..90f292b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 		file->f_path.dentry->d_name.name,
 		mapping->host->i_ino, len, (long long) pos);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 145c2d3..ed04f47 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2561,7 +2561,7 @@ static int reiserfs_write_begin(struct file *file,
 	}
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e4f8d51..92d5e8f 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index fe82d24..bf37374 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -219,7 +219,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
 }
 
 static int write_begin_slow(struct address_space *mapping,
-			    loff_t pos, unsigned len, struct page **pagep)
+			    loff_t pos, unsigned len, struct page **pagep,
+			    unsigned flags)
 {
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -247,7 +248,7 @@ static int write_begin_slow(struct address_space *mapping,
 	if (unlikely(err))
 		return err;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (unlikely(!page)) {
 		ubifs_release_budget(c, &req);
 		return -ENOMEM;
@@ -438,7 +439,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 		return -EROFS;
 
 	/* Try out the fast-path part first */
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (unlikely(!page))
 		return -ENOMEM;
 
@@ -483,7 +484,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 
-		return write_begin_slow(mapping, pos, len, pagep);
+		return write_begin_slow(mapping, pos, len, pagep, flags);
 	}
 
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2170ee..f2a30101 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -423,6 +423,9 @@ enum positive_aop_returns {
 
 #define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
 #define AOP_FLAG_CONT_EXPAND		0x0002 /* called from cont_expand */
+#define AOP_FLAG_NOFS			0x0004 /* used by filesystem to direct
+						* helper code (eg buffer layer)
+						* to clear GFP_FS from alloc */
 
 /*
  * oh the beauties of C type declarations.
@@ -2035,7 +2038,7 @@ extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
-		gfp_t gfp_mask);
+		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 709742b..01ca085 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
 
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+			pgoff_t index, unsigned flags);
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
diff --git a/mm/filemap.c b/mm/filemap.c
index f3e5f89..f8c6927 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
  * Find or create a page at the given pagecache position. Return the locked
  * page. This function is specifically for buffered writes.
  */
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+					pgoff_t index, unsigned flags)
 {
 	int status;
 	struct page *page;
+	gfp_t gfp_notmask = 0;
+	if (flags & AOP_FLAG_NOFS)
+		gfp_notmask = __GFP_FS;
 repeat:
 	page = find_lock_page(mapping, index);
 	if (likely(page))
 		return page;
 
-	page = page_cache_alloc(mapping);
+	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
 	if (!page)
 		return NULL;
-	status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+	status = add_to_page_cache_lru(page, mapping, index,
+						GFP_KERNEL & ~gfp_notmask);
 	if (unlikely(status)) {
 		page_cache_release(page);
 		if (status == -EEXIST)
@@ -2161,7 +2166,7 @@ repeat:
 	}
 	return page;
 }
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
 
 static ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, loff_t pos)
-- 
cgit v0.10.2


From 099e657625e801adf82054c8050dde5aceb68452 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Sun, 4 Jan 2009 12:00:54 -0800
Subject: rtc: add alarm/update irq interfaces

Add standard interfaces for alarm/update irqs enabling.  Drivers are no
more required to implement equivalent ioctl code as rtc-dev will provide
it.

UIE emulation should now be handled correctly and will work even for those
RTC drivers who cannot be configured to do both UIE and AIE.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Cc: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 123092d..165a818 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -102,9 +102,13 @@ config RTC_INTF_DEV_UIE_EMUL
 	depends on RTC_INTF_DEV
 	help
 	  Provides an emulation for RTC_UIE if the underlying rtc chip
-	  driver does not expose RTC_UIE ioctls.  Those requests generate
+	  driver does not expose RTC_UIE ioctls. Those requests generate
 	  once-per-second update interrupts, used for synchronization.
 
+	  The emulation code will read the time from the hardware
+	  clock several times per second, please enable this option
+	  only if you know that you really need it.
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	help
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a04c1b6..fd2c652 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -307,6 +307,60 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
 
+int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
+{
+	int err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->alarm_irq_enable)
+		err = -EINVAL;
+	else
+		err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
+
+int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
+{
+	int err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+	if (enabled == 0 && rtc->uie_irq_active) {
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_dev_update_irq_enable_emul(rtc, enabled);
+	}
+#endif
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->update_irq_enable)
+		err = -EINVAL;
+	else
+		err = rtc->ops->update_irq_enable(rtc->dev.parent, enabled);
+
+	mutex_unlock(&rtc->ops_lock);
+
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+	/*
+	 * Enable emulation if the driver did not provide
+	 * the update_irq_enable function pointer or if returned
+	 * -EINVAL to signal that it has been configured without
+	 * interrupts or that are not available at the moment.
+	 */
+	if (err == -EINVAL)
+		err = rtc_dev_update_irq_enable_emul(rtc, enabled);
+#endif
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
+
 /**
  * rtc_update_irq - report RTC periodic, alarm, and/or update irqs
  * @rtc: the rtc device
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index ecdea44..45152f4 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -92,10 +92,10 @@ static void rtc_uie_timer(unsigned long data)
 	spin_unlock_irqrestore(&rtc->irq_lock, flags);
 }
 
-static void clear_uie(struct rtc_device *rtc)
+static int clear_uie(struct rtc_device *rtc)
 {
 	spin_lock_irq(&rtc->irq_lock);
-	if (rtc->irq_active) {
+	if (rtc->uie_irq_active) {
 		rtc->stop_uie_polling = 1;
 		if (rtc->uie_timer_active) {
 			spin_unlock_irq(&rtc->irq_lock);
@@ -108,9 +108,10 @@ static void clear_uie(struct rtc_device *rtc)
 			flush_scheduled_work();
 			spin_lock_irq(&rtc->irq_lock);
 		}
-		rtc->irq_active = 0;
+		rtc->uie_irq_active = 0;
 	}
 	spin_unlock_irq(&rtc->irq_lock);
+	return 0;
 }
 
 static int set_uie(struct rtc_device *rtc)
@@ -122,8 +123,8 @@ static int set_uie(struct rtc_device *rtc)
 	if (err)
 		return err;
 	spin_lock_irq(&rtc->irq_lock);
-	if (!rtc->irq_active) {
-		rtc->irq_active = 1;
+	if (!rtc->uie_irq_active) {
+		rtc->uie_irq_active = 1;
 		rtc->stop_uie_polling = 0;
 		rtc->oldsecs = tm.tm_sec;
 		rtc->uie_task_active = 1;
@@ -134,6 +135,16 @@ static int set_uie(struct rtc_device *rtc)
 	spin_unlock_irq(&rtc->irq_lock);
 	return 0;
 }
+
+int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
+{
+	if (enabled)
+		return set_uie(rtc);
+	else
+		return clear_uie(rtc);
+}
+EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
+
 #endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
 
 static ssize_t
@@ -357,6 +368,22 @@ static long rtc_dev_ioctl(struct file *file,
 		err = rtc_irq_set_state(rtc, NULL, 0);
 		break;
 
+	case RTC_AIE_ON:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_alarm_irq_enable(rtc, 1);
+
+	case RTC_AIE_OFF:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_alarm_irq_enable(rtc, 0);
+
+	case RTC_UIE_ON:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_update_irq_enable(rtc, 1);
+
+	case RTC_UIE_OFF:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_update_irq_enable(rtc, 0);
+
 	case RTC_IRQP_SET:
 		err = rtc_irq_set_freq(rtc, NULL, arg);
 		break;
@@ -401,17 +428,6 @@ static long rtc_dev_ioctl(struct file *file,
 			err = -EFAULT;
 		return err;
 
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	case RTC_UIE_OFF:
-		mutex_unlock(&rtc->ops_lock);
-		clear_uie(rtc);
-		return 0;
-
-	case RTC_UIE_ON:
-		mutex_unlock(&rtc->ops_lock);
-		err = set_uie(rtc);
-		return err;
-#endif
 	default:
 		err = -ENOTTY;
 		break;
@@ -440,7 +456,10 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
 	 * Leave the alarm alone; it may be set to trigger a system wakeup
 	 * later, or be used by kernel code, and is a one-shot event anyway.
 	 */
+
+	/* Keep ioctl until all drivers are converted */
 	rtc_dev_ioctl(file, RTC_UIE_OFF, 0);
+	rtc_update_irq_enable(rtc, 0);
 	rtc_irq_set_state(rtc, NULL, 0);
 
 	if (rtc->ops->release)
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 91f597a..4046b75 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -145,6 +145,8 @@ struct rtc_class_ops {
 	int (*irq_set_state)(struct device *, int enabled);
 	int (*irq_set_freq)(struct device *, int freq);
 	int (*read_callback)(struct device *, int data);
+	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
+	int (*update_irq_enable)(struct device *, unsigned int enabled);
 };
 
 #define RTC_DEVICE_NAME_SIZE 20
@@ -181,7 +183,7 @@ struct rtc_device
 	struct timer_list uie_timer;
 	/* Those fields are protected by rtc->irq_lock */
 	unsigned int oldsecs;
-	unsigned int irq_active:1;
+	unsigned int uie_irq_active:1;
 	unsigned int stop_uie_polling:1;
 	unsigned int uie_task_active:1;
 	unsigned int uie_timer_active:1;
@@ -216,6 +218,10 @@ extern int rtc_irq_set_state(struct rtc_device *rtc,
 				struct rtc_task *task, int enabled);
 extern int rtc_irq_set_freq(struct rtc_device *rtc,
 				struct rtc_task *task, int freq);
+extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
+extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
+extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
+						unsigned int enabled);
 
 typedef struct rtc_task {
 	void (*func)(void *private_data);
-- 
cgit v0.10.2


From a327ca2c2674c5a9a0073421df19bfc362698136 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 8 Jul 2008 19:00:26 +0200
Subject: remove CONFIG_KMOD

Now that nothing depends on it any more, remove CONFIG_KMOD.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/init/Kconfig b/init/Kconfig
index f628171..52847ee 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -916,12 +916,6 @@ config MODULE_SRCVERSION_ALL
 	  the version).  With this option, such a "srcversion" field
 	  will be created for all modules.  If unsure, say N.
 
-config KMOD
-	def_bool y
-	help
-	  This is being removed soon.  These days, CONFIG_MODULES
-	  implies CONFIG_KMOD, so use that instead.
-
 endif # MODULES
 
 config INIT_ALL_POSSIBLE
-- 
cgit v0.10.2


From ca4787b779dd698a2a33a328aa5fa90a3e954077 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@MIT.EDU>
Date: Mon, 5 Jan 2009 08:40:10 -0600
Subject: kernel/module.c: compare symbol values when marking symbols as
 exported in /proc/kallsyms.

When there are two symbols in a module with the same name, one of which is
exported, both will be marked as exported in /proc/kallsyms.  There aren't
any instances of this in the current kernel, but it is easy to construct a
simple module with two compilation units that exhibits the problem.

$ objdump -j .text -t testmod.ko | grep foo
00000000 l     F .text	00000032 foo
00000080 g     F .text	00000001 foo
$ sudo insmod testmod.ko
$ grep "T foo" /proc/kallsyms
c28e8000 T foo	[testmod]
c28e8080 T foo	[testmod]

Fix this by comparing the symbol values once we've found the exported
symbol table entry matching the symbol name.  Tested using Ksplice:

$ ksplice-create --patch=this_commit.patch --id=bar .
$ sudo ksplice-apply ksplice-bar.tar.gz
Done!
$ grep "T foo" /proc/kallsyms
c28e8080 T foo	[testmod]

Signed-off-by: Tim Abbott <tabbott@mit.edu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/module.c b/kernel/module.c
index dd2a541..895c567 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1725,15 +1725,15 @@ static const struct kernel_symbol *lookup_symbol(const char *name,
 	return NULL;
 }
 
-static int is_exported(const char *name, const struct module *mod)
+static int is_exported(const char *name, unsigned long value,
+		       const struct module *mod)
 {
-	if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
-		return 1;
+	const struct kernel_symbol *ks;
+	if (!mod)
+		ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
 	else
-		if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
-			return 1;
-		else
-			return 0;
+		ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
+	return ks != NULL && ks->value == value;
 }
 
 /* As per nm */
@@ -2504,7 +2504,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 			strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
 				KSYM_NAME_LEN);
 			strlcpy(module_name, mod->name, MODULE_NAME_LEN);
-			*exported = is_exported(name, mod);
+			*exported = is_exported(name, *value, mod);
 			preempt_enable();
 			return 0;
 		}
-- 
cgit v0.10.2


From d1e99d7ae4e6bbd1ebb5e81ecd3af2b8793efee0 Mon Sep 17 00:00:00 2001
From: Jianjun Kong <jianjun@zeuux.org>
Date: Mon, 8 Dec 2008 14:26:29 +0800
Subject: module: fix warning of unused function when !CONFIG_PROC_FS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix this warning:
kernel/module.c:824: warning: ‘print_unload_info’ defined but not used
print_unload_info() just was used when CONFIG_PROC_FS was defined.
This patch mark print_unload_info() inline to solve the problem.

Signed-off-by: Jianjun Kong <jianjun@zeuux.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Ingo Molnar <mingo@elte.hu>
CC: Américo Wang <xiyou.wangcong@gmail.com>

diff --git a/kernel/module.c b/kernel/module.c
index 895c567..d3d2545 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -820,7 +820,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
 	return ret;
 }
 
-static void print_unload_info(struct seq_file *m, struct module *mod)
+static inline void print_unload_info(struct seq_file *m, struct module *mod)
 {
 	struct module_use *use;
 	int printed_something = 0;
@@ -893,7 +893,7 @@ void module_put(struct module *module)
 EXPORT_SYMBOL(module_put);
 
 #else /* !CONFIG_MODULE_UNLOAD */
-static void print_unload_info(struct seq_file *m, struct module *mod)
+static inline void print_unload_info(struct seq_file *m, struct module *mod)
 {
 	/* We don't know the usage count, or what modules are using. */
 	seq_printf(m, " - -");
-- 
cgit v0.10.2


From 088af9a6e05d51e7c3dc85d45d8b7a52c3ee08d7 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 31 Dec 2008 12:31:18 +0100
Subject: module: fix module loading failure of large kernel modules for parisc

When creating the final layout of a kernel module in memory, allow the
module loader to reserve some additional memory in front of a given section.
This is currently only needed for the parisc port which needs to put the
stub entries there to fulfill the 17/22bit PCREL relocations with large
kernel modules like xfs.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (renamed fn)

diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index eb10339..c1f40c2 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -13,6 +13,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 			      char *secstrings,
 			      struct module *mod);
 
+/* Additional bytes needed by arch in front of individual sections */
+unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section);
+
 /* Allocator used for allocating struct module, core sections and init
    sections.  Returns NULL on failure. */
 void *module_alloc(unsigned long size);
diff --git a/kernel/module.c b/kernel/module.c
index d3d2545..4299aef 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1578,11 +1578,21 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
 	return ret;
 }
 
+/* Additional bytes needed by arch in front of individual sections */
+unsigned int __weak arch_mod_section_prepend(struct module *mod,
+					     unsigned int section)
+{
+	/* default implementation just returns zero */
+	return 0;
+}
+
 /* Update size with this section: return offset. */
-static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
+static long get_offset(struct module *mod, unsigned int *size,
+		       Elf_Shdr *sechdr, unsigned int section)
 {
 	long ret;
 
+	*size += arch_mod_section_prepend(mod, section);
 	ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
 	*size = ret + sechdr->sh_size;
 	return ret;
@@ -1622,7 +1632,7 @@ static void layout_sections(struct module *mod,
 			    || strncmp(secstrings + s->sh_name,
 				       ".init", 5) == 0)
 				continue;
-			s->sh_entsize = get_offset(&mod->core_size, s);
+			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
 		if (m == 0)
@@ -1640,7 +1650,7 @@ static void layout_sections(struct module *mod,
 			    || strncmp(secstrings + s->sh_name,
 				       ".init", 5) != 0)
 				continue;
-			s->sh_entsize = (get_offset(&mod->init_size, s)
+			s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
 					 | INIT_OFFSET_MASK);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
-- 
cgit v0.10.2


From c298be74492bece102f3379d14015638f1fd1fac Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 1 Jan 2009 22:25:30 +0100
Subject: parisc: fix module loading failure of large kernel modules

On 32bit (and sometimes 64bit) and with big kernel modules like xfs or
ipv6 the relocation types R_PARISC_PCREL17F and R_PARISC_PCREL22F may
fail to reach their PLT stub if we only create one big stub array for
all sections at the beginning of the core or init section.

With this patch we now instead add individual PLT stub entries
directly in front of the code sections where the stubs are actually
called. This reduces the distance between the PCREL location and the
stub entry so that the relocations can be fulfilled.

While calculating the final layout of the kernel module in memory, the
kernel module loader calls arch_mod_section_prepend() to request the
to be reserved amount of memory in front of each individual section.

Tested with 32- and 64bit kernels.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h
index c2cb49e..1f41234 100644
--- a/arch/parisc/include/asm/module.h
+++ b/arch/parisc/include/asm/module.h
@@ -23,8 +23,10 @@ struct mod_arch_specific
 {
 	unsigned long got_offset, got_count, got_max;
 	unsigned long fdesc_offset, fdesc_count, fdesc_max;
-	unsigned long stub_offset, stub_count, stub_max;
-	unsigned long init_stub_offset, init_stub_count, init_stub_max;
+	struct {
+		unsigned long stub_offset;
+		unsigned int stub_entries;
+		} *section;
 	int unwind_section;
 	struct unwind_table *unwind;
 };
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 44138c3..9013243 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -6,6 +6,7 @@
  *
  *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
  *    Copyright (C) 2003 Randolph Chung <tausq at debian . org>
+ *    Copyright (C) 2008 Helge Deller <deller@gmx.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -24,6 +25,19 @@
  *
  *
  *    Notes:
+ *    - PLT stub handling
+ *      On 32bit (and sometimes 64bit) and with big kernel modules like xfs or
+ *      ipv6 the relocation types R_PARISC_PCREL17F and R_PARISC_PCREL22F may
+ *      fail to reach their PLT stub if we only create one big stub array for
+ *      all sections at the beginning of the core or init section.
+ *      Instead we now insert individual PLT stub entries directly in front of
+ *      of the code sections where the stubs are actually called.
+ *      This reduces the distance between the PCREL location and the stub entry
+ *      so that the relocations can be fulfilled.
+ *      While calculating the final layout of the kernel module in memory, the
+ *      kernel module loader calls arch_mod_section_prepend() to request the
+ *      to be reserved amount of memory in front of each individual section.
+ *
  *    - SEGREL32 handling
  *      We are not doing SEGREL32 handling correctly. According to the ABI, we
  *      should do a value offset, like this:
@@ -58,9 +72,13 @@
 #define DEBUGP(fmt...)
 #endif
 
+#define RELOC_REACHABLE(val, bits) \
+	(( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 )  ||	\
+	     ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \
+	0 : 1)
+
 #define CHECK_RELOC(val, bits) \
-	if ( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 )  ||	\
-	     ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) { \
+	if (!RELOC_REACHABLE(val, bits)) { \
 		printk(KERN_ERR "module %s relocation of symbol %s is out of range (0x%lx in %d bits)\n", \
 		me->name, strtab + sym->st_name, (unsigned long)val, bits); \
 		return -ENOEXEC;			\
@@ -92,13 +110,6 @@ static inline int in_local(struct module *me, void *loc)
 	return in_init(me, loc) || in_core(me, loc);
 }
 
-static inline int in_local_section(struct module *me, void *loc, void *dot)
-{
-	return (in_init(me, loc) && in_init(me, dot)) ||
-		(in_core(me, loc) && in_core(me, dot));
-}
-
-
 #ifndef CONFIG_64BIT
 struct got_entry {
 	Elf32_Addr addr;
@@ -258,23 +269,42 @@ static inline unsigned long count_stubs(const Elf_Rela *rela, unsigned long n)
 /* Free memory returned from module_alloc */
 void module_free(struct module *mod, void *module_region)
 {
+	kfree(mod->arch.section);
+	mod->arch.section = NULL;
+
 	vfree(module_region);
 	/* FIXME: If module_region == mod->init_region, trim exception
            table entries. */
 }
 
+/* Additional bytes needed in front of individual sections */
+unsigned int arch_mod_section_prepend(struct module *mod,
+				      unsigned int section)
+{
+	/* size needed for all stubs of this section (including
+	 * one additional for correct alignment of the stubs) */
+	return (mod->arch.section[section].stub_entries + 1)
+		* sizeof(struct stub_entry);
+}
+
 #define CONST 
 int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
 			      CONST Elf_Shdr *sechdrs,
 			      CONST char *secstrings,
 			      struct module *me)
 {
-	unsigned long gots = 0, fdescs = 0, stubs = 0, init_stubs = 0;
+	unsigned long gots = 0, fdescs = 0, len;
 	unsigned int i;
 
+	len = hdr->e_shnum * sizeof(me->arch.section[0]);
+	me->arch.section = kzalloc(len, GFP_KERNEL);
+	if (!me->arch.section)
+		return -ENOMEM;
+
 	for (i = 1; i < hdr->e_shnum; i++) {
-		const Elf_Rela *rels = (void *)hdr + sechdrs[i].sh_offset;
+		const Elf_Rela *rels = (void *)sechdrs[i].sh_addr;
 		unsigned long nrels = sechdrs[i].sh_size / sizeof(*rels);
+		unsigned int count, s;
 
 		if (strncmp(secstrings + sechdrs[i].sh_name,
 			    ".PARISC.unwind", 14) == 0)
@@ -290,11 +320,23 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
 		 */
 		gots += count_gots(rels, nrels);
 		fdescs += count_fdescs(rels, nrels);
-		if(strncmp(secstrings + sechdrs[i].sh_name,
-			   ".rela.init", 10) == 0)
-			init_stubs += count_stubs(rels, nrels);
-		else
-			stubs += count_stubs(rels, nrels);
+
+		/* XXX: By sorting the relocs and finding duplicate entries
+		 *  we could reduce the number of necessary stubs and save
+		 *  some memory. */
+		count = count_stubs(rels, nrels);
+		if (!count)
+			continue;
+
+		/* so we need relocation stubs. reserve necessary memory. */
+		/* sh_info gives the section for which we need to add stubs. */
+		s = sechdrs[i].sh_info;
+
+		/* each code section should only have one relocation section */
+		WARN_ON(me->arch.section[s].stub_entries);
+
+		/* store number of stubs we need for this section */
+		me->arch.section[s].stub_entries += count;
 	}
 
 	/* align things a bit */
@@ -306,18 +348,8 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
 	me->arch.fdesc_offset = me->core_size;
 	me->core_size += fdescs * sizeof(Elf_Fdesc);
 
-	me->core_size = ALIGN(me->core_size, 16);
-	me->arch.stub_offset = me->core_size;
-	me->core_size += stubs * sizeof(struct stub_entry);
-
-	me->init_size = ALIGN(me->init_size, 16);
-	me->arch.init_stub_offset = me->init_size;
-	me->init_size += init_stubs * sizeof(struct stub_entry);
-
 	me->arch.got_max = gots;
 	me->arch.fdesc_max = fdescs;
-	me->arch.stub_max = stubs;
-	me->arch.init_stub_max = init_stubs;
 
 	return 0;
 }
@@ -380,23 +412,27 @@ enum elf_stub_type {
 };
 
 static Elf_Addr get_stub(struct module *me, unsigned long value, long addend,
-	enum elf_stub_type stub_type, int init_section)
+	enum elf_stub_type stub_type, Elf_Addr loc0, unsigned int targetsec)
 {
-	unsigned long i;
 	struct stub_entry *stub;
 
-	if(init_section) {
-		i = me->arch.init_stub_count++;
-		BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max);
-		stub = me->module_init + me->arch.init_stub_offset + 
-			i * sizeof(struct stub_entry);
-	} else {
-		i = me->arch.stub_count++;
-		BUG_ON(me->arch.stub_count > me->arch.stub_max);
-		stub = me->module_core + me->arch.stub_offset + 
-			i * sizeof(struct stub_entry);
+	/* initialize stub_offset to point in front of the section */
+	if (!me->arch.section[targetsec].stub_offset) {
+		loc0 -= (me->arch.section[targetsec].stub_entries + 1) *
+				sizeof(struct stub_entry);
+		/* get correct alignment for the stubs */
+		loc0 = ALIGN(loc0, sizeof(struct stub_entry));
+		me->arch.section[targetsec].stub_offset = loc0;
 	}
 
+	/* get address of stub entry */
+	stub = (void *) me->arch.section[targetsec].stub_offset;
+	me->arch.section[targetsec].stub_offset += sizeof(struct stub_entry);
+
+	/* do not write outside available stub area */
+	BUG_ON(0 == me->arch.section[targetsec].stub_entries--);
+
+
 #ifndef CONFIG_64BIT
 /* for 32-bit the stub looks like this:
  * 	ldil L'XXX,%r1
@@ -489,15 +525,19 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 	Elf32_Addr val;
 	Elf32_Sword addend;
 	Elf32_Addr dot;
+	Elf_Addr loc0;
+	unsigned int targetsec = sechdrs[relsec].sh_info;
 	//unsigned long dp = (unsigned long)$global$;
 	register unsigned long dp asm ("r27");
 
 	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	       targetsec);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
-		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+		loc = (void *)sechdrs[targetsec].sh_addr
 		      + rel[i].r_offset;
+		/* This is the start of the target section */
+		loc0 = sechdrs[targetsec].sh_addr;
 		/* This is the symbol it is referring to */
 		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
 			+ ELF32_R_SYM(rel[i].r_info);
@@ -569,19 +609,32 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			break;
 		case R_PARISC_PCREL17F:
 			/* 17-bit PC relative address */
-			val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc));
+			/* calculate direct call offset */
+			val += addend;
 			val = (val - dot - 8)/4;
-			CHECK_RELOC(val, 17)
+			if (!RELOC_REACHABLE(val, 17)) {
+				/* direct distance too far, create
+				 * stub entry instead */
+				val = get_stub(me, sym->st_value, addend,
+					ELF_STUB_DIRECT, loc0, targetsec);
+				val = (val - dot - 8)/4;
+				CHECK_RELOC(val, 17);
+			}
 			*loc = (*loc & ~0x1f1ffd) | reassemble_17(val);
 			break;
 		case R_PARISC_PCREL22F:
 			/* 22-bit PC relative address; only defined for pa20 */
-			val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc));
-			DEBUGP("STUB FOR %s loc %lx+%lx at %lx\n", 
-			       strtab + sym->st_name, (unsigned long)loc, addend, 
-			       val)
+			/* calculate direct call offset */
+			val += addend;
 			val = (val - dot - 8)/4;
-			CHECK_RELOC(val, 22);
+			if (!RELOC_REACHABLE(val, 22)) {
+				/* direct distance too far, create
+				 * stub entry instead */
+				val = get_stub(me, sym->st_value, addend,
+					ELF_STUB_DIRECT, loc0, targetsec);
+				val = (val - dot - 8)/4;
+				CHECK_RELOC(val, 22);
+			}
 			*loc = (*loc & ~0x3ff1ffd) | reassemble_22(val);
 			break;
 
@@ -610,13 +663,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 	Elf64_Addr val;
 	Elf64_Sxword addend;
 	Elf64_Addr dot;
+	Elf_Addr loc0;
+	unsigned int targetsec = sechdrs[relsec].sh_info;
 
 	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	       targetsec);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
-		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+		loc = (void *)sechdrs[targetsec].sh_addr
 		      + rel[i].r_offset;
+		/* This is the start of the target section */
+		loc0 = sechdrs[targetsec].sh_addr;
 		/* This is the symbol it is referring to */
 		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
 			+ ELF64_R_SYM(rel[i].r_info);
@@ -672,42 +729,40 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			DEBUGP("PCREL22F Symbol %s loc %p val %lx\n",
 			       strtab + sym->st_name,
 			       loc, val);
+			val += addend;
 			/* can we reach it locally? */
-			if(!in_local_section(me, (void *)val, (void *)dot)) {
-
-				if (in_local(me, (void *)val))
-					/* this is the case where the
-					 * symbol is local to the
-					 * module, but in a different
-					 * section, so stub the jump
-					 * in case it's more than 22
-					 * bits away */
-					val = get_stub(me, val, addend, ELF_STUB_DIRECT,
-						       in_init(me, loc));
-				else if (strncmp(strtab + sym->st_name, "$$", 2)
+			if (in_local(me, (void *)val)) {
+				/* this is the case where the symbol is local
+				 * to the module, but in a different section,
+				 * so stub the jump in case it's more than 22
+				 * bits away */
+				val = (val - dot - 8)/4;
+				if (!RELOC_REACHABLE(val, 22)) {
+					/* direct distance too far, create
+					 * stub entry instead */
+					val = get_stub(me, sym->st_value,
+						addend, ELF_STUB_DIRECT,
+						loc0, targetsec);
+				} else {
+					/* Ok, we can reach it directly. */
+					val = sym->st_value;
+					val += addend;
+				}
+			} else {
+				val = sym->st_value;
+				if (strncmp(strtab + sym->st_name, "$$", 2)
 				    == 0)
 					val = get_stub(me, val, addend, ELF_STUB_MILLI,
-						       in_init(me, loc));
+						       loc0, targetsec);
 				else
 					val = get_stub(me, val, addend, ELF_STUB_GOT,
-						       in_init(me, loc));
+						       loc0, targetsec);
 			}
 			DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n", 
 			       strtab + sym->st_name, loc, sym->st_value,
 			       addend, val);
-			/* FIXME: local symbols work as long as the
-			 * core and init pieces aren't separated too
-			 * far.  If this is ever broken, you will trip
-			 * the check below.  The way to fix it would
-			 * be to generate local stubs to go between init
-			 * and core */
-			if((Elf64_Sxword)(val - dot - 8) > 0x800000 -1 ||
-			   (Elf64_Sxword)(val - dot - 8) < -0x800000) {
-				printk(KERN_ERR "Module %s, symbol %s is out of range for PCREL22F relocation\n",
-				       me->name, strtab + sym->st_name);
-				return -ENOEXEC;
-			}
 			val = (val - dot - 8)/4;
+			CHECK_RELOC(val, 22);
 			*loc = (*loc & ~0x3ff1ffd) | reassemble_22(val);
 			break;
 		case R_PARISC_DIR64:
@@ -794,12 +849,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 	addr = (u32 *)entry->addr;
 	printk("INSNS: %x %x %x %x\n",
 	       addr[0], addr[1], addr[2], addr[3]);
-	printk("stubs used %ld, stubs max %ld\n"
-	       "init_stubs used %ld, init stubs max %ld\n"
-	       "got entries used %ld, gots max %ld\n"
+	printk("got entries used %ld, gots max %ld\n"
 	       "fdescs used %ld, fdescs max %ld\n",
-	       me->arch.stub_count, me->arch.stub_max,
-	       me->arch.init_stub_count, me->arch.init_stub_max,
 	       me->arch.got_count, me->arch.got_max,
 	       me->arch.fdesc_count, me->arch.fdesc_max);
 #endif
@@ -829,7 +880,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 				me->name, me->arch.got_count, MAX_GOTS);
 		return -EINVAL;
 	}
-	
+
+	kfree(me->arch.section);
+	me->arch.section = NULL;
+
 	/* no symbol table */
 	if(symhdr == NULL)
 		return 0;
-- 
cgit v0.10.2


From 9ea09af3bd3090e8349ca2899ca2011bd94cda85 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 22 Dec 2008 12:36:30 +0100
Subject: stop_machine: introduce stop_machine_create/destroy.

Introduce stop_machine_create/destroy. With this interface subsystems
that need a non-failing stop_machine environment can create the
stop_machine machine threads before actually calling stop_machine.
When the threads aren't needed anymore they can be killed with
stop_machine_destroy again.

When stop_machine gets called and the threads aren't present they
will be created and destroyed automatically. This restores the old
behaviour of stop_machine.

This patch also converts cpu hotplug to the new interface since it
is special: cpu_down calls __stop_machine instead of stop_machine.
However the kstop threads will only be created when stop_machine
gets called.

Changing the code so that the threads would be created automatically
on __stop_machine is currently not possible: when __stop_machine gets
called we hold cpu_add_remove_lock, which is the same lock that
create_rt_workqueue would take. So the workqueue needs to be created
before the cpu hotplug code locks cpu_add_remove_lock.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 74d59a6..baba3a2 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -35,6 +35,24 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+
+/**
+ * stop_machine_create: create all stop_machine threads
+ *
+ * Description: This causes all stop_machine threads to be created before
+ * stop_machine actually gets called. This can be used by subsystems that
+ * need a non failing stop_machine infrastructure.
+ */
+int stop_machine_create(void);
+
+/**
+ * stop_machine_destroy: destroy all stop_machine threads
+ *
+ * Description: This causes all stop_machine threads which were created with
+ * stop_machine_create to be destroyed again.
+ */
+void stop_machine_destroy(void);
+
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
@@ -46,5 +64,9 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	local_irq_enable();
 	return ret;
 }
+
+static inline int stop_machine_create(void) { return 0; }
+static inline void stop_machine_destroy(void) { }
+
 #endif /* CONFIG_SMP */
 #endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 47fff3b..30e74dd 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -269,8 +269,11 @@ out_release:
 
 int __ref cpu_down(unsigned int cpu)
 {
-	int err = 0;
+	int err;
 
+	err = stop_machine_create();
+	if (err)
+		return err;
 	cpu_maps_update_begin();
 
 	if (cpu_hotplug_disabled) {
@@ -297,6 +300,7 @@ int __ref cpu_down(unsigned int cpu)
 
 out:
 	cpu_maps_update_done();
+	stop_machine_destroy();
 	return err;
 }
 EXPORT_SYMBOL(cpu_down);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 286c417..0cd415e 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -38,7 +38,10 @@ struct stop_machine_data {
 static unsigned int num_threads;
 static atomic_t thread_ack;
 static DEFINE_MUTEX(lock);
-
+/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
+static DEFINE_MUTEX(setup_lock);
+/* Users of stop_machine. */
+static int refcount;
 static struct workqueue_struct *stop_machine_wq;
 static struct stop_machine_data active, idle;
 static const cpumask_t *active_cpus;
@@ -109,6 +112,43 @@ static int chill(void *unused)
 	return 0;
 }
 
+int stop_machine_create(void)
+{
+	mutex_lock(&setup_lock);
+	if (refcount)
+		goto done;
+	stop_machine_wq = create_rt_workqueue("kstop");
+	if (!stop_machine_wq)
+		goto err_out;
+	stop_machine_work = alloc_percpu(struct work_struct);
+	if (!stop_machine_work)
+		goto err_out;
+done:
+	refcount++;
+	mutex_unlock(&setup_lock);
+	return 0;
+
+err_out:
+	if (stop_machine_wq)
+		destroy_workqueue(stop_machine_wq);
+	mutex_unlock(&setup_lock);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(stop_machine_create);
+
+void stop_machine_destroy(void)
+{
+	mutex_lock(&setup_lock);
+	refcount--;
+	if (refcount)
+		goto done;
+	destroy_workqueue(stop_machine_wq);
+	free_percpu(stop_machine_work);
+done:
+	mutex_unlock(&setup_lock);
+}
+EXPORT_SYMBOL_GPL(stop_machine_destroy);
+
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	struct work_struct *sm_work;
@@ -146,19 +186,14 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	int ret;
 
+	ret = stop_machine_create();
+	if (ret)
+		return ret;
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
 	ret = __stop_machine(fn, data, cpus);
 	put_online_cpus();
-
+	stop_machine_destroy();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
-
-static int __init stop_machine_init(void)
-{
-	stop_machine_wq = create_rt_workqueue("kstop");
-	stop_machine_work = alloc_percpu(struct work_struct);
-	return 0;
-}
-core_initcall(stop_machine_init);
-- 
cgit v0.10.2


From 9e01892c4234070bbcf3a9f582514c8b91464375 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 22 Dec 2008 12:36:31 +0100
Subject: module: convert to stop_machine_create/destroy.

The module code relies on a non-failing stop_machine call. So we create
the kstop threads in advance and with that make sure the call won't fail.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/kernel/module.c b/kernel/module.c
index 4299aef..f47cce9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -757,8 +757,16 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
 		return -EFAULT;
 	name[MODULE_NAME_LEN-1] = '\0';
 
-	if (mutex_lock_interruptible(&module_mutex) != 0)
-		return -EINTR;
+	/* Create stop_machine threads since free_module relies on
+	 * a non-failing stop_machine call. */
+	ret = stop_machine_create();
+	if (ret)
+		return ret;
+
+	if (mutex_lock_interruptible(&module_mutex) != 0) {
+		ret = -EINTR;
+		goto out_stop;
+	}
 
 	mod = find_module(name);
 	if (!mod) {
@@ -817,6 +825,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
 
  out:
 	mutex_unlock(&module_mutex);
+out_stop:
+	stop_machine_destroy();
 	return ret;
 }
 
@@ -1875,6 +1885,13 @@ static noinline struct module *load_module(void __user *umod,
 	/* vmalloc barfs on "unusual" numbers.  Check here */
 	if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
 		return ERR_PTR(-ENOMEM);
+
+	/* Create stop_machine threads since the error path relies on
+	 * a non-failing stop_machine call. */
+	err = stop_machine_create();
+	if (err)
+		goto free_hdr;
+
 	if (copy_from_user(hdr, umod, len) != 0) {
 		err = -EFAULT;
 		goto free_hdr;
@@ -2258,6 +2275,7 @@ static noinline struct module *load_module(void __user *umod,
 	/* Get rid of temporary copy */
 	vfree(hdr);
 
+	stop_machine_destroy();
 	/* Done! */
 	return mod;
 
@@ -2280,6 +2298,7 @@ static noinline struct module *load_module(void __user *umod,
 	kfree(args);
  free_hdr:
 	vfree(hdr);
+	stop_machine_destroy();
 	return ERR_PTR(err);
 
  truncated:
-- 
cgit v0.10.2


From 83c86984bff2d793c91eb710af7857828b9ddb49 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 4 Jan 2009 15:44:22 -0800
Subject: sparc: unify ipcbuf.h

The ony difference is the size of the mode.
sparc has extra padding to compensate for this.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 58f9b3a..aeaec45 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -1,9 +1,6 @@
 # User exported sparc header files
 include include/asm-generic/Kbuild.asm
 
-header-y += ipcbuf_32.h
-header-y += ipcbuf_64.h
-
 header-y += apc.h
 header-y += asi.h
 header-y += display7seg.h
diff --git a/arch/sparc/include/asm/ipcbuf.h b/arch/sparc/include/asm/ipcbuf.h
index 17d6ef7..66013b4 100644
--- a/arch/sparc/include/asm/ipcbuf.h
+++ b/arch/sparc/include/asm/ipcbuf.h
@@ -1,8 +1,32 @@
-#ifndef ___ASM_SPARC_IPCBUF_H
-#define ___ASM_SPARC_IPCBUF_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/ipcbuf_64.h>
-#else
-#include <asm/ipcbuf_32.h>
-#endif
+#ifndef __SPARC_IPCBUF_H
+#define __SPARC_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for sparc/sparc64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit seq
+ * - on sparc for 32 bit mode (it is 32 bit on sparc64)
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t	key;
+	__kernel_uid_t	uid;
+	__kernel_gid_t	gid;
+	__kernel_uid_t	cuid;
+	__kernel_gid_t	cgid;
+#ifndef __arch64__
+	unsigned short	__pad0;
 #endif
+	__kernel_mode_t	mode;
+	unsigned short	__pad1;
+	unsigned short	seq;
+	unsigned long long __unused1;
+	unsigned long long __unused2;
+};
+
+#endif /* __SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/asm/ipcbuf_32.h b/arch/sparc/include/asm/ipcbuf_32.h
deleted file mode 100644
index 6387209..0000000
--- a/arch/sparc/include/asm/ipcbuf_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _SPARC_IPCBUF_H
-#define _SPARC_IPCBUF_H
-
-/*
- * The ipc64_perm structure for sparc architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit mode
- * - 32-bit seq
- * - 2 miscellaneous 64-bit values (so that this structure matches
- *				    sparc64 ipc64_perm)
- */
-
-struct ipc64_perm
-{
-	__kernel_key_t		key;
-	__kernel_uid32_t	uid;
-	__kernel_gid32_t	gid;
-	__kernel_uid32_t	cuid;
-	__kernel_gid32_t	cgid;
-	unsigned short		__pad1;
-	__kernel_mode_t		mode;
-	unsigned short		__pad2;
-	unsigned short		seq;
-	unsigned long long	__unused1;
-	unsigned long long	__unused2;
-};
-
-#endif /* _SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/asm/ipcbuf_64.h b/arch/sparc/include/asm/ipcbuf_64.h
deleted file mode 100644
index a44b855..0000000
--- a/arch/sparc/include/asm/ipcbuf_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _SPARC64_IPCBUF_H
-#define _SPARC64_IPCBUF_H
-
-/*
- * The ipc64_perm structure for sparc64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit seq
- * - 2 miscellaneous 64-bit values
- */
-
-struct ipc64_perm
-{
-	__kernel_key_t	key;
-	__kernel_uid_t	uid;
-	__kernel_gid_t	gid;
-	__kernel_uid_t	cuid;
-	__kernel_gid_t	cgid;
-	__kernel_mode_t	mode;
-	unsigned short	__pad1;
-	unsigned short	seq;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-};
-
-#endif /* _SPARC64_IPCBUF_H */
-- 
cgit v0.10.2


From 473321fc373e712fbb9b88e2c0736e55fddadab8 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 4 Jan 2009 15:47:49 -0800
Subject: MAINTAINERS: update sparc maintainer

Reflect the current situation where David Miller
is the sparc maintainer.

I have tried to contact Bill on following adresses:

    wli@holomorphy.com
    wlirwin@us.ibm.com

with no success and Bill has not been active on the
sparclinux mailing list for a long time.

As sparc and sparc64 are unified I unified the two entries
in the MAINTAINERS file too.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/MAINTAINERS b/MAINTAINERS
index befacf0..141aff6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4015,10 +4015,12 @@ L:	alsa-devel@alsa-project.org (subscribers-only)
 W:	http://alsa-project.org/main/index.php/ASoC
 S:	Supported
 
-SPARC (sparc32)
-P:	William L. Irwin
-M:	wli@holomorphy.com
+SPARC + UltraSPARC (sparc/sparc64)
+P:	David S. Miller
+M:	davem@davemloft.net
 L:	sparclinux@vger.kernel.org
+T:	git kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6.git
+T:	git kernel.org:/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
 S:	Maintained
 
 SPECIALIX IO8+ MULTIPORT SERIAL CARD DRIVER
@@ -4302,13 +4304,6 @@ M:	dushistov@mail.ru
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
-UltraSPARC (sparc64)
-P:	David S. Miller
-M:	davem@davemloft.net
-L:	sparclinux@vger.kernel.org
-T:	git kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6.git
-S:	Maintained
-
 ULTRA-WIDEBAND (UWB) SUBSYSTEM:
 P:	David Vrabel
 M:	david.vrabel@csr.com
-- 
cgit v0.10.2


From 14deae41566b5cdd992c01d0069518ced5227c83 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 4 Jan 2009 16:04:39 -0800
Subject: ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.

Thanks to excellent diagnosis by Eduard Guzovsky.

The core problem is that on a network with lots of active
multicast traffic, the neighbour cache can fill up.  If
we try to allocate a new route and thus neighbour cache
entry, the bog-standard GC attempt the neighbour layer does
in ineffective because route entries hold a reference
to the existing neighbour entries and GC can only liberate
entries with no references.

IPV4 already has a way to handle this, by doing a route cache
GC in such situations (when neigh attach returns -ENOBUFS).

So simply mimick this on the ipv6 side.

Tested-by: Eduard Guzovsky <eguzovsky@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index ce532f2..1459ed3 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -155,9 +155,9 @@ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, const s
 {
 
 	if (dev)
-		return __neigh_lookup(&nd_tbl, addr, dev, 1);
+		return __neigh_lookup_errno(&nd_tbl, addr, dev);
 
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 18c486c..76f06b9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 	rt = ip6_rt_copy(ort);
 
 	if (rt) {
+		struct neighbour *neigh;
+		int attempts = !in_softirq();
+
 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 			if (rt->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
@@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		}
 #endif
 
-		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+	retry:
+		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+		if (IS_ERR(neigh)) {
+			struct net *net = dev_net(rt->rt6i_dev);
+			int saved_rt_min_interval =
+				net->ipv6.sysctl.ip6_rt_gc_min_interval;
+			int saved_rt_elasticity =
+				net->ipv6.sysctl.ip6_rt_gc_elasticity;
+
+			if (attempts-- > 0) {
+				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
+				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
+
+				ip6_dst_gc(net->ipv6.ip6_dst_ops);
+
+				net->ipv6.sysctl.ip6_rt_gc_elasticity =
+					saved_rt_elasticity;
+				net->ipv6.sysctl.ip6_rt_gc_min_interval =
+					saved_rt_min_interval;
+				goto retry;
+			}
+
+			if (net_ratelimit())
+				printk(KERN_WARNING
+				       "Neighbour table overflow.\n");
+			dst_free(&rt->u.dst);
+			return NULL;
+		}
+		rt->rt6i_nexthop = neigh;
 
 	}
 
@@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	dev_hold(dev);
 	if (neigh)
 		neigh_hold(neigh);
-	else
+	else {
 		neigh = ndisc_get_neigh(dev, addr);
+		if (IS_ERR(neigh))
+			neigh = NULL;
+	}
 
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
@@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	struct neighbour *neigh;
 
 	if (rt == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
+	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+	if (IS_ERR(neigh)) {
 		dst_free(&rt->u.dst);
-		return ERR_PTR(-ENOMEM);
+
+		/* We are casting this because that is the return
+		 * value type.  But an errno encoded pointer is the
+		 * same regardless of the underlying pointer type,
+		 * and that's what we are returning.  So this is OK.
+		 */
+		return (struct rt6_info *) neigh;
 	}
+	rt->rt6i_nexthop = neigh;
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
-- 
cgit v0.10.2


From 949b42544a20fb22800e244a004ff45bd359a21b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:09:40 -0800
Subject: firmware: convert acenic driver to request_firmware()

We store the firmware in its native big-endian form now, so the loop in
ace_copy() is modified to use be32_to_cpup() when writing it out.

We can forget the BSS,SBSS sections of the firmware, since we were
clearing all the device's RAM anyway. And the text,rodata,data sections
can all be loaded as a single chunk since they're contiguous (give or
take a few dozen bytes in between).

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 517fce4..5b396ff 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -66,6 +66,7 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/sockios.h>
+#include <linux/firmware.h>
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 #include <linux/if_vlan.h>
@@ -186,8 +187,6 @@ MODULE_DEVICE_TABLE(pci, acenic_pci_tbl);
 #define MAX_RODATA_LEN	8*1024
 #define MAX_DATA_LEN	2*1024
 
-#include "acenic_firmware.h"
-
 #ifndef tigon2FwReleaseLocal
 #define tigon2FwReleaseLocal 0
 #endif
@@ -417,6 +416,10 @@ static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1};
 MODULE_AUTHOR("Jes Sorensen <jes@trained-monkey.org>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver");
+#ifndef CONFIG_ACENIC_OMIT_TIGON_I
+MODULE_FIRMWARE("acenic/tg1.bin");
+#endif
+MODULE_FIRMWARE("acenic/tg2.bin");
 
 module_param_array_named(link, link_state, int, NULL, 0);
 module_param_array(trace, int, NULL, 0);
@@ -943,8 +946,8 @@ static int __devinit ace_init(struct net_device *dev)
 	case 4:
 	case 5:
 		printk(KERN_INFO "  Tigon I  (Rev. %i), Firmware: %i.%i.%i, ",
-		       tig_ver, tigonFwReleaseMajor, tigonFwReleaseMinor,
-		       tigonFwReleaseFix);
+		       tig_ver, ap->firmware_major, ap->firmware_minor,
+		       ap->firmware_fix);
 		writel(0, &regs->LocalCtrl);
 		ap->version = 1;
 		ap->tx_ring_entries = TIGON_I_TX_RING_ENTRIES;
@@ -952,8 +955,8 @@ static int __devinit ace_init(struct net_device *dev)
 #endif
 	case 6:
 		printk(KERN_INFO "  Tigon II (Rev. %i), Firmware: %i.%i.%i, ",
-		       tig_ver, tigon2FwReleaseMajor, tigon2FwReleaseMinor,
-		       tigon2FwReleaseFix);
+		       tig_ver, ap->firmware_major, ap->firmware_minor,
+		       ap->firmware_fix);
 		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 		readl(&regs->CpuBCtrl);		/* PCI write posting */
 		/*
@@ -1205,7 +1208,9 @@ static int __devinit ace_init(struct net_device *dev)
 	memset(ap->info, 0, sizeof(struct ace_info));
 	memset(ap->skb, 0, sizeof(struct ace_skb));
 
-	ace_load_firmware(dev);
+	if (ace_load_firmware(dev))
+		goto init_error;
+
 	ap->fw_running = 0;
 
 	tmp_ptr = ap->info_dma;
@@ -1441,10 +1446,7 @@ static int __devinit ace_init(struct net_device *dev)
 	if (ap->version >= 2)
 		writel(tmp, &regs->TuneFastLink);
 
-	if (ACE_IS_TIGON_I(ap))
-		writel(tigonFwStartAddr, &regs->Pc);
-	if (ap->version == 2)
-		writel(tigon2FwStartAddr, &regs->Pc);
+	writel(ap->firmware_start, &regs->Pc);
 
 	writel(0, &regs->Mb0Lo);
 
@@ -2761,8 +2763,8 @@ static void ace_get_drvinfo(struct net_device *dev,
 
 	strlcpy(info->driver, "acenic", sizeof(info->driver));
 	snprintf(info->version, sizeof(info->version), "%i.%i.%i",
-		tigonFwReleaseMajor, tigonFwReleaseMinor,
-		tigonFwReleaseFix);
+		 ap->firmware_major, ap->firmware_minor,
+		 ap->firmware_fix);
 
 	if (ap->pdev)
 		strlcpy(info->bus_info, pci_name(ap->pdev),
@@ -2869,11 +2871,10 @@ static struct net_device_stats *ace_get_stats(struct net_device *dev)
 }
 
 
-static void __devinit ace_copy(struct ace_regs __iomem *regs, void *src,
-			    u32 dest, int size)
+static void __devinit ace_copy(struct ace_regs __iomem *regs, const __be32 *src,
+			       u32 dest, int size)
 {
 	void __iomem *tdest;
-	u32 *wsrc;
 	short tsize, i;
 
 	if (size <= 0)
@@ -2885,20 +2886,15 @@ static void __devinit ace_copy(struct ace_regs __iomem *regs, void *src,
 		tdest = (void __iomem *) &regs->Window +
 			(dest & (ACE_WINDOW_SIZE - 1));
 		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
-		/*
-		 * This requires byte swapping on big endian, however
-		 * writel does that for us
-		 */
-		wsrc = src;
 		for (i = 0; i < (tsize / 4); i++) {
-			writel(wsrc[i], tdest + i*4);
+			/* Firmware is big-endian */
+			writel(be32_to_cpup(src), tdest);
+			src++;
+			tdest += 4;
+			dest += 4;
+			size -= 4;
 		}
-		dest += tsize;
-		src += tsize;
-		size -= tsize;
 	}
-
-	return;
 }
 
 
@@ -2937,8 +2933,13 @@ static void __devinit ace_clear(struct ace_regs __iomem *regs, u32 dest, int siz
  */
 static int __devinit ace_load_firmware(struct net_device *dev)
 {
+	const struct firmware *fw;
+	const char *fw_name = "acenic/tg2.bin";
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
+	const __be32 *fw_data;
+	u32 load_addr;
+	int ret;
 
 	if (!(readl(&regs->CpuCtrl) & CPU_HALTED)) {
 		printk(KERN_ERR "%s: trying to download firmware while the "
@@ -2946,28 +2947,52 @@ static int __devinit ace_load_firmware(struct net_device *dev)
 		return -EFAULT;
 	}
 
+	if (ACE_IS_TIGON_I(ap))
+		fw_name = "acenic/tg1.bin";
+
+	ret = request_firmware(&fw, fw_name, &ap->pdev->dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to load firmware \"%s\"\n",
+		       ap->name, fw_name);
+		return ret;
+	}
+
+	fw_data = (void *)fw->data;
+
+	/* Firmware blob starts with version numbers, followed by
+	   load and start address. Remainder is the blob to be loaded
+	   contiguously from load address. We don't bother to represent
+	   the BSS/SBSS sections any more, since we were clearing the
+	   whole thing anyway. */
+	ap->firmware_major = fw->data[0];
+	ap->firmware_minor = fw->data[1];
+	ap->firmware_fix = fw->data[2];
+
+	ap->firmware_start = be32_to_cpu(fw_data[1]);
+	if (ap->firmware_start < 0x4000 || ap->firmware_start >= 0x80000) {
+		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
+		       ap->name, ap->firmware_start, fw_name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	load_addr = be32_to_cpu(fw_data[2]);
+	if (load_addr < 0x4000 || load_addr >= 0x80000) {
+		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
+		       ap->name, load_addr, fw_name);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	/*
-	 * Do not try to clear more than 512KB or we end up seeing
-	 * funny things on NICs with only 512KB SRAM
+	 * Do not try to clear more than 512KiB or we end up seeing
+	 * funny things on NICs with only 512KiB SRAM
 	 */
 	ace_clear(regs, 0x2000, 0x80000-0x2000);
-	if (ACE_IS_TIGON_I(ap)) {
-		ace_copy(regs, tigonFwText, tigonFwTextAddr, tigonFwTextLen);
-		ace_copy(regs, tigonFwData, tigonFwDataAddr, tigonFwDataLen);
-		ace_copy(regs, tigonFwRodata, tigonFwRodataAddr,
-			 tigonFwRodataLen);
-		ace_clear(regs, tigonFwBssAddr, tigonFwBssLen);
-		ace_clear(regs, tigonFwSbssAddr, tigonFwSbssLen);
-	}else if (ap->version == 2) {
-		ace_clear(regs, tigon2FwBssAddr, tigon2FwBssLen);
-		ace_clear(regs, tigon2FwSbssAddr, tigon2FwSbssLen);
-		ace_copy(regs, tigon2FwText, tigon2FwTextAddr,tigon2FwTextLen);
-		ace_copy(regs, tigon2FwRodata, tigon2FwRodataAddr,
-			 tigon2FwRodataLen);
-		ace_copy(regs, tigon2FwData, tigon2FwDataAddr,tigon2FwDataLen);
-	}
-
-	return 0;
+	ace_copy(regs, &fw_data[3], load_addr, fw->size-12);
+ out:
+	release_firmware(fw);
+	return ret;
 }
 
 
diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h
index 4487f327..c987c9b 100644
--- a/drivers/net/acenic.h
+++ b/drivers/net/acenic.h
@@ -694,6 +694,10 @@ struct ace_private
 	u32			last_tx, last_std_rx, last_mini_rx;
 #endif
 	int			pci_using_dac;
+	u8			firmware_major;
+	u8			firmware_minor;
+	u8			firmware_fix;
+	u32			firmware_start;
 };
 
 
diff --git a/firmware/Makefile b/firmware/Makefile
index 4993a4b..e333a42 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile
@@ -20,6 +20,13 @@ fw-external-y := $(subst ",,$(CONFIG_EXTRA_FIRMWARE))
 # accurate. In the latter case it doesn't matter -- it'll use $(fw-shipped-all).
 # But be aware that the config file might not be included at all.
 
+ifdef CONFIG_ACENIC_OMIT_TIGON_I
+acenic-objs := acenic/tg2.bin
+fw-shipped- += acenic/tg1.bin
+else
+acenic-objs := acenic/tg1.bin acenic/tg2.bin
+endif
+fw-shipped-$(CONFIG_ACENIC) += $(acenic-objs)
 fw-shipped-$(CONFIG_ATARI_DSP56K) += dsp56k/bootstrap.bin
 fw-shipped-$(CONFIG_ATM_AMBASSADOR) += atmsar11.fw
 fw-shipped-$(CONFIG_CASSINI) += sun/cassini.bin
diff --git a/firmware/WHENCE b/firmware/WHENCE
index 8f06639..8823a43 100644
--- a/firmware/WHENCE
+++ b/firmware/WHENCE
@@ -360,3 +360,14 @@ License: GPLv2 or OpenIB.org BSD license, no source visible
 
 --------------------------------------------------------------------------
 
+Driver: acenic -- Alteon AceNIC Gigabit Ethernet card
+
+File: acenic/tg1.bin
+File: acenic/tg2.bin
+
+Licence: Unknown
+
+Found in hex form in kernel source, but source allegedly available at
+http://alteon.shareable.org/
+
+--------------------------------------------------------------------------
diff --git a/firmware/acenic/tg1.bin.ihex b/firmware/acenic/tg1.bin.ihex
new file mode 100644
index 0000000..bef2659
--- /dev/null
+++ b/firmware/acenic/tg1.bin.ihex
@@ -0,0 +1,4573 @@
+:100000000C040B0000004000000040001000000342
+:10001000000000000000000D0000000D3C1D00016C
+:100020008FBD5C5403A0F0213C100000261040005E
+:100030000C00100C000000000000000D27BDFFD8D0
+:100040003C1CC0003C1B0013377BD8000000D021B3
+:100050003C17001336F7541802E02021340583E8DA
+:10006000AFBF00240C002488AFB000200C0023E8B0
+:10007000000000003C040001248451A42405000178
+:1000800002E03021000038213C10000126107E5093
+:10009000AFB000100C002403AFBB00143C02000FF3
+:1000A0003442FFFF020210240362102B10400009AB
+:1000B000240500033C040001248451B002003021D7
+:1000C000036038213C020010AFA200100C00240392
+:1000D000AFA00014000020213405C0003C01000145
+:1000E00000370821A02083B03C010001003708211F
+:1000F000A02083B23C01000100370821A02083B377
+:100100003C01000100370821AC2083B4A2E004D8F0
+:10011000000418C02484000100771021AC40727CD8
+:1001200000771021AC40728002E31021A445727C5C
+:100130002C8200201440FFF7000418C0000020218A
+:100140003405C000000418C0248400010077102189
+:10015000AC40737C00771021AC40738002E3102127
+:10016000A445737C2C8200805440FFF7000418C023
+:10017000AF800054AF80011C8F82004434420040A5
+:10018000AF8200448F82004434420020AF8200449A
+:100190008F420218304200021040000900000000A7
+:1001A0008F4202203C030002346300040043102508
+:1001B000AEE204C48F42021C0800107434420004F2
+:1001C0008F4202203C0300023463000600431025E6
+:1001D000AEE204C48F42021C34420006AEE204CCFC
+:1001E0008F420218304200101040000A0000000048
+:1001F0008F42021C34420004AEE204C88F42022047
+:100200003C03000A34630004004310250800108AF0
+:10021000AEE204C08F4202203C03000A34630006B1
+:1002200000431025AEE204C08F42021C3442000697
+:10023000AEE204C88F4202183042020010400003B0
+:100240002402000108001091A2E27248A2E0724864
+:1002500024020001AF8200A0AF8200B08F8300545F
+:100260008F82005408001099246300648F82005428
+:10027000006210232C4200651440FFFC00000000C7
+:10028000AF8000448F4202088F43020CAEE20010A0
+:10029000AEE300148EE400108EE5001426E2003078
+:1002A000AEE2002824020490AEE20018AF84009071
+:1002B000AF8500948EE20028AF8200B496E2001A67
+:1002C000AF82009C8F8200B08EE304CC00431025E7
+:1002D000AF8200B08F8200B0304200041440FFFDB6
+:1002E000000000008EE204508EE30454AEE304FCF0
+:1002F0008EE204FC2442E0002C4220011440000D58
+:1003000026E400308EE204508EE304543C040001E5
+:10031000248451BC3C050001AFA00010AFA0001424
+:100320008EE704FC34A5F0000C00240300603021AB
+:1003300026E400300C0024882405040027440080B3
+:100340000C0024882405008026E4777C0C00248897
+:10035000240504008F42025C26E40094AEE20060B3
+:100360008F4202602745020024060008AEE20068C2
+:10037000240200060C00249AAEE200643C023B9A80
+:100380003442CA000000202124030002AEE30074BE
+:10039000AEE30070AEE2006C240203E8AEE20104BA
+:1003A00024020001AEE30100AEE2010C3C030001B7
+:1003B0000064182190635C2002E410212484000171
+:1003C000A043009C2C82000F1440FFF800000000A6
+:1003D0008F82004002E418212484000100021702E9
+:1003E00024420030A062009C02E41021A040009C46
+:1003F00096E2046A30420003144000090000000045
+:1004000096E2047A30420003504001313C03080078
+:1004100096E2046A304200031040002A3C020700C2
+:1004200096E2047A30420003104000263C020700A6
+:1004300096E3047A96E2046A146200223C02070002
+:100440008EE204C024030001A2E34E2034420E00D9
+:10045000AEE204C08F420218304201001040000595
+:10046000000000003C0200012442E1680800111D68
+:10047000000211003C0200012442D35C0002110082
+:10048000000211823C030800004310253C010001DA
+:10049000AC2212383C0200012442F6800002110016
+:1004A000000211823C030800004310253C010001BA
+:1004B000AC2212788EE2000034424000080012386C
+:1004C000AEE2000034423000AFA200188EE206080F
+:1004D0008F43022824420001304900FF512300E2EB
+:1004E000AFA000108EE20608000210C000571021D5
+:1004F0008FA300188FA4001CAC43060CAC4406105C
+:100500008F8701202762380024E800200102102B89
+:1005100050400001276830008F820128110200043A
+:10052000000000008F820124150200070000102146
+:100530008EE201A40000302124420001AEE201A4B9
+:10054000080011A08EE201A48EE40608000420C079
+:10055000008018218EE404308EE5043400A32821A5
+:1005600000A3302B0082202100862021ACE4000073
+:10057000ACE500048EE3060824020008A4E2000EA5
+:100580002402000DACE20018ACE9001C000318C006
+:100590002463060C02E31021ACE200088EE204C4DE
+:1005A000ACE20010AF88012092E24E2014400037E8
+:1005B000240600018EE24E30000210C02442503862
+:1005C00002E220218C830000240200071462001F35
+:1005D000000000008EE34E308EE24E341062001BAD
+:1005E000240300408C82000424420001AC820004F9
+:1005F0008EE24E348EE54E30244200011043000757
+:10060000000000008EE24E342442000110A20005DA
+:10061000000000000800118A0000000014A000057E
+:10062000000000008F82012824420020AF820128B0
+:100630008F8201288C8200042C420011504000134C
+:10064000AC800000080011A0000000008EE24E30D7
+:100650002403004024420001504300030000102105
+:100660008EE24E3024420001AEE24E308EE24E3039
+:10067000000210C02442503802E220212402000768
+:10068000AC82000024020001AC82000454C0000CC3
+:10069000AEE906083C040001248451C8AFA0001054
+:1006A000AFA000148EE606088F4702283C0500091B
+:1006B0000C00240334A5F000080012230000000001
+:1006C0008F830120276238002466002000C2102B8F
+:1006D00050400001276630008F82012810C20004BC
+:1006E000000000008F82012414C2000700000000F7
+:1006F0008EE201A40000302124420001AEE201A4F8
+:10070000080012078EE201A48EE20608AC62001C0B
+:100710008EE404A08EE504A42462001CAC620008F0
+:1007200024020008A462000E24020011AC6200182A
+:10073000AC640000AC6500048EE204C4AC6200103E
+:10074000AF86012092E24E201440003724060001BB
+:100750008EE24E30000210C02442503802E22021C6
+:100760008C830000240200121462001F00000000AD
+:100770008EE34E308EE24E341062001B24030040A4
+:100780008C82000424420001AC8200048EE24E34CC
+:100790008EE54E30244200011043000700000000A7
+:1007A0008EE24E342442000110A200050000000039
+:1007B000080011F10000000014A000050000000076
+:1007C0008F82012824420020AF8201288F820128D5
+:1007D0008C8200042C42001150400013AC800000B9
+:1007E00008001207000000008EE24E302403004093
+:1007F0002442000150430003000010218EE24E30DD
+:1008000024420001AEE24E308EE24E30000210C0B3
+:100810002442503802E2202124020012AC8200005F
+:1008200024020001AC82000414C0001B0000000080
+:100830003C040001248451D0AFA00010AFA00014EC
+:100840008EE606088F4702283C0500090C002403A9
+:1008500034A5F0018EE201B024420001AEE201B005
+:10086000080012238EE201B03C040001248451DC14
+:10087000AFA000148EE606088F4702283C05000949
+:100880000C00240334A5F0058EE201AC24420001E3
+:10089000AEE201AC8EE201AC8EE201603C040001EC
+:1008A000248451E83405F00124420001AEE20160E5
+:1008B0008EE201600000302100003821AFA000105E
+:1008C0000C002403AFA00014080012380000000040
+:1008D0003C0200012442F5A800021100000211822E
+:1008E000004310253C010001AC22127896E2045A24
+:1008F00030420003104000253C050FFF8EE204C883
+:1009000034A5FFFF34420A00AEE204C88EE304C8F7
+:100910003C040001248451F424020001A2E204EC0E
+:10092000A2E204ED3C020002006218253C02000134
+:100930002442A3900045102400021082AEE304C8B4
+:100940003C030800004310253C010001AC221220AA
+:100950003C0200012442ADD4004510240002108264
+:10096000004310253C010001AC22128096E6045A97
+:100970000000382124050011AFA000100C00240352
+:10098000AFA0001408001268000000003C02000143
+:100990002442A9D400021100000211823C03080085
+:1009A000004310253C010001AC22128096E2046A4B
+:1009B00030420010144000090000000096E2047A62
+:1009C00030420010104001120000000096E2046A5C
+:1009D00030420010104000053C02070096E2047A05
+:1009E00030420010144001023C0207003442300043
+:1009F000AFA200188EE206088F43022824420001AD
+:100A0000304900FF512300E2AFA000108EE206083B
+:100A1000000210C0005710218FA300188FA4001CE3
+:100A2000AC43060CAC4406108F87012027623800C7
+:100A300024E800200102102B5040000127683000FC
+:100A40008F82012811020004000000008F8201241F
+:100A500015020007000010218EE201A400003021E1
+:100A600024420001AEE201A4080012EA8EE201A4D1
+:100A70008EE40608000420C0008018218EE40430B3
+:100A80008EE5043400A3282100A3302B008220210E
+:100A900000862021ACE40000ACE500048EE30608EB
+:100AA00024020008A4E2000E2402000DACE20018AB
+:100AB000ACE9001C000318C02463060C02E31021FB
+:100AC000ACE200088EE204C4ACE20010AF88012062
+:100AD00092E24E2014400037240600018EE24E3090
+:100AE000000210C02442503802E220218C83000012
+:100AF000240200071462001F000000008EE34E3045
+:100B00008EE24E341062001B240300408C820004ED
+:100B100024420001AC8200048EE24E348EE54E3059
+:100B20002442000110430007000000008EE24E3412
+:100B30002442000110A2000500000000080012D4A9
+:100B40000000000014A00005000000008F820128B2
+:100B500024420020AF8201288F8201288C82000469
+:100B60002C42001150400013AC800000080012EA33
+:100B7000000000008EE24E302403004024420001B9
+:100B800050430003000010218EE24E302442000149
+:100B9000AEE24E308EE24E30000210C02442503899
+:100BA00002E2202124020007AC820000240200019E
+:100BB000AC82000454C0000CAEE906083C040001FD
+:100BC000248451C8AFA00010AFA000148EE6060820
+:100BD0008F4702283C0500090C00240334A5F000CF
+:100BE0000800136D000000008F8301202762380089
+:100BF0002466002000C2102B504000012766300000
+:100C00008F82012810C20004000000008F8201249E
+:100C100014C20007000000008EE201A40000302191
+:100C200024420001AEE201A4080013518EE201A4A7
+:100C30008EE20608AC62001C8EE404A08EE504A4DB
+:100C40002462001CAC62000824020008A462000EAA
+:100C500024020011AC620018AC640000AC65000412
+:100C60008EE204C4AC620010AF86012092E24E20F6
+:100C700014400037240600018EE24E30000210C0FE
+:100C80002442503802E220218C830000240200120A
+:100C90001462001F000000008EE34E308EE24E34DE
+:100CA0001062001B240300408C82000424420001D7
+:100CB000AC8200048EE24E348EE54E3024420001B8
+:100CC00010430007000000008EE24E342442000171
+:100CD00010A20005000000000800133B0000000007
+:100CE00014A00005000000008F820128244200208B
+:100CF000AF8201288F8201288C8200042C420011CF
+:100D000050400013AC8000000800135100000000A8
+:100D10008EE24E3024030040244200015043000381
+:100D2000000010218EE24E3024420001AEE24E302F
+:100D30008EE24E30000210C02442503802E22021E0
+:100D400024020012AC82000024020001AC820004E4
+:100D500014C0001B000000003C040001248451D09A
+:100D6000AFA00010AFA000148EE606088F4702283F
+:100D70003C0500090C00240334A5F0018EE201B00B
+:100D800024420001AEE201B00800136D8EE201B012
+:100D90003C040001248451DCAFA000148EE6060858
+:100DA0008F4702283C0500090C00240334A5F005F8
+:100DB0008EE201AC24420001AEE201AC8EE201AC55
+:100DC0008EE201603C040001248451E83405F00205
+:100DD00024420001AEE201608EE201600000302199
+:100DE00000003821AFA000100C002403AFA00014B5
+:100DF00096E6047A96E7046A3C04000124845200D3
+:100E000024050012AFA000100C002403AFA00014B2
+:100E10000C004500000000000C002318000000003A
+:100E20003C06000134C63800AEE00608AF40022898
+:100E3000AF40022C96E304588EE400003C0512D823
+:100E400034A5C35827623800AEE2725827623800D2
+:100E5000AEE2726027623800AEE27264036610216F
+:100E6000AEE272702402FFFFAEE004D4AEE004E014
+:100E7000AEE004E4AEE004F0A2E004F4AEE00E0C58
+:100E8000AEE00E18AEE00E10AEE00E14AEE00E1C9A
+:100E9000AEE0724CAEE05244AEE05240AEE0523CA6
+:100EA000AEE07250AEE07254AEE0725CAEE07268DA
+:100EB000AEE004D02463FFFF00852025AEE304F8F4
+:100EC000AEE40000AF800060AF8200643C0201002D
+:100ED000AFA200188EE206088F43022824420001C8
+:100EE000304900FF512300E2AFA000108EE2060857
+:100EF000000210C0005710218FA300188FA4001CFF
+:100F0000AC43060CAC4406108F87012027623800E2
+:100F100024E800200102102B504000012768300017
+:100F20008F82012811020004000000008F8201243A
+:100F300015020007000010218EE201A400003021FC
+:100F400024420001AEE201A4080014228EE201A4B2
+:100F50008EE40608000420C0008018218EE40430CE
+:100F60008EE5043400A3282100A3302B0082202129
+:100F700000862021ACE40000ACE500048EE3060806
+:100F800024020008A4E2000E2402000DACE20018C6
+:100F9000ACE9001C000318C02463060C02E3102116
+:100FA000ACE200088EE204C4ACE20010AF8801207D
+:100FB00092E24E2014400037240600018EE24E30AB
+:100FC000000210C02442503802E220218C8300002D
+:100FD000240200071462001F000000008EE34E3060
+:100FE0008EE24E341062001B240300408C82000409
+:100FF00024420001AC8200048EE24E348EE54E3075
+:101000002442000110430007000000008EE24E342D
+:101010002442000110A20005000000000800140C8A
+:101020000000000014A00005000000008F820128CD
+:1010300024420020AF8201288F8201288C82000484
+:101040002C42001150400013AC8000000800142214
+:10105000000000008EE24E302403004024420001D4
+:1010600050430003000010218EE24E302442000164
+:10107000AEE24E308EE24E30000210C024425038B4
+:1010800002E2202124020007AC82000024020001B9
+:10109000AC82000454C0000CAEE906083C04000118
+:1010A000248451C8AFA00010AFA000148EE606083B
+:1010B0008F4702283C0500090C00240334A5F000EA
+:1010C000080014A5000000008F830120276238006B
+:1010D0002466002000C2102B50400001276630001B
+:1010E0008F82012810C20004000000008F820124BA
+:1010F00014C20007000000008EE201A400003021AD
+:1011000024420001AEE201A4080014898EE201A489
+:101110008EE20608AC62001C8EE404A08EE504A4F6
+:101120002462001CAC62000824020008A462000EC5
+:1011300024020011AC620018AC640000AC6500042D
+:101140008EE204C4AC620010AF86012092E24E2011
+:1011500014400037240600018EE24E30000210C019
+:101160002442503802E220218C8300002402001225
+:101170001462001F000000008EE34E308EE24E34F9
+:101180001062001B240300408C82000424420001F2
+:10119000AC8200048EE24E348EE54E3024420001D3
+:1011A00010430007000000008EE24E34244200018C
+:1011B00010A20005000000000800147300000000E9
+:1011C00014A00005000000008F82012824420020A6
+:1011D000AF8201288F8201288C8200042C420011EA
+:1011E00050400013AC80000008001489000000008B
+:1011F0008EE24E302403004024420001504300039D
+:10120000000010218EE24E3024420001AEE24E304A
+:101210008EE24E30000210C02442503802E22021FB
+:1012200024020012AC82000024020001AC820004FF
+:1012300014C0001B000000003C040001248451D0B5
+:10124000AFA00010AFA000148EE606088F4702285A
+:101250003C0500090C00240334A5F0018EE201B026
+:1012600024420001AEE201B0080014A58EE201B0F4
+:101270003C040001248451DCAFA000148EE6060873
+:101280008F4702283C0500090C00240334A5F00513
+:101290008EE201AC24420001AEE201AC8EE201AC70
+:1012A0008EE2015424420001AEE201540C0014DC31
+:1012B0008EE201548F8200A0304200041440FFFDF2
+:1012C000000000008F8200403042000114400008FE
+:1012D000000000008F43010424020001106200049A
+:1012E000000000008F420264104000060000000071
+:1012F0008EE2017C24420001AEE2017C080014C5AC
+:101300008EE2017C8F82004434420004AF820044AC
+:101310008EE2017824420001AEE201788EE201788B
+:101320008F8200D88F8300D400431023AEE2726C0A
+:101330008EE2726C1C4000033C030001004310214C
+:10134000AEE2726C0C004064000000000C004440EF
+:10135000AF8002288FBF00248FB0002003E0000878
+:1013600027BD002803E000080000000003E000089B
+:101370000000000000000000000000002402002C1B
+:10138000AF820050AEE072748F420238AEE27278E3
+:101390008F82005424420067AF820058AEE07B8801
+:1013A000AEE07B8CAEE07B843C010001003708217D
+:1013B000AC2083BC3C0100010037082103E0000899
+:1013C000A02083B927BDFFD8AFBF0024AFB0002055
+:1013D0008F8200543C0300018C635CD82442006778
+:1013E0001060000DAF8200583C0200010057102130
+:1013F000904283B8104000053C0302003C0100010C
+:101400000037082108001503A02083B88EE20000F1
+:1014100000431025AEE200008F4202183042010066
+:10142000104000C6000000008F8200B0304200046F
+:10143000104000C2000000003C03000100771821AA
+:101440008C6383D08F820104146200B4000000001A
+:101450003C030001007718218C6383D48F8200B491
+:10146000146200AE000000008F8200B03C030080D8
+:10147000004310241040000D000000008F82011C6A
+:1014800034420002AF82011C8F8200B02403FFFBB4
+:1014900000431024AF8200B08F82011C2403FFFDA3
+:1014A00000431024080015CCAF82011C3C0300014E
+:1014B000007718218C6383D08F820104146200822C
+:1014C000000000003C030001007718218C6383D4E6
+:1014D0008F8200B41462007C000000003C07000111
+:1014E00000F738218CE783D08F8200B03C040001E4
+:1014F00024845270AFA00014AFA200108F8600B0F9
+:101500003C0500050C00240334A509008F82011C52
+:1015100034420002AF82011C8F8301048F8200B02D
+:1015200034420001AF8200B0AF8301048F830120F9
+:10153000276238002466002000C2102B50400001B2
+:10154000276630008F82012810C2000400000000CE
+:101550008F82012414C20006000000008EE201A464
+:1015600024420001AEE201A4080015A08EE201A40D
+:101570008F4402088F45020C26E20030AC6200085E
+:1015800024020400A462000E2402000FAC620018C2
+:10159000AC60001CAC640000AC6500048EE204C4C6
+:1015A000AC620010AF86012092E24E20144000375A
+:1015B000000000008EE24E30000210C0244250387D
+:1015C00002E220218C830000240200071462001F25
+:1015D000000000008EE34E308EE24E341062001B9D
+:1015E000240300408C82000424420001AC820004E9
+:1015F0008EE24E348EE54E30244200011043000747
+:10160000000000008EE24E342442000110A20005CA
+:10161000000000000800158A0000000014A000056A
+:10162000000000008F82012824420020AF820128A0
+:101630008F8201288C8200042C420011504000133C
+:10164000AC800000080015A0000000008EE24E30C3
+:1016500024030040244200015043000300001021F5
+:101660008EE24E3024420001AEE24E308EE24E3029
+:10167000000210C02442503802E220212402000758
+:10168000AC82000024020001AC8200048F82011CA5
+:101690002403FFFD00431024AF82011C8EE201E40D
+:1016A0003C07000100F738218CE783D02442000179
+:1016B000AEE201E48EE201E43C0400012484527CA9
+:1016C000080015BDAFA000108F8201043C0100018D
+:1016D00000370821AC2283D08F8200B43C07000180
+:1016E00000F738218CE783D03C0400012484528425
+:1016F0003C01000100370821AC2283D4AFA00010C8
+:10170000AFA000148F8600B03C0500050C00240338
+:1017100034A50900080015CC000000008F820104E8
+:101720003C01000100370821AC2283D08F8200B435
+:101730003C01000100370821AC2283D48EE2727490
+:1017400092E304F42442006714600006AEE272746F
+:101750008EE272748F4302340043102B1440007BDE
+:10176000000000008EE304E48EE204F8146200043A
+:101770000000000092E204F450400074A2E004F47F
+:101780008F830120276238002466002000C2102BBE
+:1017900050400001276630008F82012810C20004EB
+:1017A000000000008F82012414C200070000000026
+:1017B0008EE201A40000802124420001AEE201A4D7
+:1017C000080016378EE201A48EE204E4AC62001C2D
+:1017D0008EE404B08EE504B42462001CAC62000800
+:1017E00024020008A462000E24020011AC6200185A
+:1017F000AC640000AC6500048EE204C4AC6200106E
+:10180000AF86012092E24E201440003724100001E0
+:101810008EE24E30000210C02442503802E22021F5
+:101820008C830000240200121462001F00000000DC
+:101830008EE34E308EE24E341062001B24030040D3
+:101840008C82000424420001AC8200048EE24E34FB
+:101850008EE54E30244200011043000700000000D6
+:101860008EE24E342442000110A200050000000068
+:10187000080016210000000014A000050000000070
+:101880008F82012824420020AF8201288F82012804
+:101890008C8200042C42001150400013AC800000E8
+:1018A00008001637000000008EE24E30240300408E
+:1018B0002442000150430003000010218EE24E300C
+:1018C00024420001AEE24E308EE24E30000210C0E3
+:1018D0002442503802E2202124020012AC8200008F
+:1018E00024020001AC8200045600000B2410000109
+:1018F0008EE204E43C0400012484528CAFA0001466
+:10190000AFA200108EE606088F4702283C050009AA
+:101910000C00240334A5F006160000032402000185
+:1019200008001650A2E204F48EE201702442000185
+:10193000AEE201708EE201708EE204E4A2E004F4F3
+:10194000AEE004F0AEE07274AEE204F88EE20E1C7B
+:101950001040006D000000008F83012027623800D6
+:101960002466002000C2102B504000012766300082
+:101970008F82012810C20004000000008F82012421
+:1019800014C20007000000008EE201A400008021C4
+:1019900024420001AEE201A4080016AD8EE201A4CB
+:1019A0008EE2724CAC62001C8EE404A88EE504AC9E
+:1019B0002462001CAC62000824020008A462000E2D
+:1019C00024020011AC620018AC640000AC65000495
+:1019D0008EE204C4AC620010AF86012092E24E2079
+:1019E00014400037241000018EE24E30000210C077
+:1019F0002442503802E220218C830000240200128D
+:101A00001462001F000000008EE34E308EE24E3460
+:101A10001062001B240300408C8200042442000159
+:101A2000AC8200048EE24E348EE54E30244200013A
+:101A300010430007000000008EE24E3424420001F3
+:101A400010A200050000000008001697000000002A
+:101A500014A00005000000008F820128244200200D
+:101A6000AF8201288F8201288C8200042C42001151
+:101A700050400013AC800000080016AD00000000CC
+:101A80008EE24E3024030040244200015043000304
+:101A9000000010218EE24E3024420001AEE24E30B2
+:101AA0008EE24E30000210C02442503802E2202163
+:101AB00024020012AC82000024020001AC82000467
+:101AC0005600000B241000018EE2724C3C04000111
+:101AD00024845298AFA00014AFA200108EE6724C7E
+:101AE0008F4702803C0500090C00240334A5F00850
+:101AF00056000001AEE00E1C8EE20174244200018B
+:101B0000AEE201748EE201748EE24E2410400019A0
+:101B100000000000AEE04E248F8200403042000101
+:101B200014400008000000008F430104240200015B
+:101B300010620004000000008F42026410400006A2
+:101B4000000000008EE2017C24420001AEE2017C34
+:101B5000080016DA8EE2017C8F82004434420004D1
+:101B6000AF8200448EE2017824420001AEE20178A7
+:101B70008EE201788EE272782442FF99AEE27278AA
+:101B80008EE272781C4002AD000000008F420238E5
+:101B9000104002AA000000003C0200010057102182
+:101BA000904283E0144002A5000000008F420080B4
+:101BB000AEE2004C8F4200C0AEE200488F4200848B
+:101BC000AEE200388F420084AEE202448F420088C9
+:101BD000AEE202488F42008CAEE2024C8F4200908F
+:101BE000AEE202508F420094AEE202548F4200985F
+:101BF000AEE202588F42009CAEE2025C8F4200A02F
+:101C0000AEE202608F4200A4AEE202648F4200A8FE
+:101C1000AEE202688F4200ACAEE2026C8F4200B0CE
+:101C2000AEE202708F4200B4AEE202748F4200B89E
+:101C3000AEE202788F4200BC24040001AEE2027CD6
+:101C4000AEE0003C00041080005710218EE3003C01
+:101C50008C42024424840001006218212C82000F6F
+:101C6000AEE3003C1440FFF8000410808F4200CC2B
+:101C7000AEE200508F4200D0AEE200548F830120CC
+:101C8000276238002466002000C2102B504000015B
+:101C9000276630008F82012810C200040000000077
+:101CA0008F82012414C20007000000008EE201A40C
+:101CB0000000802124420001AEE201A40800177553
+:101CC0008EE201A48F4402088F45020C26E2003008
+:101CD000AC62000824020400A462000E2402000F7B
+:101CE000AC620018AC60001CAC640000AC65000481
+:101CF0008EE204C4AC620010AF86012092E24E2056
+:101D000014400037241000018EE24E30000210C053
+:101D10002442503802E220218C8300002402000774
+:101D20001462001F000000008EE34E308EE24E343D
+:101D30001062001B240300408C8200042442000136
+:101D4000AC8200048EE24E348EE54E302442000117
+:101D500010430007000000008EE24E3424420001D0
+:101D600010A20005000000000800175F000000003E
+:101D700014A00005000000008F82012824420020EA
+:101D8000AF8201288F8201288C8200042C4200112E
+:101D900050400013AC8000000800177500000000E0
+:101DA0008EE24E30240300402442000150430003E1
+:101DB000000010218EE24E3024420001AEE24E308F
+:101DC0008EE24E30000210C02442503802E2202140
+:101DD00024020007AC82000024020001AC8200044F
+:101DE000120002123C020400AFA200183C020001E3
+:101DF00000571021904283B01040010B00000000FA
+:101E00008EE206088F43022824420001304A00FF78
+:101E1000514300FDAFA000108EE20608000210C082
+:101E2000005710218FA300188FA4001CAC43060C90
+:101E3000AC4406108F8300548F8200542469003212
+:101E4000012210232C4200331040006A0000582168
+:101E500024180008240F000D240D0007240C004056
+:101E6000240E00018F8701202762380024E800201B
+:101E70000102102B50400001276830008F8201289A
+:101E800011020004000000008F82012415020007E7
+:101E9000000010218EE201A40000802124420001F4
+:101EA000AEE201A4080017F38EE201A48EE4060856
+:101EB000000420C0008018218EE404308EE5043434
+:101EC00000A3282100A3302B00822021008620219E
+:101ED000ACE40000ACE500048EE20608A4F8000EB5
+:101EE000ACEF0018ACEA001C000210C02442060C43
+:101EF00002E21021ACE200088EE204C4ACE2001061
+:101F0000AF88012092E24E201440003324100001DB
+:101F10008EE24E30000210C02442503802E22021EE
+:101F20008C820000144D001F000000008EE34E3034
+:101F30008EE24E341062001B000000008C82000410
+:101F400024420001AC8200048EE24E348EE34E3017
+:101F500024420001104C0007000000008EE24E34C5
+:101F6000244200011062000500000000080017E094
+:101F70000000000014600005000000008F820128AE
+:101F800024420020AF8201288F8201288C82000425
+:101F90002C42001150400010AC800000080017F3E4
+:101FA000000000008EE24E3024420001504C00033D
+:101FB000000010218EE24E3024420001AEE24E308D
+:101FC0008EE24E30000210C02442503802E220213E
+:101FD000AC8D0000AC8E000456000006240B0001FE
+:101FE0008F820054012210232C4200331440FF9DA5
+:101FF00000000000316300FF24020001146200773A
+:102000003C050009AEEA06088F8300548F82005415
+:1020100024690032012210232C4200331040006159
+:1020200000005821240D0008240C0011240800127F
+:1020300024070040240A00018F8301202762380012
+:102040002466002000C2102B50400001276630009B
+:102050008F82012810C20004000000008F8201243A
+:1020600014C20007000000008EE201A400008021DD
+:1020700024420001AEE201A40800185F8EE201A430
+:102080008EE20608AC62001C8EE404A08EE504A477
+:102090002462001CAC620008A46D000EAC6C001839
+:1020A000AC640000AC6500048EE204C4AC620010B5
+:1020B000AF86012092E24E2014400033241000012C
+:1020C0008EE24E30000210C02442503802E220213D
+:1020D0008C8200001448001F000000008EE34E3088
+:1020E0008EE24E341062001B000000008C8200045F
+:1020F00024420001AC8200048EE24E348EE34E3066
+:102100002442000110470007000000008EE24E3418
+:102110002442000110620005000000000800184C75
+:102120000000000014600005000000008F820128FC
+:1021300024420020AF8201288F8201288C82000473
+:102140002C42001150400010AC8000000800185FC5
+:10215000000000008EE24E30244200015047000390
+:10216000000010218EE24E3024420001AEE24E30DB
+:102170008EE24E30000210C02442503802E220218C
+:10218000AC880000AC8A000456000006240B000155
+:102190008F820054012210232C4200331440FFA6EA
+:1021A00000000000316300FF2402000114620003FC
+:1021B0003C0500090800197C241000013C040001C2
+:1021C000248452A4AFA00010AFA000148F86012079
+:1021D0008F8701240800187B34A5F0113C0400010E
+:1021E000248452B0AFA00010AFA000148F8601204D
+:1021F0008F87012434A5F0100C00240300008021F7
+:102200000800197C000000003C040001248452BC3A
+:10221000AFA000148EE606088F4702283C0500098F
+:102220000800197534A5F00F8EE206088F430228C6
+:1022300024420001304900FF512300E2AFA000100A
+:102240008EE20608000210C0005710218FA300186C
+:102250008FA4001CAC43060CAC4406108F870120F1
+:102260002762380024E800200102102B50400001B2
+:10227000276830008F82012811020004000000004E
+:102280008F82012415020007000010218EE201A4B4
+:102290000000802124420001AEE201A4080018F7EA
+:1022A0008EE201A48EE40608000420C000801821FC
+:1022B0008EE404308EE5043400A3282100A3302BE3
+:1022C0000082202100862021ACE40000ACE500045F
+:1022D0008EE3060824020008A4E2000E2402000D8A
+:1022E000ACE20018ACE9001C000318C02463060C23
+:1022F00002E31021ACE200088EE204C4ACE200105C
+:10230000AF88012092E24E201440003724100001D3
+:102310008EE24E30000210C02442503802E22021EA
+:102320008C830000240200071462001F00000000DC
+:102330008EE34E308EE24E341062001B24030040C8
+:102340008C82000424420001AC8200048EE24E34F0
+:102350008EE54E30244200011043000700000000CB
+:102360008EE24E342442000110A20005000000005D
+:10237000080018E10000000014A0000500000000A3
+:102380008F82012824420020AF8201288F820128F9
+:102390008C8200042C42001150400013AC800000DD
+:1023A000080018F7000000008EE24E3024030040C1
+:1023B0002442000150430003000010218EE24E3001
+:1023C00024420001AEE24E308EE24E30000210C0D8
+:1023D0002442503802E2202124020007AC8200008F
+:1023E00024020001AC8200045600000CAEE906088D
+:1023F0003C040001248452C8AFA00010AFA0001418
+:102400008EE606088F4702283C0500090C002403CD
+:1024100034A5F0000800197C000000008F83012023
+:10242000276238002466002000C2102B50400001B3
+:10243000276630008F82012810C2000400000000CF
+:102440008F82012414C20007000000008EE201A464
+:102450000000802124420001AEE201A40800195EC0
+:102460008EE201A48EE20608AC62001C8EE404A099
+:102470008EE504A42462001CAC620008240200085B
+:10248000A462000E24020011AC620018AC640000CB
+:10249000AC6500048EE204C4AC620010AF8601207B
+:1024A00092E24E2014400037241000018EE24E309C
+:1024B000000210C02442503802E220218C83000028
+:1024C000240200121462001F000000008EE34E3050
+:1024D0008EE24E341062001B240300408C82000404
+:1024E00024420001AC8200048EE24E348EE54E3070
+:1024F0002442000110430007000000008EE24E3429
+:102500002442000110A20005000000000800194844
+:102510000000000014A00005000000008F820128C8
+:1025200024420020AF8201288F8201288C8200047F
+:102530002C42001150400013AC8000000800195ECE
+:10254000000000008EE24E302403004024420001CF
+:1025500050430003000010218EE24E30244200015F
+:10256000AEE24E308EE24E30000210C024425038AF
+:1025700002E2202124020012AC82000024020001A9
+:10258000AC8200045600001D241000013C04000130
+:10259000248452D0AFA00010AFA000148EE606082D
+:1025A0008F4702283C0500090C00240334A5F001E4
+:1025B0008EE201B024420001AEE201B00800197CB5
+:1025C0008EE201B03C040001248452DCAFA0001470
+:1025D0008EE606088F4702283C05000934A5F00561
+:1025E0000C002403000000008EE201AC00008021FA
+:1025F00024420001AEE201AC8EE201AC1200000CFC
+:10260000240200013C01000100370821A02083B012
+:102610008F4202388EE3015824630001AEE3015873
+:102620008EE301580800198CAEE272782402000192
+:102630003C01000100370821A02283B03C020001C8
+:102640008C425CD810400187000000008EE27B8441
+:1026500024430001284200C9144001A4AEE37B8456
+:102660008EE204D43042000214400119AEE07B84B3
+:102670008EE204D43C0306003463100034420002AE
+:10268000AEE204D4AFA300188EE206088F430228FE
+:1026900024420001304A00FF514300FDAFA000106A
+:1026A0008EE20608000210C0005710218FA3001808
+:1026B0008FA4001CAC43060CAC4406108F8300545E
+:1026C0008F82005424690032012210232C420033EF
+:1026D0001040006A0000582124180008240F000D43
+:1026E000240D0007240C0040240E00018F870120D8
+:1026F0002762380024E800200102102B504000011E
+:10270000276830008F8201281102000400000000B9
+:102710008F82012415020007000010218EE201A41F
+:102720000000802124420001AEE201A408001A1535
+:102730008EE201A48EE40608000420C00080182167
+:102740008EE404308EE5043400A3282100A3302B4E
+:102750000082202100862021ACE40000ACE50004CA
+:102760008EE20608A4F8000EACEF0018ACEA001CDC
+:10277000000210C02442060C02E21021ACE2000864
+:102780008EE204C4ACE20010AF88012092E24E2039
+:1027900014400033241000018EE24E30000210C0BD
+:1027A0002442503802E220218C820000144D001F88
+:1027B000000000008EE34E308EE24E341062001BAB
+:1027C000000000008C82000424420001AC8200045E
+:1027D0008EE24E348EE34E3024420001104C00074E
+:1027E000000000008EE24E34244200011062000519
+:1027F0000000000008001A0200000000146000053C
+:10280000000000008F82012824420020AF820128AE
+:102810008F8201288C8200042C420011504000104D
+:10282000AC80000008001A15000000008EE24E3057
+:1028300024420001504C0003000010218EE24E3073
+:1028400024420001AEE24E308EE24E30000210C053
+:102850002442503802E22021AC8D0000AC8E0004EE
+:1028600056000006240B00018F8200540122102321
+:102870002C4200331440FF9D00000000316300FF34
+:102880002402000154620078AFA00010AEEA0608EE
+:102890008F8300548F820054246900320122102358
+:1028A0002C4200331040006100005821240D000824
+:1028B000240C00112408001224070040240A0001FF
+:1028C0008F830120276238002466002000C2102B6D
+:1028D00050400001276630008F82012810C200049A
+:1028E000000000008F82012414C2000700000000D5
+:1028F0008EE201A40000802124420001AEE201A486
+:1029000008001A818EE201A48EE20608AC62001C67
+:102910008EE404A08EE504A42462001CAC620008CE
+:10292000A46D000EAC6C0018AC640000AC65000433
+:102930008EE204C4AC620010AF86012092E24E2009
+:1029400014400033241000018EE24E30000210C00B
+:102950002442503802E220218C8200001448001FDB
+:10296000000000008EE34E308EE24E341062001BF9
+:10297000000000008C82000424420001AC820004AC
+:102980008EE24E348EE34E302442000110470007A1
+:10299000000000008EE24E34244200011062000567
+:1029A0000000000008001A6E00000000146000051E
+:1029B000000000008F82012824420020AF820128FD
+:1029C0008F8201288C8200042C420011504000109C
+:1029D000AC80000008001A81000000008EE24E303A
+:1029E0002442000150470003000010218EE24E30C7
+:1029F00024420001AEE24E308EE24E30000210C0A2
+:102A00002442503802E22021AC880000AC8A000445
+:102A100056000006240B00018F820054012210236F
+:102A20002C4200331440FFA600000000316300FF79
+:102A30002402000110620022000000003C0400019A
+:102A4000248452A4AFA00010AFA000148F860120F0
+:102A50008F8701243C0500090C00240334A5F011E4
+:102A600008001AAD000000003C040001248452B0AC
+:102A7000AFA000148F8601208F8701243C05000938
+:102A80000C00240334A5F01008001AAD000000006B
+:102A90003C040001248452BCAFA000148EE606085A
+:102AA0008F4702283C0500090C00240334A5F00FD1
+:102AB0008EE201AC24420001AEE201AC8EE201AC38
+:102AC0008EE2015C24420001AEE2015C8EE2015C18
+:102AD0008EE204D430420001104000550000000096
+:102AE0008F42021830420080104000290000000090
+:102AF0008F82004434420040AF8200448EE27B7CEF
+:102B0000004028218EE200C08EE300C424060000AD
+:102B10002407FFFF00002021004610241444000D6C
+:102B2000006718241465000B000000008EE27B8013
+:102B3000004028218EE200E08EE300E40000202126
+:102B40000046102414440003006718241065000B8D
+:102B5000000000008EE200C08EE300C48EE400E0BE
+:102B60008EE500E4AEE37B7CAEE57B808F820044A3
+:102B70003842002008001B38AF8200448F82004496
+:102B80002403FFDF0043102408001B38AF820044F9
+:102B90008F8200442403FFDF00431024AF820044EF
+:102BA0008EE27B7C004028218EE200C08EE300C4D0
+:102BB000240600002407FFFF000020210046102407
+:102BC0001444000D006718241465000B0000000079
+:102BD0008EE27B80004028218EE200E08EE300E45C
+:102BE000000020210046102414440003006718242C
+:102BF0001065000B000000008EE200C08EE300C4F0
+:102C00008EE400E08EE500E4AEE37B7CAEE57B8005
+:102C10008F8200443842004008001B38AF820044D5
+:102C20008F8200443442004008001B38AF820044C9
+:102C30008F82004434420040AF8200448EE27B8C9D
+:102C4000244300012842001514400028AEE37B8C89
+:102C50008F82004438420020AF82004408001B38B5
+:102C6000AEE07B8C8EE204D43042000110400011B3
+:102C7000000000008F42021830420080104000091E
+:102C8000000000008F82004434420020AF820044E4
+:102C90008F8200442403FFBF0043102408001B362A
+:102CA000AF8200448F8200443442006008001B362B
+:102CB000AF8200448F82004434420040AF8200441F
+:102CC0008EE27B88244300012842138914400005CA
+:102CD000AEE37B888F82004438420020AF820044FC
+:102CE000AEE07B880C004603000000008FBF00248C
+:102CF0008FB0002003E0000827BD002827BDFFB8E3
+:102D0000AFBF0044AFB60040AFB5003CAFB4003831
+:102D1000AFB30034AFB20030AFB1002CAFB0002879
+:102D20008F96006432C200041040000C240200049C
+:102D3000AF8200648F420114AEE204E08F82006033
+:102D400034420008AF8200608EE2016C2442000130
+:102D5000AEE2016C080022F48EE2016C32C2000186
+:102D60001040000424020001AF820064080022F435
+:102D70000000000032C200021440000C3C050003B9
+:102D80003C0400012484535434A5000102C03021C6
+:102D900000003821AFA000100C002403AFA00014E5
+:102DA0002402FFF8080022F4AF8200648F43022C53
+:102DB0008F42010C5062000CAFA000108F42022C19
+:102DC00000021080005A10218C420300AFA20020A4
+:102DD0008F42022C24070001244200013042003FB0
+:102DE00008001B80AF42022C3C0400012484536085
+:102DF000AFA000148F46022C8F47010C3C05000346
+:102E00000C00240334A5F01F0000382114E0000357
+:102E100000000000080022EDAF96006493A200209D
+:102E20002443FFFF2C62001110400658000310805D
+:102E30003C010001002208218C22541800400008A7
+:102E4000000000008FA2002030420FFFAEE20E0C07
+:102E50008F82006034420200AF8200608EE201186F
+:102E600024420001AEE20118080022E88EE20118B7
+:102E70008FA20020240300013C010001003708213B
+:102E8000A02383B130420FFFAEE252388F82006040
+:102E900034420100AF8200608EE20144244200010E
+:102EA000AEE20144080022E88EE201448FA2002035
+:102EB0000002120000022502240200011082000517
+:102EC00024020002108200092402FFFE08001BC930
+:102ED000AFA000108EE204D4AEE40070AEE4007443
+:102EE0003442000108001BBDAEE204D48EE304D4DA
+:102EF000AEE40070AEE4007400621824AEE304D4C3
+:102F00008F8400540004144200041C8200431021EA
+:102F100000041CC20043102300041D0200431021C2
+:102F200000041D420043102308001BD0AEE20078CD
+:102F30003C0400012484536CAFA000148FA6002031
+:102F40003C0500030C00240334A500048EE20110AC
+:102F500024420001AEE20110080022E88EE20110D6
+:102F6000274402120C0022FE240500063049001FEF
+:102F7000000920C002E410219442727C30424000DB
+:102F80001040000A0097102197430212A443727E5A
+:102F90008F43021400971021AC43728002E4182181
+:102FA0003402800008001C79A462727C9443727E13
+:102FB000974202121462000602E4102100971021C9
+:102FC0008C4372808F4202141062009F02E4102131
+:102FD0009442727C304280001040002A2406FFFF99
+:102FE00000002021000410C002E210219442737CF2
+:102FF000304240005440000500803021248400010C
+:103000002C8200801440FFF8000410C004C100109E
+:10301000000618C0000610C0005718218C63737C8E
+:1030200000571021AFA300108C4273803C040001B4
+:1030300024845378AFA200148F4702143C05000388
+:103040000C00240334A5001308001C903C02080067
+:103050009744021200771021A444737E8F44021417
+:103060000077102102E31821AC4473803402800001
+:10307000A462737C000910C002E2102108001C79D0
+:10308000A446727C02E410219445727C08001C2E38
+:10309000000510C09443737E97420212146200062A
+:1030A000000510C0009710218C4373808F420214DA
+:1030B00010620065000510C002E210219445737C87
+:1030C000000510C002E210219442737C304280005F
+:1030D0001040FFF000971021000520C0009710213C
+:1030E0009443737E97420212146200062406FFFF87
+:1030F000009710218C4373808F420214106200539A
+:103100003C02080000002021000410C002E210214F
+:103110009442737C304240005440000500803021CE
+:10312000248400012C8200801440FFF8000410C0A9
+:1031300004C10023000618C0000910C00057182160
+:103140008C63727C00571021AFA300108C427280F8
+:103150003C04000124845384AFA200148F4702145E
+:103160003C0500030C00240334A5F01708001C9054
+:103170003C0208008F43021000B71021AC43777C5B
+:103180008F43021400B71021AC4377803C0200014A
+:10319000005710218C4283B4244200013C010001FD
+:1031A00000370821AC2283B43C03000100771821CA
+:1031B0008C6383B402E5102108001C82A443777C51
+:1031C0009744021200771021A444737E8F440214A6
+:1031D0000077102102E31821AC4473803402800090
+:1031E000A462737C000510C002E21021A446737C27
+:1031F00000002021000428C002E510219442777CC1
+:103200001040FFDC248400012C8200805440FFFA2F
+:10321000000428C092E204D81040000624020001F5
+:103220008EE304DC012210040062182508001C8FC4
+:10323000AEE304DC8F830228240200010122100483
+:1032400000621825AF8302283C02080034421000B7
+:10325000AFA200188EE206088F4302282442000124
+:10326000304A00FF514300FDAFA000108EE2060877
+:10327000000210C0005710218FA300188FA4001C5B
+:10328000AC43060CAC4406108F8300548F8200546C
+:1032900024690032012210232C4200331040006ABE
+:1032A0000000582124100008240F000D240D0007F1
+:1032B000240C0040240E00018F8701202762380073
+:1032C00024E800200102102B504000012768300044
+:1032D0008F82012811020004000000008F82012467
+:1032E00015020007000010218EE201A40000382121
+:1032F00024420001AEE201A408001D088EE201A4F0
+:103300008EE40608000420C0008018218EE40430FA
+:103310008EE5043400A3282100A3302B0082202155
+:1033200000862021ACE40000ACE500048EE2060833
+:10333000A4F0000EACEF0018ACEA001C000210C0B4
+:103340002442060C02E21021ACE200088EE204C422
+:10335000ACE20010AF88012092E24E20144000330E
+:10336000240700018EE24E30000210C02442503883
+:1033700002E220218C820000144D001F000000009A
+:103380008EE34E308EE24E341062001B00000000CF
+:103390008C82000424420001AC8200048EE24E3490
+:1033A0008EE34E3024420001104C00070000000064
+:1033B0008EE24E342442000110620005000000003D
+:1033C00008001CF50000000014600005000000006B
+:1033D0008F82012824420020AF8201288F82012899
+:1033E0008C8200042C42001150400010AC80000080
+:1033F00008001D08000000008EE24E30244200014B
+:10340000504C0003000010218EE24E302442000197
+:10341000AEE24E308EE24E30000210C024425038F0
+:1034200002E22021AC8D0000AC8E000454E00006C6
+:10343000240B00018F820054012210232C42003300
+:103440001440FF9D00000000316300FF24020001D2
+:1034500054620078AFA00010AEEA06088F830054D3
+:103460008F82005424690032012210232C42003341
+:103470001040006100005821240E0008240D0011A6
+:10348000240A001224080040240C00018F8301202C
+:10349000276238002466002000C2102B5040000133
+:1034A000276630008F82012810C20004000000004F
+:1034B0008F82012414C20007000000008EE201A4E4
+:1034C0000000382124420001AEE201A408001D746E
+:1034D0008EE201A48EE20608AC62001C8EE404A019
+:1034E0008EE504A42462001CAC620008A46E000EE9
+:1034F000AC6D0018AC640000AC6500048EE204C43E
+:10350000AC620010AF86012092E24E2014400033DE
+:10351000240700018EE24E30000210C024425038D1
+:1035200002E220218C820000144A001F00000000EB
+:103530008EE34E308EE24E341062001B000000001D
+:103540008C82000424420001AC8200048EE24E34DE
+:103550008EE34E30244200011048000700000000B6
+:103560008EE24E342442000110620005000000008B
+:1035700008001D610000000014600005000000004C
+:103580008F82012824420020AF8201288F820128E7
+:103590008C8200042C42001150400010AC800000CE
+:1035A00008001D74000000008EE24E30244200012D
+:1035B00050480003000010218EE24E3024420001EA
+:1035C000AEE24E308EE24E30000210C0244250383F
+:1035D00002E22021AC8A0000AC8C000454E000061A
+:1035E000240B00018F820054012210232C4200334F
+:1035F0001440FFA600000000316300FF2402000118
+:1036000010620022000000003C040001248453905A
+:10361000AFA00010AFA000148F8601208F87012477
+:103620003C0500090C00240334A5F01108001DA07E
+:10363000000000003C0400012484539CAFA000144F
+:103640008F8601208F8701243C0500090C0024038C
+:1036500034A5F01008001DA0000000003C0400018B
+:10366000248453A8AFA000148EE606088F470228D2
+:103670003C0500090C00240334A5F00F8EE201ACD8
+:1036800024420001AEE201AC8EE201AC8EE20124E4
+:1036900024420001AEE2012408001F978EE20124BB
+:1036A000274402120C0022FE240500063049001FA8
+:1036B000000928C002E510219442727C304280004B
+:1036C0001040002F02E510219442727C30424000ED
+:1036D0001440001C00B710219443727E97420212DE
+:1036E0001462001800B710218C4372808F420214BC
+:1036F00054620016AFA2001092E204D810400007F6
+:10370000240200018EE304DC0122100400021027D1
+:103710000062182408001DC9AEE304DC8F83022870
+:10372000012210040002102700621824AF8302282F
+:10373000000910C002E218213402C00008001E4E29
+:10374000A462727C8F420214AFA20010000910C064
+:10375000005710218C42727C3C040001248453B435
+:103760003C050003AFA200148F47021034A5F01CE3
+:103770000C0024030120302108001E833C020800B5
+:1037800000B710219443727E97420212146200190E
+:10379000000918C000B710218C4372808F420214B8
+:1037A00014620014000918C002E510219447727CCD
+:1037B000000720C0009710219443737E00B71021AA
+:1037C000A443727E009710218C43738000B71021B0
+:1037D000AC43728002E410219443737C02E5102113
+:1037E000A443727C02E418213402C00008001E4E7B
+:1037F000A462737C02E310219447727C00003021A4
+:10380000000720C002E410219442737C0000402194
+:10381000304280001440002500E028210060502143
+:10382000340BC000009710219443737E974202121C
+:103830005462001500E02821009710218C4373800A
+:103840008F4202145462001000E02821110000068B
+:1038500002E410219443737C000510C002E21021A1
+:1038600008001E1AA443737C9443737C02EA10215F
+:10387000A443727C000710C002E21021A44B737CA9
+:1038800008001E2824060001000510C002E21021D5
+:103890009447737C000720C002E410219442737C9B
+:1038A000304280001040FFDF2508000130C200FFD9
+:1038B0001440002500002021000720C0009710219F
+:1038C0009443737E974202121462000F000910C0E5
+:1038D000009710218C4373808F4202141462000AF7
+:1038E000000910C002E418213402C00015000015C0
+:1038F000A462737C000910C002E218213402800027
+:1039000008001E4EA462727C005710218C42727C0B
+:103910003C040001248453C03C050003AFA2001006
+:10392000000710C0005710218C42737C34A5001E84
+:10393000012030210C002403AFA2001408001E83D4
+:103940003C02080000002021000428C000B710211C
+:103950009443777E974202125462002B2484000124
+:1039600000B710218C4377808F42021454620026E6
+:10397000248400013C020001005710218C4283B4D2
+:103980002442FFFF3C01000100370821AC2283B430
+:103990003C020001005710218C4283B4008090212A
+:1039A0000242102B1040000E24B1777C24B07784A3
+:1039B00002F0202102F128210C00249024060008A6
+:1039C000263100083C020001005710218C4283B4CC
+:1039D000265200010242102B1440FFF52610000869
+:1039E0003C040001009720218C8483B42405000846
+:1039F000000420C02484777C0C00248802E4202169
+:103A000008001E833C0208002C8200801440FFCF77
+:103A1000000428C03C02080034422000AFA2001875
+:103A20008EE206088F43022824420001304A00FF3C
+:103A3000514300FDAFA000108EE20608000210C046
+:103A4000005710218FA300188FA4001CAC43060C54
+:103A5000AC4406108F8300548F82005424690032D6
+:103A6000012210232C4200331040006A000058212C
+:103A700024100008240F000D240D0007240C004022
+:103A8000240E00018F8701202762380024E80020DF
+:103A90000102102B50400001276830008F8201285E
+:103AA00011020004000000008F82012415020007AB
+:103AB000000010218EE201A4000038212442000100
+:103AC000AEE201A408001EFB8EE201A48EE406080B
+:103AD000000420C0008018218EE404308EE50434F8
+:103AE00000A3282100A3302B008220210086202162
+:103AF000ACE40000ACE500048EE20608A4F0000E81
+:103B0000ACEF0018ACEA001C000210C02442060C06
+:103B100002E21021ACE200088EE204C4ACE2001024
+:103B2000AF88012092E24E201440003324070001A8
+:103B30008EE24E30000210C02442503802E22021B2
+:103B40008C820000144D001F000000008EE34E30F8
+:103B50008EE24E341062001B000000008C820004D4
+:103B600024420001AC8200048EE24E348EE34E30DB
+:103B700024420001104C0007000000008EE24E3489
+:103B800024420001106200050000000008001EE849
+:103B90000000000014600005000000008F82012872
+:103BA00024420020AF8201288F8201288C820004E9
+:103BB0002C42001150400010AC80000008001EFB99
+:103BC000000000008EE24E3024420001504C000301
+:103BD000000010218EE24E3024420001AEE24E3051
+:103BE0008EE24E30000210C02442503802E2202102
+:103BF000AC8D0000AC8E000454E00006240B0001E4
+:103C00008F820054012210232C4200331440FF9D68
+:103C100000000000316300FF2402000154620078BC
+:103C2000AFA00010AEEA06088F8300548F820054C4
+:103C300024690032012210232C420033104000611D
+:103C400000005821240E0008240D0011240A00123F
+:103C500024080040240C00018F83012027623800D3
+:103C60002466002000C2102B50400001276630005F
+:103C70008F82012810C20004000000008F820124FE
+:103C800014C20007000000008EE201A400003821E9
+:103C900024420001AEE201A408001F678EE201A4E5
+:103CA0008EE20608AC62001C8EE404A08EE504A43B
+:103CB0002462001CAC620008A46E000EAC6D0018FB
+:103CC000AC640000AC6500048EE204C4AC62001079
+:103CD000AF86012092E24E201440003324070001F9
+:103CE0008EE24E30000210C02442503802E2202101
+:103CF0008C820000144A001F000000008EE34E304A
+:103D00008EE24E341062001B000000008C82000422
+:103D100024420001AC8200048EE24E348EE34E3029
+:103D20002442000110480007000000008EE24E34DB
+:103D300024420001106200050000000008001F542A
+:103D40000000000014600005000000008F820128C0
+:103D500024420020AF8201288F8201288C82000437
+:103D60002C42001150400010AC80000008001F677A
+:103D7000000000008EE24E30244200015048000353
+:103D8000000010218EE24E3024420001AEE24E309F
+:103D90008EE24E30000210C02442503802E2202150
+:103DA000AC8A0000AC8C000454E00006240B000137
+:103DB0008F820054012210232C4200331440FFA6AE
+:103DC00000000000316300FF2402000110620022A5
+:103DD000000000003C04000124845390AFA00010B8
+:103DE000AFA000148F8601208F8701243C050009B5
+:103DF0000C00240334A5F01108001F9300000000FC
+:103E00003C0400012484539CAFA000148F86012041
+:103E10008F8701243C0500090C00240334A5F01011
+:103E200008001F93000000003C040001248453A8F4
+:103E3000AFA000148EE606088F4702283C05000953
+:103E40000C00240334A5F00F8EE201AC24420001E3
+:103E5000AEE201AC8EE201AC8EE201282442000108
+:103E6000AEE201288EE201288EE2016424420001C4
+:103E7000AEE20164080022E88EE201648FA2002015
+:103E80000002120000021D0224020001106200055F
+:103E9000240200021062000D0000000008001FB79D
+:103EA000AFA0001092E204D81440000624020001E2
+:103EB0008F820228AEE204DC2402FFFFAF820228D8
+:103EC0002402000108001FBEA2E204D892E204D836
+:103ED0005040000CA2E004D88EE204DCAF8202283D
+:103EE00008001FBEA2E004D83C040001248453C88B
+:103EF000AFA000148FA600203C0500030C00240393
+:103F000034A5F0098EE2013C24420001AEE2013CFE
+:103F1000080022E88EE2013C8FA20020000212007D
+:103F20000002250224020001108200052402000282
+:103F30001082000F0000000008001FE3AFA0001077
+:103F40008F8202203C0308FF3463FFFF00431024EC
+:103F500034420008AF820220240200013C0100012B
+:103F600000370821A02283B208001FEAAEE401084E
+:103F70008F8202203C0308FF3463FFF700431024C4
+:103F8000AF8202203C01000100370821A02083B24B
+:103F900008001FEAAEE401083C040001248453D465
+:103FA000AFA000148FA600203C0500030C002403E2
+:103FB00034A5F00A8EE2012C24420001AEE2012C6D
+:103FC000080022E88EE2012C8FA2002000021200DD
+:103FD00000021D02240200011062000524020002FA
+:103FE0001062000E0000000008002011AFA00010B9
+:103FF0008F8202203C0308FF3463FFFF004310243C
+:1040000034420008AF820220240200013C0100017A
+:104010000037082108002018A02283B33C020001C9
+:1040200000571021904283B23C0100010037082163
+:104030001440000EA02083B38F8202203C0308FFAF
+:104040003463FFF70043102408002018AF820220D9
+:104050003C040001248453E0AFA000148FA600208C
+:104060003C0500030C00240334A5F00B8EE2011480
+:1040700024420001AEE20114080022E88EE201149D
+:1040800027840208274502000C00249A2406000811
+:1040900026E40094274502000C00249A2406000818
+:1040A0008EE2013424420001AEE20134080022E82D
+:1040B0008EE201348F460248000020210C00510896
+:1040C000240500048EE2013024420001AEE20130FA
+:1040D000080022E88EE201308EF301CC8EF401D08C
+:1040E0008EF501D88EE2014026E400302442000122
+:1040F000AEE201408EF001408EF100748EF200704D
+:104100000C00248824050400AEF301CCAEF401D0E9
+:10411000AEF501D8AEF00140AEF10074AEF2007021
+:104120008F42025C26E40094AEE200608F4202609F
+:104130002745020024060008AEE2006824020006BB
+:104140000C00249AAEE200643C023B9A3442CA005E
+:10415000AEE2006C240203E8240400022403000100
+:10416000AEE20104AEE40100AEE3010C8F82022056
+:10417000304200081040000400000000AEE30108D7
+:104180000800206100002021AEE401080000202189
+:104190003C0300010064182190635C3002E41021AC
+:1041A00024840001A043009C2C82000F1440FFF8DF
+:1041B000000000008F82004002E4182124840001E6
+:1041C0000002170224420030A062009C02E4102189
+:1041D000080022E8A040009C240200013C010001EC
+:1041E00000370821A02283E0240B040024080014D7
+:1041F000240A0040240900018F8301002762300057
+:104200002466002000C2102B5040000127662800C1
+:104210008F82010810C20004000000008F82010498
+:1042200014C2000726E200308EE201A80000382107
+:1042300024420001AEE201A8080020A88EE201A8F5
+:104240008EE404B88EE504BCAC620008A46B000EDA
+:10425000AC680018AC60001CAC640000AC650004E5
+:104260008EE204CCAC620010AF86010092E204EC56
+:104270001440000E240700018EE24E282442000163
+:10428000504A0003000010218EE24E282442000113
+:10429000AEE24E288EE24E28000210C024424E3874
+:1042A00002E21021AC480000AC49000410E0FFD24B
+:1042B00000000000080022E8000000003C020900A5
+:1042C000AEE05238AEE0523CAEE05240AEE0524476
+:1042D000AEE001D03C01000100370821A02083B1ED
+:1042E000AFA200188EE206088F4302282442000184
+:1042F000304A00FF514300FDAFA000108EE20608D7
+:10430000000210C0005710218FA300188FA4001CBA
+:10431000AC43060CAC4406108F8300548F820054CB
+:1043200024690032012210232C4200331040006A1D
+:104330000000582124100008240F000D240D000750
+:10434000240C0040240E00018F87012027623800D2
+:1043500024E800200102102B5040000127683000A3
+:104360008F82012811020004000000008F820124C6
+:1043700015020007000010218EE201A40000382180
+:1043800024420001AEE201A40800212C8EE201A427
+:104390008EE40608000420C0008018218EE404305A
+:1043A0008EE5043400A3282100A3302B00822021B5
+:1043B00000862021ACE40000ACE500048EE2060893
+:1043C000A4F0000EACEF0018ACEA001C000210C014
+:1043D0002442060C02E21021ACE200088EE204C482
+:1043E000ACE20010AF88012092E24E20144000336E
+:1043F000240700018EE24E30000210C024425038E3
+:1044000002E220218C820000144D001F00000000F9
+:104410008EE34E308EE24E341062001B000000002E
+:104420008C82000424420001AC8200048EE24E34EF
+:104430008EE34E3024420001104C000700000000C3
+:104440008EE24E342442000110620005000000009C
+:1044500008002119000000001460000500000000A1
+:104460008F82012824420020AF8201288F820128F8
+:104470008C8200042C42001150400010AC800000DF
+:104480000800212C000000008EE24E302442000182
+:10449000504C0003000010218EE24E3024420001F7
+:1044A000AEE24E308EE24E30000210C02442503850
+:1044B00002E22021AC8D0000AC8E000454E0000626
+:1044C000240B00018F820054012210232C42003360
+:1044D0001440FF9D00000000316300FF2402000132
+:1044E00054620078AFA00010AEEA06088F83005433
+:1044F0008F82005424690032012210232C420033A1
+:104500001040006100005821240E0008240D001105
+:10451000240A001224080040240C00018F8301208B
+:10452000276238002466002000C2102B5040000192
+:10453000276630008F82012810C2000400000000AE
+:104540008F82012414C20007000000008EE201A443
+:104550000000382124420001AEE201A408002198A5
+:104560008EE201A48EE20608AC62001C8EE404A078
+:104570008EE504A42462001CAC620008A46E000E48
+:10458000AC6D0018AC640000AC6500048EE204C49D
+:10459000AC620010AF86012092E24E20144000333E
+:1045A000240700018EE24E30000210C02442503831
+:1045B00002E220218C820000144A001F000000004B
+:1045C0008EE34E308EE24E341062001B000000007D
+:1045D0008C82000424420001AC8200048EE24E343E
+:1045E0008EE34E3024420001104800070000000016
+:1045F0008EE24E34244200011062000500000000EB
+:104600000800218500000000146000050000000083
+:104610008F82012824420020AF8201288F82012846
+:104620008C8200042C42001150400010AC8000002D
+:1046300008002198000000008EE24E302442000164
+:1046400050480003000010218EE24E302442000149
+:10465000AEE24E308EE24E30000210C0244250389E
+:1046600002E22021AC8A0000AC8C000454E0000679
+:10467000240B00018F820054012210232C420033AE
+:104680001440FFA600000000316300FF2402000177
+:1046900010620022000000003C04000124845390BA
+:1046A000AFA00010AFA000148F8601208F870124D7
+:1046B0003C0500090C00240334A5F011080021C4B6
+:1046C000000000003C0400012484539CAFA00014AF
+:1046D0008F8601208F8701243C0500090C002403EC
+:1046E00034A5F010080021C4000000003C040001C3
+:1046F000248453A8AFA000148EE606088F47022832
+:104700003C0500090C00240334A5F00F8EE201AC37
+:1047100024420001AEE201AC8EE201AC8EE2012047
+:1047200024420001AEE201208EE201208EE2016807
+:1047300024420001AEE20168080022E88EE201682E
+:104740008F42025C26E40094AEE200608F42026079
+:1047500027450200240600080C00249AAEE20068F7
+:104760008F8202203042000814400002240200011F
+:1047700024020002AEE201088EE2011C2442000184
+:10478000AEE2011C080022E88EE2011C3C0400019C
+:10479000248453ECAFA00010AFA000148FA600201B
+:1047A0003C0500030C00240334A5F00F93A2002065
+:1047B0003C0307003463100000431025AFA200182B
+:1047C0008EE206088F43022824420001304900FF90
+:1047D000512300E2AFA000108EE20608000210C0D4
+:1047E000005710218FA300188FA4001CAC43060CA7
+:1047F000AC4406108F8701202762380024E800208F
+:104800000102102B50400001276830008F820128E0
+:1048100011020004000000008F820124150200072D
+:10482000000010218EE201A4000038212442000182
+:10483000AEE201A40800225D8EE201A48EE4060827
+:10484000000420C0008018218EE404308EE504347A
+:1048500000A3282100A3302B0082202100862021E4
+:10486000ACE40000ACE500048EE306082402000876
+:10487000A4E2000E2402000DACE20018ACE9001C1A
+:10488000000318C02463060C02E31021ACE2000808
+:104890008EE204C4ACE20010AF88012092E24E2008
+:1048A00014400037240700018EE24E30000210C091
+:1048B0002442503802E220218C83000024020007A9
+:1048C0001462001F000000008EE34E308EE24E3472
+:1048D0001062001B240300408C820004244200016B
+:1048E000AC8200048EE24E348EE54E30244200014C
+:1048F00010430007000000008EE24E342442000105
+:1049000010A200050000000008002247000000007F
+:1049100014A00005000000008F820128244200201E
+:10492000AF8201288F8201288C8200042C42001162
+:1049300050400013AC8000000800225D0000000021
+:104940008EE24E3024030040244200015043000315
+:10495000000010218EE24E3024420001AEE24E30C3
+:104960008EE24E30000210C02442503802E2202174
+:1049700024020007AC82000024020001AC82000483
+:1049800054E0000CAEE906083C040001248453F412
+:10499000AFA00010AFA000148EE606088F470228D3
+:1049A0003C0500090C00240334A5F000080022E0B7
+:1049B000000000008F830120276238002466002059
+:1049C00000C2102B50400001276630008F82012862
+:1049D00010C20004000000008F82012414C20007EE
+:1049E000000000008EE201A40000382124420001F2
+:1049F000AEE201A4080022C48EE201A48EE2060801
+:104A0000AC62001C8EE404A08EE504A42462001CA9
+:104A1000AC62000824020008A462000E2402001107
+:104A2000AC620018AC640000AC6500048EE204C403
+:104A3000AC620010AF86012092E24E201440003795
+:104A4000240700018EE24E30000210C0244250388C
+:104A500002E220218C830000240200121462001F55
+:104A6000000000008EE34E308EE24E341062001BD8
+:104A7000240300408C82000424420001AC82000424
+:104A80008EE24E348EE54E30244200011043000782
+:104A9000000000008EE24E342442000110A2000506
+:104AA00000000000080022AE0000000014A0000575
+:104AB000000000008F82012824420020AF820128DC
+:104AC0008F8201288C8200042C4200115040001378
+:104AD000AC800000080022C4000000008EE24E30CE
+:104AE0002403004024420001504300030000102131
+:104AF0008EE24E3024420001AEE24E308EE24E3065
+:104B0000000210C02442503802E220212402001288
+:104B1000AC82000024020001AC82000414E0001BFF
+:104B2000000000003C040001248453FCAFA00010EE
+:104B3000AFA000148EE606088F4702283C05000946
+:104B40000C00240334A5F0018EE201B024420001E0
+:104B5000AEE201B0080022E08EE201B03C040001A8
+:104B600024845408AFA000148EE606088F4702285C
+:104B70003C0500090C00240334A5F0058EE201ACCD
+:104B800024420001AEE201AC8EE201AC8EE20150A3
+:104B900024420001AEE201508EE201508EE201603B
+:104BA00024420001AEE201608EE201608F43022CDC
+:104BB0008F42010C1462000924020002AF820064DB
+:104BC0008F82006414400005000000008F43022C17
+:104BD0008F42010C1462F875000000008FBF004482
+:104BE0008FB600408FB5003C8FB400388FB30034CF
+:104BF0008FB200308FB1002C8FB0002803E0000886
+:104C000027BD004827BDFFF82408FFFF10A00014AF
+:104C1000000048213C0AEDB8354A83209087000007
+:104C200024840001000030210107102630420001D9
+:104C30001040000200081842006A18260060402157
+:104C400024C600012CC200081440FFF700073842B8
+:104C5000252900010125102B1440FFF00000000061
+:104C60000100102103E0000827BD000827BDFFE870
+:104C700027642800AFBF00100C0024882405100012
+:104C800024020021AF800100AF800104AF80010841
+:104C9000AF800110AF800114AF800118AF800120F8
+:104CA000AF800124AF800128AF800130AF80013494
+:104CB000AF800138AEE04E28AEE04E2CAEE04E3074
+:104CC000AEE04E34AF82011C8F42021830420040E9
+:104CD00010400004000000008F82011C34420004D8
+:104CE000AF82011C8FBF001003E0000827BD001831
+:104CF00027BDFFE0AFBF00188F820104AFA20010F4
+:104D00008F8201003C050002AFA200148F8600B024
+:104D10008F87011C3C040001248454C00C00240330
+:104D200034A5F0008F8300B03C027F00006218249D
+:104D30003C020400106200290043102B14400008BC
+:104D40003C0220003C020100106200243C020200F0
+:104D50001062001100000000080023740000000031
+:104D6000106200083C0240001462001C00000000B9
+:104D70008EE2019024420001AEE20190080023740B
+:104D80008EE201908EE2018C24420001AEE2018CA1
+:104D9000080023748EE2018C8F82011C34420002D1
+:104DA000AF82011C8F8301048F8200B03442000166
+:104DB000AF8200B0AF8301048F82011C2403FFFD8A
+:104DC00000431024AF82011C8EE201A024420001A6
+:104DD000AEE201A0080023778EE201A08F8200B02E
+:104DE00034420001AF8200B08FBF001803E000081A
+:104DF00027BD002027BDFFE0AFBF001CAFB00018EB
+:104E00008F820120AFA200108F8201243C05000197
+:104E1000AFA200148F8600A08F87011C3C04000104
+:104E2000248454CC0C00240334A5F0008F8300A00C
+:104E30003C027F00006218243C0204001062005310
+:104E4000000080210043102B144000083C04200087
+:104E50003C0201001062004D3C0202001062003A68
+:104E600000000000080023E00000000010640003C0
+:104E70003C02400014620045000000008F8200A048
+:104E80000044102410400006000000008EE201944F
+:104E900024420001AEE20194080023A98EE20194AD
+:104EA0008EE2019824420001AEE201988EE2019860
+:104EB0008F82011C34420002AF82011C8F82011CD0
+:104EC000304202001040001B000000008F8300A051
+:104ED0008F8401248F8200AC14400007240200015B
+:104EE0003C0200013442F0000062102450400001F6
+:104EF00024100001240200011200000DAF8200A066
+:104F00008F8201242442FFE0AF8201248F8201249A
+:104F10008F820124276330000043102B10400005CE
+:104F2000276237E0AF820124080023CA0000000096
+:104F3000AF8401248F82011C2403FFFD0043102451
+:104F4000080023E3AF82011C8F82011C344200025F
+:104F5000AF82011C8F8301248F8200A034420001A4
+:104F6000AF8200A0AF8301248F82011C2403FFFDC8
+:104F700000431024AF82011C8EE2019C24420001F8
+:104F8000AEE2019C080023E38EE2019C8F8200A028
+:104F900034420001AF8200A08FBF001C8FB0001808
+:104FA00003E0000827BD0020000000003C020001D3
+:104FB0008C425C5827BDFFE8AFBF001414400012BC
+:104FC000AFB000103C10000126105DD0020020217F
+:104FD0000C0024882405200026021FE03C0100016B
+:104FE000AC225D943C010001AC225D90AF420250C6
+:104FF00024022000AF500254AF42025824020001A4
+:105000003C010001AC225C588FBF00148FB000102F
+:1050100003E0000827BD00183C0300018C635D9489
+:105020008C8200008FA800108FA90014AC620000D1
+:105030003C0200018C425D948C830004AC4300046C
+:10504000AC4500088F8400542443FFE0AC460010B8
+:10505000AC470014AC480018AC49001C3C010001EE
+:10506000AC235D94AC44000C3C02000124425DD0B2
+:105070000062182B10600005000000003C020001D7
+:105080008C425D903C010001AC225D943C03000128
+:105090008C635D943C0200018C425C40AC62000079
+:1050A0003C0300018C635D943C0200018C425C4037
+:1050B000AC62000403E00008AF4302503C0300016F
+:1050C0008C635D943C0200018C425C4027BDFFD0A4
+:1050D000AFB400208FB40040AFB00010008080213A
+:1050E000AFB500248FB500448FA40048AFB10014C1
+:1050F00000A08821AFBF0028AFB3001CAFB20018DA
+:10510000AC6200003C0500018CA55D943C020001EE
+:105110008C425C4000C0902100E098211080000685
+:10512000ACA2000424A500080C002490240600185A
+:105130000800244E0000000024A400080C0024886D
+:10514000240500183C0200018C425D943C050001DE
+:1051500024A55DD02442FFE03C010001AC225D9417
+:105160000045102B10400005000000003C0200012B
+:105170008C425D903C010001AC225D943C03000137
+:105180008C635D948E020000AC6200003C03000161
+:105190008C635D948E020004AC620004AC71000864
+:1051A0008F8400542462FFE03C010001AC225D9436
+:1051B0000045102BAC720010AC730014AC740018D6
+:1051C000AC75001C10400005AC64000C3C020001F2
+:1051D0008C425D903C010001AC225D943C030001D7
+:1051E0008C635D943C0200018C425C40AC62000028
+:1051F0003C0300018C635D943C0200018C425C40E6
+:10520000AC620004AF4302508FBF00288FB500246A
+:105210008FB400208FB3001C8FB200188FB1001420
+:105220008FB0001003E0000827BD003010A000057B
+:1052300000000000AC80000024A5FFFC14A0FFFDCE
+:105240002484000403E000080000000010C00007F0
+:10525000000000008C8200002484000424C6FFFCAF
+:10526000ACA2000014C0FFFB24A5000403E000086A
+:105270000000000010C00007000000008CA2000029
+:1052800024A5000424C6FFFCAC82000014C0FFFB70
+:105290002484000403E000080000000003E000088C
+:1052A0000000000027BDFFD8AFBF00208EE304E45C
+:1052B0008EE204E010620436000000008EE204E496
+:1052C0008EE304FC00021100006260219587000853
+:1052D0008D8A00008D8B0004958D000A8EE2725C31
+:1052E0008EE3726C30E4FFFF004410210062182B43
+:1052F0001060001531A200048F8200D88EE372582E
+:1053000000431023AEE2726C8EE2726C1C4000030C
+:105310003C03000100431021AEE2726C8EE2725C2D
+:105320008EE3726C004410210062182B106000069E
+:1053300031A200048EE201B824420001AEE201B8BD
+:10534000080028E18EE201B81040024031A20200BC
+:105350001040014D0000482196E2045A30420010EE
+:1053600010400149000000008F84010027623000D6
+:105370002485002000A2102B504000012765280042
+:105380008F82010810A20004000000008F82010437
+:1053900014A200062402000C8EE201A8244200019F
+:1053A000AEE201A80800252C8EE201A8AC8A00001C
+:1053B000AC8B00048EE3726424060005A482000E08
+:1053C000AC860018AC8300088EE204E4AC82001CBA
+:1053D0008EE204C8AC820010AF85010092E204ECBA
+:1053E00014400036240900018EE24E28000210C04D
+:1053F00024424E3802E220218C8200001446001F15
+:10540000000000008EE34E288EE24E2C1062001B3E
+:10541000240300408C82000424420001AC8200047A
+:105420008EE24E2C8EE54E282442000110430007E8
+:10543000000000008EE24E2C2442000110A2000564
+:1054400000000000080025160000000014A0000560
+:10545000000000008F82010824420020AF82010872
+:105460008F8201088C8200042C42001150400013EE
+:10547000AC8000000800252C000000008EE24E28C1
+:105480002403004024420001504300030000102187
+:105490008EE24E2824420001AEE24E288EE24E28D3
+:1054A000000210C024424E3802E2202124020005EE
+:1054B000AC82000024020001AC8200041520000A26
+:1054C0003C040001AFAB00108EE272643C040001AA
+:1054D000248457303C050004AFA200148EE604E497
+:1054E000080028BE34A5F1148EE2726434843800BA
+:1054F00003641821244200100043102B1440007351
+:10550000000000008EE27264244800100364102141
+:105510000102102B144000023C02FFFF0102402157
+:105520008F8501002762300024A6002000C2102BC6
+:1055300050400001276628008F82010810C2000435
+:10554000000000008F82010414C200072563000CD4
+:105550008EE201A80000482124420001AEE201A829
+:10556000080025A08EE201A82C64000C0144102143
+:10557000ACA20000ACA3000424E2FFF4A4A2000E3D
+:1055800024020006ACA80008ACA200188EE204E4D5
+:10559000ACA2001C8EE204C83C03000200431025AC
+:1055A000ACA20010AF86010092E204EC1440003778
+:1055B000240900018EE24E28000210C024424E3819
+:1055C00002E220218C830000240200051462001FE7
+:1055D000000000008EE34E288EE24E2C1062001B6D
+:1055E000240300408C82000424420001AC820004A9
+:1055F0008EE24E2C8EE54E28244200011043000717
+:10560000000000008EE24E2C2442000110A2000592
+:10561000000000000800258A0000000014A000051A
+:10562000000000008F82010824420020AF820108A0
+:105630008F8201088C8200042C420011504000131C
+:10564000AC800000080025A0000000008EE24E287B
+:1056500024030040244200015043000300001021B5
+:105660008EE24E2824420001AEE24E288EE24E2801
+:10567000000210C024424E3802E22021240200051C
+:10568000AC82000024020001AC8200041520000A54
+:105690002508FFFCAFAB00108EE272643C040001F1
+:1056A000248457303C050004AFA200148EE604E4C5
+:1056B000080028BE34A5F12534028100A5020000AF
+:1056C0009582000E0800261DA50200028F850100AC
+:1056D0002762300024A6002000C2102B5040000199
+:1056E000276628008F82010810C200040000000015
+:1056F0008F82010414C200072563000C8EE201A80A
+:105700000000482124420001AEE201A80800260D55
+:105710008EE201A82C64000C01441021ACA2000010
+:10572000ACA300048EE3726424E2FFF4A4A2000E92
+:1057300024020006ACA2001824630010ACA30008E9
+:105740008EE204E4ACA2001C8EE204C83C0300021A
+:1057500000431025ACA20010AF86010092E204ECD9
+:1057600014400037240900018EE24E28000210C0C8
+:1057700024424E3802E220218C83000024020005DE
+:105780001462001F000000008EE34E288EE24E2CB3
+:105790001062001B240300408C820004244200019C
+:1057A000AC8200048EE24E2C8EE54E28244200018D
+:1057B00010430007000000008EE24E2C244200013E
+:1057C00010A2000500000000080025F700000000FE
+:1057D00014A00005000000008F8201082442002070
+:1057E000AF8201088F8201088C8200042C420011D4
+:1057F00050400013AC8000000800260D000000009F
+:105800008EE24E282403004024420001504300034E
+:10581000000010218EE24E2824420001AEE24E2804
+:105820008EE24E28000210C024424E3802E22021AF
+:1058300024020005AC82000024020001AC820004B6
+:105840001520000A34028100AFAB00108EE27264B2
+:105850003C040001248457303C050004AFA200142E
+:105860008EE604E4080028BE34A5F0158EE37264C9
+:10587000A462000C8EE372649582000EA462000E96
+:105880000800268124E700048F840100276230008D
+:105890002485002000A2102B50400001276528001D
+:1058A0008F82010810A20004000000008F82010412
+:1058B00014A20007240200068EE201A8000048217D
+:1058C00024420001AEE201A8080026778EE201A87A
+:1058D000AC8A0000AC8B00048EE37264A487000ED7
+:1058E000AC820018AC8300088EE204E4AC82001C99
+:1058F0008EE204C83C03000200431025AC82001075
+:10590000AF85010092E204EC144000372409000145
+:105910008EE24E28000210C024424E3802E22021BE
+:105920008C830000240200051462001F00000000A8
+:105930008EE34E288EE24E2C1062001B24030040A2
+:105940008C82000424420001AC8200048EE24E2CC2
+:105950008EE54E282442000110430007000000009D
+:105960008EE24E2C2442000110A20005000000002F
+:10597000080026610000000014A0000500000000DF
+:105980008F82010824420020AF8201088F82010823
+:105990008C8200042C42001150400013AC800000A7
+:1059A00008002677000000008EE24E282403004005
+:1059B0002442000150430003000010218EE24E28D3
+:1059C00024420001AEE24E288EE24E28000210C0B2
+:1059D00024424E3802E2202124020005AC8200005D
+:1059E00024020001AC820004152000093C050004DB
+:1059F000AFAB00108EE272643C0400012484573087
+:105A0000AFA200148EE604E4080028BE34A5F0041A
+:105A10008EE2725C30E7FFFF00471021AEE2725C5D
+:105A20008EE204E48EE304FC8EE47258000211005E
+:105A300000431021AC44000C8EE27258AFA2001853
+:105A40008EE3725CAFA3001C8EE2725C2C42003CC1
+:105A500010400004246200012403FFFE00431024D0
+:105A6000AFA2001C8EE272643C06000134C638000E
+:105A70008EE3725C2405FFF80047102124420007E2
+:105A80000045102424630007AEE272588EE2726C67
+:105A90008EE472580065182400431023AEE2726C45
+:105AA000036610210082202B148000043C03FFFFBA
+:105AB0008EE2725800431021AEE272588EE27258A4
+:105AC000AEE272648F8200F024470008276218005B
+:105AD00000E2102B50400001276710008F8200F475
+:105AE00014E20007000000008EE201B4000048212B
+:105AF00024420001AEE201B4080026C48EE201B4E3
+:105B00008F8200F0240900018FA300188FA4001CCD
+:105B1000AC430000AC440004AF8700F01520001235
+:105B2000000D11428F8200F0AFA200108F8200F4AE
+:105B30003C0400012484573CAFA200148FA6001837
+:105B40008FA7001C3C0500040C00240334A5F005BD
+:105B50008EE2008824420001AEE200888EE20088D6
+:105B6000080028D3AEE0725C304300032402000238
+:105B70001062001628620003104000052402000194
+:105B80001062000800000000080027030000000069
+:105B90002402000310620017000000000800270321
+:105BA000000000008EE200E88EE300EC24630001B8
+:105BB0002C64000100441021AEE200E8AEE300ECEA
+:105BC0008EE200E8080027038EE300EC8EE200F08E
+:105BD0008EE300F4246300012C64000100441021D2
+:105BE000AEE200F0AEE300F48EE200F0080027031E
+:105BF0008EE300F48EE200F88EE300FC24630001E3
+:105C00002C64000100441021AEE200F8AEE300FC79
+:105C10008EE200F88EE300FC8EE2725C8EE400E01F
+:105C20008EE500E4004018210000102100A3282187
+:105C300000A3302B0082202100862021AEE400E06A
+:105C4000AEE500E4080028D3AEE0725C30E2FFFF6E
+:105C5000104001C131A202001040014D0000482156
+:105C600096E2045A30420010104001490000000042
+:105C70008F840100276230002485002000A2102BB1
+:105C800050400001276528008F82010810A20004FF
+:105C9000000000008F82010414A200062402000C00
+:105CA0008EE201A824420001AEE201A80800276E9E
+:105CB0008EE201A8AC8A0000AC8B00048EE3726413
+:105CC00024060005A482000EAC860018AC830008F0
+:105CD0008EE204E4AC82001C8EE204C8AC820010A8
+:105CE000AF85010092E204EC144000362409000163
+:105CF0008EE24E28000210C024424E3802E22021DB
+:105D00008C8200001446001F000000008EE34E2825
+:105D10008EE24E2C1062001B240300408C82000493
+:105D200024420001AC8200048EE24E2C8EE54E2807
+:105D30002442000110430007000000008EE24E2CB8
+:105D40002442000110A200050000000008002758AE
+:105D50000000000014A00005000000008F82010870
+:105D600024420020AF8201088F8201088C82000447
+:105D70002C42001150400013AC8000000800276E38
+:105D8000000000008EE24E2824030040244200015F
+:105D900050430003000010218EE24E2824420001EF
+:105DA000AEE24E288EE24E28000210C024424E3849
+:105DB00002E2202124020005AC820000240200013E
+:105DC000AC8200041520000A3C040001AFAB0010B7
+:105DD0008EE272643C040001248457303C050004C8
+:105DE000AFA200148EE604E4080028BE34A5F01427
+:105DF0008EE2726434843800036418212442001057
+:105E00000043102B14400073000000008EE2726407
+:105E100024480010036410210102102B14400002DA
+:105E20003C02FFFF010240218F8501002762300004
+:105E300024A6002000C2102B504000012766280035
+:105E40008F82010810C20004000000008F8201044C
+:105E500014C200072563000C8EE201A8000048214F
+:105E600024420001AEE201A8080027E28EE201A868
+:105E70002C64000C01441021ACA20000ACA300046F
+:105E800024E2FFF4A4A2000E24020006ACA800083D
+:105E9000ACA200188EE204E4ACA2001C8EE204C89E
+:105EA0003C03000200431025ACA20010AF860100A5
+:105EB00092E204EC14400037240900018EE24E28DF
+:105EC000000210C024424E3802E220218C830000E0
+:105ED000240200051462001F000000008EE34E281B
+:105EE0008EE24E2C1062001B240300408C820004C2
+:105EF00024420001AC8200048EE24E2C8EE54E2836
+:105F00002442000110430007000000008EE24E2CE6
+:105F10002442000110A2000500000000080027CC68
+:105F20000000000014A00005000000008F8201089E
+:105F300024420020AF8201088F8201088C82000475
+:105F40002C42001150400013AC800000080027E2F2
+:105F5000000000008EE24E2824030040244200018D
+:105F600050430003000010218EE24E28244200011D
+:105F7000AEE24E288EE24E28000210C024424E3877
+:105F800002E2202124020005AC820000240200016C
+:105F9000AC8200041520000A2508FFFCAFAB0010FE
+:105FA0008EE272643C040001248457303C050004F6
+:105FB000AFA200148EE604E4080028BE34A5F01554
+:105FC00034028100A50200009582000E0800285FBF
+:105FD000A50200028F8501002762300024A6002060
+:105FE00000C2102B50400001276628008F82010854
+:105FF00010C20004000000008F82010414C20007D8
+:106000002563000C8EE201A8000048212442000113
+:10601000AEE201A80800284F8EE201A82C64000C13
+:1060200001441021ACA20000ACA300048EE3726412
+:1060300024E2FFF4A4A2000E24020006ACA2001881
+:1060400024630010ACA300088EE204E4ACA2001CA0
+:106050008EE204C83C03000200431025ACA20010ED
+:10606000AF86010092E204EC1440003724090001DD
+:106070008EE24E28000210C024424E3802E2202157
+:106080008C830000240200051462001F0000000041
+:106090008EE34E288EE24E2C1062001B240300403B
+:1060A0008C82000424420001AC8200048EE24E2C5B
+:1060B0008EE54E2824420001104300070000000036
+:1060C0008EE24E2C2442000110A2000500000000C8
+:1060D000080028390000000014A00005000000009E
+:1060E0008F82010824420020AF8201088F820108BC
+:1060F0008C8200042C42001150400013AC80000040
+:106100000800284F000000008EE24E2824030040C3
+:106110002442000150430003000010218EE24E286B
+:1061200024420001AEE24E288EE24E28000210C04A
+:1061300024424E3802E2202124020005AC820000F5
+:1061400024020001AC8200041520000A3402810000
+:10615000AFAB00108EE272643C040001248457301F
+:106160003C050004AFA200148EE604E4080028BE3B
+:1061700034A5F0168EE37264A462000C8EE37264A0
+:106180009582000EA462000E080028C224E70004D5
+:106190008F83010027623000246400200082102BCE
+:1061A00050400001276428008F82010810820004FB
+:1061B000000000008F8201041482000724050005FE
+:1061C0008EE201A80000482124420001AEE201A8AD
+:1061D000080028B68EE201A8AC6A0000AC6B00048F
+:1061E0008EE27264A467000EAC650018AC62000811
+:1061F0008EE204E4AC62001C8EE204C8AC620010C3
+:10620000AF84010092E204EC14400036240900013E
+:106210008EE24E28000210C024424E3802E22021B5
+:106220008C8200001445001F000000008EE34E2801
+:106230008EE24E2C1062001B240300408C8200046E
+:1062400024420001AC8200048EE24E2C8EE54E28E2
+:106250002442000110430007000000008EE24E2C93
+:106260002442000110A2000500000000080028A040
+:106270000000000014A00005000000008F8201084B
+:1062800024420020AF8201088F8201088C82000422
+:106290002C42001150400013AC800000080028B6CA
+:1062A000000000008EE24E2824030040244200013A
+:1062B00050430003000010218EE24E2824420001CA
+:1062C000AEE24E288EE24E28000210C024424E3824
+:1062D00002E2202124020005AC8200002402000119
+:1062E000AC8200041520000B3C0500043C040001B6
+:1062F00024845748AFAB0010AFA000148EE604E42E
+:1063000034A5F0170C00240330E7FFFF080028E154
+:10631000000000008EE272643C05000130E4FFFFE3
+:1063200000441021AEE272648EE2725C8EE372640D
+:1063300034A5380000441021AEE2725C03651021E0
+:106340000062182B146000043C03FFFF8EE27264AD
+:1063500000431021AEE272648EE304E496E2045836
+:10636000246300012442FFFF00621824AEE304E42A
+:106370008EE304E48EE204E01462000500000000F5
+:106380008F8200602403FFF700431024AF82006077
+:106390008FBF002003E0000827BD002827BDFFE0D5
+:1063A000AFBF00188EE304E88EE204E010620189BA
+:1063B000000000008EE204E88EE304FC00021100FD
+:1063C000006218219467000892E204ED8C680000D6
+:1063D0008C69000410400023946A000A8EE204C80D
+:1063E00034460400314202001040001F000000004B
+:1063F00096E2045A304200101040001B3C0280001C
+:106400003C01000100370821AC2283D88EE272647F
+:106410009464000E3C05000134A5380024420004B9
+:10642000AEE272648EE372640004240003651021FE
+:106430003C01000100370821AC2483DC0062182BEA
+:106440001460000524E700048EE272643C03FFFF41
+:1064500000431021AEE272648EE2726408002917D4
+:10646000AEE272588EE604C88EE2726C30E4FFFF32
+:106470000044102A10400015000000008F8200D850
+:106480008EE3725800431023AEE2726C8EE2726C9F
+:106490001C4000070044102A8EE2726C3C0300018D
+:1064A00000431021AEE2726C8EE2726C0044102A3E
+:1064B00010400006000000008EE201B824420001F6
+:1064C000AEE201B808002A728EE201B83C02000177
+:1064D000005710218C4283D85440000124E7FFFC70
+:1064E00031420004104000B930E2FFFF3C020001DD
+:1064F000005710218C4283D81040002F00005021FB
+:106500008F840100276230002485002000A2102B18
+:1065100050400001276528008F82010810A2003238
+:10652000000000008F82010410A2002F2402001539
+:10653000AC880000AC8900048EE37264A487000E6E
+:10654000AC820018AC8300088EE204E83C03000132
+:10655000007718218C6383DCAC8600100043102583
+:10656000AC82001CAF85010092E204EC144000668E
+:10657000240A00018EE24E28240300402442000138
+:1065800050430003000010218EE24E2824420001F7
+:10659000AEE24E288EE24E28000210C024424E3851
+:1065A00002E2182124020015AC620000240200015E
+:1065B000080029BFAC6200048F840100276230000C
+:1065C0002485002000A2102B5040000127652800E0
+:1065D0008F82010810A20004000000008F820104D5
+:1065E00014A20006240200068EE201A82442000143
+:1065F000AEE201A8080029BF8EE201A8AC88000025
+:10660000AC8900048EE37264A487000EAC8200188B
+:10661000AC8300088EE204E8AC860010AC82001C5B
+:10662000AF85010092E204EC14400037240A000117
+:106630008EE24E28000210C024424E3802E2202191
+:106640008C830000240200051462001F000000007B
+:106650008EE34E288EE24E2C1062001B2403004075
+:106660008C82000424420001AC8200048EE24E2C95
+:106670008EE54E2824420001104300070000000070
+:106680008EE24E2C2442000110A200050000000002
+:10669000080029A90000000014A000050000000067
+:1066A0008F82010824420020AF8201088F820108F6
+:1066B0008C8200042C42001150400013AC8000007A
+:1066C000080029BF000000008EE24E28240300408D
+:1066D0002442000150430003000010218EE24E28A6
+:1066E00024420001AEE24E288EE24E28000210C085
+:1066F00024424E3802E2202124020005AC82000030
+:1067000024020001AC8200041540000A24020001AA
+:10671000AFA900108EE272643C040001248457305B
+:106720003C050004AFA200148EE604E408002A4FE2
+:1067300034A5F204A2E204ED8EE204E88EE304FC48
+:106740008EE472583C06000134C638003C0100015A
+:1067500000370821AC2083D83C0100010037082114
+:10676000AC2083DC0002110000431021AC44000C7B
+:106770008EE272642405FFF830E3FFFF004310212E
+:10678000244200070045102424630007AEE272583B
+:106790008EE2726C8EE47258006518240043102358
+:1067A000AEE2726C036610210082202B148000047C
+:1067B0003C03FFFF8EE2725800431021AEE2725894
+:1067C0008EE2725808002A64AEE2726410400073D0
+:1067D000000000008F830100276230002464002045
+:1067E0000082102B14400002000050212764280072
+:1067F0008F82010810820004000000008F820104D3
+:1068000014820006240500058EE201A8244200013E
+:10681000AEE201A808002A468EE201A8AC6800009A
+:10682000AC6900048EE27264A467000EAC650018C7
+:10683000AC6200088EE204E8AC660010AC62001C9A
+:10684000AF84010092E204EC14400036240A0001F7
+:106850008EE24E28000210C024424E3802E220216F
+:106860008C8200001445001F000000008EE34E28BB
+:106870008EE24E2C1062001B240300408C82000428
+:1068800024420001AC8200048EE24E2C8EE54E289C
+:106890002442000110430007000000008EE24E2C4D
+:1068A0002442000110A200050000000008002A3068
+:1068B0000000000014A00005000000008F82010805
+:1068C00024420020AF8201088F8201088C820004DC
+:1068D0002C42001150400013AC80000008002A46F2
+:1068E000000000008EE24E282403004024420001F4
+:1068F00050430003000010218EE24E282442000184
+:10690000AEE24E288EE24E28000210C024424E38DD
+:1069100002E2202124020005AC82000024020001D2
+:10692000AC8200041540000C30E5FFFF3C04000180
+:10693000248457483C050004AFA90010AFA0001400
+:106940008EE604E434A5F2370C00240330E7FFFFA1
+:1069500008002A72000000008EE2726400451021D7
+:10696000AEE272648EE2726C8EE372643C040001EB
+:1069700034843800A2E004ED00451023AEE2726CCE
+:10698000036410210062182B146000043C03FFFF15
+:106990008EE2726400431021AEE272648EE304E87A
+:1069A00096E20458246300012442FFFF0062182489
+:1069B000AEE304E88EE304E88EE204E0146200052E
+:1069C000000000008F8200602403FFF700431024C2
+:1069D000AF8200608FBF001803E0000827BD0020D1
+:1069E00027BDFFE0AFBF001CAFB000188F820100D1
+:1069F0008EE34E2C8F8201048F8501082402004013
+:106A00002463000150620003000010218EE24E2C2E
+:106A100024420001AEE24E2C8EE24E2C8EE34E2C30
+:106A2000000210C024424E3802E220218EE24E289D
+:106A30008C8700041462000700A030218F820108B7
+:106A400024420020AF8201088F82010808002AA298
+:106A5000AC8000008EE24E2C240300402442000152
+:106A600050430003000010218EE24E2C244200010E
+:106A7000000210C024424E3802E220218C82000421
+:106A80008F8301080002114000621821AF830108C2
+:106A9000AC8000008CC200182443FFFE2C6200135F
+:106AA000104000C1000310803C01000100220821B9
+:106AB0008C22577000400008000000008EE204F0B5
+:106AC00000471021AEE204F08EE204F08F43023C56
+:106AD0000043102B144000BE000000008EE304E4CD
+:106AE0008EE204F8506200BAA2E004F48F83012021
+:106AF000276238002466002000C2102B504000019D
+:106B0000276630008F82012810C2000400000000B8
+:106B10008F82012414C20007000000008EE201A44D
+:106B20000000802124420001AEE201A408002B12E3
+:106B30008EE201A48EE204E4AC62001C8EE404B098
+:106B40008EE504B42462001CAC6200082402000834
+:106B5000A462000E24020011AC620018AC640000B4
+:106B6000AC6500048EE204C4AC620010AF86012064
+:106B700092E24E2014400037241000018EE24E3085
+:106B8000000210C02442503802E220218C83000011
+:106B9000240200121462001F000000008EE34E3039
+:106BA0008EE24E341062001B240300408C820004ED
+:106BB00024420001AC8200048EE24E348EE54E3059
+:106BC0002442000110430007000000008EE24E3412
+:106BD0002442000110A200050000000008002AFC69
+:106BE0000000000014A00005000000008F820128B2
+:106BF00024420020AF8201288F8201288C82000469
+:106C00002C42001150400013AC80000008002B12F1
+:106C1000000000008EE24E302403004024420001B8
+:106C200050430003000010218EE24E302442000148
+:106C3000AEE24E308EE24E30000210C02442503898
+:106C400002E2202124020012AC8200002402000192
+:106C5000AC8200045600000B241000018EE204E414
+:106C60003C04000124845754AFA00014AFA20010CC
+:106C70008EE606088F4702283C0500090C00240315
+:106C800034A5F006160000032402000108002B7151
+:106C9000A2E204F48EE2017024420001AEE201702F
+:106CA0008EE201708EE204E4A2E004F4AEE004F0AF
+:106CB000AEE204F88F42023C50400045AEE07274F0
+:106CC0008EE2018424420001AEE201848EE201845E
+:106CD00008002B71AEE072748EE2050424030040BC
+:106CE0002442000150430003000010218EE20504FD
+:106CF00024420001AEE205048EE205048CC30018B4
+:106D000000021080005710218C4405082402000363
+:106D10001462000F000000003C0200010057102127
+:106D2000904283B110400014000000008EE201D0B8
+:106D30008EE3524000441021AEE201D08EE201D831
+:106D400000641821306300FF08002B59AEE3524065
+:106D50008EE201CC8EE30E1000441021AEE201CC95
+:106D60008EE201D800641821306301FFAEE30E10FB
+:106D700000441021AEE201D88EE20000344200400F
+:106D800008002B71AEE200008EE2014C3C010001D4
+:106D900000370821A02083E024420001AEE2014C2C
+:106DA00008002B718EE2014C94C7000E8CC2001CAF
+:106DB0003C04000124845760AFA60014AFA2001069
+:106DC0008CC600183C0500080C00240334A50910EB
+:106DD0008FBF001C8FB0001803E0000827BD002003
+:106DE00027BDFF98AFBF0060AFBE005CAFB60058D4
+:106DF000AFB50054AFB40050AFB3004CAFB20048D1
+:106E0000AFB10044AFB000408F8301088F8201040E
+:106E1000AFA00024106203E7AFA0002C3C1E0001CD
+:106E200037DE38003C0BFFFF8F9301088E6200189D
+:106E30008F8301042443FFFE2C620014104003CF13
+:106E4000000310803C010001002208218C2257C061
+:106E500000400008000000009663000E8EE2725CA5
+:106E60008EE404F000431021AEE2725C8E63001CDD
+:106E700096E2045824840001AEE404F02463000187
+:106E80002442FFFF00621824AEE304E48F42023C78
+:106E90000082202B148003B9000000008F830120A2
+:106EA000276238002466002000C2102B50400001E9
+:106EB000276630008F82012810C200040000000005
+:106EC0008F82012414C20007000000008EE201A49A
+:106ED0000000802124420001AEE201A408002BFE44
+:106EE0008EE201A48EE204E4AC62001C8EE404B0E5
+:106EF0008EE504B42462001CAC6200082402000881
+:106F0000A462000E24020011AC620018AC64000000
+:106F1000AC6500048EE204C4AC620010AF860120B0
+:106F200092E24E2014400037241000018EE24E30D1
+:106F3000000210C02442503802E220218C8300005D
+:106F4000240200121462001F000000008EE34E3085
+:106F50008EE24E341062001B240C00408C82000430
+:106F600024420001AC8200048EE24E348EE34E30A7
+:106F700024420001104C0007000000008EE24E3455
+:106F800024420001106200050000000008002BE808
+:106F90000000000014600005000000008F8201283E
+:106FA00024420020AF8201288F8201288C820004B5
+:106FB0002C42001150400013AC80000008002BFE52
+:106FC000000000008EE24E30240C004024420001FC
+:106FD000504C0003000010218EE24E30244200018C
+:106FE000AEE24E308EE24E30000210C024425038E5
+:106FF00002E2202124020012240C0001AC820000D5
+:10700000AC8C00045600000D241000018EE204E454
+:107010003C04000124845754AFA00014AFA2001018
+:107020008EE606088F4702283C05000934A5F006C5
+:107030000C002403AFAB00388FAB00381200030AFA
+:10704000240C000108002F1900000000966C001CA1
+:10705000AFAC002C9662001E3C0C8000AFAC00244C
+:10706000AE62001C8E75001C8EE204FC8EE404FCF3
+:1070700000151900006210218C52000C92E27B98DE
+:10708000006418219476000A1440000332C2000202
+:10709000AEF27BA4AEF57B9C1040004B000080213B
+:1070A00096E2045A304200021040004700000000FF
+:1070B0008E63001C8EE204FC00032100008210217C
+:1070C0008C42000C037E1821244200220043102B26
+:1070D0001440000A240500148EE204FC00821021F2
+:1070E0008C44000CAFAB00380C002F752484000ECC
+:1070F0008FAB003808002C523050FFFF8EE204FCAA
+:10710000008210218C42000C9450000E9443001019
+:10711000944400129445001402038021020480214B
+:107120000205802194430016944400189445001AE7
+:107130000203802102048021020580219443001C67
+:107140009444001E94420020020380210204802106
+:107150000202802100101C023202FFFF0062802127
+:107160008E63001C8EE204FC001024020003290040
+:1071700000A210218C43000C3202FFFF008280210C
+:10718000037E1021246300180062182B146000098C
+:10719000000000008EE204FC00A210218C43000CD1
+:1071A000001010273C01FFFF0023082108002C6F6E
+:1071B000A42200188EE204FC00A210218C43000CD3
+:1071C00000101027A462001896E2045A00008821DB
+:1071D00030420008144000630000A0218E63001CB0
+:1071E0008EE204FC0003310000C210218C42000C2E
+:1071F000037E1821244200220043102B1440003546
+:10720000000000008EE204FC00C210218C42000C41
+:1072100024470010037E102100E2102B5040000193
+:1072200000EB38218EE204FC94F1000000C2102132
+:107230008C42000C24470016037E102100E2102B24
+:10724000144000022634FFEC00EB38218EE204FCEF
+:1072500090E3000100C210218C42000C2447001A68
+:10726000037E102100E2102B1440000202838821CB
+:1072700000EB382194E2000024E70002022288217A
+:10728000037E102100E2102B5040000100EB38215A
+:1072900094E2000024E7000202228821037E1021EC
+:1072A00000E2102B5040000100EB382194E2000076
+:1072B00024E7000202228821037E102100E2102B25
+:1072C0005040000100EB382194E2000008002CD06F
+:1072D000022288218EE204FC00C210218C43000CA3
+:1072E0008EE204FC947100108EE304FC00C21021B5
+:1072F0008C44000C00C318218C62000C2634FFEC77
+:10730000908400178EE304FC9442001A02848821C2
+:1073100000C318218C65000C8EE304FC0222882136
+:107320008EE204FC00C3182100C210218C44000C22
+:107330008C62000C94A3001C9484001E94420020D4
+:1073400002238821022488210222882100111C02A4
+:107350003222FFFF0062882100111C023222FFFF4F
+:107360000062882132C20001104000B2000000001B
+:1073700096E2045A30420001104000AE32C2008052
+:10738000104000080000000092E27B9814400005C5
+:1073900000000000240C0001A2EC7B98AEF57B9C61
+:1073A000AEF27BA48EE304FC001511000043102113
+:1073B0008C47000C037E182124E2000E0043102BA2
+:1073C0001440000800E020212405000E0C002F7559
+:1073D000AFAB00383042FFFF8FAB003808002D09FB
+:1073E0000202802194E6000024E7000294E50000F8
+:1073F00024E7000294E3000024E7000294E2000086
+:1074000024E7000294E4000024E700020206802141
+:1074100002058021020380210202802194E2000003
+:1074200094E30002020480210202802102038021F1
+:1074300000101C023202FFFF0062802100101C02BB
+:107440003202FFFF8EE47B9C0062802114950004D1
+:107450003205FFFF9662001608002D17005120210B
+:107460009662001600542021000414023083FFFFAE
+:1074700000432021008520230004140200822021E3
+:107480003084FFFF508000013404FFFF8EE27BA4B4
+:1074900024430017037E10210062102B504000018E
+:1074A000006B182190630000240200111462003167
+:1074B000240200068EE27BA4037E182124420028C9
+:1074C0000043102B14400018000000008EE27B9C4B
+:1074D00012A2000A32C201008EE27BA43C01FFFF2F
+:1074E00000220821942200280082202100041C028E
+:1074F0003082FFFF0062202132C2010014400004EC
+:107500000004102792E27B98144000020004102728
+:107510003044FFFF8EE27BA43C01FFFF00220821E4
+:1075200008002D8AA42400288EE27B9C12A2000869
+:1075300032C201008EE27BA4944200280082202106
+:1075400000041C023082FFFF0062202132C20100D1
+:10755000144000040004102792E27B9814400002BB
+:10756000000410273044FFFF8EE27BA408002D8A20
+:10757000A44400281462002F037E18218EE27BA40D
+:10758000244200320043102B144000180000000079
+:107590008EE27B9C12A2000A32C201008EE27BA422
+:1075A0003C01FFFF002208219422003200822021AA
+:1075B00000041C023082FFFF0062202132C2010061
+:1075C000144000040004102792E27B98144000024B
+:1075D000000410273044FFFF8EE27BA43C01FFFF34
+:1075E0000022082108002D8AA42400328EE27B9C10
+:1075F00012A2000832C201008EE27BA49442003243
+:107600000082202100041C023082FFFF0062202142
+:1076100032C20100144000040004102792E27B985B
+:1076200014400002000410273044FFFF8EE27BA4C8
+:10763000A44400328FAC00241180002C037E18215A
+:107640008E420000AE42FFFC2642000A0043102B8F
+:107650001440001B3403810026430004037E1021E4
+:107660000062102B1440000300602021006B1821E1
+:10767000006020218C62000024630004AE42000000
+:10768000037E10210062102B50400001006B182176
+:107690008C620000AC82000034028100A462000011
+:1076A00024630002037E10210062102B5040000171
+:1076B000006B182197AC002E08002DB4A46C0000BC
+:1076C0008E4200048E440008A643000897AC002EAA
+:1076D000A64C000AAE420000AE4400049662000EC2
+:1076E0002652FFFC24420004A662000E9662000EA1
+:1076F0008EE3725C00621821AEE3725CAFB20018D8
+:107700008EE3725CAFA3001C8EE2725C2C42003CE4
+:1077100010400004246200012403FFFE00431024F3
+:10772000AFA2001C32C200801040000C32C2010027
+:107730008EE27BA824430001000210C000571021F4
+:10774000AEE37BA88FA300188FA4001CAC437BACD6
+:10775000AC447BB008002EA0AEE0725C104000721A
+:10776000000000008EE27BA824430001000210C04C
+:1077700000571021AEE37BA88FA300188FA4001C34
+:10778000AC437BACAC447BB08EE27BA81040006382
+:1077900000004821000050218F8200F0244800089A
+:1077A000276218000102102B5040000127681000CA
+:1077B0008F8200F415020007000000008EE201B481
+:1077C0000000802124420001AEE201B408002DFA3D
+:1077D0008EE201B48F8300F02410000101571021C4
+:1077E0008C447BAC8C457BB0AC640000AC65000481
+:1077F000AF8800F01600000602EA10218EE2008831
+:1078000024420001AEE2008808002E3F8EE200888C
+:107810008C427BB08EE400E08EE500E48EE67B9C3B
+:10782000004018210000102100A3282100A3382BBC
+:1078300000822021008720218EE204FC00C9302133
+:1078400000063100AEE400E0AEE500E400C2302105
+:1078500094C2000A240C00020002114230430003CB
+:10786000106C00162862000310400005240C000173
+:10787000106C00080000000008002E3F000000000F
+:10788000240C0003106C00170000000008002E3FBD
+:10789000000000008EE200E88EE300EC24630001AB
+:1078A0002C64000100441021AEE200E8AEE300ECDD
+:1078B0008EE200E808002E3F8EE300EC8EE200F03E
+:1078C0008EE300F4246300012C64000100441021C5
+:1078D000AEE200F0AEE300F48EE200F008002E3FCE
+:1078E0008EE300F48EE200F88EE300FC24630001D6
+:1078F0002C64000100441021AEE200F8AEE300FC6D
+:107900008EE200F88EE300FC8EE27BA825290001C0
+:107910000122102B1440FFA0254A0008A2E07B980A
+:1079200008002E9FAEE07BA88F8200F0244700085D
+:107930002762180000E2102B50400001276710005A
+:107940008F8200F414E20007000000008EE201B410
+:107950000000802124420001AEE201B408002E5D47
+:107960008EE201B48F8200F0241000018FA3001872
+:107970008FA4001CAC430000AC440004AF8700F0AF
+:1079800016000007000000008EE20088244200017B
+:10799000AEE200888EE2008808002EA0AEE0725CA5
+:1079A0008EE2725C8EE400E08EE500E4240C0002BE
+:1079B000004018210000102100A3282100A3302B33
+:1079C000008220210086202100161142304300034E
+:1079D000AEE400E0AEE500E4106C00172C6200039A
+:1079E00010400005240C0001106C0008000000008D
+:1079F00008002EA0AEE0725C240C0003106C00198D
+:107A00000000000008002EA0AEE0725C8EE200E8EC
+:107A10008EE300EC246300012C640001004410217B
+:107A2000AEE200E8AEE300EC8EE200E88EE300ECAC
+:107A300008002EA0AEE0725C8EE200F08EE300F44F
+:107A4000246300012C64000100441021AEE200F028
+:107A5000AEE300F48EE200F08EE300F408002EA006
+:107A6000AEE0725C8EE200F88EE300FC246300015D
+:107A70002C64000100441021AEE200F8AEE300FCEB
+:107A80008EE200F88EE300FCAEE0725C8E62001CB9
+:107A900096E304588EE404F0244200012463FFFFBF
+:107AA0000043102424840001AEE204E4AEE404F0B8
+:107AB0008F42023C0082202B148000B000000000A6
+:107AC0008F830120276238002466002000C2102B1B
+:107AD00050400001276630008F82012810C2000448
+:107AE000000000008F82012414C200070000000083
+:107AF0008EE201A40000802124420001AEE201A434
+:107B000008002F078EE201A48EE204E4AC62001CA0
+:107B10008EE404B08EE504B42462001CAC6200085C
+:107B200024020008A462000E24020011AC620018B6
+:107B3000AC640000AC6500048EE204C4AC620010CA
+:107B4000AF86012092E24E2014400037241000013D
+:107B50008EE24E30000210C02442503802E2202152
+:107B60008C830000240200121462001F0000000039
+:107B70008EE34E308EE24E341062001B240C004027
+:107B80008C82000424420001AC8200048EE24E3458
+:107B90008EE34E3024420001104C0007000000002C
+:107BA0008EE24E3424420001106200050000000005
+:107BB00008002EF100000000146000050000000025
+:107BC0008F82012824420020AF8201288F82012861
+:107BD0008C8200042C42001150400013AC80000045
+:107BE00008002F07000000008EE24E30240C0040F9
+:107BF00024420001504C0003000010218EE24E3060
+:107C000024420001AEE24E308EE24E30000210C03F
+:107C10002442503802E2202124020012240C0001E8
+:107C2000AC820000AC8C00045600000D2410000152
+:107C30008EE204E43C04000124845754AFA00014F5
+:107C4000AFA200108EE606088F4702283C05000907
+:107C500034A5F0060C002403AFAB00388FAB00381E
+:107C600016000003240C000108002F5CA2EC04F4B1
+:107C70008EE2017024420001AEE201708EE20170DA
+:107C80008EE204E4A2E004F4AEE004F0AEE072742C
+:107C9000AEE204F88F42023C1040003800000000C1
+:107CA0008EE2018424420001AEE2018408002F5CD0
+:107CB0008EE201848EE20504240C0040244200017F
+:107CC000504C0003000010218EE205042442000104
+:107CD000AEE205048EE205048E630018240C000356
+:107CE0000002108000571021146C000F8C4405080E
+:107CF0003C02000100571021904283B11040001453
+:107D0000000000008EE201D08EE3524000441021BA
+:107D1000AEE201D08EE201D800641821306300FF8A
+:107D200008002F4FAEE352408EE201CC8EE30E10DE
+:107D300000441021AEE201CC8EE201D8006418218B
+:107D4000306301FFAEE30E1000441021AEE201D813
+:107D50008EE200003442004008002F5CAEE20000DA
+:107D60008EE2014C3C01000100370821A02083E095
+:107D700024420001AEE2014C8EE2014C8F820108E8
+:107D800024420020AF8201088F8201088F820108FF
+:107D9000276330000043102B1440000227622800A4
+:107DA000AF8201088F8301088F8201041462FC1ED8
+:107DB000000000008FBF00608FBE005C8FB60058CF
+:107DC0008FB500548FB400508FB3004C8FB2004871
+:107DD0008FB100448FB0004003E0000827BD006869
+:107DE0000005284310A0000D000030213C030001D5
+:107DF000346338003C07FFFF036310210082102B1F
+:107E00005040000100872021948200002484000259
+:107E100024A5FFFF14A0FFF800C2302100061C02B9
+:107E200030C2FFFF0062302100061C0230C2FFFF9B
+:107E30000062302103E0000830C2FFFF27BDFF8849
+:107E4000240F0001AFBF0070AFBE006CAFB600687A
+:107E5000AFB50064AFB40060AFB3005CAFB2005820
+:107E6000AFB10054AFB00050A3A00027AFAF002CBB
+:107E70008EE204D400008021304200011440002A28
+:107E8000A3A000378F8700E08F8800C48F8200E8AE
+:107E900000E220232C8210005040000124841000B6
+:107EA000000420C2008018218EE400C88EE500CCBA
+:107EB0000000102100A3282100A3302B00822021E4
+:107EC00000862021AEE400C8AEE500CC8F8300C858
+:107ED0003C02000A3442EFFF010320230044102B30
+:107EE000104000033C02000A3442F00000822021CE
+:107EF000008018218EE400C08EE500C4000010212F
+:107F000000A3282100A3302B0082202100862021FD
+:107F1000AEE400C0AEE500C4AF8800C8AF8700E49F
+:107F2000080034CCAF8700E83C0200010057102164
+:107F3000904283C01040000B000000003C14000180
+:107F40000297A0218E9483C43C13000102779821EC
+:107F50008E7383C83C1200010257902108003193B0
+:107F60008E5283CC8F8300E08F8200E410430007A1
+:107F7000000088218F8200E4241100018C4300005E
+:107F80008C440004AFA30018AFA4001C1620000E00
+:107F90003C02FFFF8F8200C4AFA200108F8200C896
+:107FA0003C04000124845870AFA200148F8600E0C6
+:107FB0008F8700E43C0500060C00240334A5F00084
+:107FC000080034CC000000008FA3001C8FB2001802
+:107FD0003074FFFF2694FFFC00621024104000580C
+:107FE000024098213C020080006210241040000AE8
+:107FF0003C0400408EE2007C24420001AEE2007CA2
+:108000008EE2007C8EE201FC24420001AEE201FC23
+:10801000080034C68EE201FC3C0600043C0B000163
+:108020003C0A00023C0500103C0900088EE200807A
+:108030003C0800203407800024420001AEE20080AA
+:108040008EE200808FA2001C0044182410660021DC
+:1080500000C3102B1440000700000000106B00113B
+:1080600000000000106A0015000000000800304900
+:10807000000420421065002300A3102B14400005CB
+:1080800000000000106900190000000008003049DD
+:108090000004204210680021000000000800304960
+:1080A000000420428EE2003424420001AEE200349B
+:1080B0008EE2003408003049000420428EE201ECD8
+:1080C00024420001AEE201EC8EE201EC08003049EE
+:1080D000000420428EE201F024420001AEE201F0F1
+:1080E0008EE201F008003049000420428EE201F4E3
+:1080F00024420001AEE201F48EE201F408003049AE
+:10810000000420428EE2003024420001AEE2003042
+:108110008EE2003008003049000420428EE201F86F
+:1081200024420001AEE201F88EE201F80004204290
+:108130001087047C000000000800300E00000000E2
+:108140003C02000100571021904283B21440008489
+:10815000240200013C03000100771821906383B3DF
+:108160001462007F3C0201008E4300000062102474
+:108170001040006F2402FFFF14620005241000016C
+:10818000964300043402FFFF1062007500000000F7
+:1081900092E204D814400072000000003C0200018A
+:1081A000005710218C4283B4284200051040002063
+:1081B000000038213C020001005710218C4283B49A
+:1081C000184000160000282196660000000520C017
+:1081D000009710219442777E1446000900971021E1
+:1081E0009443778096620002146200050097102184
+:1081F00094437782966200045062000824070001CD
+:108200003C020001005710218C4283B424A50001D8
+:1082100000A2102A5440FFEE000520C030E200FF0B
+:108220001040044000000000080030D500000000AD
+:10823000024020210C0022FE240500063044001FCD
+:10824000000428C002E510219442727C30424000B4
+:108250001440043400B710219443727E96620000EB
+:108260001462000B000418C000B710219443728000
+:108270009662000214620006000418C000B71021C4
+:10828000944372829662000410620035000418C0A4
+:1082900002E310219442727C304280001440042199
+:1082A00002E31021944B727C96670000000B28C0FB
+:1082B00000B710219442737E080030B700003021CF
+:1082C000000420C002E410219443737C02E41021D6
+:1082D000944B737C3063800014600010000B28C046
+:1082E00000B710219442737E1447FFF501602021EE
+:1082F00000B7102194437380966200025462FFF12C
+:10830000000420C000B710219443738296620004D9
+:108310005462FFEC000420C02406000130C200FFBC
+:108320001040040000000000080030D500000000EC
+:108330009743020296420000146203FA0000000014
+:108340009743020496420002146203F60000000004
+:108350009743020696420004146203F200000000F4
+:10836000924200003A030001304200010043102411
+:10837000104000742402FFFF8E63000014620004AA
+:108380003402FFFF966300041062006F240F0002A6
+:108390003C02000100571021904283B21440006A51
+:1083A000240F000392E204D854400068AFAF002CC1
+:1083B0003C020001005710218C4283B42842000582
+:1083C00010400020000038213C020001005710211D
+:1083D0008C4283B4184000160000282196660000E5
+:1083E000000520C0009710219442777E14460009B2
+:1083F0000097102194437780966200021462000572
+:10840000009710219443778296620004506200081E
+:10841000240700013C020001005710218C4283B464
+:1084200024A5000100A2102A5440FFEE000520C040
+:1084300030E200FF14400044240F0003080034C65B
+:1084400000000000024020210C0022FE240500064E
+:108450003044001F000428C002E510219442727CC1
+:1084600030424000144003AF00B710219443727EA5
+:10847000966200001462000B000418C000B71021BF
+:10848000944372809662000214620006000418C0D1
+:1084900000B7102194437282966200041062002794
+:1084A000000418C002E310219442727C3042800024
+:1084B0001440039C02E31021944B727C96670000E9
+:1084C000000B28C000B710219442737E0800313C95
+:1084D00000003021000420C002E410219443737C8A
+:1084E00002E41021944B737C306380001460001010
+:1084F000000B28C000B710219442737E1447FFF58B
+:108500000160202100B7102194437380966200021D
+:108510005462FFF1000420C000B71021944373821D
+:10852000966200045462FFEC000420C0240600019F
+:1085300030C200FF1040037B000000000800314FF4
+:10854000240F0003240F0001AFAF002C8F42026004
+:108550000054102B1040003A000000008F8300E40C
+:108560008F8200E01062000324630008AF8300E400
+:10857000AF8300E88EE400C08EE500C402801821BD
+:108580000000102100A3282100A3302B008220210D
+:1085900000862021AEE400C0AEE500C48EE20058A3
+:1085A00024420001AEE200588EE200588EE2007CC8
+:1085B00024420001AEE2007C8EE2007C8F8200E06B
+:1085C000AFA200108F8200E43C040001248458789C
+:1085D000AFA200148FA600188FA7001C3C05000650
+:1085E0000C00240334A5F003080034CC0000000084
+:1085F0008EE25240AFA200108EE252443C040001D1
+:1086000024845884AFA200148EE60E108EE70E1854
+:108610003C0500060C00240334A5F0028EE201C0E4
+:1086200024420001AEE201C08EE200008EE301C0F0
+:108630002403FFBF0043102408003470AEE20000A2
+:1086400096E204680054102B104000030000000064
+:10865000240F0001A3AF0027128003012416000796
+:1086600024150040241E0001240E00128EE2724CDC
+:108670008F43028024420001304207FF106202D380
+:108680000000000093A2002710400014000000002A
+:108690008EE352408EE252441062000926ED5244AD
+:1086A0008EE652448EE35244000211402442524866
+:1086B00002E2802124630001080031BF306B00FF1B
+:1086C00092E272481440FFCA000000008EE201E00E
+:1086D00024420001AEE201E08EE201E08EE30E10E2
+:1086E0008EE20E181062FFC226ED0E188EE60E18EE
+:1086F0008EE30E180002114024420E2002E2802177
+:1087000024630001306B01FF96E2046A30420010DE
+:1087100010400019000000009642000C340F810048
+:10872000144F0015000000003C020001005710210A
+:10873000904283C014400010000000009642000EDA
+:10874000A60200168E4200088E4300048E440000EC
+:108750002694FFFCAE42000CAE430008AE44000479
+:108760009602000E26730004240F0001A3AF003709
+:1087700034420200A602000E8E0200008E030004A6
+:108780003C04000134843800306A0007026A9823F0
+:10879000036410210262102B10400005028AA02100
+:1087A00002641023036218233C0200200043982334
+:1087B000268200072404FFF89603000A0044602480
+:1087C000006A1821006C102B104000020180382133
+:1087D00000603821AE1300188F88012024E20007C2
+:1087E0000044382427623800250900200122102B7C
+:1087F00050400001276930008F82012811220004B7
+:10880000000000008F82012415220007014018217A
+:108810008EE201A40000882124420001AEE201A4FE
+:108820000800324C8EE201A48E0400008E05000484
+:1088300000001021AD130008A507000EAD160018AA
+:10884000AD06001C00A3302B00A3282300822023A8
+:1088500000862023AD040000AD0500048EE204C0B4
+:10886000AD020010AF89012092E24E201440003387
+:10887000241100018EE24E30000210C02442503814
+:1088800002E220218C8200001456001F000000002C
+:108890008EE34E308EE24E341062001B000000006A
+:1088A0008C82000424420001AC8200048EE24E342B
+:1088B0008EE34E30244200011055000700000000F6
+:1088C0008EE24E34244200011062000500000000D8
+:1088D00008003239000000001460000500000000AC
+:1088E0008F82012824420020AF8201288F82012834
+:1088F0008C8200042C42001150400010AC8000001B
+:108900000800324C000000008EE24E30244200018C
+:1089100050550003000010218EE24E302442000129
+:10892000AEE24E308EE24E30000210C0244250388B
+:1089300002E22021AC960000AC9E00041620001834
+:108940003C0500068E0200183C0400012484589067
+:10895000AFA200108E0200008E03000434A5F009BF
+:10896000020030210C002403AFA3001493A20037AF
+:1089700010400216340F81008E4200048E4300081E
+:108980008E44000CA64F000CAE420000AE43000423
+:10899000AE4400089602001608003470A642000E8D
+:1089A00014EC0168028A1823960C000A9603000E44
+:1089B000028A1023A602000A34620004A602000EF6
+:1089C0008F88012027623800250900200122102B02
+:1089D00014400002306AFFFF276930008F820128AF
+:1089E00011220004000000008F82012415220007DC
+:1089F000240400208EE201A400008821244200010A
+:108A0000AEE201A4080032CA8EE201A48EE5724CE7
+:108A10008EE604908EE70494A504000E240400045E
+:108A2000AD100008AD0400180005294000A0182171
+:108A30000000102100E3382100E3202B00C2302188
+:108A400000C43021AD060000AD0700048EE2724C78
+:108A5000AD02001C8EE204C4AD020010AF890120FB
+:108A600092E24E2014400033241100018EE24E3079
+:108A7000000210C02442503802E220218C82000003
+:108A80001456001F000000008EE34E308EE24E347C
+:108A90001062001B000000008C82000424420001D0
+:108AA000AC8200048EE24E348EE34E30244200014C
+:108AB00010550007000000008EE24E3424420001F1
+:108AC0001062000500000000080032B7000000003E
+:108AD00014600005000000008F820128244200205D
+:108AE000AF8201288F8201288C8200042C42001161
+:108AF00050400010AC800000080032CA00000000A6
+:108B00008EE24E3024420001505500030000102137
+:108B10008EE24E3024420001AEE24E308EE24E3004
+:108B2000000210C02442503802E22021AC9600001E
+:108B3000AC9E00041620000D00000000A60C000AE8
+:108B4000A60A000E8F820100AFA200108F820104DE
+:108B50003C0400012484589C3C050006AFA200148C
+:108B60008EE6724C0800343B34A5F00B3C0100014A
+:108B700000370821A02083C0ADAB00008EE201D8F1
+:108B80008EE3724C2442FFFFAEE201D88EE201D8A0
+:108B900024630001306307FF26E2524415A2000659
+:108BA000AEE3724C8EE201D02442FFFFAEE201D070
+:108BB000080032EF8EE201D08EE201CC2442FFFFAA
+:108BC000AEE201CC8EE201CC8F4202401040007335
+:108BD000000000008EE20E1C24420001AEE20E1CDA
+:108BE0008F4302400043102B144001760000A02167
+:108BF0008F830120276238002466002000C2102BDA
+:108C000050400001276630008F82012810C2000406
+:108C1000000000008F82012414C200070000000041
+:108C20008EE201A40000882124420001AEE201A4EA
+:108C30000800334F8EE201A48EE2724CAC62001C3D
+:108C40008EE404A88EE504AC2462001CAC6200082B
+:108C500024020008A462000E24020011AC62001875
+:108C6000AC640000AC6500048EE204C4AC62001089
+:108C7000AF86012092E24E201440003324110001FF
+:108C80008EE24E30000210C02442503802E2202111
+:108C90008C820000144E001F000000008EE34E3056
+:108CA0008EE24E341062001B000000008C82000433
+:108CB00024420001AC8200048EE24E348EE34E303A
+:108CC0002442000110550007000000008EE24E34DF
+:108CD0002442000110620005000000000800333C3F
+:108CE0000000000014600005000000008F820128D1
+:108CF00024420020AF8201288F8201288C82000448
+:108D00002C42001150400010AC8000000800334F8E
+:108D1000000000008EE24E30244200015055000356
+:108D2000000010218EE24E3024420001AEE24E30AF
+:108D30008EE24E30000210C02442503802E2202160
+:108D4000AC8E0000AC9E00045620000D24110001E2
+:108D50008EE2724C3C040001248458A8AFA0001499
+:108D6000AFA200108EE6724C8F4702803C050009CE
+:108D700034A5F0080C002403AFAE00488FAE0048C5
+:108D800056200001AEE00E1C8EE201882442000154
+:108D9000AEE20188080033C88EE201888F8301208B
+:108DA000276238002466002000C2102B50400001CA
+:108DB000276630008F82012810C2000400000000E6
+:108DC0008F82012414C20007000000008EE201A47B
+:108DD0000000882124420001AEE201A4080033BA59
+:108DE0008EE201A48EE2724CAC62001C8EE404A8F8
+:108DF0008EE504AC2462001CAC620008240200086A
+:108E0000A462000E24020011AC620018AC640000E1
+:108E1000AC6500048EE204C4AC620010AF86012091
+:108E200092E24E2014400033241100018EE24E30B5
+:108E3000000210C02442503802E220218C8200003F
+:108E4000144E001F000000008EE34E308EE24E34C0
+:108E50001062001B000000008C820004244200010C
+:108E6000AC8200048EE24E348EE34E302442000188
+:108E700010550007000000008EE24E34244200012D
+:108E80001062000500000000080033A70000000089
+:108E900014600005000000008F8201282442002099
+:108EA000AF8201288F8201288C8200042C4200119D
+:108EB00050400010AC800000080033BA00000000F1
+:108EC0008EE24E3024420001505500030000102174
+:108ED0008EE24E3024420001AEE24E308EE24E3041
+:108EE000000210C02442503802E22021AC8E000063
+:108EF000AC9E00041620000D000000008EE2724CB3
+:108F00003C040001248458A8AFA00014AFA20010B4
+:108F10008EE6724C8F4702803C05000934A5F008AC
+:108F20000C002403AFAE00488FAE00488EE20174FF
+:108F300024420001AEE201748EE201740800346E36
+:108F40000000A021960C000A0183102B5440000160
+:108F500001801821A603000A8F88012027623800AB
+:108F6000250900200122102B504000012769300004
+:108F70008F82012811220004000000008F8201244A
+:108F800015220007240400208EE201A4000088219D
+:108F900024420001AEE201A40800342F8EE201A4B5
+:108FA0008EE5724C8EE604908EE70494A504000EC4
+:108FB00024040004AD100008AD0400180005294089
+:108FC00000A018210000102100E3382100E3202B2D
+:108FD00000C2302100C43021AD060000AD070004FE
+:108FE0008EE2724CAD02001C8EE204C4AD02001091
+:108FF000AF89012092E24E20144000332411000179
+:109000008EE24E30000210C02442503802E220218D
+:109010008C8200001456001F000000008EE34E30CA
+:109020008EE24E341062001B000000008C820004AF
+:1090300024420001AC8200048EE24E348EE34E30B6
+:109040002442000110550007000000008EE24E345B
+:109050002442000110620005000000000800341CDA
+:109060000000000014600005000000008F8201284D
+:1090700024420020AF8201288F8201288C820004C4
+:109080002C42001150400010AC8000000800342F2A
+:10909000000000008EE24E302442000150550003D3
+:1090A000000010218EE24E3024420001AEE24E302C
+:1090B0008EE24E30000210C02442503802E22021DD
+:1090C000AC960000AC9E00041620001D00000000BD
+:1090D000A60C000A8F820100AFA200108F8201044B
+:1090E0003C0400012484589C3C050006AFA20014F7
+:1090F0008EE6724C34A5F00D0C00240302003821DA
+:1091000093A2003710400031340F81008E420004DA
+:109110008E4300088E44000CA64F000CAE420000A7
+:10912000AE430004AE44000896020016A642000EAC
+:109130009602000E3042FDFF08003470A602000EB9
+:109140008EE201D82442FFFFAEE201D88EE201D8C0
+:109150008EE201CC3C04001F3C01000100370821D5
+:10916000A03E83C02442FFFFAEE201CC9603000A7A
+:109170003484FFFF8EE201CC006A1821026398213B
+:109180000093202B108000033C02FFF534421000B6
+:1091900002629821ADAB00008EE2724C24420001C5
+:1091A000304207FFAEE2724C8F4202401040000492
+:1091B0000283A0238EE20E1C24420001AEE20E1CAC
+:1091C000A3A000271680FD290000000012800024C3
+:1091D000000000003C01000100370821AC3483C4CA
+:1091E0003C01000100370821AC3383C83C01000179
+:1091F00000370821AC3283CC93A20037104000081E
+:10920000000000003C020001005710218C4283CC7A
+:10921000244200043C01000100370821AC2283CC29
+:109220008EE2724C8F43028024420001304207FFDD
+:1092300014620006000000008EE201C42442000116
+:10924000AEE201C4080034CC8EE201C48EE201BC5F
+:1092500024420001AEE201BC080034CC8EE201BC25
+:1092600097A4001E2484FFFC008018218EE400C017
+:109270008EE500C40000102100A3282100A3302B9C
+:109280000082202100862021AEE400C0AEE500C4AB
+:109290008FAF002C2402000211E2000F29E200032C
+:1092A000144000172402000315E20015000000001E
+:1092B0008EE200D08EE300D4246300012C64000110
+:1092C00000441021AEE200D0AEE300D48EE200D024
+:1092D000080034C68EE300D48EE200D88EE300DCB2
+:1092E000246300012C64000100441021AEE200D888
+:1092F000AEE300DC8EE200D8080034C68EE300DC6A
+:109300008EE200C88EE300CC246300012C640001CF
+:1093100000441021AEE200C8AEE300CC8EE200C8EB
+:109320008EE300CC8F8300E48F8200E010620003A4
+:1093300024630008AF8300E4AF8300E88FBF0070B0
+:109340008FBE006C8FB600688FB500648FB400606C
+:109350008FB3005C8FB200588FB100548FB00050B3
+:1093600003E0000827BD007827BDFFB0AFB500447B
+:109370000000A821AFB0003000008021AFBF004C3A
+:10938000AFB60048AFB40040AFB3003CAFB2003856
+:10939000AFB100348EE204D4241400013042000145
+:1093A0001440002A0000B0218F8700E08F8800C49D
+:1093B0008F8200E800E220232C8210005040000140
+:1093C00024841000000420C2008018218EE400C80C
+:1093D0008EE500CC0000102100A3282100A3302B33
+:1093E0000082202100862021AEE400C8AEE500CC3A
+:1093F0008F8300C83C02000A3442EFFF01032023A0
+:109400000044102B104000033C02000A3442F000DC
+:1094100000822021008018218EE400C08EE500C467
+:109420000000102100A3282100A3302B008220215E
+:1094300000862021AEE400C0AEE500C4AF8800C8BD
+:10944000AF8700E408003850AF8700E83C02000115
+:1094500000571021904283C01040000B0000000014
+:109460003C130001027798218E7383C43C110001E4
+:10947000023788218E3183C83C12000102579021A7
+:10948000080036E88E5283CC8F8300E08F8200E4A0
+:1094900010430007000048218F8200E424090001E6
+:1094A0008C4300008C440004AFA30018AFA4001C40
+:1094B0001520000E3C02FFFF8F8200C4AFA20010F7
+:1094C0008F8200C83C04000124845870AFA20014AD
+:1094D0008F8600E08F8700E43C0500060C00240323
+:1094E00034A5F00008003850000000008FA3001CD5
+:1094F0008FB200183073FFFF2673FFFC0062102448
+:1095000010400058024088213C0200800062102474
+:109510001040000A3C0400408EE2007C244200011E
+:10952000AEE2007C8EE2007C8EE201FC244200016F
+:10953000AEE201FC0800384A8EE201FC3C06000461
+:109540003C0B00013C0A00023C0500103C090008ED
+:109550008EE200803C080020340780002442000195
+:10956000AEE200808EE200808FA2001C004418242E
+:109570001066002100C3102B1440000700000000FB
+:10958000106B001100000000106A001500000000C0
+:1095900008003592000420421065002300A3102B20
+:1095A00014400005000000001069001900000000D0
+:1095B00008003592000420421068002100000000DD
+:1095C00008003592000420428EE20034244200015B
+:1095D000AEE200348EE200340800359200042042EE
+:1095E0008EE201EC24420001AEE201EC8EE201ECDD
+:1095F00008003592000420428EE201F0244200016E
+:10960000AEE201F08EE201F0080035920004204243
+:109610008EE201F424420001AEE201F48EE201F494
+:1096200008003592000420428EE2003024420001FE
+:10963000AEE200308EE20030080035920004204295
+:109640008EE201F824420001AEE201F88EE201F858
+:1096500000042042108702B70000000008003557C0
+:10966000000000003C02000100571021904283B22C
+:1096700014400084240200013C03000100771821FB
+:10968000906383B31462007F3C0201008E430000AC
+:10969000006210241040006F2402FFFF14620005D6
+:1096A00024100001964300043402FFFF106200758D
+:1096B0000000000092E204D8144000720000000094
+:1096C0003C020001005710218C4283B4284200055F
+:1096D00010400020000038213C02000100571021FA
+:1096E0008C4283B418400016000028219626000002
+:1096F000000520C0009710219442777E144600098F
+:10970000009710219443778096220002146200058E
+:10971000009710219443778296220004506200083B
+:10972000240700013C020001005710218C4283B441
+:1097300024A5000100A2102A5440FFEE000520C01D
+:1097400030E200FF1040027B000000000800361EDF
+:1097500000000000024020210C0022FE240500062B
+:109760003044001F000428C002E510219442727C9E
+:10977000304240001440026F00B710219443727EC3
+:10978000962200001462000B000418C000B71021DC
+:10979000944372809622000214620006000418C0EE
+:1097A00000B71021944372829622000410620035A3
+:1097B000000418C002E310219442727C3042800001
+:1097C0001440025C02E310219448727C962700004A
+:1097D000000828C000B710219442737E08003600AC
+:1097E00000003021000420C002E410219443737C67
+:1097F00002E410219448737C3063800014600010F0
+:10980000000828C000B710219442737E1447FFF56A
+:109810000100202100B7102194437380962200029A
+:109820005462FFF1000420C000B7102194437382FA
+:10983000962200045462FFEC000420C024060001BC
+:1098400030C200FF1040023B000000000800361E3E
+:1098500000000000974302029642000014620235A5
+:109860000000000097430204964200021462023195
+:109870000000000097430206964200041462022D85
+:1098800000000000924200003A0300013042000153
+:1098900000431024104000742402FFFF8E230000B8
+:1098A000146200043402FFFF962300041062006F6C
+:1098B000241400023C02000100571021904283B2A0
+:1098C0001440006A2414000392E204D81440006794
+:1098D000000000003C020001005710218C4283B4BC
+:1098E0002842000510400020000038213C02000101
+:1098F000005710218C4283B4184000160000282124
+:1099000096260000000520C0009710219442777E23
+:109910001446000900971021944377809622000294
+:109920001462000500971021944377829622000468
+:1099300050620008240700013C020001005710217A
+:109940008C4283B424A5000100A2102A5440FFEEEB
+:10995000000520C030E200FF14400044241400033E
+:109960000800384A00000000024020210C0022FEBE
+:10997000240500063044001F000428C002E5102121
+:109980009442727C30424000144001EA00B710213A
+:109990009443727E962200001462000B000418C0EB
+:1099A00000B71021944372809622000214620006D0
+:1099B000000418C000B7102194437282962200045C
+:1099C00010620027000418C002E310219442727C48
+:1099D00030428000144001D702E310219448727C89
+:1099E00096270000000828C000B710219442737E1B
+:1099F0000800368500003021000420C002E4102158
+:109A00009443737C02E410219448737C306380009B
+:109A100014600010000828C000B710219442737E23
+:109A20001447FFF50100202100B7102194437380F3
+:109A3000962200025462FFF1000420C000B71021FA
+:109A400094437382962200045462FFEC000420C009
+:109A50002406000130C200FF104001B600000000E3
+:109A60000800369824140003241400018F42026079
+:109A70000053102B10400049000000008F8300E4C9
+:109A80008F8200E01062000324630008AF8300E4CB
+:109A9000AF8300E88EE400C08EE500C402601821A8
+:109AA0000000102100A3282100A3302B00822021D8
+:109AB00000862021AEE400C0AEE500C48EE200586E
+:109AC00024420001AEE200588EE200588EE2007C93
+:109AD00024420001AEE2007C8EE2007C8F8200E036
+:109AE000AFA200108F8200E43C0400012484587867
+:109AF000AFA200148FA600188FA7001C3C0500061B
+:109B00000C00240334A5F0030800385000000000C6
+:109B10008EE25240AFA200108EE252443C0400019B
+:109B200024845884AFA200148EE60E108EE70E181F
+:109B30000C00240334A5F0028EE201C0244200018F
+:109B4000AEE201C08EE200008EE301C02403FFBF3D
+:109B500000431024080037F8AEE200008EE25240C5
+:109B6000AFA200108EE252443C04000124845884C9
+:109B7000AFA200148EE60E108EE70E183C0500060C
+:109B80000C00240334A5F0028EE201C0244200013F
+:109B9000AEE201C0080037F88EE201C096E2046828
+:109BA0000053102B544000013C158000126001311D
+:109BB0003C0C001F358CFFFF8EE2724C8F430280FD
+:109BC00024420001304207FF10620108000000003B
+:109BD00012A00014000000008EE352408EE25244B6
+:109BE0001062000926EE52448EEB52448EE352443A
+:109BF000000211402442524802E280212463000105
+:109C000008003712306800FF92E272481440FFC02B
+:109C10003C0500068EE201E024420001AEE201E0D4
+:109C20008EE201E08EE30E108EE20E181062FFCB82
+:109C300026EE0E188EEB0E180000A8218EE30E18EB
+:109C40000002114024420E2002E280212463000120
+:109C5000306801FF96E2046A30420010104000179D
+:109C6000340281009643000C1462001400000000CE
+:109C70003C02000100571021904283C01440000FA5
+:109C8000000000009642000EA60200168E42000858
+:109C90008E4300048E4400002673FFFCAE42000C8D
+:109CA000AE430008AE4400049602000E26310004C4
+:109CB0002416000134420200A602000E9603000A98
+:109CC000026050210073102B1040000202606821D6
+:109CD000006050212D42003D1040002A0000382134
+:109CE0009623000C2402080054620027AE110018CD
+:109CF0003C02000100571021904283C054400022D2
+:109D0000AE110018262200170182102B10400013FC
+:109D1000000000003C02FFF5005110219042101796
+:109D2000384300062C630001384200112C42000128
+:109D30000062182510600013262200100182102BEB
+:109D40001040000E000000003C07FFF500F1382134
+:109D500094E710100800375E24E7000E92220017E7
+:109D6000384300062C630001384200112C420001E8
+:109D70000062182550600004AE11001896270010EC
+:109D800024E7000EAE1100183C020001005710211C
+:109D9000904283C00002102B14E0000200024EC06B
+:109DA000014038218F83012027623800246600207B
+:109DB00000C2102B50400001276630008F8201281E
+:109DC00010C20004000000008F82012414C20007AA
+:109DD0002402000B8EE201A400004821244200016D
+:109DE000AEE201A4080037BF8EE201A48E04000099
+:109DF0008E050004AC62001801751025004910257D
+:109E0000AC710008A467000EAC62001CAC640000DA
+:109E1000AC6500048EE204C0AC620010AF86012085
+:109E200092E24E2014400038240900018EE24E30A8
+:109E3000000210C02442503802E220218C8300002E
+:109E40002402000714620020000000008EE34E3060
+:109E50008EE24E341062001C000000008C82000470
+:109E600024420001AC8200048EE34E348EE54E3075
+:109E7000240200402463000110620007000000007B
+:109E80008EE24E342442000110A2000500000000C2
+:109E9000080037A90000000014A000050000000021
+:109EA0008F82012824420020AF8201288F8201285E
+:109EB0008C8200042C42001150400013AC80000042
+:109EC000080037BF000000008EE24E30240300403F
+:109ED0002442000150430003000010218EE24E3066
+:109EE00024420001AEE24E308EE24E30000210C03D
+:109EF0002442503802E2202124020007AC820000F4
+:109F000024020001AC820004152000183C05000664
+:109F10008E0200183C04000124845890AFA2001067
+:109F20008E0200008E03000434A5F00902003021E7
+:109F30000C002403AFA3001432C200FF1040002B1A
+:109F4000340281008E4300048E4400088E45000CCC
+:109F5000A642000CAE430000AE440004AE4500082B
+:109F600096020016080037F8A642000E154D000AAA
+:109F7000000000009602000EA613000A34420004FE
+:109F8000A602000E3C01000100370821A02083C07A
+:109F9000080037F6000098219604000A0093102B61
+:109FA00010400002026018210080182124020001E4
+:109FB000A603000A3C01000100370821A02283C04B
+:109FC0009604000A022488210191102B10400003FE
+:109FD0003C02FFF5344210000222882102649823DB
+:109FE0000000A8211660FEF4ADC800001260002138
+:109FF00032C200FF3C01000100370821AC3383C4AA
+:10A000003C01000100370821AC3183C83C0100014C
+:10A010000037082110400008AC3283CC3C0200011C
+:10A02000005710218C4283CC244200043C010001E3
+:10A0300000370821AC2283CC8EE2724C8F43028021
+:10A040002442000114620006000000008EE201C4F8
+:10A0500024420001AEE201C4080038508EE201C47F
+:10A060008EE201BC24420001AEE201BC080038507F
+:10A070008EE201BC97A4001E2484FFFC00801821FE
+:10A080008EE400C08EE500C40000102100A328214A
+:10A0900000A3302B00822021008620212402000210
+:10A0A000AEE400C0AEE500C41282000F2A820003B5
+:10A0B000144000172402000316820015000000005F
+:10A0C0008EE200D08EE300D4246300012C640001F2
+:10A0D00000441021AEE200D0AEE300D48EE200D006
+:10A0E0000800384A8EE300D48EE200D88EE300DC0C
+:10A0F000246300012C64000100441021AEE200D86A
+:10A10000AEE300DC8EE200D80800384A8EE300DCC3
+:10A110008EE200C88EE300CC246300012C640001B1
+:10A1200000441021AEE200C8AEE300CC8EE200C8CD
+:10A130008EE300CC8F8300E48F8200E01062000386
+:10A1400024630008AF8300E4AF8300E88FBF004CB6
+:10A150008FB600488FB500448FB400408FB3003CE9
+:10A160008FB200388FB100348FB0003003E00008A8
+:10A1700027BD005027BDFF90AFB600600000B021A2
+:10A18000AFBF0068AFBE0064AFB5005CAFB40058AD
+:10A19000AFB30054AFB20050AFB1004CAFB0004805
+:10A1A0008EE204D400008821241500013042000111
+:10A1B0001440002AA3A0002F8F8700E08F8800C4DE
+:10A1C0008F8200E800E220232C8210005040000122
+:10A1D00024841000000420C2008018218EE400C8EE
+:10A1E0008EE500CC0000102100A3282100A3302B15
+:10A1F0000082202100862021AEE400C8AEE500CC1C
+:10A200008F8300C83C02000A3442EFFF0103202381
+:10A210000044102B104000033C02000A3442F000BE
+:10A2200000822021008018218EE400C08EE500C449
+:10A230000000102100A3282100A3302B0082202140
+:10A2400000862021AEE400C0AEE500C4AF8800C89F
+:10A25000AF8700E408003C5BAF8700E83C020001E8
+:10A2600000571021904283C01040000B00000000F6
+:10A270003C130001027798218E7383C43C100001C7
+:10A28000021780218E1083C83C12000102579021D2
+:10A2900008003A598E5283CC8F8300E08F8200E40D
+:10A2A00010430007000038218F8200E424070001DA
+:10A2B0008C4300008C440004AFA30018AFA4001C22
+:10A2C00014E0000E3C02FFFF8F8200C4AFA200101A
+:10A2D0008F8200C83C040001248458B4AFA200144B
+:10A2E0008F8600E08F8700E43C0500060C00240305
+:10A2F00034A5F20008003C5B000000008FA3001CA6
+:10A300008FB200183073FFFF2673FFFC0062102429
+:10A3100010400058024080213C020080006210245E
+:10A320001040000A3C0400408EE2007C2442000100
+:10A33000AEE2007C8EE2007C8EE201FC2442000151
+:10A34000AEE201FC08003C558EE201FC3C06000434
+:10A350003C0B00013C0A00023C0500103C090008CF
+:10A360008EE200803C080020340780002442000177
+:10A37000AEE200808EE200808FA2001C0044182410
+:10A380001066002100C3102B1440000700000000DD
+:10A39000106B001100000000106A001500000000A2
+:10A3A00008003916000420421065002300A3102B7A
+:10A3B00014400005000000001069001900000000B2
+:10A3C0000800391600042042106800210000000037
+:10A3D00008003916000420428EE2003424420001B5
+:10A3E000AEE200348EE20034080039160004204248
+:10A3F0008EE201EC24420001AEE201EC8EE201ECBF
+:10A4000008003916000420428EE201F024420001C7
+:10A41000AEE201F08EE201F008003916000420429D
+:10A420008EE201F424420001AEE201F48EE201F476
+:10A4300008003916000420428EE200302442000158
+:10A44000AEE200308EE200300800391600042042EF
+:10A450008EE201F824420001AEE201F88EE201F83A
+:10A46000000420421087033E00000000080038DB93
+:10A47000000000003C02000100571021904283B20E
+:10A4800014400084240200013C03000100771821DD
+:10A49000906383B31462007F3C0201008E4300008E
+:10A4A000006210241040006F2402FFFF14620005B8
+:10A4B00024110001964300043402FFFF106200756E
+:10A4C0000000000092E204D8144000720000000076
+:10A4D0003C020001005710218C4283B42842000541
+:10A4E00010400020000038213C02000100571021DC
+:10A4F0008C4283B418400016000028219606000004
+:10A50000000520C0009710219442777E1446000970
+:10A510000097102194437780960200021462000590
+:10A52000009710219443778296020004506200083D
+:10A53000240700013C020001005710218C4283B423
+:10A5400024A5000100A2102A5440FFEE000520C0FF
+:10A5500030E200FF1040030200000000080039A2B2
+:10A5600000000000024020210C0022FE240500060D
+:10A570003044001F000428C002E510219442727C80
+:10A5800030424000144002F600B710219443727E1E
+:10A59000960200001462000B000418C000B71021DE
+:10A5A000944372809602000214620006000418C0F0
+:10A5B00000B71021944372829602000410620035A5
+:10A5C000000418C002E310219442727C30428000E3
+:10A5D000144002E302E31021944D727C96070000C0
+:10A5E000000D28C000B710219442737E0800398402
+:10A5F00000003021000420C002E410219443737C49
+:10A6000002E41021944D737C3063800014600010CC
+:10A61000000D28C000B710219442737E1447FFF547
+:10A6200001A0202100B710219443738096020002FC
+:10A630005462FFF1000420C000B7102194437382DC
+:10A64000960200045462FFEC000420C024060001BE
+:10A6500030C200FF104002C200000000080039A212
+:10A66000000000009743020296420000146202BC00
+:10A67000000000009743020496420002146202B8F0
+:10A68000000000009743020696420004146202B4E0
+:10A6900000000000924200003A2300013042000115
+:10A6A00000431024104000742402FFFF8E030000BA
+:10A6B000146200043402FFFF960300041062006F6E
+:10A6C000241500023C02000100571021904283B281
+:10A6D0001440006A2415000392E204D81440006775
+:10A6E000000000003C020001005710218C4283B49E
+:10A6F0002842000510400020000038213C020001E3
+:10A70000005710218C4283B4184000160000282105
+:10A7100096060000000520C0009710219442777E25
+:10A720001446000900971021944377809602000296
+:10A73000146200050097102194437782960200046A
+:10A7400050620008240700013C020001005710215C
+:10A750008C4283B424A5000100A2102A5440FFEECD
+:10A76000000520C030E200FF14400044241500031F
+:10A7700008003C5500000000024020210C0022FE91
+:10A78000240500063044001F000428C002E5102103
+:10A790009442727C304240001440027100B7102194
+:10A7A0009443727E960200001462000B000418C0ED
+:10A7B00000B71021944372809602000214620006D2
+:10A7C000000418C000B7102194437282960200045E
+:10A7D00010620027000418C002E310219442727C2A
+:10A7E000304280001440025E02E31021944D727CDE
+:10A7F00096070000000D28C000B710219442737E18
+:10A8000008003A0900003021000420C002E41021B1
+:10A810009443737C02E41021944D737C3063800078
+:10A8200014600010000D28C000B710219442737E00
+:10A830001447FFF501A0202100B710219443738035
+:10A84000960200025462FFF1000420C000B71021FC
+:10A8500094437382960200045462FFEC000420C00B
+:10A860002406000130C200FF1040023D000000003D
+:10A8700008003A1C24150003241500018F420260D1
+:10A880000053102B10400036000000008F8300E4BE
+:10A890008F8200E01062000324630008AF8300E4AD
+:10A8A000AF8300E88EE400C08EE500C4026018218A
+:10A8B0000000102100A3282100A3302B00822021BA
+:10A8C00000862021AEE400C0AEE500C48EE2005850
+:10A8D00024420001AEE200588EE200588EE2007C75
+:10A8E00024420001AEE2007C8EE2007C8F8200E018
+:10A8F000AFA200108F8200E43C040001248458C001
+:10A90000AFA200148FA600188FA7001C3C050006FC
+:10A910000C00240334A5F20308003C5B0000000097
+:10A920008EE25240AFA200108EE252443C0400017D
+:10A93000248458CCAFA200148EE60E108EE70E18B9
+:10A940003C0500060C00240334A5F2028EE201C08F
+:10A9500024420001AEE201C008003C028EE201C0C8
+:10A9600096E204680053102B544000013C1680000E
+:10A97000126001CB3C0E001F35CEFFFF3C0FFFF5F0
+:10A9800035EF1000241E00408EE2724C8F4302808F
+:10A9900024420001304207FF1062019E00000000C7
+:10A9A00012C00012000000008EE352408EE25244BA
+:10A9B0001062000A26F852448EF45244AFB80024C4
+:10A9C0008EE35244000211402442524802E28821A0
+:10A9D0002463000108003A85306D00FF8EE201E03B
+:10A9E00024420001AEE201E08EE201E08EE30E10AF
+:10A9F0008EE20E181062FFCA26F80E188EF40E189A
+:10AA00000000B021AFB800248EE30E180002114000
+:10AA100024420E2002E2882124630001306D01FFF0
+:10AA200096E2046A3042001010400018340281009F
+:10AA30009643000C14620015000000003C02000167
+:10AA400000571021904283C0144000100000000005
+:10AA50009642000EA62200168E4200088E43000485
+:10AA60008E4400002673FFFCAE42000CAE4300088B
+:10AA7000AE4400049622000E2610000424180001A3
+:10AA8000A3B8002F34420200A622000E8E2200003E
+:10AA90008E2300043C04000134843800020030217D
+:10AAA000306A0007020A8023036410210202102B7F
+:10AAB00010400005026A9821020410230362182343
+:10AAC0003C02002000438023266200079623000AF0
+:10AAD0002418FFF80058C824006A18210079102BA8
+:10AAE00010400002032060210060602101801821D5
+:10AAF000246200072418FFF800586024026C102B11
+:10AB000014400004019328230183282308003AC33A
+:10AB100000C3102100D31021004A202301C4102BB0
+:10AB200054400001008F202125420040004C102B92
+:10AB3000144000350000582194C3000C2402080082
+:10AB400054620032AE2600183C020001005710216A
+:10AB5000904283C05440002DAE26001824C2001736
+:10AB600001C2102B10400013000000003C02FFF552
+:10AB70000046102190421017384300062C63000154
+:10AB8000384200112C4200010062182510600014A8
+:10AB900024C2001001C2102B1040000E0000000063
+:10ABA0003C0BFFF501665821956B101008003AF434
+:10ABB0002562000E90C20017384300062C63000186
+:10ABC000384200112C420001006218251060000577
+:10ABD0000160182194CB00102562000E004A582114
+:10ABE00001601821246200072418FFF80058582437
+:10ABF00000C31021004A202301C4102B1040000282
+:10AC000001632823008F2021AE2600183C0200019A
+:10AC100000571021904283C00002102B000216C082
+:10AC200015600002AFA2004401805821308200016B
+:10AC3000104000070000402190880000248400019B
+:10AC400001C4102B1040000224A5FFFF008F20211B
+:10AC500050A0001200081C022CA20002544000095F
+:10AC600024A5FFFF948200002484000201024021F9
+:10AC700001C4102B1040000624A5FFFE08003B2154
+:10AC8000008F20219082000000021200010240216A
+:10AC900014A0FFF22CA2000200081C023102FFFFE8
+:10ACA000006240213108FFFF0140282111400011BE
+:10ACB000020020212CA200025440000924A5FFFF1D
+:10ACC00094820000248400020102402101C4102B60
+:10ACD0001040000624A5FFFE08003B38008F20210D
+:10ACE00090820000000212000102402114A0FFF235
+:10ACF0002CA2000200081C023102FFFF006240216A
+:10AD000000081C023102FFFF8F89012000624021F0
+:10AD100027623800252300200062102B1440000217
+:10AD20003108FFFF276330008F8201281062000482
+:10AD3000000000008F8201241462000701402821D6
+:10AD40008EE201A40000382124420001AEE201A4F9
+:10AD500008003BC98EE201A48E2600008E27000465
+:10AD6000000814003448000BAD300008A52B000E7D
+:10AD7000AD2800188FB8004400002021029610254D
+:10AD800000581025AD22001C00E5102B00E53823EB
+:10AD900000C4302300C23023AD260000AD270004DC
+:10ADA0008EE204C0AD220010AF83012092E24E205B
+:10ADB0001440005F240700012502FFEE2C42000230
+:10ADC00014400003240200111502002400000000BA
+:10ADD0008EE24E30000210C02442503802E22021A0
+:10ADE0008C830000240200121462000F0000000097
+:10ADF0008EE34E308EE24E341062000B00000000F5
+:10AE00008C82000424420001AC8200048EE24E34A5
+:10AE10008EE34E3024420001105E002A0000000044
+:10AE200008003BA8000000008EE24E3024420001E2
+:10AE3000505E0003000010218EE24E3024420001DB
+:10AE4000AEE24E308EE24E30000210C02442503846
+:10AE500002E2202108003BC6240200128EE24E309E
+:10AE6000000210C02442503802E220218C830000EE
+:10AE7000240200071462001F000000008EE34E3021
+:10AE80008EE24E341062001B000000008C82000431
+:10AE900024420001AC8200048EE24E348EE34E3038
+:10AEA00024420001105E0007000000008EE24E34D4
+:10AEB00024420001106200050000000008003BB4BD
+:10AEC0000000000014600005000000008F820128CF
+:10AED00024420020AF8201288F8201288C82000446
+:10AEE0002C42001150400012AC80000008003BC909
+:10AEF000000000008EE24E3024420001505E00034C
+:10AF0000000010218EE24E3024420001AEE24E30AD
+:10AF10008EE24E30000210C02442503802E220215E
+:10AF200024020007AC82000024020001AC8200046D
+:10AF300014E000193C0500063C04000124845890EC
+:10AF40008E22001834A5F209AFA200108E22000054
+:10AF50008E23000402203021016038210C002403DC
+:10AF6000AFA3001493A2002F1040002A34028100E6
+:10AF70008E4300048E4400088E45000CA642000C4F
+:10AF8000AE430000AE440004AE4500089622001611
+:10AF900008003C02A642000E1599000A026A182316
+:10AFA0009622000EA623000A34420004A622000EB8
+:10AFB0003C01000100370821A02083C008003BFFAE
+:10AFC000000098219624000A0083102B54400001B1
+:10AFD0000080182124020001A623000A3C01000180
+:10AFE00000370821A02283C09622000A004A1821B7
+:10AFF0000203802101D0102B54400001020F802158
+:10B00000026398230000B0218FB800241660FE5E12
+:10B01000AF0D000012600022000000003C010001A2
+:10B0200000370821AC3383C43C01000100370821FC
+:10B03000AC3083C83C01000100370821AC3283CC1E
+:10B0400093A2002F10400008000000003C02000105
+:10B05000005710218C4283CC244200043C010001A3
+:10B0600000370821AC2283CC8F4302808EE2724CE1
+:10B0700014620006000000008EE201C424420001B8
+:10B08000AEE201C408003C5B8EE201C48EE201BC6A
+:10B0900024420001AEE201BC08003C5B8EE201BC30
+:10B0A00097A4001E2484FFFC008018218EE400C0B9
+:10B0B0008EE500C40000102100A3282100A3302B3E
+:10B0C000008220210086202124020002AEE400C07C
+:10B0D000AEE500C412A2000F2AA20003144000171C
+:10B0E0002402000316A20015000000008EE200D02A
+:10B0F0008EE300D4246300012C640001004410217D
+:10B10000AEE200D0AEE300D48EE200D008003C55A1
+:10B110008EE300D48EE200D88EE300DC24630001CD
+:10B120002C64000100441021AEE200D8AEE300DC44
+:10B130008EE200D808003C558EE300DC8EE200C8A9
+:10B140008EE300CC246300012C6400010044102134
+:10B15000AEE200C8AEE300CC8EE200C88EE300CCC5
+:10B160008F8300E48F8200E01062000324630008F4
+:10B17000AF8300E4AF8300E88FBF00688FBE006438
+:10B180008FB600608FB5005C8FB400588FB3005449
+:10B190008FB200508FB1004C8FB0004803E0000820
+:10B1A00027BD007027BDFFE0AFBF00188EE30E146F
+:10B1B0008EE20E0C10620074000000008EE30E0C94
+:10B1C0008EE20E1400622023048200012484020017
+:10B1D0008EE30E188EE20E140043102B1440000470
+:10B1E000240202008EE30E1408003C7D0043182365
+:10B1F0008EE20E188EE30E14004310232443FFFF4B
+:10B20000008048210069102A544000010060482154
+:10B210008F8701002762300024E800200102102BF4
+:10B2200050400001276828008F82010811020004A5
+:10B23000000000008F8201041502000700001021A9
+:10B240008EE201A80000202124420001AEE201A804
+:10B2500008003CBF8EE201A88EE40E1400042140D9
+:10B26000008018218EE404608EE5046400A3282188
+:10B2700000A3302B0082202100862021ACE40000B6
+:10B28000ACE500048EE30E1400091140A4E2000EA8
+:10B2900024020002ACE200180003194024630E20CF
+:10B2A00002E31021ACE200088EE20E14ACE2001CB6
+:10B2B0008EE204CCACE20010AF88010092E204EC14
+:10B2C00014400011240400018EE24E2824030040A3
+:10B2D0002442000150430003000010218EE24E285A
+:10B2E00024420001AEE24E288EE24E28000210C039
+:10B2F00024424E3802E2182124020002AC6200000F
+:10B3000024020001AC6200041480000E24030040FB
+:10B310008EE20E14AFA200108EE20E183C0500075C
+:10B32000AFA200148EE60E0C8EE70E103C04000156
+:10B33000248458D40C00240334A5F00108003CDD1B
+:10B34000000000008EE2050024420001504300038B
+:10B35000000010218EE2050024420001AEE205004B
+:10B360008EE205000002108000571021AC4905084C
+:10B370008EE20E1400491021304201FFAEE20E149D
+:10B380008EE30E148EE20E0C146200050000000025
+:10B390008F8200602403FDFF00431024AF82006011
+:10B3A0008FBF001803E0000827BD002027BDFFE085
+:10B3B000AFBF00188EE3523C8EE252381062007428
+:10B3C000000000008EE352388EE2523C00622023DF
+:10B3D00004820001248401008EE352448EE2523C38
+:10B3E0000043102B14400004240201008EE3523C61
+:10B3F00008003CFF004318238EE252448EE3523C87
+:10B40000004310232443FFFF008048210069102AD5
+:10B4100054400001006048218F87010027623000FE
+:10B4200024E800200102102B50400001276828006A
+:10B430008F82010811020004000000008F820104C5
+:10B4400015020007000010218EE201A80000202153
+:10B4500024420001AEE201A808003D418EE201A8AD
+:10B460008EE4523C00042140008018218EE40470D8
+:10B470008EE5047400A3282100A3302B0082202134
+:10B4800000862021ACE40000ACE500048EE3523CD1
+:10B4900000091140A4E2000E24020003ACE20018EF
+:10B4A000000319402463524802E31021ACE2000873
+:10B4B0008EE2523CACE2001C8EE204CCACE2001006
+:10B4C000AF88010092E204EC144000112404000152
+:10B4D0008EE24E2824030040244200015043000322
+:10B4E000000010218EE24E2824420001AEE24E28D8
+:10B4F0008EE24E28000210C024424E3802E218218B
+:10B5000024020003AC62000024020001AC620004CB
+:10B510001480000E240300408EE2523CAFA20010C3
+:10B520008EE252443C050007AFA200148EE652386A
+:10B530008EE752403C040001248458E00C002403B0
+:10B5400034A5F01008003D5F000000008EE2050009
+:10B550002442000150430003000010218EE2050048
+:10B5600024420001AEE205008EE2050000021080D8
+:10B5700000571021AC4905088EE2523C00491021C9
+:10B58000304200FFAEE2523C8EE3523C8EE2523833
+:10B5900014620005000000008F8200602403FEFF9B
+:10B5A00000431024AF8200608FBF001803E0000842
+:10B5B00027BD00208F8201208EE34E348F8201242C
+:10B5C0008F8601282402004024630001506200039A
+:10B5D000000010218EE24E3424420001AEE24E34CF
+:10B5E0008EE24E348EE44E348EE34E30000210C0B4
+:10B5F000244250381483000702E228218F82012858
+:10B6000024420020AF8201288F82012808003D9249
+:10B61000ACA000008EE24E3424030040244200011E
+:10B6200050430003000010218EE24E3424420001FA
+:10B63000000210C02442503802E228218CA20004EB
+:10B640008F8301280002114000621821AF83012876
+:10B65000ACA000008CC200182443FFFE2C62001234
+:10B6600010400008000310803C0100010022082166
+:10B670008C2258F000400008000000002402000165
+:10B68000AEE24E2403E000080000000027BDFFC822
+:10B69000AFBF0030AFB5002CAFB40028AFB300246B
+:10B6A000AFB20020AFB1001CAFB000188F830128EB
+:10B6B0008F820124106202B0000098213C11001F0B
+:10B6C0003631FFFF3C12FFF53652100024150012F0
+:10B6D000241400408F8C01288F82012824420020EE
+:10B6E000AF8201289182001B8F8301282443FFFE33
+:10B6F0002C6200121040029C000310803C010001EB
+:10B70000002208218C225948004000080000000057
+:10B710008F42021830420100104000070000000074
+:10B720009583001695820018006218230003140206
+:10B7300000431021A58200168D82001C3C0380006E
+:10B740003044FFFF004368243C03080000431824F2
+:10B7500011A00004AD84001C0004114008003DD875
+:10B76000244252480004114024420E2002E2582193
+:10B770009562000E3042FFFC10600004A562000ECE
+:10B780009584001608003EC0000000008D69001876
+:10B7900000004021952A000025290002952700007D
+:10B7A0002529000295260000252900029525000084
+:10B7B0002529000295240000252900029523000078
+:10B7C0002529000295220000252900020147502169
+:10B7D000014650210145502101445021014350218F
+:10B7E00001425021000A1C023142FFFF0062502139
+:10B7F000000A1C023142FFFF0062502196E2046AF7
+:10B80000314EFFFF30420002104000440000502142
+:10B81000252200140222102B1040001401201821B0
+:10B820002405000A000020210223102B54400001AF
+:10B8300000721821946200002463000224A5FFFF17
+:10B8400014A0FFF90082202100041C023082FFFFB7
+:10B8500000622021000414023083FFFF0043102106
+:10B860003042FFFF08003E3301425021952A00007C
+:10B8700025290002952800002529000295270000AF
+:10B8800025290002952600002529000295250000A3
+:10B890002529000295230000252900029522000099
+:10B8A0002529000295240000252900020148502185
+:10B8B00001475021014650210145502101435021AB
+:10B8C000014250219522000095230002014450219D
+:10B8D0000142502101435021000A1C023142FFFF66
+:10B8E00000625021000A1C023142FFFF0062502119
+:10B8F0003148FFFF510000013408FFFF8D6200183E
+:10B900009443000C2402080054620005A56800104E
+:10B910009562000E34420002A562000EA568001078
+:10B9200096E2046A000028213042000814400056C4
+:10B93000000030218D630018246200240222102BA5
+:10B9400010400034246900100229102B54400001DB
+:10B950000132482195250000246900140229102B8A
+:10B960001040000224A5FFEC01324821952200007E
+:10B9700030420FFF144000032529000208003E60FA
+:10B98000241300010000982100A030210229102B6F
+:10B990005440000101324821912200012529000272
+:10B9A00000A228210229102B544000010132482115
+:10B9B000252900020229102B5440000101324821A0
+:10B9C000952200002529000200A228210229102B1F
+:10B9D000544000010132482195220000252900022F
+:10B9E00000A228210229102B5440000101324821D5
+:10B9F000952200002529000200A228210229102BEF
+:10BA000054400001013248219522000008003E996F
+:10BA100000A2282194650010946200142469001685
+:10BA200030420FFF1440000324A5FFEC08003E8CB9
+:10BA3000241300010000982100A03021912300016F
+:10BA400025290004952200002529000295240000E4
+:10BA50002529000200A3282100A228219522000008
+:10BA60009523000200A4282100A2282100A3282158
+:10BA700000051C0230A2FFFF0062282100051C0205
+:10BA800030A2FFFF0062282196E2046A30420001E2
+:10BA90001040001E0000202195820016004E202339
+:10BAA0000004140200822021326200FF5040000294
+:10BAB000008620210085202100041402008220211C
+:10BAC0003084FFFF508000013404FFFF8D620018B6
+:10BAD000244300170223102B544000010072182148
+:10BAE00090620000384300112C63000138420006C8
+:10BAF0002C420001006218251060000400000000C4
+:10BB00009562000E34420001A562000E9562000E9F
+:10BB1000240A00023042000410400002A564001212
+:10BB2000240A00048F88012027623800250900209C
+:10BB30000122102B50400001276930008F8201281C
+:10BB400011220004000000008F820124152200074A
+:10BB5000240400208EE201A4000080212442000180
+:10BB6000AEE201A408003F4F8EE201A48EE5724CC4
+:10BB70008EE604908EE70494AD0B0008A504000E39
+:10BB8000AD0A00180005294000A01821000010216E
+:10BB900000E3382100E3202B00C2302100C4302113
+:10BBA000AD060000AD0700048EE2724C004D10257A
+:10BBB000AD02001C8EE204C4AD020010AF8901206A
+:10BBC00092E24E2014400060241000012543FFEE55
+:10BBD0002C630002394200112C420001006218253A
+:10BBE00010600024000000008EE24E30000210C001
+:10BBF0002442503802E220218C8200001455000FAC
+:10BC0000000000008EE34E308EE24E341062000BD6
+:10BC1000000000008C82000424420001AC82000479
+:10BC20008EE24E348EE34E30244200011054002B3D
+:10BC30000000000008003F2E000000008EE24E30A1
+:10BC40002442000150540003000010218EE24E30C7
+:10BC500024420001AEE24E308EE24E30000210C0AF
+:10BC60002442503802E220212402000108003F4E05
+:10BC7000AC9500008EE24E30000210C024425038D5
+:10BC800002E220218C830000240200071462001FBE
+:10BC9000000000008EE34E308EE24E341062001B36
+:10BCA000000000008C82000424420001AC820004E9
+:10BCB0008EE24E348EE34E302442000110540007D1
+:10BCC000000000008EE24E342442000110620005A4
+:10BCD0000000000008003F3A00000000146000056A
+:10BCE000000000008F82012824420020AF8201283A
+:10BCF0008F8201288C8200042C42001150400012D7
+:10BD0000AC80000008003F4F000000008EE24E3083
+:10BD10002442000150540003000010218EE24E30F6
+:10BD200024420001AEE24E308EE24E30000210C0DE
+:10BD30002442503802E2202124020007AC82000095
+:10BD400024020001AC8200041600000D0000000077
+:10BD50008F8201203C04000124845938AFA00014D4
+:10BD6000AFA200108D86001C8F8701243C050008BF
+:10BD70000C00240334A50001080040570000000017
+:10BD80008EE2724C24420001304207FF11A00006EF
+:10BD9000AEE2724C8EE201D02442FFFFAEE201D04F
+:10BDA00008003F6B8EE201D08EE201CC2442FFFFFF
+:10BDB000AEE201CC8EE201CC8EE201D82442FFFF3C
+:10BDC000AEE201D8080040578EE201D88F4202400F
+:10BDD000104000E5000000008EE20E1C244200012D
+:10BDE00008004057AEE20E1C9582001EAD82001C7A
+:10BDF0008F42024010400072000000008EE20E1CD4
+:10BE000024420001AEE20E1C8F4302400043102B7F
+:10BE1000144000D5000000008F8301202762380005
+:10BE20002466002000C2102B50400001276630001D
+:10BE30008F82012810C20004000000008F820124BC
+:10BE400014C20007000000008EE201A4000080215F
+:10BE500024420001AEE201A408003FDA8EE201A410
+:10BE60008EE2724CAC62001C8EE404A88EE504AC39
+:10BE70002462001CAC62000824020008A462000EC8
+:10BE800024020011AC620018AC640000AC65000430
+:10BE90008EE204C4AC620010AF86012092E24E2014
+:10BEA00014400034241000018EE24E30000210C015
+:10BEB0002442503802E220218C8200001455001FD9
+:10BEC000000000008EE34E308EE24E341062001B04
+:10BED000000000008C82000424420001AC820004B7
+:10BEE0008EE24E348EE34E3024420001105400079F
+:10BEF000000000008EE24E34244200011062000572
+:10BF00000000000008003FC60000000014600005AB
+:10BF1000000000008F82012824420020AF82012807
+:10BF20008F8201288C8200042C42001150400011A5
+:10BF3000AC80000008003FDA000000008EE24E30C6
+:10BF40002442000150540003000010218EE24E30C4
+:10BF500024420001AEE24E308EE24E30000210C0AC
+:10BF60002442503802E2202124020001AC95000056
+:10BF7000AC8200045600000B241000018EE2724CCB
+:10BF80003C040001248458A8AFA00014AFA2001004
+:10BF90008EE6724C8F4702803C0500090C0024039A
+:10BFA00034A5F00856000001AEE00E1C8EE20188B8
+:10BFB00024420001AEE20188080040508EE2018870
+:10BFC0008F830120276238002466002000C2102BD6
+:10BFD00050400001276630008F82012810C2000403
+:10BFE000000000008F82012414C20007000000003E
+:10BFF0008EE201A40000802124420001AEE201A4EF
+:10C00000080040448EE201A48EE2724CAC62001C37
+:10C010008EE404A88EE504AC2462001CAC62000827
+:10C0200024020008A462000E24020011AC62001871
+:10C03000AC640000AC6500048EE204C4AC62001085
+:10C04000AF86012092E24E201440003424100001FB
+:10C050008EE24E30000210C02442503802E220210D
+:10C060008C8200001455001F000000008EE34E304B
+:10C070008EE24E341062001B000000008C8200042F
+:10C0800024420001AC8200048EE24E348EE34E3036
+:10C090002442000110540007000000008EE24E34DC
+:10C0A000244200011062000500000000080040303A
+:10C0B0000000000014600005000000008F820128CD
+:10C0C00024420020AF8201288F8201288C82000444
+:10C0D0002C42001150400011AC8000000800404488
+:10C0E000000000008EE24E30244200015054000354
+:10C0F000000010218EE24E3024420001AEE24E30AC
+:10C100008EE24E30000210C02442503802E220215C
+:10C1100024020001AC950000AC8200041600000B64
+:10C12000000000008EE2724C3C040001248458A8F8
+:10C13000AFA00014AFA200108EE6724C8F470280B1
+:10C140003C0500090C00240334A5F0088EE20174BC
+:10C1500024420001AEE20174080040578EE20174EF
+:10C1600024020001AEE24E248F8301288F82012435
+:10C170001462FD58000000008FBF00308FB5002C06
+:10C180008FB400288FB300248FB200208FB1001C21
+:10C190008FB0001803E0000827BD003827BDFFE876
+:10C1A000278402082745020024060008AFBF0014B8
+:10C1B0000C00249AAFB000100000202124100001D0
+:10C1C0002402241FAF900210AF900200AF8002043F
+:10C1D000AF8202148F460248240300043C02004050
+:10C1E0003C010001AC235CC43C010001AC235CC8F1
+:10C1F0003C010001AC205D9C3C010001AC225CC014
+:10C200003C010001AC235CC80C005108240500046B
+:10C210000C004822000000008EE200003C03FEFFFC
+:10C220003463FFFD00431024AEE200003C023C00FA
+:10C23000AF82021C3C01000100370821AC3083AC06
+:10C240008FBF00148FB0001003E0000827BD001856
+:10C2500027BDFFE03C05000834A50400AFBF00186F
+:10C26000AFA00010AFA000148F8602003C040001B4
+:10C27000248459F00C002403000038218EE202804F
+:10C2800024420001AEE202808EE202808F8302002F
+:10C290003C023F00006218248FBF00183C020400DB
+:10C2A00003E0000827BD002027BDFFD8AFBF002056
+:10C2B000AFB1001CAFB000188F9002208EE20214C4
+:10C2C0000000382124420001AEE202148EE2021482
+:10C2D0003C02030002021024104000273C1104001D
+:10C2E0000C00429B000000003C02010002021024EE
+:10C2F00010400007000000008EE2021824420001F6
+:10C30000AEE202188EE20218080040C63C03FDFFB0
+:10C310008EE2021C24420001AEE2021C8EE2021CEC
+:10C320003C03FDFF3463FFFF3C0808FF3508FFFFB7
+:10C330008EE200003C040001248459FC3C05000806
+:10C340000200302100431024AEE200008F82022060
+:10C35000000038213C03030000481024004310254E
+:10C36000AF820220AFA000100C002403AFA0001485
+:10C370000800429600000000021110241040001F27
+:10C380003C0240008F830224240214021462000B3A
+:10C390003C03FDFF3C04000124845A083C050008CE
+:10C3A000AFA00010AFA000148F86022434A5FFFFB9
+:10C3B0000C002403000038213C03FDFF8EE2000046
+:10C3C0003463FFFF02002021004310240C004E5470
+:10C3D000AEE200008EE2022024420001AEE2022022
+:10C3E0008EE202208F8202203C0308FF3463FFFFAD
+:10C3F0000043102408004295005110250202102429
+:10C4000010400142000000008EE2022C2442000194
+:10C41000AEE2022C8EE2022C8F8202203C0308FF47
+:10C420003463FFFF0043102434420004AF82022033
+:10C430008F8300548F8200540800410E2463000251
+:10C440008F820054006210232C4200031440FFFC32
+:10C45000000000008F8600E08F8400E430C20007F7
+:10C4600010400012000000008F8300E42402FFF857
+:10C4700000C210241043000D000000008F82005401
+:10C480008F8300E014C30009244400508F820054BD
+:10C49000008210232C4200511040000400000000D4
+:10C4A0008F8200E010C2FFF9000000008F8202209E
+:10C4B0003C0308FF3463FFFD00431024AF820220D9
+:10C4C0008F8600E030C20007104000032402FFF80E
+:10C4D00000C23024AF8600E08F8300C43C02001FFE
+:10C4E0003442FFFF246800080048102B104000036E
+:10C4F0003C02FFF534421000010240218F8B00C83E
+:10C500008F8501208F8401240800414500006021AF
+:10C51000276238000082102B504000012764300051
+:10C5200010A40010318200FF8C82001838430007ED
+:10C530002C6300013842000B2C42000100621825D8
+:10C540005060FFF3248400208EE20240240C00019E
+:10C5500024420001AEE202408EE202408C8B0008D1
+:10C56000318200FF14400065000000003C02000121
+:10C5700000571021904283C014400060000000006A
+:10C580008F8400E400C41023000218C30462000179
+:10C59000246302008F8900C410600005240200019A
+:10C5A0001062000900000000080041870000000040
+:10C5B0008EE202300120582124420001AEE2023016
+:10C5C000080041BC8EE202308EE202343C05000AD3
+:10C5D00024420001AEE202348C8B000034A5F0004E
+:10C5E0008EE20234012B182300A3102B54400001CB
+:10C5F000006518212C62233F144000400000000019
+:10C600008F8200E824420008AF8200E88F8200E8B1
+:10C610008F8200E40120582124420008AF8200E408
+:10C62000080041BC8F8200E48EE202383C03000A1D
+:10C6300024420001AEE202388C8400003463F00032
+:10C640008EE20238008838230067102B5440000126
+:10C6500000E338213C02000334420D400047102B18
+:10C660001040000300000000080041BC0080582179
+:10C670008F8200E424440008AF8400E48F8400E447
+:10C68000108600183C05000A34A5F0003C0A00039F
+:10C69000354A0D408EE2007C24420001AEE2007C6F
+:10C6A0008C8300008EE2007C0068382300A7102BEA
+:10C6B0005440000100E538210147102B5440000789
+:10C6C000006058218F8200E424440008AF8400E415
+:10C6D0008F8400E41486FFEF00000000148600053C
+:10C6E0000000000001205821AF8600E4080041BC92
+:10C6F000AF8600E8AF8400E4AF8400E88F8200C812
+:10C700003C03000A3463F000004838230067102B14
+:10C710005440000100E338213C02000334420D3F45
+:10C720000047102B544000070000602101683823A7
+:10C730000067102B5440000300E33821080041CF6C
+:10C740003C0200033C02000334420D3F0047102B23
+:10C7500014400016318200FF144000060000000063
+:10C760003C02000100571021904283C01040000F8E
+:10C77000000000008EE2023C3C04FDFF8EE300005E
+:10C780003484FFFF24420001AEE2023C8EE2023C10
+:10C7900024020001006418243C0100010037082134
+:10C7A000A02283B80800422CAEE30000AF8B00C883
+:10C7B0008F8300C88F8200C43C04000A3484F000D8
+:10C7C000006238230087102B5440000100E4382118
+:10C7D0003C02000334420D400047102B2CE30001C3
+:10C7E0000043102510400008000000008F82022046
+:10C7F0003C0308FF3463FFFF004310243C03400068
+:10C8000000431025AF8202208F8600E08F8400E471
+:10C8100010C4002A000000008EE2007C24420001C7
+:10C82000AEE2007C8EE2007C24C2FFF8AF8200E022
+:10C830003C0200018C427E303C0300088F8600E001
+:10C84000004310241040001D0000000010C4001B15
+:10C85000240DFFF83C0A000A354AF0003C0C008029
+:10C86000248500082762280050A2000127651800CF
+:10C870008C8800048C8200008CA900003103FFFF2B
+:10C8800000431021004D102424430010006B102B96
+:10C8900054400001006A1821012B102B5440000164
+:10C8A000012A482110690002010C1025AC82000405
+:10C8B00000A0202114C4FFEB248500088F820220F1
+:10C8C0003C0308FF3463FFFF00431024344200029E
+:10C8D000AF8202208F8300548F82005408004237B9
+:10C8E000246300018F820054006210232C42000256
+:10C8F0001440FFFC000000008F8202203C0308FF70
+:10C900003463FFFB00431024AF8202200601005570
+:10C91000000000008EE2022824420001AEE202285C
+:10C920008EE202288F8202203C0308FF3463FFFF5F
+:10C930000043102434420004AF8202208F8300544D
+:10C940008F82005408004251246300028F820054F9
+:10C95000006210232C4200031440FFFC0000000082
+:10C960008F8600E030C20007104000120000000077
+:10C970008F8300E42402FFF800C210241043000D4E
+:10C98000000000008F8200548F8300E014C3000970
+:10C99000244400328F820054008210232C42003342
+:10C9A00010400004000000008F8200E010C2FFF978
+:10C9B000000000008F8202203C0308FF3463FFFD6B
+:10C9C00000431024AF8202208F8600E030C20007AF
+:10C9D000104000032402FFF800C23024AF8600E0BC
+:10C9E000240301F58F8200E800673823000718C090
+:10C9F00000431021AF8200E88F8200E8AF8200E49C
+:10CA00008EE2007C3C0408FF3484FFFF00471021C5
+:10CA1000AEE2007C8F8202203C038000346300027F
+:10CA20000044102400431025AF8202208F8300545D
+:10CA30008F8200540800428D246300018F820054CD
+:10CA4000006210232C4200021440FFFC0000000092
+:10CA50008F8202203C0308FF3463FFFB0043102455
+:10CA6000AF8202208FBF00208FB1001C8FB0001852
+:10CA700003E0000827BD00283C0200018C425CD87E
+:10CA800027BDFFD810400012AFBF00203C040001BA
+:10CA900024845A143C050008240200013C010001D2
+:10CAA00000370821AC2283ACAFA00010AFA0001467
+:10CAB0008F86022034A504983C010001AC205CD88C
+:10CAC0003C010001AC225CCC0C00240300003821A6
+:10CAD0008F4202683C037FFF3463FFFF0043102452
+:10CAE000AF4202688EE204D08EE404D42403FFFE39
+:10CAF00000431024308400021080011EAEE204D0F6
+:10CB00008EE204D42403FFFD00431024AEE204D4DB
+:10CB10008F8200443C03060034632000344200202E
+:10CB2000AF820044AFA300188EE206088F430228AC
+:10CB300024420001304A00FF514300FEAFA0001024
+:10CB40008EE20608000210C0005710218FA30018C3
+:10CB50008FA4001CAC43060CAC4406108F83005419
+:10CB60008F82005424690032012210232C420033AA
+:10CB70001040006A0000582124180008240F000DFE
+:10CB8000240D0007240C0040240E00018F87012093
+:10CB90002762380024E800200102102B50400001D9
+:10CBA000276830008F820128110200040000000075
+:10CBB0008F82012415020007000010218EE201A4DB
+:10CBC0000000282124420001AEE201A40800433DF8
+:10CBD0008EE201A48EE40608000420C00080182123
+:10CBE0008EE404308EE5043400A3282100A3302B0A
+:10CBF0000082202100862021ACE40000ACE5000486
+:10CC00008EE20608A4F8000EACEF0018ACEA001C97
+:10CC1000000210C02442060C02E21021ACE200081F
+:10CC20008EE204C4ACE20010AF88012092E24E20F4
+:10CC300014400033240500018EE24E30000210C083
+:10CC40002442503802E220218C820000144D001F43
+:10CC5000000000008EE34E308EE24E341062001B66
+:10CC6000000000008C82000424420001AC82000419
+:10CC70008EE24E348EE34E3024420001104C000709
+:10CC8000000000008EE24E342442000110620005D4
+:10CC9000000000000800432A0000000014600005A6
+:10CCA000000000008F82012824420020AF8201286A
+:10CCB0008F8201288C8200042C4200115040001009
+:10CCC000AC8000000800433D000000008EE24E30C2
+:10CCD00024420001504C0003000010218EE24E302F
+:10CCE00024420001AEE24E308EE24E30000210C00F
+:10CCF0002442503802E22021AC8D0000AC8E0004AA
+:10CD000054A00006240B00018F820054012210233E
+:10CD10002C4200331440FF9D00000000316300FFEF
+:10CD20002402000154620079AFA00010AEEA0608A8
+:10CD30008F8300548F820054246900320122102313
+:10CD40002C4200331040006100005821240D0008DF
+:10CD5000240C00112408001224070040240A0001BA
+:10CD60008F830120276238002466002000C2102B28
+:10CD700050400001276630008F82012810C2000455
+:10CD8000000000008F82012414C200070000000090
+:10CD90008EE201A40000282124420001AEE201A499
+:10CDA000080043A98EE201A48EE20608AC62001CD2
+:10CDB0008EE404A08EE504A42462001CAC6200088A
+:10CDC000A46D000EAC6C0018AC640000AC650004EF
+:10CDD0008EE204C4AC620010AF86012092E24E20C5
+:10CDE00014400033240500018EE24E30000210C0D2
+:10CDF0002442503802E220218C8200001448001F97
+:10CE0000000000008EE34E308EE24E341062001BB4
+:10CE1000000000008C82000424420001AC82000467
+:10CE20008EE24E348EE34E3024420001104700075C
+:10CE3000000000008EE24E34244200011062000522
+:10CE40000000000008004396000000001460000588
+:10CE5000000000008F82012824420020AF820128B8
+:10CE60008F8201288C8200042C4200115040001057
+:10CE7000AC800000080043A9000000008EE24E30A4
+:10CE80002442000150470003000010218EE24E3082
+:10CE900024420001AEE24E308EE24E30000210C05D
+:10CEA0002442503802E22021AC880000AC8A000401
+:10CEB00054A00006240B00018F820054012210238D
+:10CEC0002C4200331440FFA600000000316300FF35
+:10CED0002402000154620003AFA00010080043D6F2
+:10CEE000000000003C04000124845A20AFA000147C
+:10CEF0008F8601208F8701243C0500090C00240344
+:10CF000034A5F011080043D6000000003C040001E5
+:10CF100024845A2CAFA000148F8601208F8701240F
+:10CF20003C0500090C00240334A5F010080043D68A
+:10CF3000000000003C04000124845A38AFA0001413
+:10CF40008EE606088F4702283C0500090C002403E2
+:10CF500034A5F00F8EE201AC24420001AEE201AC38
+:10CF60008EE201AC8EE2015C24420001AEE2015C83
+:10CF70008EE2015C8FBF002003E0000827BD00287F
+:10CF80003C0200018C425CD827BDFFE01440000D3C
+:10CF9000AFBF00183C04000124845A443C0500083B
+:10CFA000AFA00010AFA000148F86022034A5049912
+:10CFB000240200013C010001AC225CD80C002403D7
+:10CFC000000038218EE204D03C03000100771821D4
+:10CFD000946383B23442000110600007AEE204D0D3
+:10CFE0008F8202203C0308FF3463FFFF00431024BC
+:10CFF00034420008AF820220000020210C0052A21F
+:10D0000024050004AF4202688FBF001803E0000847
+:10D0100027BD00200000000000000000000000000C
+:10D020000000000000000000000000000000000000
+:10D0300000000000000000000000000000000000F0
+:10D0400000000000000000000000000000000000E0
+:10D0500000000000000000000000000000000000D0
+:10D0600000000000000000000000000000000000C0
+:10D0700000000000000000000000000000000000B0
+:10D0800000000000000000000000000000000000A0
+:10D090000000000000000000000000000000000090
+:10D0A0000000000000000000000000000000000080
+:10D0B0000000000000000000000000000000000070
+:10D0C0000000000000000000000000000000000060
+:10D0D0000000000000000000000000000000000050
+:10D0E0000000000000000000000000000000000040
+:10D0F0000000000000000000000000000000000030
+:10D100000000000000000000000000003C120001D0
+:10D11000265212003C1400018E945C503C10000119
+:10D12000261011203C15C00036B500608E8A000024
+:10D130008EB30000026A400B0248000A0200F82188
+:10D14000000000000000000D0000000000000000D2
+:10D1500000000000000000000000000000000000CF
+:10D1600000000000000000000000000000000000BF
+:10D1700000000000000000000000000000000000AF
+:10D18000000000000000000000000000000000009F
+:10D19000000000000000000000000000000000008F
+:10D1A000000000000000000000000000000000007F
+:10D1B000000000000000000000000000000000006F
+:10D1C000000000000000000000000000000000005F
+:10D1D000000000000000000000000000000000004F
+:10D1E000000000000000000000000000000000003F
+:10D1F000000000000000000000000000000000002F
+:10D20000000000000000000000000000080014D62C
+:10D2100000000000080014D83C0A0001080014D8DF
+:10D220003C0A0002080014D800000000080024A6F0
+:10D2300000000000080014D83C0A0003080014D8BD
+:10D240003C0A000408002F8C00000000080014D8DD
+:10D250003C0A000508003CE80000000008003C66AD
+:10D2600000000000080014D83C0A0006080014D88A
+:10D270003C0A0007080014D800000000080014D879
+:10D2800000000000080014D80000000008002A7503
+:10D2900000000000080014D83C0A000B080014D855
+:10D2A0003C0A000C080014D83C0A000D0800237A40
+:10D2B000000000000800233900000000080014D816
+:10D2C0003C0A000E08001B3C00000000080024A4DB
+:10D2D00000000000080014D83C0A000F080040A716
+:10D2E000000000000800409100000000080014D871
+:10D2F0003C0A0010080014EE00000000080014D8DA
+:10D300003C0A0011080014D83C0A0012080014D886
+:10D310003C0A0013000000000000000000000000B4
+:10D3200000000000000000000000000000000000FD
+:10D3300000000000000000000000000000000000ED
+:10D3400000000000000000000000000000000000DD
+:10D3500000000000000000000000000000000000CD
+:10D3600000000000000000000000000000000000BD
+:10D3700000000000000000000000000000000000AD
+:10D38000000000000000000000000000000000009D
+:10D39000000000000000000000000000000000008D
+:10D3A000000000000000000000000000000000007D
+:10D3B000000000000000000000000000000000006D
+:10D3C000000000000000000000000000000000005D
+:10D3D000000000000000000000000000000000004D
+:10D3E000000000000000000000000000000000003D
+:10D3F000000000000000000000000000000000002D
+:10D400000000000000000000000000003C030001DC
+:10D4100034633800240500802404001F2406FFFF25
+:10D4200024020001AF80021CAF820200AF82022002
+:10D4300003631021AF8200C003631021AF8200C4D8
+:10D4400003631021AF8200C827623800AF8200D08A
+:10D4500027623800AF8200D427623800AF8200D83C
+:10D4600027621800AF8200E027621800AF8200E454
+:10D4700027621800AF8200E827621000AF8200F038
+:10D4800027621000AF8200F427621000AF8200F81C
+:10D49000ACA000002484FFFF1486FFFD24A5000437
+:10D4A0008F8300403C02F000006218243C025000D0
+:10D4B0001062000C0043102B144000063C02600078
+:10D4C0003C024000106200082402080008004539B0
+:10D4D0000000000010620004240208000800453922
+:10D4E00000000000240207003C010001AC225CDCCB
+:10D4F00003E000080000000027BDFFD8AFBF0024F4
+:10D50000AFB000208F8300548F8200543C01000193
+:10D51000AC205CC408004545246300648F8200543D
+:10D52000006210232C4200651440FFFC0000000044
+:10D530000C004D71000000002404000100002821AF
+:10D5400027A60018340280000C00498EA7A20018FC
+:10D550008F8300548F820054080045562463006472
+:10D560008F820054006210232C4200651440FFFC9F
+:10D5700024040001240500010C00494C27A60018D2
+:10D580008F8300548F820054080045622463006436
+:10D590008F820054006210232C4200651440FFFC6F
+:10D5A00024040001240500010C00494C27A60018A2
+:10D5B0008F8300548F8200540800456E24630064FA
+:10D5C0008F820054006210232C4200651440FFFC3F
+:10D5D000240400013C06000124C65DA00C00494C57
+:10D5E000240500028F8300548F8200540800457B7D
+:10D5F000246300648F820054006210232C42006573
+:10D600001440FFFC24040001240500033C10000129
+:10D6100026105DA20C00494C0200302197A600188C
+:10D620003C07000194E75DA03C04000124845AB04B
+:10D63000AFA00014960200003C05000D34A50100C7
+:10D640000C002403AFA2001097A200181040004C59
+:10D6500024036040960200003042FFF01443000AA9
+:10D66000240200203C03000194635DA05462000981
+:10D6700024027830240200033C010001AC225CC487
+:10D68000080045AC240200053C03000194635DA042
+:10D69000240278301462000F240300103C020001C1
+:10D6A00094425DA23042FFF01443000A24020003BA
+:10D6B0003C010001AC225CC4240200063C010001D4
+:10D6C000AC225DB03C010001AC225DBC080045E627
+:10D6D0003C09FFF03C0200018C425CC43C030001A9
+:10D6E00094635DA0344200013C010001AC225CC4A3
+:10D6F000240200151462000F000000003C0200012B
+:10D7000094425DA23042FFF03843F4202C630001C4
+:10D710003842F4302C4200010062182510600005E8
+:10D72000240200033C010001AC225DBC080045E678
+:10D730003C09FFF03C03000194635DA024027810D3
+:10D740001462000B240200023C02000194425DA21C
+:10D750003042FFF0144000062402000224020004BC
+:10D760003C010001AC225DBC080045E63C09FFF02D
+:10D770003C010001AC225DBC080045E63C09FFF01D
+:10D780003C0200018C425CC4240300013C01000106
+:10D79000AC235DBC344200043C010001AC225CC4FB
+:10D7A0003C09FFF03529BDC03C0600018CC65CC4B5
+:10D7B0003C04000124845AB0240200013C01000111
+:10D7C000AC225CCC8F8200543C0700018CE75DBC2E
+:10D7D0003C03000194635DA03C08000195085DA234
+:10D7E0003C05000D34A501003C010001AC205CC8E3
+:10D7F000004910213C010001AC225DACAFA3001038
+:10D800000C002403AFA800148FBF00248FB00020A9
+:10D8100003E0000827BD002827BDFFE83C05000104
+:10D820008CA55CC8240600042402000114A2001484
+:10D83000AFBF00103C0200018C427E3C30428000B1
+:10D84000104000053C04000F3C0300018C635DBCEC
+:10D8500008004617348442403C0400043C030001A5
+:10D860008C635DBC348493E02402000514620016CE
+:10D87000000000003C04003D0800462F34840900ED
+:10D880003C0200018C427E3830428000104000058E
+:10D890003C04001E3C0300018C635DBC0800462A6A
+:10D8A000348484803C04000F3C0300018C635DBC25
+:10D8B000348442402402000514620003000000008A
+:10D8C0003C04007A348412003C0200018C425DACBE
+:10D8D0008F83005400441021004310230044102B78
+:10D8E00014400037000000003C0200018C425CD074
+:10D8F00014400033000000003C01000110C000256E
+:10D90000AC205CE03C0900018D295CC424070001C7
+:10D910003C0440003C08000125087E3C250AFFFC31
+:10D920000005284214A0000224C6FFFF24050008B9
+:10D9300000A91024104000100000000014A70008E7
+:10D94000000000008D020000004410241040000A76
+:10D95000000000003C0100010800465BAC255CE0D3
+:10D960008D4200000044102410400003000000001D
+:10D970003C010001AC275CE03C0200018C425CE011
+:10D980000006182B2C420001004310245440FFE5F0
+:10D99000000528428F8200543C0300018C635CE048
+:10D9A0003C010001AC225DAC1060002A24020001A1
+:10D9B0003C010001AC255CC83C010001AC225CCC00
+:10D9C0003C0200018C425CE010400022000000009C
+:10D9D0003C0200018C425CCC1040000A2402000191
+:10D9E0003C010001AC205CCC3C0100010037082167
+:10D9F000AC2283AC3C010001AC205D4C3C01000139
+:10DA0000AC225D043C030001007718218C6383ACD9
+:10DA10002402000810620005240200010C00469553
+:10DA20000000000008004692000000003C030001D6
+:10DA30008C635CC8106200072402000E3C030001E6
+:10DA40008C637DD010620003000000000C004E5477
+:10DA50008F8402208FBF001003E0000827BD00184C
+:10DA600027BDFFE03C02FDFFAFBF00188EE30000C2
+:10DA70003C0500018CA55CC83C0400018C845CF072
+:10DA80003442FFFF0062182414A40008AEE3000033
+:10DA90003C030001007718218C6383AC3C02000139
+:10DAA0008C425CF410620008000000003C0200019F
+:10DAB000005710218C4283AC3C010001AC255CF086
+:10DAC0003C010001AC225CF43C0300018C635CC8A7
+:10DAD00024020002106201692C620003104000055C
+:10DAE0002402000110620008000000000800481C29
+:10DAF0000000000024020004106200B124020001B2
+:10DB00000800481D000000003C02000100571021E1
+:10DB10008C4283AC2443FFFF2C6200081040015A62
+:10DB2000000310803C010001002208218C225AC809
+:10DB300000400008000000003C0300018C635DBC55
+:10DB40002402000514620014000000003C020001E1
+:10DB50008C425CD41040000A240200030C004822CE
+:10DB600000000000240200023C01000100370821EF
+:10DB7000AC2283AC3C010001080046E0AC205CD440
+:10DB80003C01000100370821AC2283AC3C010001BC
+:10DB90000800481FAC205C600C0048220000000018
+:10DBA0003C0200018C425CD43C010001AC205C6072
+:10DBB000104000DD240200023C0100010037082172
+:10DBC000AC2283AC3C0100010800481FAC205CD4AF
+:10DBD0003C0300018C635DBC240200051462000359
+:10DBE000240200013C010001AC225D000C0049CF81
+:10DBF000000000003C0300018C635D000800478EBC
+:10DC0000240200113C0500018CA55CC83C06000103
+:10DC10008CC67E3C0C005108000020212402000527
+:10DC20003C010001AC205CD43C010001003708211C
+:10DC30000800481FAC2283AC3C04000124845ABC79
+:10DC40003C05000F34A50100000030210000382100
+:10DC5000AFA000100C002403AFA000140800481F60
+:10DC6000000000008F8202203C03F70000431025D3
+:10DC7000080047B7AF8202208F8202203C030004D5
+:10DC800000431024144000A9240200078F8300548D
+:10DC90003C0200018C425DA42463D8F000431023B1
+:10DCA0002C422710144000F8240200010800481DEF
+:10DCB000000000003C0500018CA55CC80C0052A2CD
+:10DCC000000020210C005386000020213C030001AD
+:10DCD0008C637E34046100EA240200013C020008E7
+:10DCE0000062102410400006000000008F82021421
+:10DCF0003C03FFFF00431024080047413442251F26
+:10DD00008F8202143C03FFFF004310243442241F7F
+:10DD1000AF8202148EE200003C0302000043102593
+:10DD2000AEE200008F8202202403FFFB0043102498
+:10DD3000AF8202208F82022034420002AF82022092
+:10DD4000240200083C01000100370821AC2283AC0A
+:10DD50008F8202203C03000400431024144000057D
+:10DD6000000000008F8202203C03F70000431025D2
+:10DD7000AF8202203C0300018C635DBC24020005DD
+:10DD80001462000A000000003C02000194425DA2FF
+:10DD900024429FBC2C4200041040000424040018BC
+:10DDA000240500020C004D93240600200C0043DDE6
+:10DDB000000000003C0100010800481FAC205D503D
+:10DDC0003C020001005710218C4283AC2443FFFF2A
+:10DDD0002C620008104000AC000310803C010001E0
+:10DDE000002208218C225AE80040000800000000B0
+:10DDF0000C00429B000000003C010001AC205CCC08
+:10DE0000AF8002043C0100010C004822AC207E20BF
+:10DE1000240200013C010001AC225CE42402000267
+:10DE20003C010001003708210800481FAC2283ACE8
+:10DE30000C00489F000000003C0300018C635CE480
+:10DE40002402000914620090240200033C01000136
+:10DE5000003708210800481FAC2283AC3C020001B7
+:10DE60008C427E3830424000104000050000000027
+:10DE70008F8200443C03FFFF0800479F34637FFF0D
+:10DE80008F8200442403FF7F00431024AF820044AC
+:10DE90008F830054080047B9240200048F83005484
+:10DEA0003C0200018C425DA42463D8F0004310239F
+:10DEB0002C42271014400074240200053C0100018C
+:10DEC000003708210800481FAC2283AC8F82022053
+:10DED0003C03F70000431025AF820220AF8002040C
+:10DEE0003C010001AC207E208F83005424020006F8
+:10DEF0003C01000100370821AC2283AC3C01000149
+:10DF00000800481FAC235DA48F8300543C0200012D
+:10DF10008C425DA42463FFF6004310232C42000AC8
+:10DF20001440005900000000240200073C010001D9
+:10DF3000003708210800481FAC2283AC8F820220E2
+:10DF40003C04F70000441025AF8202208F8202209B
+:10DF50003C03030000431024144000050000182176
+:10DF60008F8202202403000100441025AF8202208A
+:10DF700010600043240200018F8202143C03FFFF63
+:10DF80003C0400018C845D98004310243442251F1A
+:10DF9000AF820214240200083C010001003708216E
+:10DFA0001080000BAC2283AC3C0200018C425D74FB
+:10DFB00014400007240200013C010001AC227DD086
+:10DFC0000C004E548F8402200800480C0000000012
+:10DFD0008F8202203C0300080043102414400017E5
+:10DFE0002402000E3C010001AC227DD08EE2000034
+:10DFF000000020213C030200004310250C00538642
+:10E00000AEE200008F8202202403FFFB00431024B5
+:10E01000AF8202208F820220344200020C0043DDD6
+:10E02000AF8202203C0500018CA55CC80C0052A206
+:10E03000000020210800481F000000003C020001F1
+:10E040008C425D7410400010000000003C02000192
+:10E050008C425D702442FFFF3C010001AC225D70E8
+:10E0600014400009240200023C010001AC205D7450
+:10E070003C0100010800481FAC225D702402000131
+:10E080003C010001AC225CCC8FBF001803E000080B
+:10E0900027BD00208F8202008F8202208F82022003
+:10E0A00034420004AF8202208F8202003C0600014D
+:10E0B0008CC65CC834420004AF8202002402000215
+:10E0C00010C2003A2CC200031040000524020001D7
+:10E0D00010C20008000000000800486800000000AE
+:10E0E0002402000410C20013240200010800486842
+:10E0F000000000003C0300018C635CB83C0200019E
+:10E100008C425CC03C0400018C845CDC3C0500015A
+:10E110008CA55CBCAF860200AF860220346300226F
+:10E1200000441025004510253442000208004867CD
+:10E13000AF8302003C0300018C635D98AF82020054
+:10E1400010600009AF8202203C0200018C425D7425
+:10E15000144000053C033F003C0200018C425CB0CF
+:10E160000800485B346300E03C0200018C425CB074
+:10E170003C033F00346300E200431025AF820200FD
+:10E180003C0300018C635CB43C04F7003C020001DA
+:10E190008C425CC03C0500018CA55CDC0064182549
+:10E1A0000043102500451025AF82022003E000083F
+:10E1B000000000008F8202203C0300018C635CC8D9
+:10E1C00034420004AF820220240200011062000FDA
+:10E1D000000000008F8300548F82005424630002EB
+:10E1E000006210232C4200031040001100000000C8
+:10E1F0008F820054006210232C4200031040000C58
+:10E200000000000008004879000000008F830054DF
+:10E210008F82005408004885246300078F820054D1
+:10E22000006210232C4200081440FFFC0000000094
+:10E230008F8400E0308200071040000D00000000D5
+:10E240008F8200548F8300E014830009244500323C
+:10E250008F82005400A210232C420033104000048F
+:10E26000000000008F8200E01082FFF90000000033
+:10E270008F8202202403FFFD00431024AF8202207E
+:10E2800003E00008000000003C0300018C635CE434
+:10E290003C0200018C425CE8506200042463FFFFF2
+:10E2A0003C010001AC235CE82463FFFF2C62000901
+:10E2B0001040009D000310803C0100010022082155
+:10E2C0008C225B0800400008000000008F820044A0
+:10E2D00034428080AF8200448F8300540800493864
+:10E2E000240200028F8300543C0200018C425DA88E
+:10E2F0002463D8F0004310232C4227101440008AD6
+:10E300002402000308004945000000008F820044F9
+:10E310003C03FFFF34637FFF00431024AF820044BF
+:10E320008F83005408004938240200048F8300546E
+:10E330003C0200018C425DA82463FFF600431023D9
+:10E340002C42000A144000782402000508004945C8
+:10E35000000000008F8202203C03F70000431025DC
+:10E36000AF8202208F8202202403FFFB004310248F
+:10E37000AF8202208F82022034420002AF8202204C
+:10E380003C023F00344200E0AF8202008F82020074
+:10E390002403FFFD00431024AF8202002404000187
+:10E3A0003405FFFFAF8402048F8300548F82005432
+:10E3B000080048EC246300018F820054006210239F
+:10E3C0002C4200021440FFFC000000008F82022457
+:10E3D0000004204000A4102B1040FFF200000000B9
+:10E3E0008F8202203C03F70000431025AF820220F9
+:10E3F0008F8202143C03FFFF004310243442251F88
+:10E40000AF8202148F8202202403FFFB00431024FA
+:10E41000AF8202208F8202203C04F700348400087F
+:10E4200034420002AF8202208F8202203C033F0070
+:10E43000346300E200441025AF820220AF83020063
+:10E440008F8400F0276217F81482000224850008E8
+:10E45000276510008F8200F410A200073C038000A3
+:10E46000346300403C02000124425C70AC82000036
+:10E47000AC830004AF8500F08F8300540800493856
+:10E48000240200068F8300543C0200018C425DA8E8
+:10E490002463FFF6004310232C42000A144000229C
+:10E4A0002402000708004945000000008F8200E0B8
+:10E4B000AF8200E48F8200E0AF8200E88F8202200A
+:10E4C00034420004AF8202208F8202202403FFF72F
+:10E4D00000431024AF8202208F82004434428080A7
+:10E4E000AF8200448F830054240200083C010001E5
+:10E4F000AC225CE43C01000108004947AC235DA864
+:10E500008F8300543C0200018C425DA82463D8F044
+:10E51000004310232C42271014400003240200095A
+:10E520003C010001AC225CE403E0000800000000B4
+:10E5300000000000000000000000000027BDFFD820
+:10E54000AFB2001800809021AFB3001C00A098214A
+:10E55000AFB1001400C08821AFB0001000008021CE
+:10E56000AFBF0020A62000000C004D4B240400018A
+:10E57000261000012E0200201440FFFB00000000C6
+:10E580000C004D4B000020210C004D4B24040001D9
+:10E590000C004D4B240400010C004D4B00002021C9
+:10E5A000241000100250102410400002000020210E
+:10E5B000240400010C004D4B001080421600FFFAAD
+:10E5C0000250102424100010027010241040000289
+:10E5D00000002021240400010C004D4B001080425B
+:10E5E0001600FFFA027010240C004D7134108000E8
+:10E5F0000C004D71000000000C004D2B00000000CD
+:10E600005040000500108042962200000050102566
+:10E61000A6220000001080421600FFF70000000054
+:10E620000C004D71000000008FBF00208FB3001C54
+:10E630008FB200188FB100148FB0001003E00008F3
+:10E6400027BD002827BDFFD8AFB100140080882166
+:10E65000AFB2001800A09021AFB3001C00C09821F9
+:10E66000AFB0001000008021AFBF00200C004D4B68
+:10E6700024040001261000012E0200201440FFFB9C
+:10E68000000000000C004D4B000020210C004D4B01
+:10E69000240400010C004D4B000020210C004D4BC8
+:10E6A0002404000124100010023010241040000245
+:10E6B00000002021240400010C004D4B001080427A
+:10E6C0001600FFFA0230102424100010025010240B
+:10E6D0001040000200002021240400010C004D4BDA
+:10E6E000001080421600FFFA025010240C004D4B1F
+:10E6F000240400010C004D4B000020213410800048
+:10E7000096620000005010241040000200002021FA
+:10E71000240400010C004D4B001080421600FFF84D
+:10E72000000000000C004D71000000008FBF0020B1
+:10E730008FB3001C8FB200188FB100148FB000107F
+:10E7400003E0000827BD00283C0300018C635D0046
+:10E750003C0200018C425D4827BDFFD8AFBF0020BE
+:10E76000AFB1001C10620003AFB000183C01000103
+:10E77000AC235D482463FFFF2C6200131040034963
+:10E78000000310803C010001002208218C225B3034
+:10E7900000400008000000000C004D7100008021C6
+:10E7A00034028000A7A2001027B100100C004D4BCE
+:10E7B00024040001261000012E0200201440FFFB5B
+:10E7C000000000000C004D4B000020210C004D4BC0
+:10E7D000240400010C004D4B000020210C004D4B87
+:10E7E0002404000124100010320200011040000235
+:10E7F00000002021240400010C004D4B0010804239
+:10E800001600FFFA32020001241000100C004D4BDC
+:10E8100000002021001080421600FFFC00000000D4
+:10E820000C004D4B240400010C004D4B0000202136
+:10E830003410800096220000005010241040000286
+:10E8400000002021240400010C004D4B00108042E8
+:10E850001600FFF8000000000C004D7100000000E1
+:10E8600008004D242402000227B10010A7A00010C8
+:10E87000000080210C004D4B2404000126100001F3
+:10E880002E0200201440FFFB000000000C004D4B46
+:10E89000000020210C004D4B240400010C004D4BC6
+:10E8A000240400010C004D4B000020212410001016
+:10E8B0003202000110400002000020212404000167
+:10E8C0000C004D4B001080421600FFFA320200018E
+:10E8D000241000100C004D4B00002021001080423D
+:10E8E0001600FFFC000000000C004D713410800089
+:10E8F0000C004D71000000000C004D2B00000000CA
+:10E900005040000500108042962200000050102563
+:10E91000A6220000001080421600FFF70000000051
+:10E920000C004D710000000097A2001030428000E2
+:10E93000144002DC2402000308004D240000000003
+:10E9400024021200A7A2001027B1001000008021AD
+:10E950000C004D4B24040001261000012E02002063
+:10E960001440FFFB000000000C004D4B0000202174
+:10E970000C004D4B240400010C004D4B00002021E5
+:10E980000C004D4B24040001241000103202000141
+:10E990001040000200002021240400010C004D4B17
+:10E9A000001080421600FFFA32020001241000100D
+:10E9B0000C004D4B00002021001080421600FFFC8F
+:10E9C000000000000C004D4B240400010C004D4BD6
+:10E9D00000002021341080009622000000501024F6
+:10E9E0001040000200002021240400010C004D4BC7
+:10E9F000001080421600FFF8000000000C004D716E
+:10EA0000000000008F83005408004D16240200040B
+:10EA10008F8300543C0200018C425DB82463FF9C4C
+:10EA2000004310232C4200641440029E2402000282
+:10EA30003C0300018C635DBC106202972C620003F2
+:10EA40001440029624020011240200031062000503
+:10EA500024020004106202912402000F08004D24D9
+:10EA60002402001108004D24240200052402001491
+:10EA7000A7A2001027B10010000080210C004D4B10
+:10EA800024040001261000012E0200201440FFFB88
+:10EA9000000000000C004D4B000020210C004D4BED
+:10EAA000240400010C004D4B000020210C004D4BB4
+:10EAB0002404000124100010320200011040000262
+:10EAC00000002021240400010C004D4B0010804266
+:10EAD0001600FFFA32020001241000103202001268
+:10EAE0001040000200002021240400010C004D4BC6
+:10EAF000001080421600FFFA320200120C004D4B4B
+:10EB0000240400010C004D4B000020213410800033
+:10EB10009622000000501024104000020000202126
+:10EB2000240400010C004D4B001080421600FFF839
+:10EB3000000000000C004D71000000008F830054A5
+:10EB400008004D16240200068F8300543C02000189
+:10EB50008C425DB82463FF9C004310232C42006468
+:10EB6000144002502402000708004D240000000059
+:10EB700024020006A7A2001027B100100000802187
+:10EB80000C004D4B24040001261000012E02002031
+:10EB90001440FFFB000000000C004D4B0000202142
+:10EBA0000C004D4B240400010C004D4B00002021B3
+:10EBB0000C004D4B2404000124100010320200010F
+:10EBC0001040000200002021240400010C004D4BE5
+:10EBD000001080421600FFFA3202000124100010DB
+:10EBE0003202001310400002000020212404000122
+:10EBF0000C004D4B001080421600FFFA3202001349
+:10EC00000C004D4B240400010C004D4B0000202152
+:10EC100034108000962200000050102410400002A2
+:10EC200000002021240400010C004D4B0010804204
+:10EC30001600FFF8000000000C004D7100000000FD
+:10EC40008F83005408004D16240200088F8300545F
+:10EC50003C0200018C425DB82463FF9C00431023FA
+:10EC60002C4200641440020F2402000908004D24C5
+:10EC70000000000027B10010A7A0001000008021B4
+:10EC80000C004D4B24040001261000012E02002030
+:10EC90001440FFFB000000000C004D4B0000202141
+:10ECA0000C004D4B240400010C004D4B24040001CA
+:10ECB0000C004D4B000020212410001032020001F6
+:10ECC0001040000200002021240400010C004D4BE4
+:10ECD000001080421600FFFA3202000124100010DA
+:10ECE000320200181040000200002021240400011C
+:10ECF0000C004D4B001080421600FFFA3202001843
+:10ED00000C004D71341080000C004D7100000000AB
+:10ED10000C004D2B00000000504000050010804208
+:10ED20009622000000501025A6220000001080420C
+:10ED30001600FFF7000000000C004D71000080215C
+:10ED400097A2001027B1001034420001A7A20010C2
+:10ED50000C004D4B24040001261000012E0200205F
+:10ED60001440FFFB000000000C004D4B0000202170
+:10ED70000C004D4B240400010C004D4B00002021E1
+:10ED80000C004D4B2404000124100010320200013D
+:10ED90001040000200002021240400010C004D4B13
+:10EDA000001080421600FFFA320200012410001009
+:10EDB000320200181040000200002021240400014B
+:10EDC0000C004D4B001080421600FFFA3202001872
+:10EDD0000C004D4B240400010C004D4B0000202181
+:10EDE00034108000962200000050102410400002D1
+:10EDF00000002021240400010C004D4B0010804233
+:10EE00001600FFF8000000000C004D71000000002B
+:10EE10008F83005408004D162402000A8F8300548B
+:10EE20003C0200018C425DB82463FF9C0043102328
+:10EE30002C4200641440019B2402000B08004D2466
+:10EE40000000000027B10010A7A0001000008021E2
+:10EE50000C004D4B24040001261000012E0200205E
+:10EE60001440FFFB000000000C004D4B000020216F
+:10EE70000C004D4B240400010C004D4B24040001F8
+:10EE80000C004D4B00002021241000103202000124
+:10EE90001040000200002021240400010C004D4B12
+:10EEA000001080421600FFFA320200012410001008
+:10EEB000320200171040000200002021240400014B
+:10EEC0000C004D4B001080421600FFFA3202001772
+:10EED0000C004D71341080000C004D7100000000DA
+:10EEE0000C004D2B00000000504000050010804237
+:10EEF0009622000000501025A6220000001080423B
+:10EF00001600FFF7000000000C004D71000080218A
+:10EF100097A2001027B1001034420700A7A20010EA
+:10EF20000C004D4B24040001261000012E0200208D
+:10EF30001440FFFB000000000C004D4B000020219E
+:10EF40000C004D4B240400010C004D4B000020210F
+:10EF50000C004D4B2404000124100010320200016B
+:10EF60001040000200002021240400010C004D4B41
+:10EF7000001080421600FFFA320200012410001037
+:10EF8000320200171040000200002021240400017A
+:10EF90000C004D4B001080421600FFFA32020017A1
+:10EFA0000C004D4B240400010C004D4B00002021AF
+:10EFB00034108000962200000050102410400002FF
+:10EFC00000002021240400010C004D4B0010804261
+:10EFD0001600FFF8000000000C004D71000000005A
+:10EFE0008F83005408004D162402000C8F830054B8
+:10EFF0003C0200018C425DB82463FF9C0043102357
+:10F000002C420064144001272402001208004D2401
+:10F010000000000027B10010A7A000100000802110
+:10F020000C004D4B24040001261000012E0200208C
+:10F030001440FFFB000000000C004D4B000020219D
+:10F040000C004D4B240400010C004D4B2404000126
+:10F050000C004D4B00002021241000103202000152
+:10F060001040000200002021240400010C004D4B40
+:10F07000001080421600FFFA320200012410001036
+:10F08000320200141040000200002021240400017C
+:10F090000C004D4B001080421600FFFA32020014A3
+:10F0A0000C004D71341080000C004D710000000008
+:10F0B0000C004D2B00000000504000050010804265
+:10F0C0009622000000501025A62200000010804269
+:10F0D0001600FFF7000000000C004D7100008021B9
+:10F0E00097A2001027B1001034420010A7A2001010
+:10F0F0000C004D4B24040001261000012E020020BC
+:10F100001440FFFB000000000C004D4B00002021CC
+:10F110000C004D4B240400010C004D4B000020213D
+:10F120000C004D4B24040001241000103202000199
+:10F130001040000200002021240400010C004D4B6F
+:10F14000001080421600FFFA320200012410001065
+:10F1500032020014104000020000202124040001AB
+:10F160000C004D4B001080421600FFFA32020014D2
+:10F170000C004D4B240400010C004D4B00002021DD
+:10F18000341080009622000000501024104000022D
+:10F1900000002021240400010C004D4B001080428F
+:10F1A0001600FFF8000000000C004D710000000088
+:10F1B0008F83005408004D16240200138F830054DF
+:10F1C0003C0200018C425DB82463FF9C0043102385
+:10F1D0002C420064144000B32402000D08004D24AA
+:10F1E0000000000027B10010A7A00010000080213F
+:10F1F0000C004D4B24040001261000012E020020BB
+:10F200001440FFFB000000000C004D4B00002021CB
+:10F210000C004D4B240400010C004D4B2404000154
+:10F220000C004D4B00002021241000103202000180
+:10F230001040000200002021240400010C004D4B6E
+:10F24000001080421600FFFA320200012410001064
+:10F2500032020018104000020000202124040001A6
+:10F260000C004D4B001080421600FFFA32020018CD
+:10F270000C004D71341080000C004D710000000036
+:10F280000C004D2B00000000504000050010804293
+:10F290009622000000501025A62200000010804297
+:10F2A0001600FFF7000000000C004D7100008021E7
+:10F2B00097A2001027B100103042FFFEA7A2001055
+:10F2C0000C004D4B24040001261000012E020020EA
+:10F2D0001440FFFB000000000C004D4B00002021FB
+:10F2E0000C004D4B240400010C004D4B000020216C
+:10F2F0000C004D4B240400012410001032020001C8
+:10F300001040000200002021240400010C004D4B9D
+:10F31000001080421600FFFA320200012410001093
+:10F3200032020018104000020000202124040001D5
+:10F330000C004D4B001080421600FFFA32020018FC
+:10F340000C004D4B240400010C004D4B000020210B
+:10F35000341080009622000000501024104000025B
+:10F3600000002021240400010C004D4B00108042BD
+:10F370001600FFF8000000000C004D7100000000B6
+:10F380008F83005408004D162402000E240208400A
+:10F39000A7A2001027B10010000080210C004D4BE7
+:10F3A00024040001261000012E0200201440FFFB5F
+:10F3B000000000000C004D4B000020210C004D4BC4
+:10F3C000240400010C004D4B000020210C004D4B8B
+:10F3D0002404000124100010320200011040000239
+:10F3E00000002021240400010C004D4B001080423D
+:10F3F0001600FFFA3202000124100010320200133E
+:10F400001040000200002021240400010C004D4B9C
+:10F41000001080421600FFFA320200130C004D4B20
+:10F42000240400010C004D4B00002021341080000A
+:10F4300096220000005010241040000200002021FD
+:10F44000240400010C004D4B001080421600FFF810
+:10F45000000000000C004D71000000008F8300547C
+:10F46000240200103C010001AC225D003C010001BF
+:10F4700008004D26AC235DB88F8300543C02000188
+:10F480008C425DB82463FF9C004310232C4200642F
+:10F490001440000400000000240200113C0100019F
+:10F4A000AC225D008FBF00208FB1001C8FB0001810
+:10F4B00003E0000827BD00288F8500448F820044A8
+:10F4C0003C030001004310253C030008AF820044C8
+:10F4D0008F8400548F82005400A3282408004D37E5
+:10F4E000248400018F820054008210232C420002E9
+:10F4F0001440FFFC000000008F8200443C03FFFE2C
+:10F500003463FFFF00431024AF8200448F83005414
+:10F510008F82005408004D45246300018F820054FF
+:10F52000006210232C4200021440FFFC0000000087
+:10F5300003E0000800A010218F8300443C02FFF08C
+:10F540003442FFFF00042480006218243C020002C1
+:10F550000082202500641825AF8300448F82004478
+:10F560003C03FFFE3463FFFF00431024AF820044DE
+:10F570008F8300548F82005408004D5E2463000185
+:10F580008F820054006210232C4200021440FFFCC2
+:10F59000000000008F8200443C030001004310255E
+:10F5A000AF8200448F8300548F82005408004D6B5B
+:10F5B000246300018F820054006210232C42000259
+:10F5C0001440FFFC0000000003E000080000000001
+:10F5D0008F8200443C03FFF03463FFFF004310249C
+:10F5E000AF8200448F8200443C0300010043102599
+:10F5F000AF8200448F8300548F82005408004D7FF7
+:10F60000246300018F820054006210232C42000208
+:10F610001440FFFC000000008F8200443C03FFFE0A
+:10F620003463FFFF00431024AF8200448F830054F3
+:10F630008F82005408004D8D246300018F82005496
+:10F64000006210232C4200021440FFFC0000000066
+:10F6500003E000080000000027BDFFC8AFB300248E
+:10F6600000809821AFB5002C00A0A821AFB20020E7
+:10F6700000C0902132A2FFFFAFBF0030AFB400281E
+:10F68000AFB1001CAFB0001814400034A7B2001096
+:10F690003271FFFF27B20010000080210C004D4B9B
+:10F6A00024040001261000012E0200201440FFFB5C
+:10F6B000000000000C004D4B000020210C004D4BC1
+:10F6C000240400010C004D4B000020210C004D4B88
+:10F6D0002404000124100010320200011040000236
+:10F6E00000002021240400010C004D4B001080423A
+:10F6F0001600FFFA3202000124100010023010241C
+:10F700001040000200002021240400010C004D4B99
+:10F71000001080421600FFFA023010240C004D4BFE
+:10F72000240400010C004D4B000020213410800007
+:10F7300096420000005010241040000200002021DA
+:10F74000240400010C004D4B001080421200007593
+:10F750000000000008004DC9000000003274FFFFE7
+:10F7600027B10010A7A00010000080210C004D4B15
+:10F7700024040001261000012E0200201440FFFB8B
+:10F78000000000000C004D4B000020210C004D4BF0
+:10F79000240400010C004D4B240400010C004D4BCF
+:10F7A000000020212410001032020001104000024D
+:10F7B00000002021240400010C004D4B0010804269
+:10F7C0001600FFFA320200012410001002901024EB
+:10F7D0001040000200002021240400010C004D4BC9
+:10F7E000001080421600FFFA029010240C004D71A8
+:10F7F000341080000C004D71000000000C004D2BF7
+:10F8000000000000504000050010804296220000D9
+:10F8100000501025A6220000001080421600FFF7BD
+:10F82000000000000C004D710000000032A5FFFF39
+:10F830002402000154A200042402000297A2001036
+:10F8400008004E140052102514A200063271FFFF6A
+:10F8500097A200100012182700431024A7A200103E
+:10F860003271FFFF27B20010000080210C004D4BC9
+:10F8700024040001261000012E0200201440FFFB8A
+:10F88000000000000C004D4B000020210C004D4BEF
+:10F89000240400010C004D4B000020210C004D4BB6
+:10F8A0002404000124100010320200011040000264
+:10F8B00000002021240400010C004D4B0010804268
+:10F8C0001600FFFA3202000124100010023010244A
+:10F8D0001040000200002021240400010C004D4BC8
+:10F8E000001080421600FFFA023010240C004D4B2D
+:10F8F000240400010C004D4B000020213410800036
+:10F900009642000000501024104000020000202108
+:10F91000240400010C004D4B001080421600FFF83B
+:10F92000000000000C004D71000000008FBF00308F
+:10F930008FB5002C8FB400288FB300248FB2002025
+:10F940008FB1001C8FB0001803E0000827BD0038FD
+:10F9500000000000000000000000000027BDFFE8DC
+:10F96000AFBF00103C030001007718218C6383AC0B
+:10F97000240200081462022C008030213C020001A5
+:10F980008C425D9814400033000000008F850224F3
+:10F9900038A300202C63000138A200102C42000183
+:10F9A000006218251460000D38A300302C6300019C
+:10F9B00038A204002C4200010062182514600007E0
+:10F9C00038A304022C63000138A204042C42000175
+:10F9D0000062182510600005000000000C00429B2A
+:10F9E0000000000008004E8D2402000E0C0043DDD4
+:10F9F000000000003C0500018CA55CC80C0052A270
+:10FA0000000020213C0300018C635CC82402000438
+:10FA1000146200052403FFFB3C0200018C425CC41D
+:10FA200008004E892403FFF73C0200018C425CC4AD
+:10FA3000004310243C010001AC225CC42402000EEF
+:10FA40003C0100010C00429BAC227DD00800508795
+:10FA5000000000008F8202203C03040000431024B9
+:10FA6000104000272403FFBF8F8502243C020001C1
+:10FA70008C427DDC00A32024004310241482000C5F
+:10FA8000000000003C0200018C427DE024420001A5
+:10FA90003C010001AC227DE02C4200021440000831
+:10FAA000240200013C01000108004EADAC227E00A2
+:10FAB0003C010001AC207DE03C010001AC207E0057
+:10FAC0003C0200018C427E001040000630A2004043
+:10FAD00010400004240200013C01000108004EB85F
+:10FAE000AC227E043C010001AC207E043C010001FC
+:10FAF000AC257DDC3C01000108004EC8AC207E1026
+:10FB0000240200013C010001AC227E103C010001F6
+:10FB1000AC207E003C010001AC207DE03C010001F6
+:10FB2000AC207E043C010001AC207DDC3C030001E4
+:10FB30008C637DD03C0200018C427DD410620003B6
+:10FB40003C0202003C010001AC237DD400C2102421
+:10FB5000104000072463FFFF8F820220240300016E
+:10FB60003C010001AC235CCC080050853C03F7004D
+:10FB70002C62000E104001A8000310803C0100011F
+:10FB8000002208218C225B80004000080000000059
+:10FB90003C010001AC207E003C010001AC207DE076
+:10FBA0003C010001AC207DDC3C010001AC207E0466
+:10FBB0003C010001AC207DF83C010001AC207DF04F
+:10FBC0000C00486AAF800224240200023C010001BC
+:10FBD000AC227DD03C0200018C427E1014400056C5
+:10FBE0003C03FDFF8EE200003463FFFF004310245E
+:10FBF0000C00429BAEE20000AF8002048F82020044
+:10FC00002403FFFD00431024AF8202003C010001E9
+:10FC1000AC207E208F8300543C0200018C427DF892
+:10FC2000240400013C010001AC247E0C24420001AC
+:10FC30003C010001AC227DF82C4200043C01000193
+:10FC4000AC237DF414400006240200033C010001B3
+:10FC5000AC245CCC3C01000108005083AC207DF852
+:10FC60003C01000108005083AC227DD08F830054FA
+:10FC70003C0200018C427DF42463D8F00043102341
+:10FC80002C42271014400003240200043C01000110
+:10FC9000AC227DD03C0200018C427E101440002634
+:10FCA0003C03FDFF8EE200003463FFFF004310249D
+:10FCB00008005083AEE200003C0400018C845D9C8F
+:10FCC0003C0100010C00508AAC207DE83C020001A0
+:10FCD0008C427E1CAF8202043C0200018C427E10EA
+:10FCE000144000153C03FDFF8EE200003463FFFF6B
+:10FCF00000431024AEE200008F8202043042003044
+:10FD00001440013C240200023C0300018C637E1C71
+:10FD1000240200053C010001AC227DD03C01000121
+:10FD200008005083AC237E203C0200018C427E10F0
+:10FD3000104000103C03FDFF3C0200018C425D6C52
+:10FD4000244200013C010001AC225D6C2C42000207
+:10FD500014400131240200013C010001AC225D7419
+:10FD60003C010001AC205D6C3C01000108005083A7
+:10FD7000AC225CCC8EE200003463FFFF0043102411
+:10FD8000AEE200003C0200018C427E0010400122E5
+:10FD9000000000003C0200018C427DDC1040011E8E
+:10FDA000000000003C010001AC227E082402000398
+:10FDB0003C010001AC227DE0080050242402000632
+:10FDC0003C010001AC207DE88F82020434420040F7
+:10FDD000AF8202043C0200018C427E202403000713
+:10FDE0003C010001AC237DD0344200403C010001C5
+:10FDF000AC227E203C0200018C427E0010400005B7
+:10FE0000000000003C0200018C427DDC104000F943
+:10FE1000240200023C05000124A57DE08CA2000024
+:10FE20002C424E21104000F3240200023C0200014B
+:10FE30008C427E04104000F82404FFBF3C02000105
+:10FE40008C427DDC3C0300018C637E08004410245E
+:10FE50000064182410430004240200013C01000146
+:10FE600008005083AC227DD024020003ACA2000025
+:10FE7000240200083C010001AC227DD03C020001BC
+:10FE80008C427E0C1040000C240200013C04000156
+:10FE90000C0050978C847DDC3C0200018C427E2853
+:10FEA00014400005240200013C0200018C427E2423
+:10FEB00010400006240200013C010001AC225CCC91
+:10FEC0003C01000108005083AC207DF83C02000199
+:10FED0008C427DF03C0300018C637DDC2C420001F0
+:10FEE000000210C0306300083C010001AC227DF02C
+:10FEF0003C010001AC237DEC8F83005424020009F7
+:10FF00003C010001AC227DD03C010001080050837F
+:10FF1000AC237DF48F8300543C0200018C427DF4BD
+:10FF20002463D8F0004310232C422710144000A86B
+:10FF3000000000003C0200018C427E0010400005E1
+:10FF4000000000003C0200018C427DDC104000A952
+:10FF5000240200023C03000124637DE08C62000067
+:10FF60002C424E21104000A3240200023C0200015A
+:10FF70008C427E0C1040000E000000003C0200018C
+:10FF80008C427DDC3C010001AC207E0C30420080C4
+:10FF90001040002F2402000C8F82020430420080A7
+:10FFA0001440000C24020003080050112402000C2D
+:10FFB0003C0200018C427DDC304200801440000590
+:10FFC000240200038F820204304200801040001F90
+:10FFD00024020003AC6200002402000A3C0100017C
+:10FFE000AC227DD03C04000124847E188C82000069
+:10FFF0003C0300018C637DF000431025AF820204B6
+:020000021000EC
+:100000008C8300003C0400018C847DF02402000BF2
+:100010003C010001AC227DD0006418253C010001A8
+:10002000AC237E203C05000124A57DE08CA20000CD
+:100030002C424E211040006F240200023C020001BD
+:100040008C427E1010400005000000002402000CCD
+:100050003C01000108005083AC227DD03C0200012D
+:100060008C427E001040006C000000003C04000147
+:100070008C847DDC1080005E308200083C0300012F
+:100080008C637DEC10620064240200033C010001DB
+:10009000AC247E08ACA20000240200063C01000152
+:1000A00008005083AC227DD08F82020034420002CF
+:1000B000AF8202008F8300542402000D3C01000136
+:1000C000AC227DD03C010001AC237DF48F83005431
+:1000D0003C0200018C427DF42463D8F000431023DD
+:1000E0002C4227101440003A000000003C0200019E
+:1000F0008C427E10104000292402000E3C030001B7
+:100100008C637E243C01000114600015AC227DD07C
+:100110000C0043DD000000003C0500018CA55CC81C
+:100120000C0052A2000020213C0300018C635CC83B
+:1001300024020004146200052403FFFB3C020001BA
+:100140008C425CC4080050522403FFF73C020001BB
+:100150008C425CC4004310243C010001AC225CC40E
+:100160008EE200003C03020000431025AEE20000D6
+:100170008F8202243C010001AC227E2C8F8202205F
+:100180002403FFFB00431024AF8202208F82022051
+:100190003442000208005083AF8202203C0200017A
+:1001A0008C427E0010400005000000003C0200016F
+:1001B0008C427DDC1040000F240200023C02000152
+:1001C0008C427DE02C424E211040000A24020002A5
+:1001D0003C0200018C427E001040000F0000000035
+:1001E0003C0200018C427DDC1440000B000000004A
+:1001F000240200023C01000108005083AC227DD0A3
+:100200003C0200018C427E00104000030000000010
+:100210000C00429B000000008F8202203C03F7008C
+:1002200000431025AF8202208FBF001003E00008BA
+:1002300027BD00183C03000124637E288C62000067
+:1002400010400005344220003C010001AC227E1C1D
+:1002500008005095AC6000003C010001AC247E1CFD
+:1002600003E000080000000027BDFFE030820030FE
+:10027000AFBF00183C010001AC227E24144000678F
+:100280003C02FFFF34421F0E008210241440006124
+:1002900024020030308220001040005D3083800056
+:1002A00000031A0230820001000212003C04000127
+:1002B0008C845D9C00621825000331C23C03000160
+:1002C00024635D78308280000002120230840001D5
+:1002D0000004220000441025000239C200061080EC
+:1002E0000043102100471021904300002402000128
+:1002F00010620025000000001060000724020002C8
+:1003000010620013240200031062002C3C05000F51
+:10031000080050F9000000008F8202002403FEFF55
+:1003200000431024AF8202008F8202203C03FFFEB4
+:100330003463FFFF00431024AF8202203C01000120
+:10034000AC207E443C01000108005104AC207E4CEE
+:100350008F82020034420100AF8202008F820220AD
+:100360003C03FFFE3463FFFF00431024AF820220F2
+:10037000240201003C010001AC227E443C0100014A
+:1003800008005104AC207E4C8F8202002403FEFF43
+:1003900000431024AF8202008F8202203C03000140
+:1003A00000431025AF8202203C010001AC207E44B6
+:1003B0003C01000108005104AC237E4C8F820200F6
+:1003C00034420100AF8202008F8202203C03000110
+:1003D00000431025AF820220240201003C010001ED
+:1003E000AC227E443C01000108005104AC237E4C49
+:1003F00034A5FFFF3C04000124845BB8AFA30010C8
+:100400000C002403AFA000140800510400000000F9
+:10041000240200303C010001AC227E288FBF00186E
+:1004200003E0000827BD00200000000027BDFFC832
+:10043000AFB2002800809021AFB3002C00A098211B
+:10044000AFB0002000C080213C04000124845BD0B8
+:100450003C0500093C0200018C425CC834A59001B7
+:100460000240302102603821AFBF0030AFB100241C
+:10047000A7A0001AAFB000140C002403AFA2001014
+:1004800024020002126200832E6200031040000565
+:10049000240200011262000A000000000800529BC2
+:1004A0000000000024020004126200FA2402000886
+:1004B000126200F93C02FFEC0800529B00000000B1
+:1004C0003C0200018C425CC4304200021440000433
+:1004D000001289403C02FFFB3442FFFF02028024ED
+:1004E0003C01000100310821AC307E3C3C02400060
+:1004F000020210241040004E001023C2308400304D
+:10050000001013823042001C3C03000124635D088C
+:1005100000431021008238213C02002002021024F6
+:1005200010400006240201003C01000100310821B6
+:10053000AC227E40080051503C0200803C0100018A
+:1005400000310821AC207E403C02008002021024D1
+:1005500010400006001219403C0200013C0100015D
+:10056000002308210800515CAC227E480012114093
+:100570003C01000100220821AC207E4894E40000E8
+:100580003C0300018C635DBC240200051062001076
+:10059000A7A400183202400010400002348240003C
+:1005A000A7A200182404000194E20002240500041C
+:1005B00024E60002344200010C00498EA4E200024D
+:1005C00024040001000028210C00498E27A60018F1
+:1005D0003C0200018C425CC8241100013C01000176
+:1005E000AC315CD414530004320280000C00429BF6
+:1005F00000000000320280001040011F00000000D7
+:100600000C00429B000000003C0300018C635DBCB9
+:100610002402000510620118240200023C010001BE
+:10062000AC315CCC3C0100010800529BAC225CC8A0
+:10063000240400012405000427B0001A0C00498E90
+:100640000200302124040001000028210C00498E02
+:10065000020030213C020001005110218C427E3406
+:100660003C0400018C845CC83C03BFFF3463FFFF83
+:100670003C010001AC335CD4004310243C01000178
+:1006800000310821109300FAAC227E340800529BFE
+:10069000000000003C02200002021024104000056F
+:1006A000240200013C010001AC225D98080051AD1C
+:1006B000001289403C010001AC205D980012894085
+:1006C0003C01000100310821AC307E383C02400082
+:1006D0000202102414400016000000003C02000139
+:1006E0008C425D9810400008240400042405000199
+:1006F0000C004D9324062000240200013C0100015F
+:1007000000370821AC2283AC3C02000100511021CB
+:100710008C427E303C03BFFF3463FFFF0043102454
+:100720003C0100010031082108005299AC227E30C2
+:100730003C0200018C425D98104000283C0300A060
+:10074000020310245443000D3C0200203C0200012F
+:100750008C425D9C240301003C0100010031082112
+:10076000AC237E443C0300013C0100010031082120
+:10077000AC237E4C080051F03442040002021024E5
+:1007800010400008240301003C0200018C425D9CE3
+:100790003C01000100310821AC237E44080051F0E7
+:1007A000344208003C020080020210241040002E57
+:1007B0003C0300013C0200018C425D9C3C010001B5
+:1007C00000310821AC237E4C34420C003C01000176
+:1007D000AC225D9C08005218240400013C02002059
+:1007E0000202102410400006240201003C01000116
+:1007F00000310821AC227E44080052013C020080F6
+:100800003C01000100310821AC207E443C02008004
+:100810000202102410400007001219403C0200019F
+:100820003C01000100230821AC227E4C0800520F3D
+:1008300024040001001211403C01000100220821A3
+:10084000AC207E4C240400010000282127B0001EAB
+:100850000C00494C02003021240400010000282132
+:100860000C00494C02003021240400012405000141
+:1008700027B0001C0C00494C020030212404000168
+:10088000240500010C00494C020030210800529957
+:10089000000000003C02FFEC3442FFFF0202802413
+:1008A0003C02000802028025001211403C010001B8
+:1008B00000220821AC307E383C02200002021024C5
+:1008C00010400009000000003C0200018C425D74F1
+:1008D00014400005240200013C010001AC225D9897
+:1008E0000800523A3C0240003C010001AC205D98F7
+:1008F0003C024000020210241440001E00000000D0
+:100900003C0200018C425D983C010001AC205CE09F
+:1009100010400007240220203C010001AC225D9C15
+:10092000240200013C01000100370821AC2283AC05
+:100930003C04BFFF001219403C020001004310219B
+:100940008C427E303C0500018CA55CC83484FFFFDE
+:10095000004410243C01000100230821AC227E3019
+:100960002402000110A20044000000000800529977
+:10097000000000003C0200018C425D981040001C09
+:10098000240220003C010001AC225D9C3C0300A03D
+:100990000203102414430005001211403402A00089
+:1009A0003C01000108005294AC225D9C3C03000114
+:1009B000006218218C637E383C0200200062102403
+:1009C00010400004240220013C0100010800529460
+:1009D000AC225D9C3C020080006210241040001F8D
+:1009E0003402A0013C01000108005294AC225D9C3D
+:1009F0003C0200200202102410400007001219409F
+:100A0000240201003C01000100230821AC227E44A5
+:100A1000080052883C020080001211403C01000195
+:100A200000220821AC207E443C02008002021024F7
+:100A300010400006001219403C0200013C01000178
+:100A40000023082108005294AC227E4C0012114071
+:100A50003C01000100220821AC207E4C3C03000137
+:100A60008C635CC8240200011062000300000000D7
+:100A70000C00429B000000008FBF00308FB3002CA1
+:100A80008FB200288FB100248FB0002003E000084F
+:100A900027BD003827BDFFD8AFB2002000809021CD
+:100AA000AFB1001C0000882124020002AFBF002467
+:100AB000AFB00018A7A0001210A200D3A7A000108A
+:100AC0002CA20003104000052402000110A2000A1D
+:100AD00000128140080053800220102124020004EB
+:100AE00010A2007D2402000810A2007C0012294000
+:100AF00008005380022010213C03000100701821DF
+:100B00008C637E3C3C0240000062102414400009CB
+:100B1000240400013C027FFF3442FFFF006288246E
+:100B20003C01000100300821AC317E3408005380C4
+:100B300002201021240500010C00494C27A60010BA
+:100B400024040001240500010C00494C27A60010D4
+:100B500097A2001030420004104000343C114000C5
+:100B60003C0200018C425DBC2443FFFF2C62000666
+:100B700010400034000310803C01000100220821D5
+:100B80008C225BE00040000800000000240400010B
+:100B90002405001127B000120C00494C020030213E
+:100BA00024040001240500110C00494C02003021EE
+:100BB00097A5001230A24000104000023C04001033
+:100BC0003C0400083C0300010800530130A28000EF
+:100BD000240400012405001427B000120C00494C25
+:100BE0000200302124040001240500140C00494CAB
+:100BF0000200302197A5001230A210001040000220
+:100C00003C0400103C0400083C03000130A2080032
+:100C1000544000013C0300023C02800002221025E7
+:100C2000006418250800530E004388253C1100017C
+:100C3000023088218E317E3C3C027FFF3442FFFF30
+:100C4000022288243C0200018C425CD81040001D26
+:100C5000001211403C0200018C425D9810400002DD
+:100C60003C02200002228825001211403C010001B4
+:100C7000002208218C227E40104000033C0200200C
+:100C800008005322022288253C02FFDF3442FFFF86
+:100C900002228824001211403C0100010022082198
+:100CA0008C227E48104000033C0200800800532D37
+:100CB000022288253C02FF7F3442FFFF0222882463
+:100CC000001211403C01000100220821AC317E34A9
+:100CD0000800538002201021001229403C0300012B
+:100CE000006518218C637E383C02400000621024AD
+:100CF000144000083C027FFF3442FFFF006288245A
+:100D00003C01000100250821AC317E3008005380F1
+:100D1000022010213C0200018C425CD810400033BC
+:100D20003C11C00C3C0200018C425D743C04C00CC0
+:100D3000348420003C0300018C635D980002102B7A
+:100D40000002102300441024106000030051882585
+:100D50003C022000022288253C02000100451021AF
+:100D60008C427E44104000033C0200200800535D8A
+:100D7000022288253C02FFDF3442FFFF0222882442
+:100D8000001211403C010001002208218C227E4CFF
+:100D9000104000033C0200800800536802228825AE
+:100DA0003C02FF7F3442FFFF022288243C02000104
+:100DB0008C425D60104000023C020800022288253F
+:100DC0003C0200018C425D64104000023C020400C1
+:100DD000022288253C0200018C425D68104000061A
+:100DE0003C0201000800537B022288253C027FFF61
+:100DF0003442FFFF00628824001211403C010001D0
+:100E000000220821AC317E30022010218FBF002447
+:100E10008FB200208FB1001C8FB0001803E00008D3
+:100E200027BD002827BDFFD8AFB400200080A02137
+:100E3000AFBF0024AFB3001CAFB20018AFB10014B5
+:100E4000AFB000108F9002003C0300018C635CC8BF
+:100E50008F93022024020002106200632C620003C0
+:100E600010400005240200011062000A001419401D
+:100E70000800544800000000240200041062005AD8
+:100E800024020008106200590014914008005448E0
+:100E9000000000003C040001008320218C847E3C83
+:100EA0003C110001022388218E317E343C02400037
+:100EB000008210241040003E3C0200080222102450
+:100EC00010400020361000023C02000100431021B7
+:100ED0008C427E4010400005361000203610010084
+:100EE0003C020020080053BD022288252402FEFF98
+:100EF000020280243C02FFDF3442FFFF02228824EA
+:100F0000001411403C010001002208218C227E487F
+:100F1000104000053C020001026298253C0200805E
+:100F2000080053DC022288253C02FFFE3442FFFF0A
+:100F3000026298243C02FF7F3442FFFF080053DC2A
+:100F4000022288242402FEDF020280243C02FFFEEB
+:100F50003442FFFF026298243C02FF5F3442FFFFED
+:100F6000022288243C01000100230821AC207E409D
+:100F70003C01000100230821AC207E480C00486A97
+:100F800000000000AF900200AF9302208F82022089
+:100F90002403FFFB00431024AF8202208F82022033
+:100FA00034420002AF820220080053F300141140C3
+:100FB0008F8202002403FFFD004310240C00486AC6
+:100FC000AF8202003C02BFFF3442FFFF0C00429B95
+:100FD00002228824001411403C0100010022082153
+:100FE00008005448AC317E34001491403C040001A8
+:100FF000009220218C847E383C110001023288212D
+:101000008E317E303C0240000082102414400011DA
+:10101000000000003C0200018C425D981440000674
+:101020003C02BFFF8F820200344200020C00486A7B
+:10103000AF8202003C02BFFF3442FFFF0C00429B24
+:10104000022288243C010001003208210800544893
+:10105000AC317E303C0200018C425D9810400005AE
+:101060003C0200203C0200018C425D741040002BC9
+:101070003C0200200082102410400007361000209F
+:10108000240201003C01000100320821AC227E4410
+:1010900008005428361001003C01000100320821EC
+:1010A000AC207E442402FEFF020280243C02008029
+:1010B0000082102410400007001419403C02000177
+:1010C0003C01000100230821AC227E4C0800543969
+:1010D00002629825001411403C0100010022082101
+:1010E000AC207E4C3C02FFFE3442FFFF026298249B
+:1010F0000C00486A00000000AF900200AF9302208D
+:101100008F8202202403FFFB00431024AF820220C1
+:101110008F82022034420002AF820220001411406C
+:101120003C01000100220821AC317E308FBF002439
+:101130008FB400208FB3001C8FB200188FB1001441
+:101140008FB0001003E0000827BD00282448656127
+:101150006465723A202F70726F6A656374732F72C0
+:1011600063732F73772F67652F2E2F6E69632F663A
+:10117000772F636F6D6D6F6E2F66776D61696E2E61
+:10118000632C7620312E312E322E313120313939F7
+:10119000382F30342F32372032323A31333A34322A
+:1011A00020736875616E6720457870202400000008
+:1011B0007468655F4441574E00000000535441433A
+:1011C0004B5F312000000000426164536E64526E38
+:1011D000670000003F456E71457674003F6E6F51A9
+:1011E00064457650000000006576526E6746756C67
+:1011F0006C000000496C6C436F6E66527800000012
+:1012000053656E64436B53756D00000052656376E1
+:10121000566C616E0000000000000000244865610B
+:101220006465723A202F70726F6A656374732F72EF
+:1012300063732F73772F67652F2E2F6E69632F6669
+:10124000772F636F6D6D6F6E2F74696D65722E638E
+:101250002C7620312E312E322E3820313939382F4C
+:1012600030372F33312031373A35383A343520731F
+:101270006875616E6720457870202400542D446D98
+:101280006152643100000000542D446D61424200FF
+:10129000542D446D613200003F6E6F5164547845A7
+:1012A000000000003F6E6F5164527845000000005E
+:1012B000656E714D4576504661696C00656E714D85
+:1012C00045764661696C00006661696C456E454D06
+:1012D000000000003F456E71457674003F6E6F510F
+:1012E00064457650000000006576526E6746756C66
+:1012F0006C00000000000000000000002448656150
+:101300006465723A202F70726F6A656374732F720E
+:1013100063732F73772F67652F2E2F6E69632F6688
+:10132000772F636F6D6D6F6E2F636F6D6D616E6480
+:101330002E632C7620312E312E322E313020313951
+:1013400039382F31312F31382031373A31313A3174
+:101350003820736875616E6720457870202400001E
+:101360003F4D626F78457674000000004E4F636F0A
+:101370006D616E6400000000687374655F455252D1
+:1013800000000000412D45727242756300000000AC
+:101390004552524F522D416464000000656E714DFC
+:1013A0004576504661696C00656E714D45764661C3
+:1013B000696C00006661696C456E454D0000000077
+:1013C000442D4572724C617374000000442D4572C7
+:1013D000723200006D4373744D6445525200000038
+:1013E00070726F6D4D6445525200000046696C7416
+:1013F0004D64455252000000636D645F45525200D7
+:101400003F456E71457674003F6E6F51644576506E
+:10141000000000006576526E6746756C6C00000037
+:101420000000000000006EA000007FBC00006E38CD
+:1014300000008734000082B00000878000008780B1
+:1014400000006F540000769400007F0C000080A81C
+:10145000000080740000878000007E70000080CC57
+:1014600000006E64000081CC00000000244865612B
+:101470006465723A202F70726F6A656374732F729D
+:1014800063732F73772F67652F2E2F6E69632F6617
+:10149000772F636F6D6D6F6E2F646D612E632C7689
+:1014A00020312E312E322E3320313939382F30343D
+:1014B0002F32372032323A31333A34312073687563
+:1014C000616E67204578702024000000646D6172B1
+:1014D0006441544E00000000646D61777241544EC7
+:1014E00000000000000000000000000024486561CA
+:1014F0006465723A202F70726F6A656374732F721D
+:1015000063732F73772F67652F2E2F6E69632F6696
+:10151000772F636F6D6D6F6E2F74726163652E63CD
+:101520002C7620312E312E322E3220313939382F7F
+:1015300030342F32372032323A31333A353020735B
+:101540006875616E672045787020240024486561C5
+:101550006465723A202F70726F6A656374732F72BC
+:1015600063732F73772F67652F2E2F6E69632F6636
+:10157000772F636F6D6D6F6E2F646174612E632CB6
+:101580007620312E312E322E3220313939382F301B
+:10159000342F32372032323A31333A3430207368C4
+:1015A00075616E67204578702024000046575F56AD
+:1015B000455253494F4E3A2023312046726920410B
+:1015C000707220372031373A35353A34382050445C
+:1015D000542032303030000046575F434F4D504961
+:1015E0004C455F54494D453A2031373A35353A3408
+:1015F0003800000046575F434F4D50494C455F420D
+:10160000593A2064657672637300000046575F4361
+:101610004F4D50494C455F484F53543A20636F6DCE
+:10162000707574650000000046575F434F4D504988
+:101630004C455F444F4D41494E3A20656E672E61DF
+:101640006374656F6E2E636F6D00000046575F43D5
+:101650004F4D50494C45523A20676363207665727E
+:1016600073696F6E20322E372E32000000000000AA
+:101670000000000000000000000000002448656138
+:101680006465723A202F70726F6A656374732F728B
+:1016900063732F73772F67652F2E2F6E69632F6605
+:1016A000772F636F6D6D6F6E2F6D656D2E632C766A
+:1016B00020312E312E322E3220313939382F30342C
+:1016C0002F32372032323A31333A3434207368754E
+:1016D000616E672045787020240000002448656111
+:1016E0006465723A202F70726F6A656374732F722B
+:1016F00063732F73772F67652F2E2F6E69632F66A5
+:10170000772F636F6D6D6F6E2F73656E642E632C14
+:101710007620312E312E322E313120313939382F89
+:1017200031322F32322031373A31373A3535207362
+:101730006875616E6720457870202400736E64645C
+:10174000654E6F51200000006E6F454E515F54583A
+:1017500000000000736E6464744E6F51200000003E
+:101760003F6E6F516454784500000000756E6B72D7
+:101770006474797065000000000000000000ACCCCB
+:101780000000ACCC0000AD9C0000AAB00000AAB0E4
+:101790000000AD9C0000AD9C0000AD9C0000AD9C25
+:1017A0000000AD9C0000AD9C0000AD9C0000AD9C15
+:1017B0000000AD9C0000AD9C0000AD9C0000AD9C05
+:1017C0000000AD9C0000AD7C000000000000BCA843
+:1017D0000000BCA80000BD700000AE4C0000B05876
+:1017E0000000BD700000BD700000BD700000BD7045
+:1017F0000000BD700000BD700000BD700000BD7035
+:101800000000BD700000BD700000BD700000BD7024
+:101810000000BD700000BD540000B0402448656168
+:101820006465723A202F70726F6A656374732F72E9
+:1018300063732F73772F67652F2E2F6E69632F6663
+:10184000772F636F6D6D6F6E2F726563762E632CCD
+:101850007620312E312E322E313920313939382F40
+:1018600030372F32342032313A33303A303520732A
+:101870006875616E6720457870202400706B52781F
+:101880004552520066726D324C617267650000000D
+:1018900072784E6F527842640000000072785144B2
+:1018A0006D61444600000000727851446D6142460B
+:1018B000000000003F6E6F51645278450000000048
+:1018C000706B5278455252730000000066726D32A0
+:1018D0004C7267530000000072784E6F42645300F0
+:1018E0003F724264446D6146000000003F724A420C
+:1018F00064446D4600000000000000000000F6781F
+:101900000000F6780000F6780000F6780000F6781F
+:101910000000F6780000F6780000F6780000F6780F
+:101920000000F6780000F6780000F6780000F678FF
+:101930000000F6780000F6780000F6700000F670FF
+:101940000000F670572D444D41456E4600000000E2
+:10195000000000000000FDC00001015C0000FDDC93
+:101960000001015C0001015C0001015C0001015CFF
+:101970000001015C0001015C0000F7040001015C52
+:101980000001015C0001015C0001015C0001015CDF
+:101990000001015400010154000101542448656113
+:1019A0006465723A202F70726F6A656374732F7268
+:1019B00063732F73772F67652F2E2F6E69632F66E2
+:1019C000772F636F6D6D6F6E2F6D61632E632C7655
+:1019D00020312E312E322E313220313939382F300C
+:1019E000342F32372032323A31333A34322073686E
+:1019F00075616E6720457870202400006D61637406
+:101A00007841544E000000004E7453796E264C6BA2
+:101A10000000000072656D61737372740000000055
+:101A20006C696E6B444F574E00000000656E714D3F
+:101A30004576504661696C00656E714D457646612C
+:101A4000696C00006661696C456E454D00000000E0
+:101A50006C696E6B55500000000000002448656101
+:101A60006465723A202F70726F6A656374732F72A7
+:101A700063732F73772F67652F2E2F6E69632F6621
+:101A8000772F636F6D6D6F6E2F636B73756D2E6344
+:101A90002C7620312E312E322E3220313939382F0A
+:101AA00030342F32372032323A31333A33392073DF
+:101AB0006875616E672045787020240050726F62EF
+:101AC00065506879000000006C6E6B4153535254AE
+:101AD0000000000000011B2C00011BC400011BF8CA
+:101AE00000011C2C00011C5800011C6C00011CA8EA
+:101AF0000001207C00011DE400011E2400011E5095
+:101B000000011E9000011EC000011EFC00011F30DC
+:101B10000001207C000122C0000122D80001230026
+:101B2000000123200001234800012478000124A0A3
+:101B3000000124F40001251C000000000001278C96
+:101B40000001285C0001293400012A0400012A60F8
+:101B500000012B3C00012B6400012C4000012C688B
+:101B600000012E1000012E3800012FE0000131D8B5
+:101B70000001346C000133800001346C00013498A2
+:101B800000013008000131B00000000000013B847A
+:101B900000013BC800013C6000013CAC00013D1C61
+:101BA00000013DB400013DE800013E7000013F0826
+:101BB00000013FD8000140180001409C000140C0D6
+:101BC000000141F4646F42617365506700000000DA
+:101BD00000000000000000000000000073746D6150
+:101BE000634C4E4B000000000000000000014C3828
+:101BF00000014C3800014B8000014BC400014C38FF
+:101C000000014C380000000000000000000000004F
+:101C100000000000000000000000000000000000C4
+:101C2000000000000000000000000000416C74652E
+:101C30006F6E204163654E4943205600416C7465C8
+:101C40006F6E204163654E49432056004242424236
+:101C50000000000000000000000000000013541805
+:101C60000013E7FC0000000000000000000000007E
+:101C70000000000000000000000000000060CF0035
+:101C800000000060CF000000000000000000000025
+:101C90000000000000000000000000000000000044
+:101CA0000000000000000000000000000000000034
+:101CB0000000000000000000000000000000000024
+:101CC0000000000000000000000000000000000014
+:101CD0000000000000000000000000030000000001
+:101CE00000000001000000000000000000000000F3
+:101CF00000000001000000000000000100000000E2
+:101D000000000000000000000000000000000001D2
+:101D100000000001000000000000000000000000C2
+:101D20000000000000000000010000002100000091
+:101D30001200014000000000000000002000000030
+:101D4000120000A0000000001200006012000180DC
+:101D5000120001E000000000000000000000000090
+:101D60000000000100000000000000000000000072
+:101D70000000000000000000000000000000000261
+:101D8000000000000000000000030001000000014E
+:0C1D900000030201000000000000000041
+:00000001FF
+/* tg1 firmware v12.4.11 */
diff --git a/firmware/acenic/tg2.bin.ihex b/firmware/acenic/tg2.bin.ihex
new file mode 100644
index 0000000..a9ff4f4
--- /dev/null
+++ b/firmware/acenic/tg2.bin.ihex
@@ -0,0 +1,4844 @@
+:100000000C040B0000004000000040000000000055
+:1000100010000003000000000000000D0000000DB3
+:100020003C1D00018FBD6D2003A0F0213C1000009D
+:10003000261040000C0010C0000000000000000D61
+:100040003C1D00018FBD6D2403A0F0213C10000079
+:10005000261040000C0017E0000000000000000D1A
+:100060000000000000000000000000000000000090
+:100070000000000000000000000000000000000080
+:100080000000000000000000000000000000000070
+:100090000000000000000000000000000000000060
+:1000A0000000000000000000000000000000000050
+:1000B0000000000000000000000000000000000040
+:1000C0000000000000000000000000000000000030
+:1000D0000000000000000000000000000000000020
+:1000E0000000000000000000000000000000000010
+:1000F0000000000000000000000000000000000000
+:1001000000000000000000000000000002000008E5
+:10011000000000000800172F3C0A00010800172FFC
+:100120003C0A00020800172F0000000008002CAC59
+:100130000000000008002C4F000000000800172FEE
+:100140003C0A00040800328A0000000008001A522D
+:10015000000000000800394D00000000080038F4DD
+:10016000000000000800172F3C0A0006080039BBF9
+:100170003C0A00070800172F3C0A00080800172F48
+:100180003C0A000908003A130000000008002EA6EF
+:10019000000000000800172F3C0A000B0800172F72
+:1001A0003C0A000C0800172F3C0A000D080028FB31
+:1001B0000000000008002890000000000800172F31
+:1001C0003C0A000E0800208C0000000008001964A2
+:1001D0000000000008001A040000000008003CA60F
+:1001E0000000000008003C94000000000800172FE9
+:1001F000000000000800191A000000000800172F76
+:10020000000000000800172F3C0A00130800172FF9
+:100210003C0A001400000000000000000000000084
+:1002200000000000000000000000000000000000CE
+:1002300000000000000000000000000000000000BE
+:1002400000000000000000000000000000000000AE
+:10025000000000000000000000000000000000009E
+:10026000000000000000000000000000000000008E
+:10027000000000000000000000000000000000007E
+:10028000000000000000000000000000000000006E
+:10029000000000000000000000000000000000005E
+:1002A000000000000000000000000000000000004E
+:1002B000000000000000000000000000000000003E
+:1002C000000000000000000000000000000000002E
+:1002D000000000000000000000000000000000001E
+:1002E000000000000000000000000000000000000E
+:1002F00000000000000000000000000000000000FE
+:1003000000000000000000000000000027BDFFE02A
+:100310003C1CC000AFBF001CAFB000188F82014072
+:1003200024030003AF8300EC344200040C002B20B4
+:10033000AF8201403C0100C00C001763AC203FFCC1
+:10034000004018213C0200103C010001AC236E9CCF
+:10035000106200110043102B144000023C020020E8
+:100360003C0200081062000C240501003C0600015C
+:100370008CC66E9C3C04000124845C74000038210F
+:10038000AFA000100C002B3BAFA000143C020020DB
+:100390003C010001AC226E9C240200083C010001DB
+:1003A000AC226EB42402001F3C010001AC226EC4DA
+:1003B000240200163C010001AC226E983C05FFFEB1
+:1003C00034A56F083C0200018C426E9C3C03000285
+:1003D000246390103C0400018C846CC400431023FF
+:1003E00014800002004580212610FA382402F00013
+:1003F000020280240C00178502002021020228231B
+:100400003C0400200082182300651823247BB000E0
+:100410003C03FFFE3463BF080363B8213C0600BF02
+:1004200034C6F0003C0700018CE76CC03C0300BF01
+:100430003463E000008520233C010001AC246EA859
+:10044000008220233C010001AC256E90000528426B
+:100450003C010001AC226E8427620FFC3C010001CC
+:10046000AC226D2027621FFC00DB3023007B1823A9
+:100470003C010001AC246E883C010001AC256EAC4F
+:100480003C010001AC226D24AF86015010E0001148
+:10049000AF8302503C1D00018FBD6CCC03A0F02146
+:1004A0000C001749000000003C0200018C426CD097
+:1004B0003C0300018C636CD42442FE0024630200E0
+:1004C0003C010001AC226CD03C0100011000000492
+:1004D000AC236CD43C1D00018FBD6D2003A0F02126
+:1004E0003C0200018C426CC41040000D26FAFA3820
+:1004F0003C0200018C426CD03C0300018C636CD444
+:100500003C1A00018F5A6CD42442FA38246305C87F
+:100510003C010001AC226CD03C010001AC236CD446
+:100520003C0200018C426CC8144000030000000033
+:100530003C010001AC206CD00C0011510000000007
+:100540008FBF001C8FB0001803E0000827BD0020FB
+:100550003C0200018C426CD03C0300018C636CD4E3
+:1005600027BDFF98AFB000483C1000018E1066B860
+:10057000AFB200503C12000026524100AFBF0060F5
+:10058000AFBE005CAFB50058AFB30054AFB1004C84
+:10059000AFA20034AFA30030AFA00010AFA0001492
+:1005A0008F8600403C04000124845C802405020006
+:1005B0003C010001AC326E800C002B3B0200382164
+:1005C0008F8300403C02F000006218243C0260006F
+:1005D0001062000BA3A0003F240E00013C040001A8
+:1005E00024845C88A3AE003FAFA00010AFA000142D
+:1005F0008F860040240503000C002B3B02003821AD
+:100600008F8202403C03000100431025AF8202406C
+:10061000AF8000488F8200481440000500000000B1
+:10062000AF8000488F8200481040000400000000A6
+:10063000AF8000481000000302E02021AF80004C92
+:1006400002E020213C0500010C002BA834A540F855
+:10065000034020210C002BA8240505C83C02000102
+:100660008C426EA83C0D00018DAD6E883C030001EC
+:100670008C636E843C0800018D086E903C0900017B
+:100680008D296EAC3C0A00018D4A6EB43C0B000112
+:100690008D6B6EC43C0C00018D8C6E983C04000187
+:1006A00024845C9424050400AF42013C8F42013C49
+:1006B0002406000124070001AF400000AF4D0138BF
+:1006C000AF430144AF480148AF49014CAF4A015024
+:1006D000AF4B0154AF4C01582442FF80AF42014060
+:1006E00024020001AFA200100C002B3BAFA00014AD
+:1006F0008F420138AFA200108F42013CAFA200141C
+:100700008F4601448F4701483C04000124845CA0CB
+:100710000C002B3B24050500AFB70010AFBA001446
+:100720008F46014C8F4701503C04000124845CAC8F
+:100730000C002B3B240506003C0200018C426E9C01
+:10074000036038213C06000224C690102448FFFFB5
+:100750000106182400E810240043102B1040000666
+:10076000240509003C04000124845CB8AFA80010F3
+:100770000C002B3BAFA000148F82000CAFA2001026
+:100780008F82003CAFA200148F8600008F87000488
+:100790003C04000124845CC40C002B3B24051000A5
+:1007A0008C0202208C0302248C0602188C07021C87
+:1007B0003C04000124845CCC24051100AFA200108D
+:1007C0000C002B3BAFA30014AF800054AF80011C82
+:1007D0008C020218304200021040000900000000A4
+:1007E0008C0202203C030002346300040043102505
+:1007F000AF42000C8C02021C1000000834420004BE
+:100800008C0202203C0300023463000600431025E2
+:10081000AF42000C8C02021C34420006AF420014AE
+:100820008C020218304200101040000A0000000044
+:100830008C02021C34420004AF4200108C020220E1
+:100840003C03000A34630004004310251000000933
+:10085000AF4200088C0202203C03000A3463000609
+:1008600000431025AF4200088C02021C34420006EF
+:10087000AF42001024020001AF8200A0AF8200B09E
+:100880008F8300548F820054AF8000D0AF8000C0AF
+:1008900010000002246300648F8200540062102361
+:1008A0002C4200651440FFFC000000008C0402088C
+:1008B0008C05020C26E20028AEE2002024020490FF
+:1008C000AEE20010AEE40008AEE5000C26E400083D
+:1008D0008C8200008C830004AF820090AF83009470
+:1008E0008C820018AF8200B49482000AAF82009C10
+:1008F0008F420014AF8200B08F8200B030420004FB
+:100900001440FFFD000000008F8200B03C03EF00A8
+:100910000043102410400021000000008F8200B42A
+:10092000AFA200108F8200908F8300943C040001DE
+:1009300024845CD4AFA300148F8600B08F87009C02
+:100940003C0500010C002B3B34A5200D3C040001AC
+:1009500024845CE0240203C0AFA20010AFA0001406
+:100960008F8601443C07000124E75CE80C002B3B28
+:100970003405DEAD8F82011C34420002AF82011CBF
+:100980008F82022034420004AF8202208F82014015
+:100990003C03000100431025AF82014096E204723F
+:1009A00096E6045296E70462AFA2001096E2048233
+:1009B0003C04000124845D14240512000C002B3B30
+:1009C000AFA2001496F0045232020001104000025F
+:1009D0000000B02124160001320200025440000140
+:1009E00036D60002320200085440000136D6000418
+:1009F000320200105440000136D6000832020020B6
+:100A00005440000136D6001032020040544000012C
+:100A100036D60020320200805440000136D6004015
+:100A200096E6048230C202005440000136D64000EF
+:100A300096E304723062020010400003306201004D
+:100A40001000000336D620005440000136D61000B6
+:100A500096F0046232C24000144000043207009B4A
+:100A600030C2009B14E20007240E000132C22000B5
+:100A70001440000D320200013062009B10E20009B8
+:100A8000240E00013C04000124845D202405130091
+:100A900002003821A3AE003FAFA300100C002B3B97
+:100AA000AFA00014320200015440000136D600808D
+:100AB000320200025440000136D601003202000822
+:100AC0005440000136D602003202001054400001AA
+:100AD00036D60400320200805440000136D60800A9
+:100AE0008C02021830420200104000023C02000852
+:100AF00002C2B0258C0202183042080010400002E9
+:100B00003C02008002C2B0258C0202183042040070
+:100B1000104000023C02010002C2B0258C02021803
+:100B200030420100104000023C02020002C2B02527
+:100B30008C02021830420080104000023C02040087
+:100B400002C2B0258C020218304220001040000280
+:100B50003C02001002C2B0258C0202183042400054
+:100B6000104000023C02002002C2B0258C02021894
+:100B700030421000104000023C02004002C2B0258A
+:100B80008EE204988EE3049CAF420160AF4301649F
+:100B90008EE204A08EE304A4AF420168AF43016C6F
+:100BA0008EE204A88EE304ACAF420170AF4301743F
+:100BB0008EE204288EE3042CAF420178AF43017C1F
+:100BC0008EE204488EE3044CAF420180AF430184BF
+:100BD0008EE204588EE3045CAF420188AF43018C7F
+:100BE0008EE204688EE3046CAF420190AF4301943F
+:100BF0008EE204788EE3047CAF420198AF43019CFF
+:100C00008EE204888EE3048CAF4201A0AF4301A4BE
+:100C10008EE204B08EE304B424040080AF4201A845
+:100C2000AF4301AC0C002BA8240500808C02025CB1
+:100C300027440224AF4201F08C0202602405020026
+:100C4000240600080C002BBFAF4201F83C043B9A7D
+:100C50003484CA0000003821240200062403000264
+:100C6000AF4201F4240203E8AF430204AF430200A1
+:100C7000AF4401FCAF42029424020001AF43029052
+:100C8000AF42029C3C0300010067182190636CD8BE
+:100C90000347102124E70001A043022C2CE2000F9F
+:100CA0001440FFF80347182124E700013C08000125
+:100CB000350840F88F8200403C04000124845D2CFC
+:100CC000240514000002170224420030A062022C06
+:100CD00003471021A040022C8C07021802C03021CB
+:100CE000240205C8AFA200100C002B3BAFA80014D3
+:100CF0003C04000124845D383C05000024A55C8090
+:100D00002406001027B100300220382127B3003418
+:100D10000C0017A3AFB300103C0300018C636CC838
+:100D20001060000A004080218FA300302405FF00DE
+:100D30008FA20034246400FF008520240083182340
+:100D400000431023AFA20034AFA400303C040001E4
+:100D500024845D443C05000024A5410024060108CC
+:100D6000022038210C0017A3AFB3001000409021DF
+:100D700032C200033C010001AC326E8010400045DD
+:100D8000022038218F8200503C03001000431024C1
+:100D900010400016000000008C0202183042004093
+:100DA0001040000F240200018F8200508C030218B3
+:100DB000240E00013C04000124845D50A3AE003FDA
+:100DC000AFA20010AFA300148F87004024051500C8
+:100DD0000C002B3B02C0302110000004000000007A
+:100DE0003C01000100370821A02240F43C0400012E
+:100DF00024845D5C3C05000124A55B403C060001A9
+:100E000024C65BAC00C530238F42001027B30030EE
+:100E10000260382127B1003434420A00AF4200108A
+:100E20000C0017A3AFB100103C04000124845D70D6
+:100E30003C05000124A5B7143C06000124C6BA9065
+:100E400000C5302302603821AF4201080C0017A30F
+:100E5000AFB100103C04000124845D8C3C0500010E
+:100E600024A5BE583C06000124C6C90000C5302395
+:100E7000026038213C010001AC226EF40C0017A383
+:100E8000AFB100103C04000124845DA410000024D4
+:100E9000240516003C04000124845DAC3C050001DF
+:100EA00024A5A10C3C06000124C6A23800C53023AD
+:100EB0000C0017A3AFB300103C04000124845DBCF8
+:100EC0003C05000124A5B2B03C06000124C6B70CC5
+:100ED00000C5302302203821AF4201080C0017A3BF
+:100EE000AFB300103C04000124845DD03C05000138
+:100EF00024A5BA983C06000124C6BE5000C5302384
+:100F0000022038213C010001AC226EF40C0017A332
+:100F1000AFB300103C04000124845DE424051650A6
+:100F200002C03021000038213C010001AC226EF8E3
+:100F3000AFA000100C002B3BAFA0001432C2002069
+:100F40001040002127A700303C04000124845DF0FC
+:100F50003C05000124A5B13C3C06000124C6B2A812
+:100F600000C5302324022000AF42001C27A2003419
+:100F70000C0017A3AFA20010000219000003198291
+:100F80003C04080000641825AE4300282403001028
+:100F9000AF43003C96E30450AF4300408F43004012
+:100FA0003C04000124845E04AFA00014AFA3001031
+:100FB0008F47001C240516603C010001AC226EF036
+:100FC0001000002532C600208EE204488EE3044C57
+:100FD000AF43001C8F42001C2442E0002C42200141
+:100FE0001440000A240E00013C04000124845E1019
+:100FF000A3AE003FAFA00010AFA000148F46001CAE
+:10100000240517000C002B3B000038213C02000097
+:1010100024425CBC00021100000211823C03080063
+:1010200000431025AE42002824020008AF42003CD5
+:1010300096E20450AF4200408F4200403C04000161
+:1010400024845E1CAFA00014AFA200108F47001CC8
+:101050002405180032C600200C002B3B00000000C5
+:101060003C050FFF3C0300018C636EF434A5FFFFC9
+:10107000024030213C0200018C426EF83C04080022
+:101080000065182400031882006418250045102408
+:101090000002108200441025ACC2008032C20180E0
+:1010A00010400056ACC300208F82005C3C030080DF
+:1010B000004310241040000D000000008F820050FB
+:1010C000AFA200108F82005C240E00013C040001DE
+:1010D00024845E28A3AE003FAFA200148F87004097
+:1010E000240519000C002B3B02C030218F820050D8
+:1010F0003C030010004310241040001600000000C4
+:101100008C020218304200401040000F24020001FF
+:101110008F8200508C030218240E00013C04000151
+:1011200024845D50A3AE003FAFA20010AFA3001413
+:101130008F870040240520000C002B3B02C030218B
+:1011400010000004000000003C01000100370821ED
+:10115000A02240F43C04000124845E343C050001DC
+:1011600024A55AC03C06000124C65B3800C53023C4
+:101170008F42000827B300300260382127B10034C5
+:1011800034420E00AF4200080C0017A3AFB10010AC
+:101190003C04000124845E4C3C05000124A5D8B425
+:1011A0003C06000124C6E3C800C530230260382194
+:1011B000AF42010C0C0017A3AFB100103C040001BA
+:1011C00024845E643C05000124A5E9AC3C060001D2
+:1011D00024C6F0F000C53023026038213C01000134
+:1011E000AC226F040C0017A3AFB100103C04000147
+:1011F00024845E7C10000027240521003C040001AB
+:1012000024845E843C05000124A59FC83C0600019F
+:1012100024C6A10400C5302327B1003002203821A4
+:1012200027B300340C0017A3AFB300103C04000137
+:1012300024845E943C05000124A5CAD43C06000128
+:1012400024C6D8AC00C5302302203821AF42010C9F
+:101250000C0017A3AFB300103C04000124845EA46B
+:101260003C05000124A5E84C3C06000124C6E9A485
+:1012700000C53023022038213C010001AC226F045C
+:101280000C0017A3AFB300103C04000124845EB827
+:101290002405215002C03021000038213C0100010A
+:1012A000AC226F10AFA000100C002B3BAFA00014BD
+:1012B0003C110FFF3C0300018C636F043631FFFFCC
+:1012C000024098213C0200018C426F103C0E080045
+:1012D0000071182400031882006E18250051102494
+:1012E00000021082004E1025AE630038AE62007816
+:1012F0008C02021830420040144000042402000115
+:101300003C01000100370821A02240F43C04000108
+:1013100024845EC43C05000124A5E3D03C06000102
+:1013200024C6E52C00C5302327BE003003C0382179
+:1013300027B500340C0017A3AFB500103C01000125
+:10134000AC226EFC00511024000210823C0E0800FA
+:10135000004E1025AE62005032C220001040000640
+:1013600003C038213C02000024425CBC022210244D
+:101370001000000F000210823C04000124845ED89B
+:101380003C05000124A5E5343C06000124C6E6E442
+:1013900000C530230C0017A3AFB500103C010001BD
+:1013A000AC226F1400511024000210823C0E080081
+:1013B000004E1025AE62004832C2400010400005C9
+:1013C00027A700303C02000024425CBC1000000E45
+:1013D000000211003C04000124845EF03C05000181
+:1013E00024A5E6EC3C06000124C6E84400C53023F1
+:1013F00027A200340C0017A3AFA200103C0100018B
+:10140000AC226F0800021100000211823C030800A8
+:1014100000431025AE4200603C04000124845F08B4
+:101420003C05000124A582303C06000124C68650FC
+:1014300000C5302327B100300220382127B3003403
+:101440000C0017A3AFB300103C0E0FFF35CEFFFF0B
+:101450003C04000124845F143C05000024A564685A
+:101460003C06000024C6658800C5302302203821D0
+:101470000240F0213C010001AC226EDC004E102441
+:10148000000210823C15080000551025AFAE004444
+:10149000AFC200B80C0017A3AFB300103C040001AA
+:1014A00024845F203C05000024A565903C060000D4
+:1014B00024C668088FAE004400C5302302203821BE
+:1014C0003C010001AC226ED0004E102400021082BC
+:1014D00000551025AFC200E80C0017A3AFB30010F1
+:1014E0003C04000124845F383C05000024A56810FA
+:1014F0003C06000024C669408FAE004400C530237E
+:10150000022038213C010001AC226EC8004E10249C
+:101510000002108200551025AFC200C00C0017A3B6
+:10152000AFB300103C04000124845F503C0500016F
+:1015300024A5FAD03C06000124C6FBA88FAE0044C7
+:1015400000C53023022038213C010001AC226ED4BA
+:10155000004E10240002108200551025AFC200C8B2
+:101560000C0017A3AFB300103C04000124845F5C9F
+:101570003C05000124A5C93C3C06000124C6CA2044
+:1015800000C5302302203821AF4201100C0017A300
+:10159000AFB300103C04000124845F6C3C050001E3
+:1015A00024A5C9103C06000124C6C93400C5302357
+:1015B00002203821AF4201240C0017A3AFB3001062
+:1015C0003C04000124845F7C3C05000124A55A8072
+:1015D0003C06000124C65AAC00C530230220382145
+:1015E000AF420120AF4201140C0017A3AFB30010AB
+:1015F0003C04000124845F883C05000124A5F29886
+:101600003C06000124C6F6B400C530230220382170
+:10161000AF4201180C0017A3AFB300108FAE004407
+:101620003C010001AC226F18004E10240002108211
+:10163000005510250C003FC3AFC200D00C003C4049
+:10164000000000000C0027A800000000AC000228E9
+:10165000AC00022C96E204502442FFFFAF42003857
+:1016600096E20460AF42008032C2400014400003A2
+:101670000000000096E20480AF42008496E70490E8
+:1016800050E000012407080024E2FFFFAF42008879
+:10169000AF42007C2402080010E2000F32C240007A
+:1016A000104000032402040010E2000B00000000C0
+:1016B000240E00013C04000124845F98A3AE003F87
+:1016C00096E604902405217002C03821AFA00010D6
+:1016D0000C002B3BAFA000148F4301388F4401381E
+:1016E00024020001A34205C2AF430094AF44009816
+:1016F000AFA00010AFA000148F4600808F47008479
+:101700003C04000124845FA40C002B3B2405220030
+:101710000C0024A43C1108003C1433D83694CB5858
+:101720003C020800344200803C04000124845FB085
+:101730003C05000024A55D003C06000024C65D1C9D
+:1017400000C5302327A70030AF8200602402FFFFCE
+:10175000AF82006427A200340C0017A3AFA20010D0
+:101760003C010001AC226EB800021100000211829F
+:10177000005110250C0018FCAE4200008F82024080
+:101780003C03000100431025AF8202403C020000F0
+:1017900024424034AF820244AF8002408F82006016
+:1017A00000511024144000053C0308008F820060A3
+:1017B000004310241040FFFD000000000C003C4DD1
+:1017C000000088213C020100AFA200208F530018C6
+:1017D000240200FF56620001267100018C020228DB
+:1017E0001622000E001330C08F42033C2442000139
+:1017F000AF42033C8F42033C8C0202283C040001B0
+:1018000024845C243C050009AFA00014AFA20010A2
+:101810008FA600201000003F34A5010000D7102142
+:101820008FA300208FA40024AC4304C0AC4404C4A4
+:1018300000C018218F4401788F45017C00001021E1
+:1018400024070004AFA70010AFB100148F48000CAC
+:1018500024C604C002E63021AFA800188F48010C4E
+:101860002407000800A3282100A3482B0082202180
+:101870000100F809008920211440000B240700080A
+:101880008F820120AFA200108F8201243C0400014E
+:1018900024845C2C3C050009AFA200148FA6002014
+:1018A0001000001C34A502008F4401608F450164C4
+:1018B0008F43000CAF5100188F86012024020010C6
+:1018C000AFA20010AFB10014AFA300188F42010CFB
+:1018D0000040F80924C6001C14400010000000005D
+:1018E0008F42034024420001AF4203408F42034035
+:1018F0008F820120AFA200108F8201243C040001DE
+:1019000024845C343C050009AFA200148FA600209B
+:1019100034A503000C002B3B026038218F4202E407
+:1019200024420001AF4202E48F4202E493A2003F4E
+:10193000104000693C02070034423000AFA200288A
+:101940008F530018240200FF126200020000882159
+:10195000267100018C0202281622000E001330C0EE
+:101960008F42033C24420001AF42033C8F42033CC0
+:101970008C0202283C04000124845C243C050009FC
+:10198000AFA00014AFA200108FA600281000003FE7
+:1019900034A5010000D710218FA300288FA4002CAC
+:1019A000AC4304C0AC4404C400C018218F44017887
+:1019B0008F45017C0000102124070004AFA7001010
+:1019C000AFB100148F48000C24C604C002E63021D9
+:1019D000AFA800188F48010C2407000800A3282195
+:1019E00000A3482B008220210100F8090089202152
+:1019F0001440000B240700088F820120AFA20010C2
+:101A00008F8201243C04000124845C2C3C050009E5
+:101A1000AFA200148FA600281000001C34A50200FD
+:101A20008F4401608F4501648F43000CAF51001853
+:101A30008F86012024020010AFA20010AFB1001465
+:101A4000AFA300188F42010C0040F80924C6001C07
+:101A500014400010000000008F42034024420001A7
+:101A6000AF4203408F4203408F820120AFA200109B
+:101A70008F8201243C04000124845C343C0500096D
+:101A8000AFA200148FA6002834A503000C002B3B46
+:101A9000026038218F4202F024420001AF4202F07E
+:101AA0008F4202F03C04000124845FC0AFA000100C
+:101AB000AFA000148FA60028240523000C002B3BA8
+:101AC0000000382110000004000000008C020264B5
+:101AD00010400005000000008F8200A0304200048A
+:101AE0001440FFFA000000008F82004434420004DA
+:101AF000AF8200448F42030824420001AF42030832
+:101B00008F4203088F8200D88F8300D400431023B4
+:101B10002442FF80AF4200908F4200902842FF8114
+:101B200010400006240200018F4200908F430144C0
+:101B300000431021AF42009024020001AF42008C0C
+:101B400032C2000810400006000000008F8202141C
+:101B50003C0381003042FFFF00431025AF82021496
+:101B60003C0300018C636D94306200021040000958
+:101B7000306200013C04000124845FCC3C0500007D
+:101B800024A56D503C06000024C671C81000001248
+:101B900000C5302310400009000000003C04000193
+:101BA00024845FDC3C05000024A571D03C060000C5
+:101BB00024C676781000000800C530233C040001DC
+:101BC00024845FEC3C05000024A569483C06000025
+:101BD00024C66D4800C5302327A7003027A2003453
+:101BE0000C0017A3AFA200103C010001AC226ECC88
+:101BF0003C0200018C426ECC3C0308000002110044
+:101C00000002118200431025AE4200408F8200A0E6
+:101C1000AFA200108F8200B0AFA200148F86005CCC
+:101C20008F87011C3C04000124845FFC3C010001FF
+:101C3000AC366EA43C010001AC206E943C01000166
+:101C4000AC3C6E8C3C010001AC3B6EBC3C01000125
+:101C5000AC376EC03C010001AC3A6EA00C002B3BCF
+:101C6000240524008F820200AFA200108F82022080
+:101C7000AFA200148F8600448F8700503C040001FF
+:101C8000248460080C002B3B240525008F83006012
+:101C90000074100B0242000A0200F821000000004C
+:101CA0000000000D8FBF00608FBE005C8FB5005834
+:101CB0008FB300548FB200508FB1004C8FB00048EA
+:101CC00003E0000827BD006827BDFFE03C040001D9
+:101CD00024846014240526000000302100003821EF
+:101CE000AFBF0018AFA000100C002B3BAFA000143A
+:101CF0008FBF001803E0000827BD002003E00008A4
+:101D00000000000003E000080000000000000000E8
+:101D100000000000000000000000000000000000C3
+:101D200003E000080000000003E0000800000000DD
+:101D300027BDFDE027A500183C04DEAD3484BEEFCE
+:101D4000AFBF02188F8201503C03001F3463FFFFB6
+:101D5000AFA4001800A2282300A328248CA200000E
+:101D60001044000A00000000AFA500108CA2000083
+:101D7000AFA200148F8601508F8702503C040001EF
+:101D80002484601C0C002B3B240527008FBF021805
+:101D900003E0000827BD022027BDFFE03C06ABBAE8
+:101DA00034C6BABEAFB000183C1000043C07007F38
+:101DB00034E7FFFFAFBF001C001028408E04000076
+:101DC0008CA30000ACA00000AE0600008CA20000B6
+:101DD000ACA3000010460005AE04000000A0802166
+:101DE00000F0102B1040FFF5001028403C040001CB
+:101DF00024846028240528000200302100003821B6
+:101E0000AFA000100C002B3BAFA00014020010216B
+:101E10008FBF001C8FB0001803E0000827BD002012
+:101E20008C0202243047003F10E000100080302177
+:101E3000000028212403002000E3102410400002A9
+:101E40000006304200A62821000318421460FFFB60
+:101E500000E310242402F00000A228243402FFFF33
+:101E60000045102B144000033C0200011000000844
+:101E70003C0200013442FFFF008518230043102B71
+:101E80001440000300A010213C02FFFE008210213C
+:101E900003E000080000000027BDFFD0AFB5002818
+:101EA0008FB50040AFB2002000A09021AFB1001C60
+:101EB00024C60003AFBF002CAFB30024AFB000189E
+:101EC0008EA200002403FFFC00C380240050102BCE
+:101ED0001440001B00E088218E330000AFB00010DA
+:101EE0008EA20000AFA200148E270000240530004F
+:101EF0000C002B3B024030218E230000007020217B
+:101F00000064102B10400007024028218CA2000022
+:101F1000AC620000246300040064102B1440FFFB3B
+:101F200024A500048EA2000000501023AEA20000E1
+:101F30008E220000005010211000000BAE22000085
+:101F40002402002DA0820000AFB000108EA200007D
+:101F500002409821AFA200148E2700002405310012
+:101F60000C002B3B02603021026010218FBF002C3F
+:101F70008FB500288FB300248FB200208FB1001CD2
+:101F80008FB0001803E0000827BD003027BDFFE830
+:101F90003C1CC0003C05FFFE3C0300018C636E84CA
+:101FA0003C0400018C846E9034A5BF0824021FFC01
+:101FB0003C010001AC226CD03C0200C03C0100019D
+:101FC000AC226CD43C020020AFBF00103C0100C02A
+:101FD000AC201FFC0043102300441023245BB000FE
+:101FE0000365B8213C1D00018FBD6CCC03A0F0211E
+:101FF0003C0400C0348402003C1A00C03C0300C012
+:10200000346307C824021DFC3C010001AC226CD0E3
+:10201000240218343C010001AC246CD43C010001C2
+:10202000AC226CD03C010001AC236CD40C00180D28
+:10203000375A02008FBF001003E0000827BD0018C8
+:1020400027BDFFC83C04000124846034240532000D
+:102050003C0200018C426CD03C0300018C636CD4C8
+:102060000000302103603821AFBF0030AFB3002C37
+:10207000AFB20028AFB10024AFB00020AFA2001C67
+:10208000AFA30018AFB700100C002B3BAFBA001481
+:102090000C001916000000008F8202403442000438
+:1020A000AF82024024020001AF4200003C02000166
+:1020B00000571021904240F4104000922403FFFC8E
+:1020C0003C1000012610AC733C1200012652A84CB3
+:1020D00002121023004380248FA3001C3C04000143
+:1020E000248460400070102B1440001A27B300189D
+:1020F0008FB100182405300002403021AFB000102D
+:10210000AFA300140C002B3B022038218FA3001832
+:10211000007020210064102B104000070240302185
+:102120008CC20000AC620000246300040064102B29
+:102130001440FFFB24C600048FA2001C0050102393
+:10214000AFA2001C8E620000005010211000000A97
+:10215000AE6200000240882124053100AFB00010BB
+:10216000AFA300148FA70018022030212402002DF5
+:102170000C002B3BA0820000240700208FA3001C32
+:102180003C0400012484605C241200203C01000116
+:10219000AC316EB02C6200201440001D27B1001835
+:1021A0008FB00018240530003C06000124C66F5093
+:1021B000AFA70010AFA300140C002B3B0200382186
+:1021C0008FA300183C04000124846F502465002074
+:1021D0000065102B10400007000000008C820000FA
+:1021E000AC620000246300040065102B1440FFFB68
+:1021F000248400048FA2001C00521023AFA2001CF4
+:102200008E220000005210211000000BAE220000B0
+:102210003C10000126106F5024053100AFA70010BC
+:10222000AFA300148FA70018020030212402002D54
+:102230000C002B3BA0820000240700203C0400017E
+:10224000248460708FA3001C241200203C01000134
+:10225000AC306EE42C6200201440001D27B1001841
+:102260008FB00018240530003C06000124C66F70B2
+:10227000AFA70010AFA300140C002B3B02003821C5
+:102280008FA300183C04000124846F702465002093
+:102290000065102B10400007000000008C82000039
+:1022A000AC620000246300040065102B1440FFFBA7
+:1022B000248400048FA2001C00521023AFA2001C33
+:1022C0008E220000005210211000000BAE220000F0
+:1022D0003C10000126106F7024053100AFA70010DC
+:1022E000AFA300148FA70018020030212402002D94
+:1022F0000C002B3BA08200003C01000110000031CB
+:10230000AC306EE03C1000012610821F3C12000130
+:102310002652809C02121023004380248FA3001CAD
+:102320003C040001248460840070102B1440001AC7
+:1023300027B300188FB10018240530000240302167
+:10234000AFB00010AFA300140C002B3B02203821CB
+:102350008FA30018007020210064102B104000078C
+:10236000024030218CC20000AC62000024630004F3
+:102370000064102B1440FFFB24C600048FA2001C35
+:1023800000501023AFA2001C8E62000000501021EC
+:102390001000000AAE6200000240882124053100CE
+:1023A000AFB00010AFA300148FA700180220302197
+:1023B0002402002D0C002B3BA08200003C010001F8
+:1023C000AC316EB03C0300018C636EB0240204009B
+:1023D0000060F809AF8200708FBF00308FB3002C0F
+:1023E0008FB200288FB100248FB0002003E00008D6
+:1023F00027BD003800000000000000008F82004070
+:102400003C03F000004310243C036000144300062A
+:10241000000000008F8200502403FF80004310243E
+:1024200034420055AF8200508F820054244203E8AA
+:10243000AF820058240201F4AF4200E024020004FD
+:10244000AF4200E824020002AF4001B0AF4000E418
+:10245000AF4200DCAF4000D8AF4000D403E000083A
+:10246000AF4000D08F8200542442000503E00008F2
+:10247000AF82007827BDFFE8AFBF00108F82005405
+:10248000244203E8AF8200583C02080002C2102434
+:10249000104000043C02F7FF3442FFFF02C2B024A8
+:1024A000369400403C0200018C426DA81040001799
+:1024B0003C0202003C0300018C636F1C106000169C
+:1024C0000282A0253C0200018C426E44144000129E
+:1024D0003C0202003C0200018C426D943042000339
+:1024E0001440000D3C0202008F8302243C020002D3
+:1024F0008C428FEC106200083C0202000C003DAFE1
+:1025000000000000100000043C0202000C00419694
+:10251000000000003C02020002C210241040000330
+:10252000000000000C001F4B000000008F4200D88C
+:102530008F4300DC24420001AF4200D80043102B3F
+:102540001440000300000000AF4000D83694008023
+:102550008C0302381060000C000000008F4201B0B4
+:10256000244203E8AF4201B00043102B14400006A0
+:1025700000000000934205C5144000030000000065
+:102580000C001DA0000000008FBF001003E0000839
+:1025900027BD001803E000080000000027BDFFD899
+:1025A000AFBF00208F43002C8F42003810620059CB
+:1025B000000000003C02000100571021904240F052
+:1025C00010400026240700088F4401708F450174D5
+:1025D0008F48000C8F86012024020020AFA200103B
+:1025E000AFA30014AFA800188F42010C0040F809F7
+:1025F00024C6001C14400011240200013C0100010B
+:1026000000370821A02240F08F820124AFA20010E1
+:102610008F8201283C04000124846128AFA20014A9
+:102620008F46002C8F8701203C0500090C002B3BB6
+:1026300034A509001000005C000000008F42030078
+:1026400024420001AF4203008F4203008F42002C5E
+:10265000A34005C110000027AF4200388F4401702D
+:102660008F4501748F43002C8F48000C8F8601200A
+:1026700024020080AFA20010AFA30014AFA800187E
+:102680008F42010C0040F80924C6001C14400011C0
+:10269000240200013C01000100370821A02240F182
+:1026A0008F820124AFA200108F8201283C04000118
+:1026B00024846134AFA200148F46002C8F87012040
+:1026C0003C0500090C002B3B34A51100100000361E
+:1026D000000000008F4203008F43002C24420001C1
+:1026E000AF4203008F42030024020001A34205C150
+:1026F000AF4300383C01000100370821A02040F121
+:102700003C01000100370821A02040F01000002605
+:10271000AF400034934205C11040001D000000008E
+:10272000A34005C18F8200403042000114400008E0
+:10273000000020218C0301042402000150620005E6
+:10274000240400018C020264104000030080102168
+:102750002404000100801021104000060000000049
+:102760008F42030C24420001AF42030C100000080A
+:102770008F42030C8F82004434420004AF82004435
+:102780008F42030824420001AF4203088F4203082E
+:102790003C01000100370821A02040F03C0100016D
+:1027A00000370821A02040F18F42000010400007B0
+:1027B00000000000AF80004C8F82004C1040FFFDF5
+:1027C000000000001000000500000000AF8000487D
+:1027D0008F8200481040FFFD000000008F820060E3
+:1027E0003C03FF7F3463FFFF00431024AF8200608F
+:1027F0008F420000104000030000000010000002A3
+:10280000AF80004CAF8000488FBF002003E000087D
+:1028100027BD002803E000080000000027BDFFD806
+:10282000AFBF00208F4300448F42007C106200291C
+:10283000240700088F4401688F45016C8F48000C05
+:102840008F86012024020040AFA20010AFA3001425
+:10285000AFA800188F42010C0040F80924C6001CE4
+:1028600014400011240200013C010001003708213E
+:10287000A02240F28F820124AFA200108F82012893
+:102880003C0400012484613CAFA200148F46004444
+:102890008F8701203C0500090C002B3B34A5130059
+:1028A0001000000F000000008F42030424420001CA
+:1028B000AF4203048F4203048F420044AF42007CC6
+:1028C0003C01000100370821A02040F21000000464
+:1028D000AF4000783C01000100370821A02040F201
+:1028E0008F4200001040000700000000AF80004C45
+:1028F0008F82004C1040FFFD00000000100000051A
+:1029000000000000AF8000488F8200481040FFFDAB
+:10291000000000008F8200603C03FEFF3463FFFF75
+:1029200000431024AF8200608F420000104000037B
+:102930000000000010000002AF80004CAF80004893
+:102940008FBF002003E0000827BD002803E0000837
+:10295000000000003C0200018C426DA827BDFFA8CA
+:10296000AFBF0050AFBE004CAFB50048AFB300449E
+:10297000AFB20040AFB1003CAFB00038104000D55E
+:102980008F9000448F4200D0244300012842000B66
+:10299000144000E4AF4300D08F42000430420002F4
+:1029A0001440009CAF4000D08F4200043C03000163
+:1029B0008C636D9834420002AF420004240200018F
+:1029C000146200033C020600100000023442300092
+:1029D00034421000AFA200208F4A0018AFAA003482
+:1029E00027AA0020AFAA002C8FAA0034240200FFDF
+:1029F0001142000200001821254300018C02022828
+:102A0000006098211662000E3C0500098F42033CCD
+:102A100024420001AF42033C8F42033C8C02022857
+:102A20008FA700343C0400012484610CAFA0001483
+:102A3000AFA200108FA600201000007034A5050082
+:102A40008FAA0034000A38C000F710218FA300209D
+:102A50008FA40024AC4304C0AC4404C48F8300544E
+:102A60008F820054247103E8022210232C4203E9D0
+:102A70001040001B0000A82100E09021265E04C049
+:102A80008F4401788F45017C02401821240A0004FC
+:102A9000AFAA0010AFB300148F48000C0000102143
+:102AA00002FE3021AFA800188F48010C240700084F
+:102AB00000A3282100A3482B008220210100F8094F
+:102AC0000089202154400006241500018F82005403
+:102AD000022210232C4203E91440FFE90000000009
+:102AE00032A200FF54400018AF5300188F42037801
+:102AF00024420001AF4203788F4203788F82012085
+:102B00008FAA002C8FA70034AFA200108F8201245F
+:102B10003C04000124846118AFA200148D4600001B
+:102B20003C0500091000003534A506008F4203085B
+:102B30002415000124420001AF4203088F4203081C
+:102B40001000001E32A200FF8F8300548F820054B9
+:102B5000247103E8022210232C4203E910400016DE
+:102B60000000A8213C1E0020241200108F42000CFF
+:102B70008F4401608F4501648F860120AFB2001041
+:102B8000AFB30014005E1025AFA200188F42010CF5
+:102B9000240700080040F80924C6001C1440FFE385
+:102BA000000000008F820054022210232C4203E90F
+:102BB0001440FFEE0000000032A200FF144000119C
+:102BC0003C0500098F42037824420001AF4203789C
+:102BD0008F4203788F8201208FAA002C8FA70034A8
+:102BE000AFA200108F8201243C04000124846120E4
+:102BF000AFA200148D46000034A507000C002B3B4B
+:102C0000000000008F4202EC24420001AF4202ECBF
+:102C10008F4202EC8F4200043042000150400029F4
+:102C2000361000403C02040002C210241040001381
+:102C30002404FFDF8F4202508F4302548F4401B4BB
+:102C400014640006361000408F4202708F430274F5
+:102C50008F4401B8106400072402FFDF8F42025046
+:102C60008F4302548F4402708F450274100000128B
+:102C70003A1000201000002B020280248F420250E4
+:102C80008F4302548F4501B414650006020480246A
+:102C90008F4202708F4302748F4401B85064002148
+:102CA000361000408F4202508F4302548F4402700E
+:102CB0008F4502743A100040AF4301B41000001970
+:102CC000AF4501B88F4200D4244300011000001129
+:102CD000284200338F4200043042000110400009B6
+:102CE0003C02040002C21024104000042402FFDF52
+:102CF000020280241000000B361000401000000972
+:102D0000361000608F4200D436100040244300018A
+:102D1000284201F514400003AF4300D4AF4000D473
+:102D20003A100020AF9000442402FF7F0282A024CA
+:102D30008FBF00508FBE004C8FB500488FB300444A
+:102D40008FB200408FB1003C8FB0003803E0000824
+:102D500027BD005803E00008000000003C0200010D
+:102D60008C426DA827BDFFB0AFBF0048AFBE004486
+:102D7000AFB50040AFB3003CAFB20038AFB10034E4
+:102D8000104000C7AFB000308F4200D02443000194
+:102D90002842000B144000DAAF4300D08F420004F9
+:102DA0003042000214400097AF4000D08F42000430
+:102DB0003C0300018C636D9834420002AF42000472
+:102DC00024020001146200033C020600100000020D
+:102DD0003442300034421000AFA20020000018211D
+:102DE0008F5E001827AA0020240200FF13C20002F1
+:102DF000AFAA002C27C300018C020228006090219A
+:102E00001642000E001E38C08F42033C24420001CF
+:102E1000AF42033C8F42033C8C0202283C04000179
+:102E20002484610C3C050009AFA00014AFA200107F
+:102E30008FA600201000006D34A5050000F71021BA
+:102E40008FA300208FA40024AC4304C0AC4404C46E
+:102E50008F8300548F820054247003E802021023F1
+:102E60002C4203E91040001B0000982100E088215B
+:102E7000263504C08F4401788F45017C022018213B
+:102E8000240A0004AFAA0010AFB200148F48000C4F
+:102E90000000102102F53021AFA800188F48010C66
+:102EA0002407000800A3282100A3482B008220212A
+:102EB0000100F80900892021544000062413000174
+:102EC0008F820054020210232C4203E91440FFE9D0
+:102ED00000000000326200FF54400017AF5200189B
+:102EE0008F42037824420001AF4203788F42037877
+:102EF0008F8201208FAA002CAFA200108F820124A4
+:102F00003C040001248461183C050009AFA20014B0
+:102F10008D4600001000003534A506008F420308DE
+:102F20002413000124420001AF4203088F4203082A
+:102F30001000001E326200FF8F8300548F82005405
+:102F4000247003E8020210232C4203E9104000160B
+:102F5000000098213C150020241100108F42000C25
+:102F60008F4401608F4501648F860120AFB100104E
+:102F7000AFB2001400551025AFA200188F42010C0B
+:102F8000240700080040F80924C6001C1440FFE391
+:102F9000000000008F820054020210232C4203E93B
+:102FA0001440FFEE00000000326200FF14400011E8
+:102FB000000000008F42037824420001AF420378F2
+:102FC0008F4203788F8201208FAA002CAFA20010BD
+:102FD0008F8201243C040001248461203C05000907
+:102FE000AFA200148D46000034A507000C002B3B57
+:102FF00003C038218F4202EC24420001AF4202ECB0
+:103000008F4202EC8F420004304200011040001851
+:10301000240400018F4202508F4302548F4501B4B3
+:103020003C01000114650006A0246CF18F4202707F
+:103030008F4302748F4401B8106400210000000027
+:103040008F4202508F4302543C04000190846CF084
+:103050008F4602708F47027438840001AF4301B479
+:10306000AF4701B83C01000110000025A0246CF01E
+:103070008F4200D43C010001A0206CF024430001E9
+:10308000284200331440001EAF4300D43C0200012C
+:1030900090426CF1AF4000D410000017384200019C
+:1030A0008F42000430420001104000080000000080
+:1030B0000C00565A000020213C010001A0206CF1B8
+:1030C0003C0100011000000EA0206CF08F4200D4E3
+:1030D0003C010001A0206CF024430001284201F5CE
+:1030E00014400007AF4300D43C02000190426CF151
+:1030F000AF4000D4004210263C010001A0226CF138
+:103100003C0300018C636D98240200021462000CE1
+:103110003C0300023C03000190636CF124020001B7
+:103120005462001F000020213C02000190426CF01C
+:103130001443001B24040005100000192404000699
+:103140003C0200028C428FF4004310241040000B1C
+:10315000240200013C03000190636CF154620010F2
+:10316000000020213C02000190426CF01443000C4E
+:10317000240400031000000A240400043C0300019E
+:1031800090636CF114620006000020213C020001F3
+:1031900090426CF024040001504400012404000219
+:1031A0000C00565A000000002402FF7F0282A02477
+:1031B0008FBF00488FBE00448FB500408FB3003CE6
+:1031C0008FB200388FB100348FB0003003E00008B8
+:1031D00027BD005003E00008000000003C02000191
+:1031E0008C426DA827BDFFB0AFBF0048AFBE004402
+:1031F000AFB50040AFB3003CAFB20038AFB1003460
+:10320000104000DEAFB000308F4200D03C0400011F
+:103210008C846D98244300012842000BAF4400E8E1
+:10322000144000FEAF4300D08F4200043042000241
+:1032300014400095AF4000D08F4200043442000299
+:10324000AF42000424020001148200033C02060085
+:10325000100000023442300034421000AFA20020BF
+:10326000000018218F5E001827AA0020240200FF0A
+:1032700013C20002AFAA002C27C300018C0202284F
+:10328000006090211642000E001E38C08F42033CA1
+:1032900024420001AF42033C8F42033C8C020228CF
+:1032A0003C0400012484610C3C050009AFA000141B
+:1032B000AFA200108FA600201000006D34A50500FD
+:1032C00000F710218FA300208FA40024AC4304C07A
+:1032D000AC4404C48F8300548F820054247003E8EC
+:1032E000020210232C4203E91040001B0000982129
+:1032F00000E08821263504C08F4401788F45017C89
+:1033000002201821240A0004AFAA0010AFB2001452
+:103310008F48000C0000102102F53021AFA80018E2
+:103320008F48010C2407000800A3282100A3482B84
+:10333000008220210100F809008920215440000664
+:10334000241300018F820054020210232C4203E94F
+:103350001440FFE900000000326200FF54400017F3
+:10336000AF5200188F42037824420001AF42037825
+:103370008F4203788F8201208FAA002CAFA2001009
+:103380008F8201243C040001248461183C0500095B
+:10339000AFA200148D4600001000003534A50600D1
+:1033A0008F4203082413000124420001AF420308A6
+:1033B0008F4203081000001E326200FF8F8300540A
+:1033C0008F820054247003E8020210232C4203E988
+:1033D00010400016000098213C1500202411001018
+:1033E0008F42000C8F4401608F4501648F8601205D
+:1033F000AFB10010AFB2001400551025AFA20018F5
+:103400008F42010C240700080040F80924C6001C64
+:103410001440FFE3000000008F82005402021023DA
+:103420002C4203E91440FFEE00000000326200FF6E
+:1034300014400011000000008F4203782442000174
+:10344000AF4203788F4203788F8201208FAA002C2D
+:10345000AFA200108F8201243C040001248461206B
+:103460003C050009AFA200148D46000034A50700FA
+:103470000C002B3B03C038218F4202EC2442000198
+:10348000AF4202EC8F4202EC8F4200043042000156
+:10349000104000333C02040002C210241040001708
+:1034A00000000000934205C08F4402508F45025433
+:1034B0008F4301B43442002014A30006A34205C088
+:1034C0008F4202708F4302748F4401B81064000869
+:1034D000000000008F4202508F430254934405C005
+:1034E0008F4602708F470274100000163884004027
+:1034F000934205C010000048304200BF934205C00F
+:103500008F4402508F4502548F4301B4304200BFB4
+:1035100014A30006A34205C08F4202708F430274B9
+:103520008F4401B81064000B000000008F4202506D
+:103530008F430254934405C08F4602708F47027434
+:1035400038840020AF4301B4AF4701B81000003306
+:10355000A34405C0934205C01000002F3442002050
+:10356000934205C08F4300D434420020A34205C0DB
+:103570002462000110000023286300338F4200E41E
+:103580008F4300E024420001AF4200E40043102AD0
+:1035900014400006240300018F4200E81443000297
+:1035A000AF4000E424030004AF4300E88F4200046E
+:1035B000304200011040000D3C02040002C2102401
+:1035C0001040000700000000934205C03442004054
+:1035D000A34205C0934205C01000000F304200DF37
+:1035E000934205C01000000C34420060934205C0B5
+:1035F0008F4300D434420020A34205C0246200015E
+:10360000286300FB14600005AF4200D4934205C05C
+:10361000AF4000D438420040A34205C0934205C0E9
+:103620008F4300E83042007FA34205C0240200011E
+:103630001462000500000000934405C0000421024C
+:1036400010000003348400F0934405C03484000F5C
+:103650000C005640000000002402FF7F0282A024DC
+:103660008FBF00488FBE00448FB500408FB3003C31
+:103670008FB200388FB100348FB0003003E0000803
+:1036800027BD005003E000080000000027BDFFB088
+:10369000274401C026E30028246504000065102BA0
+:1036A000AFBF0048AFBE0044AFB50040AFB3003C71
+:1036B000AFB20038AFB1003410400007AFB00030F7
+:1036C0008C820000AC620000246300040065102BB3
+:1036D0001440FFFB248400048C020080AEE200440E
+:1036E0008C0200C0AEE200408C020084AEE20030EA
+:1036F0008C020084AEE2023C8C020088AEE2024002
+:103700008C02008CAEE202448C020090AEE20248D1
+:103710008C020094AEE2024C8C020098AEE20250A1
+:103720008C02009CAEE202548C0200A0AEE2025871
+:103730008C0200A4AEE2025C8C0200A8AEE2026041
+:103740008C0200ACAEE202648C0200B0AEE2026811
+:103750008C0200B4AEE2026C8C0200B8AEE20270E1
+:103760008C0200BC24040001AEE20274AEE000341E
+:1037700000041080005710218EE300348C42023C7C
+:1037800024840001006218212C82000FAEE3003473
+:103790001440FFF8000410808C0200CCAEE2004818
+:1037A0008C0200D0AEE2004C8C0200E0AEE201F8E8
+:1037B0008C0200E4AEE201FC8C0200E8AEE2020002
+:1037C0008C0200ECAEE202048C0200F0AEE20208D1
+:1037D0008EE400C08EE500C48C0200FC0045102B76
+:1037E0001040000B000000008EE200C08EE300C419
+:1037F0002404000124050000006518210065302B19
+:103800000044102100461021AEE200C0AEE300C427
+:103810008C0200FC8EE400C08EE500C42408FFFF8B
+:1038200024090000004018210000102100882024F5
+:1038300000A928240082202500A32825AEE400C08A
+:10384000AEE500C48EE400D08EE500D48C0200F416
+:103850000045102B1040000B000000008EE200D04D
+:103860008EE300D424040001240500000065182123
+:103870000065302B0044102100461021AEE200D03C
+:10388000AEE300D48C0200F48EE400D08EE500D4C8
+:1038900000401821000010210088202400A92824BD
+:1038A0000082202500A32825AEE400D0AEE500D498
+:1038B0008EE400C88EE500CC8C0200F80045102B89
+:1038C0001040000B000000008EE200C88EE300CC28
+:1038D0002404000124050000006518210065302B38
+:1038E0000044102100461021AEE200C8AEE300CC37
+:1038F0008C0200F88EE400C88EE500CC0040182150
+:10390000000010210088202400A9282400822025FE
+:1039100000A3282524020008AEE400C8AEE500CCD0
+:10392000AFA20010AFA000148F42000C8C0402085C
+:103930008C05020CAFA200188F42010C26E600286D
+:103940000040F80924070400104000F03C02040085
+:10395000AFA20020934205C6104000890000182144
+:103960008F5E001827AA0020240200FF13C2000265
+:10397000AFAA002C27C300018C020228006090210E
+:103980001642000E001E38C08F42033C2442000144
+:10399000AF42033C8F42033C8C0202283C040001EE
+:1039A0002484610C3C050009AFA00014AFA20010F4
+:1039B0008FA600201000006B34A5050000F7102131
+:1039C0008FA300208FA40024AC4304C0AC4404C4E3
+:1039D0008F8300548F820054247003E80202102366
+:1039E0002C4203E91040001B0000982100E08821D0
+:1039F000263504C08F4401788F45017C02201821B0
+:103A0000240A0004AFAA0010AFB200148F48000CC3
+:103A10000000102102F53021AFA800188F48010CDA
+:103A20002407000800A3282100A3482B008220219E
+:103A30000100F809008920215440000624130001E8
+:103A40008F820054020210232C4203E91440FFE944
+:103A500000000000326200FF54400017AF5200180F
+:103A60008F42037824420001AF4203788F420378EB
+:103A70008F8201208FAA002CAFA200108F82012418
+:103A80003C040001248461183C050009AFA2001425
+:103A90008D4600001000003334A506008F42030855
+:103AA0002413000124420001AF4203088F4203089F
+:103AB0001000001C326200FF8F8300548F8200547C
+:103AC000247003E8020210232C4203E91040001482
+:103AD00000009821241100108F42000C8F440160D7
+:103AE0008F4501648F860120AFB10010AFB2001482
+:103AF000AFA200188F42010C240700080040F8090B
+:103B000024C6001C1440FFE5000000008F82005412
+:103B1000020210232C4203E91440FFEF00000000D2
+:103B2000326200FF54400012240200018F420378E9
+:103B300024420001AF4203788F4203788F82012034
+:103B40008FAA002CAFA200108F8201243C04000138
+:103B5000248461203C050009AFA200148D460000BA
+:103B600034A507000C002B3B03C0382100001021B6
+:103B70001440005B240200011000006500000000FA
+:103B80008F510018240200FF122200020000802141
+:103B9000263000018C0202281602000E001130C0EF
+:103BA0008F42033C24420001AF42033C8F42033C5E
+:103BB0008C0202283C040001248460F43C050009C6
+:103BC000AFA00014AFA200108FA600201000003F8D
+:103BD00034A5010000D710218FA300208FA400245A
+:103BE000AC4304C0AC4404C400C018218F44017825
+:103BF0008F45017C0000102124070004AFA70010AE
+:103C0000AFB000148F48000C24C604C002E6302177
+:103C1000AFA800188F48010C2407000800A3282132
+:103C200000A3482B008220210100F80900892021EF
+:103C30001440000B240700088F820120AFA200105F
+:103C40008F8201243C040001248460FC3C050009AF
+:103C5000AFA200148FA600201000001C34A50200A3
+:103C60008F4401608F4501648F43000CAF500018F2
+:103C70008F86012024020010AFA20010AFB0001404
+:103C8000AFA300188F42010C0040F80924C6001CA5
+:103C900054400011240200018F42034024420001DD
+:103CA000AF4203408F4203408F820120AFA2001039
+:103CB0008F8201243C040001248461043C05000936
+:103CC000AFA200148FA6002034A503000C002B3BEC
+:103CD00002203821000010211040000D24020001B4
+:103CE0008F4202E8A34005C6AF4001B02442000164
+:103CF000AF4202E88F4202E88EE201502442000106
+:103D0000AEE20150100000038EE2015024020001D7
+:103D1000A34205C68FBF00488FBE00448FB5004048
+:103D20008FB3003C8FB200388FB100348FB00030B9
+:103D300003E0000827BD005027BDFFD8AFBF00201B
+:103D40008F8200B030420004104000680000000084
+:103D50008F4301288F8201041462000500000000D7
+:103D60008F4301308F8200B4106200060000000013
+:103D70008F820104AF4201288F8200B41000005BE3
+:103D8000AF4201308F8200B03C030080004310241A
+:103D90001040000D000000008F82011C3442000220
+:103DA000AF82011C8F8200B02403FFFB004310246C
+:103DB000AF8200B08F82011C2403FFFD004310245A
+:103DC0001000004AAF82011C8F4301288F8201043A
+:103DD00014620005000000008F4301308F8200B4A0
+:103DE00010620010000000008F820104AF42012821
+:103DF0008F8200B48F430128AF420130AFA300107F
+:103E00008F4201303C04000124846144AFA20014BD
+:103E10008F86011C8F8700B03C0500051000003123
+:103E200034A509008F420128AFA200108F42013053
+:103E30003C04000124846150AFA200148F86011C51
+:103E40008F8700B03C0500050C002B3B34A510000B
+:103E50008F82011C34420002AF82011C8F83010457
+:103E60008F8200B034420001AF8200B0240200080B
+:103E7000AF830104AFA20010AFA000148F42000C6A
+:103E80008C0402088C05020CAFA200188F42010CB2
+:103E900026E600280040F809240704008F82011C50
+:103EA0002403FFFD00431024AF82011C8EE201DCDD
+:103EB00024420001AEE201DC8EE201DC8F420128E7
+:103EC000AFA200108F4201303C0400012484615CE9
+:103ED000AFA200148F86011C8F8700B03C0500053F
+:103EE00034A511000C002B3B000000008F8200A0C5
+:103EF0003042000410400069000000008F43012C94
+:103F00008F82012414620005000000008F430134F9
+:103F10008F8200A410620006000000008F8201243E
+:103F2000AF42012C8F8200A41000005CAF4201342C
+:103F30008F8200A03C030080004310241040000D3D
+:103F4000000000008F82011C34420002AF82011C7D
+:103F50008F8200A02403FFFB00431024AF8200A047
+:103F60008F82011C2403FFFD004310241000004B2E
+:103F7000AF82011C8F43012C8F8201241462000543
+:103F8000000000008F4301348F8200A410620010F3
+:103F9000000000008F820124AF42012C8F8200A418
+:103FA0008F43012CAF420134AFA300108F42013484
+:103FB0003C04000124846168AFA200148F86011CB8
+:103FC0008F8700A03C0500051000003234A51200C8
+:103FD0008F42012CAFA200108F4201343C0400013B
+:103FE00024846174AFA200148F86011C8F8700A007
+:103FF0003C0500050C002B3B34A513008F82011CEF
+:1040000034420002AF82011C8F8301248F8200A002
+:1040100034420001AF8200A024020080AF8301245B
+:10402000AFA20010AFA000148F4200148C0402084D
+:104030008C05020CAFA200188F4201083C0600015B
+:1040400024C66ED80040F809240700048F82011CA2
+:104050002403FFFD00431024AF82011C8EE201DC2B
+:1040600024420001AEE201DC8EE201DC8F42012C31
+:10407000AFA200108F4201343C040001248461800F
+:10408000AFA200148F86011C8F8700A03C0500059D
+:1040900034A514000C002B3B000000008FBF002053
+:1040A00003E0000827BD00283C0810002407000199
+:1040B0003C0600803C0501008F82007000481024FF
+:1040C0001040FFFD000000008F82005424420005D4
+:1040D000AF8200788C040234108000160000182192
+:1040E0003C020001005710218C4240E824420005A8
+:1040F0003C01000100370821AC2240E83C020001ED
+:10410000005710218C4240E80044102B1440000955
+:10411000000000003C0300803C0100010037082142
+:10412000AC2040E83C010001003708211000000BE2
+:10413000A02740F03C02000100571021904240F0BF
+:1041400054400006006618253C020001005710216B
+:10415000904240F154400001006618258C04023062
+:1041600010800013000000003C02000100571021E5
+:104170008C4240EC244200053C010001003708213C
+:10418000AC2240EC3C020001005710218C4240EC74
+:104190000044102B14400006000000003C01000108
+:1041A00000370821AC2040EC1000000600651825FF
+:1041B0003C02000100571021904240F2544000019F
+:1041C000006518251060FFBC000000008F42000051
+:1041D0001040000700000000AF80004C8F82004CB0
+:1041E0001040FFFD0000000010000005000000006E
+:1041F000AF8000488F8200481040FFFD00000000A3
+:104200008F82006000431025AF8200608F42000063
+:1042100010400003000000001000FFA7AF80004C1A
+:104220001000FFA5AF80004803E000080000000078
+:1042300000000000000000000000000027BDFFE0BB
+:10424000AFBF00188F86006430C200041040002504
+:10425000240400048C020114AF420020AF840064E7
+:104260008F4202FC24420001AF4202FC8F4202FC5A
+:104270008F820064304200041440000500000000FA
+:104280008C0301148F4200201462FFF20000000032
+:104290008F420000104000078F43003CAF80004C6D
+:1042A0008F82004C1040FFFD000000001000000550
+:1042B00000000000AF8000488F8200481040FFFDE2
+:1042C000000000008F82006000431025AF82006074
+:1042D0008F42000010400073000000001000006FCB
+:1042E0000000000030C20008104000202404000834
+:1042F0008C02011CAF420048AF8400648F4202A8C8
+:1043000024420001AF4202A88F4202A88F820064BB
+:104310003042000814400005000000008C03011C1E
+:104320008F4200481462FFF2000000008F4200003C
+:104330001040000700000000AF80004C8F82004C4E
+:104340001040FFFD0000000010000005000000000C
+:10435000AF8000488F8200481040FFFD0000000041
+:104360008F8200601000FFD93442020030C200206A
+:1043700010400023240400208C02012CAF4200686E
+:10438000AF8400648F4202D824420001AF4202D8B9
+:104390008F4202D88F820064304200201440000512
+:1043A00032C240008C03012C8F4200681462FFF27D
+:1043B00032C24000144000023C02000102C2B0259B
+:1043C0008F4200001040000700000000AF80004C4A
+:1043D0008F82004C1040FFFD00000000100000051F
+:1043E00000000000AF8000488F8200481040FFFDB1
+:1043F000000000008F8200601000FFB4344208000B
+:1044000030C2001010400029240400108C02012446
+:10441000AF420058AF8400648F4202D424420001AE
+:10442000AF4202D48F4202D48F8200643042001027
+:104430001440000532C220008C0301248F42005832
+:104440001462FFF232C220005040000136D68000D4
+:104450008F4200001040000700000000AF80004CB9
+:104460008F82004C1040FFFD00000000100000058E
+:1044700000000000AF8000488F8200481040FFFD20
+:10448000000000008F82006034420100AF820060B3
+:104490008F42000010400003000000001000006C7C
+:1044A000AF80004C1000006AAF80004830C20001AD
+:1044B0001040000424020001AF8200641000006478
+:1044C0000000000030C200021440000B3C05000355
+:1044D0003C0400012484624434A505000000382116
+:1044E000AFA000100C002B3BAFA000142402FFC0B3
+:1044F00010000057AF8200648C05022C8C02010C66
+:1045000010A20048000510808C46030024A2000180
+:104510003045003F24020003AC05022C00061E02B9
+:1045200010620005240200101062001D30C20FFF4F
+:1045300010000039000000008F4302A88F440000E3
+:1045400030C20FFFAF42004824630001AF4302A80E
+:10455000108000078F4202A8AF80004C8F82004C71
+:104560001040FFFD000000001000000500000000EA
+:10457000AF8000488F8200481040FFFD000000001F
+:104580008F82006034420200AF8200608F420000E0
+:104590001040001F000000001000001B0000000081
+:1045A000AF42005832C220005040000136D6800091
+:1045B0008F4202D48F43000024420001AF4202D454
+:1045C000106000078F4202D4AF80004C8F82004CF5
+:1045D0001040FFFD0000000010000005000000007A
+:1045E000AF8000488F8200481040FFFD00000000AF
+:1045F0008F82006034420100AF8200608F42000071
+:10460000104000030000000010000006AF80004CC6
+:1046100010000004AF8000480C00219600C020214B
+:10462000004028218C02010C14A200022402000286
+:10463000AF8200648F8200643042000214400004A4
+:10464000000000008C02010C14A2FFAC000000006E
+:104650008FBF001803E0000827BD002003E000081A
+:104660000000000027BDFFA0AFB000400080802107
+:10467000001016022442FFFF304300FF2C6200139B
+:10468000AFBF0058AFBE0054AFB50050AFB3004C41
+:10469000AFB20048AFB10044104001F3AFA5003401
+:1046A000000310803C010001002208218C22628856
+:1046B00000400008000000000010130230440FFF0B
+:1046C0002402000110820005240200021082000C66
+:1046D0002402FFFE100000243C0500038F43000469
+:1046E0003C0200018C426F04AF440200AF4402045C
+:1046F0003C0400018C846E801000000934630001CA
+:104700008F430004AF440200AF4402043C040001A4
+:104710008C846E80006218243C0200012442CA2866
+:104720000002110000021182AF4300043C030800A4
+:1047300000431025AC8200388F84005400041442DA
+:1047400000041C820043102100041CC200431023FB
+:1047500000041D020043102100041D4200431023E9
+:1047600010000009AF4202083C040001248462509A
+:1047700034A510000200302100003821AFA0001045
+:104780000C002B3BAFA000148F4202A0244200017A
+:10479000AF4202A01000021F8F4202A027B00028E3
+:1047A00002002021240502100C002BBF2406000863
+:1047B0000C00251802002021100002160000000045
+:1047C0008FAA003427A40028000A1880254200017F
+:1047D0003042003FAFA200348C6503008FAA003442
+:1047E000000210808C430300254200013042003F4C
+:1047F000AFA20034AC02022CAFA500280C00251893
+:10480000AFA3002C100002030000000027B0002816
+:1048100002002021240502100C002BBF24060008F2
+:104820000C00265702002021100001FA00000000B1
+:104830008FAA003427A40028000A1880254200010E
+:104840003042003FAFA200348C6503008FAA0034D1
+:10485000000210808C430300254200013042003FDB
+:10486000AFA20034AC02022CAFA500280C002657E2
+:10487000AFA3002C100001E700000000001013029D
+:1048800030430FFF240200011062000524020002E1
+:104890001062001E3C020002100000333C050003C1
+:1048A0003C03000202C310245440003702C3B02569
+:1048B0008F8202283C01000100370821AC2238D841
+:1048C0008F82022C3C01000100370821AC2238DC29
+:1048D0008F8202303C01000100370821AC2238E011
+:1048E0008F8202343C01000100370821AC2238E4F9
+:1048F0002402FFFFAF820228AF82022CAF82023077
+:10490000AF8202341000002002C3B02502C210247E
+:10491000104000123C02FFFD3C0200010057102134
+:104920008C4238D8AF8202283C0200010057102187
+:104930008C4238DCAF82022C3C020001005710216F
+:104940008C4238E0AF8202303C0200010057102157
+:104950008C4238E4AF8202343C02FFFD3442FFFF58
+:104960001000000902C2B0243C0400012484625CEF
+:1049700034A511000200302100003821AFA0001042
+:104980000C002B3BAFA000148F4202CC244200014C
+:10499000AF4202CC1000019F8F4202CC00101302E4
+:1049A00030450FFF2402000110A20005240200027E
+:1049B00010A2000D3C0408FF100000143C05000389
+:1049C0003C0208FF3442FFFF8F8302203C040004B6
+:1049D00002C4B0250062182434630008AF830220AB
+:1049E00010000012AF4502983484FFF73C03FFFB30
+:1049F0008F8202203463FFFF02C3B02400441024DE
+:104A0000AF82022010000009AF4502983C0400016B
+:104A10002484626834A5120002003021000038218D
+:104A2000AFA000100C002B3BAFA000148F4202BCC3
+:104A300024420001AF4202BC100001768F4202BC4A
+:104A400027840208240502000C002BBF240600085E
+:104A500027440224240502000C002BBF2406000872
+:104A60008F4202C424420001AF4202C41000016917
+:104A70008F4202C40010130230430FFF24020001D2
+:104A8000106200112862000250400005240200025A
+:104A90001060000700000000100000170000000078
+:104AA0001062000F00000000100000130000000062
+:104AB0008C060248000020210C005104240500044B
+:104AC00010000007000000008C06024800002021B2
+:104AD0000C00510424050004100000100000000028
+:104AE0008C06024C000020210C005104240500011A
+:104AF0001000000A000000003C04000124846274DD
+:104B00003C05000334A513000200302100003821C9
+:104B1000AFA000100C002B3BAFA000148F4202C0CE
+:104B200024420001AF4202C01000013A8F4202C08D
+:104B30000C002426000000001000013600000000D8
+:104B400024020001A34205C5241001008F4401A8DE
+:104B50008F4501ACAFB00010AFA000148F4200141D
+:104B6000AFA200188F42010826E600280040F8098D
+:104B7000240704001040FFF500000000100001258C
+:104B8000000000003C03FFFF34637FFF8F42036897
+:104B90008F44036002C3B02400001821AF400058C6
+:104BA000AF40005CAF400060AF40006400441023A1
+:104BB000AF4203683C020900AF400360AFA200208F
+:104BC0008F5E001827AA0020240200FF13C20002F3
+:104BD000AFAA003C27C300018C020228006090218C
+:104BE0001642000E001E38C08F42033C24420001D2
+:104BF000AF42033C8F42033C8C0202283C0400017C
+:104C00002484620C3C050009AFA00014AFA2001080
+:104C10008FA600201000006B34A5050000F71021BE
+:104C20008FA300208FA40024AC4304C0AC4404C470
+:104C30008F8300548F820054247003E802021023F3
+:104C40002C4203E91040001B0000982100E088215D
+:104C5000263504C08F4401788F45017C022018213D
+:104C6000240A0004AFAA0010AFB200148F48000C51
+:104C70000000102102F53021AFA800188F48010C68
+:104C80002407000800A3282100A3482B008220212C
+:104C90000100F80900892021544000062413000176
+:104CA0008F820054020210232C4203E91440FFE9D2
+:104CB00000000000326200FF54400017AF5200189D
+:104CC0008F42037824420001AF4203788F42037879
+:104CD0008F8201208FAA003CAFA200108F82012496
+:104CE0003C040001248462183C050009AFA20014B2
+:104CF0008D4600001000003334A506008F420308E3
+:104D00002413000124420001AF4203088F4203082C
+:104D10001000001C326200FF8F8300548F82005409
+:104D2000247003E8020210232C4203E9104000140F
+:104D300000009821241100108F42000C8F44016064
+:104D40008F4501648F860120AFB10010AFB200140F
+:104D5000AFA200188F42010C240700080040F80998
+:104D600024C6001C1440FFE5000000008F820054A0
+:104D7000020210232C4203E91440FFEF0000000060
+:104D8000326200FF14400011000000008F420378DF
+:104D900024420001AF4203788F4203788F820120C2
+:104DA0008FAA003CAFA200108F8201243C040001B6
+:104DB000248462203C050009AFA200148D46000047
+:104DC00034A507000C002B3B03C038218F4202B0F2
+:104DD00024420001AF4202B08F4202B08F4202F87B
+:104DE00024420001AF4202F81000008A8F4202F80C
+:104DF0008C02025C27440224AF4201F08C02026064
+:104E000024050200240600080C002BBFAF4201F865
+:104E10008F82022030420008144000022402000168
+:104E200024020002AF4202988F4202AC24420001E9
+:104E3000AF4202AC100000778F4202AC3C0200FF90
+:104E40003442FFFF0202182432C2018014400006DF
+:104E50003402FFFB0043102B14400003000000004D
+:104E60001000006CAF4300BC3C040001248462804D
+:104E70003C05000334A51500020030210000382154
+:104E8000AFA000100C002B3BAFA000143C020700A9
+:104E90003442100000101E0200621825AFA300204B
+:104EA0008F510018240200FF12220002000080210E
+:104EB000263000018C0202281602000E001130C0BC
+:104EC0008F42033C24420001AF42033C8F42033C2B
+:104ED0008C0202283C040001248461F43C05000992
+:104EE000AFA00014AFA200108FA600201000003F5A
+:104EF00034A5010000D710218FA300208FA4002427
+:104F0000AC4304C0AC4404C400C018218F440178F1
+:104F10008F45017C0000102124070004AFA700107A
+:104F2000AFB000148F48000C24C604C002E6302144
+:104F3000AFA800188F48010C2407000800A32821FF
+:104F400000A3482B008220210100F80900892021BC
+:104F50001440000B240700088F820120AFA200102C
+:104F60008F8201243C040001248461FC3C0500097B
+:104F7000AFA200148FA600201000001C34A5020070
+:104F80008F4401608F4501648F43000CAF500018BF
+:104F90008F86012024020010AFA20010AFB00014D1
+:104FA000AFA300188F42010C0040F80924C6001C72
+:104FB00014400010000000008F4203402442000112
+:104FC000AF4203408F4203408F820120AFA2001006
+:104FD0008F8201243C040001248462043C05000902
+:104FE000AFA200148FA6002034A503000C002B3BB9
+:104FF000022038218F4202E024420001AF4202E049
+:105000008F4202E08F4202F024420001AF4202F0E0
+:105010008F4202F08FA200348FBF00588FBE005421
+:105020008FB500508FB3004C8FB200488FB1004451
+:105030008FB0004003E0000827BD006027BDFFF8E7
+:105040002408FFFF10A00014000048213C0AEDB81E
+:10505000354A83209087000024840001000030211D
+:1050600001071026304200011040000200081842DB
+:10507000006A18260060402124C600012CC20008E6
+:105080001440FFF700073842252900010125102BA5
+:105090001440FFF0000000000100102103E00008B0
+:1050A00027BD000827BDFFB0AFBF0048AFBE00441A
+:1050B000AFB50040AFB3003CAFB20038AFB1003481
+:1050C000AFB000308F870220AFA700248F87020087
+:1050D000AFA7002C8F8202203C0308FF3463FFFF40
+:1050E0000043102434420004AF8202208F82020069
+:1050F0003C03C0FF3463FFFF00431024344200042C
+:10510000AF8202008F5303588F55035C8F5E03609C
+:105110008F470364AFA700148F470368AFA7001C35
+:105120008F4202D0274401C024420001AF4202D086
+:105130008F5002D08F5102048F5202000C002BA816
+:1051400024050400AF530358AF55035CAF5E036002
+:105150008FA70014AF4703648FA7001CAF470368F5
+:10516000AF5002D0AF510204AF5202008C02025C79
+:1051700027440224AF4201F08C02026024050200A1
+:1051800024060008AF4201F8240200060C002BBFE1
+:10519000AF4201F43C023B9A3442CA00AF4201FCE8
+:1051A000240203E82404000224030001AF42029415
+:1051B000AF440290AF43029C8F820220304200082D
+:1051C0001040000400000000AF43029810000003EC
+:1051D00000003021AF440298000030213C03000160
+:1051E0000066182190636D000346102124C600015B
+:1051F000A043022C2CC2000F1440FFF803461821D4
+:1052000024C600018F820040240400802405008011
+:105210000002170224420030A062022C0346102133
+:105220000C002BA8A040022C8FA7002430E2000421
+:1052300014400006000000008F8202203C0308FF9B
+:105240003463FFFB00431024AF8202208FA7002CA1
+:1052500030E2000414400006000000008F820200CB
+:105260003C03C0FF3463FFFB00431024AF82020005
+:105270008FBF00488FBE00448FB500408FB3003C05
+:105280008FB200388FB100348FB0003003E00008D7
+:1052900027BD00500000000000000000AF400104E6
+:1052A00024040001000410C002E21821248200013D
+:1052B0003C01000100230821A42234D00040202119
+:1052C0002C8200801440FFF8000410C0240200016A
+:1052D0003C01000100370821A42038D0AF42010072
+:1052E000AF800228AF80022CAF800230AF80023442
+:1052F00003E000080000000027BDFFE8AFBF001476
+:10530000AFB000108F420104284200051040002673
+:10531000008080213C0200018F430104344230D0E0
+:1053200002E22021000318C00062182102E31821C4
+:105330000083102B1040001500001021960700007C
+:1053400024840006246600069482FFFC14470009AA
+:10535000000028219483FFFE9602000214620006DA
+:1053600000A0102194820000960300040043102640
+:105370002C45000100A010211440000924840008DD
+:105380000086102B1440FFF000001021304200FF77
+:1053900014400030240200011000002E00001021F3
+:1053A0001000FFFA24020001020020210C00240C4E
+:1053B000240500063042007F000218C002E31021DD
+:1053C0003C01000100220821942230D01040FFF25D
+:1053D00002E310213C06000100C2302194C630D007
+:1053E00010C0FFED3C080001350834D296070000DC
+:1053F000000610C000572021008820219482000060
+:10540000144700090000282194830002960200023C
+:105410001462000600A01021948200049603000488
+:10542000004310262C45000100A010211440000765
+:10543000000610C002E210213C06000100C230212B
+:1054400094C634D014C0FFEB000610C010C0FFD2C9
+:10545000240200018FBF00148FB0001003E0000889
+:1054600027BD001803E000080000000027BDFFB0C2
+:1054700000801021AFB00030245000020200202133
+:1054800024050006AFB1003400408821AFBF0048BA
+:10549000AFBE0044AFB50040AFB3003C0C00240CDD
+:1054A000AFB200383047007F000710C002E2102181
+:1054B0003C05000100A2282194A530D050A0001C7A
+:1054C00000A030213C090001352934D29628000281
+:1054D000000510C00057202100892021948200007F
+:1054E0001448000900003021948300029602000253
+:1054F0001462000600C01021948200049603000488
+:10550000004310262C46000100C010211440000763
+:10551000000510C002E210213C05000100A2282174
+:1055200094A534D014A0FFEB000510C000A03021DA
+:1055300010C00014000610C0005718213C010001E3
+:10554000002308218C2334D000571021AFA3001072
+:105550003C010001002208218C2234D43C040001CB
+:1055600024846394AFA200148E2600008E270004CA
+:105570003C0500040C002B3B34A504001000006324
+:105580003C0208008F45010010A00006000510C075
+:1055900002E210213C01000100220821942234D0B3
+:1055A000AF42010000A0302114C00011000628C045
+:1055B000000710C002E21021AFA700103C0100015B
+:1055C00000220821942230D03C040001248463A0EE
+:1055D000AFA200148E2600008E2700043C050004B4
+:1055E0000C002B3B34A50500100000483C020800CD
+:1055F00000B718213C02000196040000344234D266
+:1056000000621821A46400008E020002000720C07E
+:10561000AC62000202E410213C0300010062182188
+:10562000946330D002E510213C01000100220821E2
+:10563000A42334D002E410213C01000100220821FF
+:10564000A42630D08F420104244200012842008069
+:105650001040000F3C0200028F4201043C04000194
+:10566000348430D296030000000210C0005710218D
+:1056700000441021A44300008E030002AC4300024A
+:105680008F42010424420001AF4201043C020002A7
+:1056900002C2102410400011000721423C03000107
+:1056A000346338D824020003004410230002108021
+:1056B0000057202100832021005710210043102192
+:1056C00030E5001F8C4300002402000100A21004FA
+:1056D000006218251000000CAC83000024020003B7
+:1056E0000044102300021080005C2821005C10217F
+:1056F00030E4001F8C4302282402000100821004C1
+:1057000000621825ACA302283C02080034421000B5
+:1057100000001821AFA200208F5E001827AA0020E9
+:10572000240200FF13C20002AFAA002C27C300010D
+:105730008C020228006090211642000E001E38C024
+:105740008F42033C24420001AF42033C8F42033CA2
+:105750008C0202283C0400012484635C3C0500099F
+:10576000AFA00014AFA200108FA600201000006BA5
+:1057700034A5050000F710218FA300208FA400247A
+:10578000AC4304C0AC4404C48F8300548F820054E3
+:10579000247003E8020210232C4203E91040001B8E
+:1057A0000000982100E08821263504C08F4401784C
+:1057B0008F45017C02201821240A0004AFAA0010A2
+:1057C000AFB200148F48000C0000102102F5302108
+:1057D000AFA800188F48010C2407000800A3282157
+:1057E00000A3482B008220210100F8090089202114
+:1057F00054400006241300018F820054020210233B
+:105800002C4203E91440FFE900000000326200FF6F
+:1058100054400017AF5200188F4203782442000111
+:10582000AF4203788F4203788F8201208FAA002C29
+:10583000AFA200108F8201243C040001248463681D
+:105840003C050009AFA200148D4600001000003393
+:1058500034A506008F4203082413000124420001EE
+:10586000AF4203088F4203081000001C326200FFA1
+:105870008F8300548F820054247003E802021023A7
+:105880002C4203E91040001400009821241100105C
+:105890008F42000C8F4401608F4501648F86012088
+:1058A000AFB10010AFB20014AFA200188F42010CCC
+:1058B000240700080040F80924C6001C1440FFE536
+:1058C000000000008F820054020210232C4203E9E2
+:1058D0001440FFEF00000000326200FF144000118E
+:1058E000000000008F42037824420001AF42037899
+:1058F0008F4203788F8201208FAA002CAFA2001064
+:105900008F8201243C040001248463703C0500095B
+:10591000AFA200148D46000034A507000C002B3BFD
+:1059200003C038218F4202B424420001AF4202B4C6
+:105930008F4202B48F4202F424420001AF4202F4CB
+:105940008F4202F48FBF00488FBE00448FB50040E5
+:105950008FB3003C8FB200388FB100348FB000306D
+:1059600003E0000827BD005027BDFFA000801021E4
+:10597000AFB00040245000020200202124050006A0
+:10598000AFB1004400408821AFBF0058AFBE005403
+:10599000AFB50050AFB3004C0C00240CAFB20048C0
+:1059A0003048007F000810C002E210213C060001D0
+:1059B00000C2302194C630D010C0001C0000382135
+:1059C0003C0A0001354A34D296290002000610C074
+:1059D00000572021008A20219482000014490009E8
+:1059E000000028219483000296020002146200063F
+:1059F00000A01021948200049603000400431026A6
+:105A00002C45000100A0102114400008000610C021
+:105A100000C0382102E210213C06000100C2302102
+:105A200094C634D014C0FFEA000610C014C00011A0
+:105A3000AFA70028000810C002E21021AFA8001094
+:105A40003C01000100220821942230D03C040001D6
+:105A5000248463ACAFA200148E2600008E270004BD
+:105A60003C0500040C002B3B34A509001000007518
+:105A70003C02080010E0000C000610C002E21021F9
+:105A80003C03000100621821946334D0000710C069
+:105A900002E210213C01000100220821A42334D09D
+:105AA0001000000B3C04000102E210213C03000145
+:105AB00000621821946334D0000810C002E2102163
+:105AC0003C01000100220821A42330D03C04000145
+:105AD000348430D08F430100000610C002E2102150
+:105AE0003C01000100220821A42334D08F4201048C
+:105AF00002E438210000282118400029AF460100A7
+:105B000024E6000694C3FFFC96020000146200091C
+:105B10000000202194C3FFFE9602000214620006DA
+:105B20000080102194C20000960300040043102658
+:105B30002C440001008010215040001424A50001D5
+:105B40008F4201042442FFFF00A2102A1040000BE4
+:105B500024E40004948200068C830008A482FFFEE3
+:105B6000AC8300008F42010424A500012442FFFF02
+:105B700000A2102A1440FFF7248400088F42010479
+:105B80002442FFFF10000006AF4201048F420104CF
+:105B900024C6000800A2102A1440FFDA24E70008F7
+:105BA000000810C002E210213C010001002208217F
+:105BB000942230D0144000233C0208003C02000232
+:105BC00002C2102410400012000821423C030001D0
+:105BD000346338D8240200030044102300021080EC
+:105BE000005720210083202100571021004310215D
+:105BF0003105001F240300018C42000000A318049B
+:105C000000031827004310241000000DAC82000090
+:105C1000240200030044102300021080005C2821AD
+:105C2000005C10213104001F240300018C42022873
+:105C3000008318040003182700431024ACA2022894
+:105C40003C0208003442200000001821AFA20020CE
+:105C50008F5E001827AB0020240200FF13C2000251
+:105C6000AFAB003427C300018C02022800609021F2
+:105C70001642000E001E38C08F42033C2442000131
+:105C8000AF42033C8F42033C8C0202283C040001DB
+:105C90002484635C3C050009AFA00014AFA200108F
+:105CA0008FA600201000006B34A5050000F710211E
+:105CB0008FA300208FA40024AC4304C0AC4404C4D0
+:105CC0008F8300548F820054247003E80202102353
+:105CD0002C4203E91040001B0000982100E08821BD
+:105CE000263504C08F4401788F45017C022018219D
+:105CF000240B0004AFAB0010AFB200148F48000CAF
+:105D00000000102102F53021AFA800188F48010CC7
+:105D10002407000800A3282100A3482B008220218B
+:105D20000100F809008920215440000624130001D5
+:105D30008F820054020210232C4203E91440FFE931
+:105D400000000000326200FF54400017AF520018FC
+:105D50008F42037824420001AF4203788F420378D8
+:105D60008F8201208FAB0034AFA200108F820124FC
+:105D70003C040001248463683C050009AFA20014C0
+:105D80008D6600001000003334A506008F42030822
+:105D90002413000124420001AF4203088F4203088C
+:105DA0001000001C326200FF8F8300548F82005469
+:105DB000247003E8020210232C4203E9104000146F
+:105DC00000009821241100108F42000C8F440160C4
+:105DD0008F4501648F860120AFB10010AFB200146F
+:105DE000AFA200188F42010C240700080040F809F8
+:105DF00024C6001C1440FFE5000000008F82005400
+:105E0000020210232C4203E91440FFEF00000000BF
+:105E1000326200FF14400011000000008F4203783E
+:105E200024420001AF4203788F4203788F82012021
+:105E30008FAB0034AFA200108F8201243C0400011C
+:105E4000248463703C050009AFA200148D66000035
+:105E500034A507000C002B3B03C038218F4202B849
+:105E600024420001AF4202B88F4202B88F4202F4CE
+:105E700024420001AF4202F48F4202F48FBF005867
+:105E80008FBE00548FB500508FB3004C8FB20048C6
+:105E90008FB100448FB0004003E0000827BD0060D0
+:105EA00000000000000000000000000027BDFFE02F
+:105EB00027644000AFBF00180C002BA82405100079
+:105EC0003C03000134632CC03C04000134842EC820
+:105ED00024020020AF82011C02E31021AF800100E8
+:105EE000AF800104AF800108AF800110AF800114C2
+:105EF000AF800118AF800120AF800124AF8001285E
+:105F0000AF800130AF800134AF800138AF4200EC88
+:105F100002E31021AF4200F002E41021AF4200F48E
+:105F200002E41021AF4200F83C02000100571021AA
+:105F3000904240F41440001C3C0500018F82011C7B
+:105F40003C040001248464703C05000134420001DB
+:105F5000AF82011CAFA00010AFA000148F86011CFF
+:105F600034A501000C002B3B000038218C020218E4
+:105F70003042004010400014000000008F82011CDD
+:105F80003C0400012484647C3C050001344200048C
+:105F9000AF82011CAFA00010AFA000148F86011CBF
+:105FA0001000000734A502003C040001248464842E
+:105FB000AFA00010AFA000148F86011C34A5030011
+:105FC0000C002B3B000038218FBF001803E00008B5
+:105FD00027BD00208FA900108F83012C8FAA0014E9
+:105FE0008FAB00181060000A27624FE014620002B5
+:105FF00024680020276848008F82012811020004CD
+:10600000000000008F82012415020007000000003C
+:106010008F4303340000102124630001AF43033495
+:10602000100000398F430334AC640000AC650004F9
+:10603000AC660008A467000EAC690018AC6A001CCE
+:10604000AC6B0010AC620014AF8801208F4200FCE2
+:106050008F4400F42442FFFFAF4200FC8C8200001A
+:10606000104900053042FF8F104000193122FF8F88
+:10607000104000183C0200018C8300042C620010C8
+:10608000104000133C02000124630001AC830004B3
+:106090008F4300F8344230C802E2102154620004F9
+:1060A000246200083C02000134422EC802E21021A2
+:1060B00014440015240200018F820128244200208C
+:1060C000AF8201288F8201281000000F24020001F6
+:1060D0003C020001344230C802E210215482000424
+:1060E000248200083C02000134422EC802E2102142
+:1060F0000040202124020001AF4400F4AC890000DC
+:10610000AC8200042402000103E00008000000004B
+:1061100003E00008000000008FA900108F83010C2D
+:106120008FAA00148FAB00181060000A276247E0A6
+:106130001462000224680020276840008F82010852
+:1061400011020004000000008F8201041502000704
+:10615000000000008F430338000010212463000179
+:10616000AF430338100000358F430338AC640000A0
+:10617000AC650004AC660008A467000EAC690018AA
+:10618000AC6A001CAC6B0010AC620014AF8801005C
+:106190008F4400EC8C820000304200061040001951
+:1061A00031220006104000183C0200018C830004DC
+:1061B0002C620010104000133C0200012463000117
+:1061C000AC8300048F4300F034422EC002E2102161
+:1061D00054620004246200083C02000134422CC0D6
+:1061E00002E2102114440015240200018F820108EC
+:1061F00024420020AF8201088F8201081000000FA6
+:10620000240200013C02000134422EC002E21021AF
+:1062100054820004248200083C02000134422CC055
+:1062200002E210210040202124020001AF4400ECD2
+:10623000AC890000AC8200042402000103E00008E5
+:106240000000000003E000080000000027BDFFD8A8
+:106250003C0400012484648C3C050001AFBF002491
+:10626000AFB20020AFB1001CAFB000188F90010496
+:106270008F9100B08F92011C34A525008F82010000
+:106280000240302102203821AFA200100C002B3B2D
+:10629000AFB000148E020008AFA200108E02000CF6
+:1062A0003C04000124846498AFA200148E06000010
+:1062B0008E0700043C0500010C002B3B34A5251083
+:1062C0008E020018AFA200108E02001C3C040001D8
+:1062D000248464A4AFA200148E0600108E0700145C
+:1062E0003C0500010C002B3B34A525203C027F001F
+:1062F000022210243C030800544300163C03020011
+:106300008F82009C3042FFFF144000123C030200C9
+:106310003C040001248464B03C05000234A5F03044
+:10632000000030210000382136420002AF82011CFB
+:1063300036220001AF8200B0AF900104AF92011C81
+:10634000AFA000100C002B3BAFA0001410000024E5
+:106350000000000002C310241040000D022310248E
+:106360001040000B36420002AF82011C36220001B1
+:10637000AF8200B0AF900104AF92011C8F42033096
+:1063800024420001AF420330100000158F42033059
+:106390003C040001248464B8240202A9AFA20010C6
+:1063A000AFA000148F8601443C07000124E764C0BD
+:1063B0000C002B3B3405DEAD8F82011C3442000201
+:1063C000AF82011C8F82022034420004AF8202207F
+:1063D0008F8201403C03000100431025AF82014041
+:1063E0008FBF00248FB200208FB1001C8FB0001827
+:1063F00003E0000827BD002827BDFFD83C040001AA
+:10640000248464E83C050001AFBF0024AFB2002043
+:10641000AFB1001CAFB000188F9001248F9100A085
+:106420008F92011C34A526008F820120024030216A
+:1064300002203821AFA200100C002B3BAFB000149B
+:106440008E020008AFA200108E02000C3C04000176
+:10645000248464F4AFA200148E0600008E070004AA
+:106460003C0500010C002B3B34A526108E020018C1
+:10647000AFA200108E02001C3C04000124846500C1
+:10648000AFA200148E0600108E0700143C05000118
+:106490000C002B3B34A526203C027F000222102456
+:1064A0003C030800544300163C0302008F8200ACFA
+:1064B0003042FFFF144000123C0302003C04000184
+:1064C0002484650C3C05000134A5F0300000302127
+:1064D0000000382136420002AF82011C3622000142
+:1064E000AF8200A0AF900124AF92011CAFA00010BA
+:1064F0000C002B3BAFA00014100000240000000093
+:1065000002C310241040000D022310241040000B81
+:1065100036420002AF82011C36220001AF8200A089
+:10652000AF900124AF92011C8F42032C2442000142
+:10653000AF42032C100000158F42032C3C040001D5
+:10654000248464B8240202E2AFA20010AFA00014B9
+:106550008F8601443C07000124E764C00C002B3BFC
+:106560003405DEAD8F82011C34420002AF82011C73
+:106570008F82022034420004AF8202208F820140C9
+:106580003C03000100431025AF8201408FBF00246F
+:106590008FB200208FB1001C8FB0001803E00008FC
+:1065A00027BD00280000602100005021000030219C
+:1065B0000000282100006821000048210000782107
+:1065C000000070218F8801248F8701041580002E20
+:1065D0008F8B011C11A00014316208008F820120F2
+:1065E00010460029000000003C0400018C846EE489
+:1065F0008CC200008CC30004AC820000AC83000499
+:106600008CC20008AC82000894C2000EA482000E66
+:106610008CC20010240C0001AC8200108CC200144B
+:106620001000001224C600201040001700000000D7
+:106630003C0400018C846EE48D0200008D03000494
+:10664000AC820000AC8300048D020008AC8200081C
+:106650009502000EA482000E8D0200102506002077
+:10666000AC8200108D020014240C000100C018211F
+:10667000AC82001427624FE00043102B544000010D
+:1066800027634800006030211540002F316201006F
+:1066900011200014316280008F8201001045002A11
+:1066A000316201003C0400018C846EE08CA2000089
+:1066B0008CA30004AC820000AC8300048CA2000810
+:1066C000AC82000894A2000EA482000E8CA20010DE
+:1066D000240A0001AC8200108CA2001410000012E9
+:1066E00024A5002010400018316201003C04000184
+:1066F0008C846EE08CE200008CE30004AC8200002D
+:10670000AC8300048CE20008AC82000894E2000E26
+:10671000A482000E8CE2001024E50020AC82001060
+:106720008CE20014240A000100A01821AC8200149D
+:10673000276247E00043102B5440000127634000CC
+:1067400000602821316201005440001D31621000B8
+:1067500011A0000931A20800104000042502002009
+:106760008F8200A8A5E2000025020020AF8201244C
+:106770008F8801240000682111800011316210000F
+:106780003C0400018C846EE48C8200008C83000445
+:10679000AF820080AF8300848C820008AF8200A4A7
+:1067A0009482000EAF8200AC8C8200100000602149
+:1067B000AF8200A08C8D00108C8F0014316210000D
+:1067C0001440FF82000000001120000F3122080059
+:1067D000104000043C0200028F8200B8A5C20000F5
+:1067E0003C020002012210241040000424E2002098
+:1067F0008F8200B4AF8200D424E20020AF82010473
+:106800008F870104000048211140FF700000000044
+:106810003C0400018C846EE08C8200008C830004B8
+:10682000AF820090AF8300948C820008AF8200B4E6
+:106830009482000EAF82009C8C82001000005021D8
+:10684000AF8200B08C8900101000FF608C8E0014A5
+:1068500003E0000800000000000060210000582153
+:106860000000302100002821000068210000502194
+:1068700000007821000070218F8801248F87010497
+:106880003C1801001580002E8F89011C11A00014F6
+:10689000312208008F8201201046002900000000EC
+:1068A0003C0400018C846EE48CC200008CC30004A4
+:1068B000AC820000AC8300048CC20008AC820008EB
+:1068C00094C2000EA482000E8CC20010240C0001A1
+:1068D000AC8200108CC200141000001224C60020EC
+:1068E00010400017000000003C0400018C846EE49E
+:1068F0008D0200008D030004AC820000AC83000414
+:106900008D020008AC8200089502000EA482000EE1
+:106910008D02001025060020AC8200108D020014AC
+:10692000240C000100C01821AC82001427624FE043
+:106930000043102B544000012763480000603021C1
+:106940001560002F31220100114000143122800017
+:106950008F8201001045002A312201003C04000111
+:106960008C846EE08CA200008CA30004AC8200003A
+:10697000AC8300048CA20008AC82000894A2000E34
+:10698000A482000E8CA20010240B0001AC82001027
+:106990008CA200141000001224A500201040001842
+:1069A000312201003C0400018C846EE08CE2000086
+:1069B0008CE30004AC820000AC8300048CE200088D
+:1069C000AC82000894E2000EA482000E8CE200105B
+:1069D00024E50020AC8200108CE20014240B00019E
+:1069E00000A01821AC820014276247E00043102B5E
+:1069F000544000012763400000602821312201003B
+:106A00005440001D3122100011A0000931A20800DD
+:106A100010400004250200208F8200A8A5E200009B
+:106A200025020020AF8201248F8801240000682104
+:106A300011800011312210003C0400018C846EE4AE
+:106A40008C8200008C830004AF820080AF830084BE
+:106A50008C820008AF8200A49482000EAF8200AC4A
+:106A60008C82001000006021AF8200A08C8D00108D
+:106A70008C8F00143122100014400022000000000E
+:106A80001140000F31420800104000043C02000297
+:106A90008F8200B8A5C200003C020002014210240F
+:106AA0001040000424E200208F8200B4AF8200D4A2
+:106AB00024E20020AF8201048F87010400005021EE
+:106AC00011600010000000003C0400018C846EE0A6
+:106AD0008C8200008C830004AF820090AF8300940E
+:106AE0008C820008AF8200B49482000EAF82009CBA
+:106AF0008C82001000005821AF8200B08C8A0010F8
+:106B00008C8E00148F8200703C0310000043102410
+:106B10001040FF5C000000008F82005424420005FA
+:106B2000AF8200788C040234108000160000182117
+:106B30003C020001005710218C4240E8244200052D
+:106B40003C01000100370821AC2240E83C02000172
+:106B5000005710218C4240E80044102B14400009DB
+:106B6000240200013C0300803C01000100370821A1
+:106B7000AC2040E83C010001003708211000000C67
+:106B8000A02240F03C02000100571021904240F04A
+:106B9000144000063C0200803C0200010057102116
+:106BA000904240F1104000023C0200800062182533
+:106BB0008C04023010800013000000003C02000131
+:106BC000005710218C4240EC244200053C0100019A
+:106BD00000370821AC2240EC3C0200010057102194
+:106BE0008C4240EC0044102B1440000600000000D2
+:106BF0003C01000100370821AC2040EC10000006E9
+:106C0000007818253C02000100571021904240F204
+:106C100054400001007818251060FF1A00000000A1
+:106C20008F4200001040000700000000AF80004CC1
+:106C30008F82004C1040FFFD000000001000000596
+:106C400000000000AF8000488F8200481040FFFD28
+:106C5000000000008F82006000431025AF820060BA
+:106C60008F42000010400003000000001000FF05EC
+:106C7000AF80004C1000FF03AF80004803E0000825
+:106C80000000000000000000000000003C020001C5
+:106C90008C426D2827BDFFE8AFBF001414400012DE
+:106CA000AFB000103C10000126106F9002002021B0
+:106CB0000C002BA82405200026021FE03C01000147
+:106CC000AC226EEC3C010001AC226EE8AC0202503A
+:106CD00024022000AC100254AC020258240200012D
+:106CE0003C010001AC226D288FBF00148FB0001052
+:106CF00003E0000827BD00183C0900018D296EEC57
+:106D00008C8200008FA300108FA80014AD22000019
+:106D10008C820004AD250008AD2200048F8200544F
+:106D2000AD260010AD270014AD230018AD28001CBF
+:106D3000AD22000C2529FFE03C02000124426F90A7
+:106D40000122102B10400003000000003C0900014C
+:106D50008D296EE83C0200018C426D10AD220000CE
+:106D60003C0200018C426D103C010001AC296EEC2C
+:106D7000AD220004AC09025003E00008000000004E
+:106D800027BDFFD0AFB000103C1000018E106EEC9C
+:106D90003C0200018C426D10AFB1001400808821CC
+:106DA000AFBE00248FBE00408FA40048AFB20018D1
+:106DB00000A09021AFBF0028AFB50020AFB3001CEA
+:106DC000AE0200003C0200018C426D1000C0982110
+:106DD00000E0A82110800006AE020004260500088D
+:106DE0000C002BB324060018100000052610FFE04D
+:106DF000260400080C002BA8240500182610FFE02C
+:106E00003C03000124636F900203102B1040000329
+:106E1000000000003C1000018E106EE88E22000081
+:106E2000AE0200008E220004AE120008AE02000482
+:106E30008F820054AE130010AE150014AE1E001861
+:106E40008FA80044AE08001CAE02000C2610FFE024
+:106E50000203102B10400003000000003C10000152
+:106E60008E106EE83C0200018C426D10AE020000F4
+:106E70003C0200018C426D103C010001AC306EEC14
+:106E8000AE020004AC1002508FBF00288FBE002459
+:106E90008FB500208FB3001C8FB200188FB1001483
+:106EA0008FB0001003E0000827BD003000851821D6
+:106EB0000083102B1040000600000000AC80000092
+:106EC000248400040083102B5440FFFDAC8000009C
+:106ED00003E000080000000000A6182100A3102B0A
+:106EE00010400007000000008C820000ACA20000EF
+:106EF00024A5000400A3102B1440FFFB24840004ED
+:106F000003E0000800000000008618210083102B19
+:106F100010400007000000008CA20000AC820000BE
+:106F2000248400040083102B1440FFFB24A50004DC
+:106F300003E00008000000000006308000861821F1
+:106F40000083102B1040000600000000AC850000FC
+:106F5000248400040083102B5440FFFDAC85000006
+:106F600003E00008000000000000000026E5002803
+:106F700000A03021274301C08F4D03588F47035C89
+:106F80008F4803608F4903648F4A03688F4B020464
+:106F90008F4C0200246404000064102B1040000891
+:106FA0003C0208FF8CC20000AC62000024630004B5
+:106FB0000064102B1440FFFB24C600043C0208FFB1
+:106FC0003442FFFF3C03C0FFAF4D0358AF47035CA3
+:106FD000AF480360AF490364AF4A0368AF4B020494
+:106FE000AF4C02008F8402203463FFFF8F860200C3
+:106FF000008210243442000400C3182434630004C7
+:10700000AF820220AF8302008CA20214AC02008483
+:107010008CA20218AC0200888CA2021CAC02008C6C
+:107020008CA20220AC0200908CA20224AC0200943C
+:107030008CA20228AC0200988CA2022CAC02009C0C
+:107040008CA20230AC0200A08CA20234AC0200A4DC
+:107050008CA20238AC0200A88CA2023CAC0200ACAC
+:107060008CA20240AC0200B08CA20244AC0200B47C
+:107070008CA20248AC0200B88CA2024CAC0200BC4C
+:107080008CA2001CAC0200808CA20018AC0200C0D4
+:107090008CA20020AC0200CC8CA20024AC0200D058
+:1070A0008CA201D0AC0200E08CA201D4AC0200E4BE
+:1070B0008CA201D8AC0200E88CA201DCAC0200EC8E
+:1070C0008CA201E0AC0200F08CA200988CA3009C82
+:1070D000AC0300FC8CA200A88CA300ACAC0300F4B1
+:1070E0008CA200A08CA300A430840004AC0300F8A0
+:1070F0001480000730C200048F8202203C0308FF86
+:107100003463FFFB00431024AF82022030C200042E
+:1071100014400006000000008F8202003C03C0FF04
+:107120003463FFFB00431024AF8202008F4202DC75
+:10713000A34005C524420001AF4202DC8F4202DCBD
+:1071400003E000080000000027BDFFD8AFBF002407
+:10715000AFB000208F4300248F420020106200381F
+:10716000000000008F4300208F4200240062202393
+:1071700004810003000000008F42004000822021B3
+:107180008F4300308F4200240043102B1440000531
+:10719000000000008F4300408F42002410000005D3
+:1071A000006210238F4200308F43002400431023DD
+:1071B0002442FFFF00406021008C102A544000014F
+:1071C000008060218F4A00248F4900408F480024AE
+:1071D0008F4401808F4501848F4600248F4B001C13
+:1071E00024070001AFA7001000084100010018218A
+:1071F000014C50212529FFFF01498024AFB0001424
+:107200008F4700140000102100063100AFA70018BE
+:1072100000A3282100A3382B0082202100872021F1
+:107220008F420108016630210040F809000C390046
+:1072300054400001AF5000248F4300248F420020AF
+:1072400014620018000000008F4200001040000788
+:1072500000000000AF80004C8F82004C1040FFFD0A
+:10726000000000001000000500000000AF80004892
+:107270008F8200481040FFFD000000008F820060F8
+:107280002403FFEF00431024AF8200608F42000010
+:10729000104000030000000010000002AF80004C0E
+:1072A000AF8000488FBF00248FB0002003E00008AB
+:1072B00027BD002803E000080000000027BDFFC034
+:1072C00032C20020AFBF0038AFB30034AFB20030DD
+:1072D000AFB1002C10400004AFB000288F5300283D
+:1072E00010000002000000008F5300208F42003089
+:1072F000105300EB000211008F43001C006280213C
+:107300008E0400008E050004961200088F42009043
+:107310009611000A3246FFFF0046102A104000175F
+:10732000000000008F8200D88F4300980043102394
+:107330002442DCBEAF4200908F4200902842DCBF66
+:1073400010400005000000008F4200908F43014470
+:1073500000431021AF4200908F4200900046102A57
+:1073600010400006000000008F4203482442000144
+:10737000AF420348100000E18F4203488F8200FCB7
+:1073800014400006000000008F4203442442000124
+:10739000AF420344100000D98F420344934205C218
+:1073A0001040000B32C200081040000832220200D8
+:1073B000104000063C0340009602000EAF4300ACB4
+:1073C0000002140010000002AF4200B0AF4000AC59
+:1073D000322200041040007F3222080010400003D7
+:1073E0003247FFFF100000022402002024020004A4
+:1073F000AFA200108F420030AFA200148F420010E5
+:107400003C03000200431025AFA200188F460098ED
+:107410008F4201080040F80900000000104000B74A
+:10742000000000008F42009C8F4300940242102114
+:10743000AF42009CAE03000C8F4200AC104000082D
+:107440003C0340008F42009400431025AFA200206F
+:107450008F42009C8F4300B01000000400431025B1
+:107460008F420094AFA200208F42009CAFA2002464
+:107470008F8200FC8FA300208FA40024AC43000067
+:10748000AC44000424420008AF8200F08F42009C0C
+:107490008F4402708F4502740040182100001021B3
+:1074A00000A3282100A3302B008220210086202168
+:1074B0003223006024020040AF440270AF450274E2
+:1074C000106200172C6200411040000524020020C9
+:1074D00010620008240200011000002600000000D5
+:1074E0002402006010620019240200011000002133
+:1074F000000000008F4202788F43027C2463000169
+:107500002C64000100441021AF420278AF43027C9A
+:107510008F4202788F43027C100000162402000183
+:107520008F4202808F430284246300012C64000197
+:1075300000441021AF420280AF4302848F42028098
+:107540008F4302841000000B240200018F42028846
+:107550008F43028C246300012C640001004410213D
+:10756000AF420288AF43028C8F4202888F43028C65
+:1075700024020001A34205C28F4200983244FFFF5B
+:107580002406FFF88F45013C0044102124420007E7
+:107590000046102424840007AF4200948F420090DC
+:1075A0008F43009400862024004410230065182B8C
+:1075B00014600005AF4200908F4200948F43014455
+:1075C00000431023AF4200948F4200941000002328
+:1075D000AF40009C3247FFFF50E0002232C2002043
+:1075E000144000022402001024020002AFA2001086
+:1075F0008F420030AFA200148F420010AFA20018DB
+:107600008F4600988F4201080040F80900000000F2
+:107610001040003A3245FFFF8F4200988F430090A0
+:107620008F46013C00451021AF4200988F42009CDC
+:107630008F440098A34005C200651823AF43009013
+:10764000004510210086202B14800005AF42009CCD
+:107650008F4200988F43014400431023AF420098AB
+:1076600032C2002010400005000000008F42035885
+:107670002442FFFFAF4203588F4203588F4200302D
+:107680008F430040244200012463FFFF0043102485
+:10769000AF4200308F420030145300180000000049
+:1076A0008F4200001040000700000000AF80004C37
+:1076B0008F82004C1040FFFD00000000100000050C
+:1076C00000000000AF8000488F8200481040FFFD9E
+:1076D000000000008F8200602403FFF700431024A5
+:1076E000AF8200608F4200001040000300000000E5
+:1076F00010000002AF80004CAF8000488FBF003800
+:107700008FB300348FB200308FB1002C8FB00028BF
+:1077100003E0000827BD004003E00008000000006F
+:1077200027BDFFD032C20020AFBF002CAFB200286F
+:10773000AFB1002410400004AFB000208F520028E9
+:1077400010000002000000008F5200208F42003025
+:10775000105200B5000211008F43001C006280210E
+:107760008E0400008E050004961100088F420090E0
+:107770009607000A3226FFFF0046102A1040001725
+:10778000000000008F8200D88F4300980043102330
+:107790002442DC46AF4200908F4200902842DC47F2
+:1077A00010400005000000008F4200908F4301440C
+:1077B00000431021AF4200908F4200900046102AF3
+:1077C00010400006000000008F42034824420001E0
+:1077D000AF420348100000AB8F4203488F8600FC85
+:1077E00010C0000C000000008F8200F42403FFF89A
+:1077F0000043102400461023000218C35860000103
+:10780000246301008F42008C0043102B14400006BB
+:10781000000712C28F42034424420001AF420344D6
+:10782000100000988F420344934305C21060000F7C
+:10783000304600018F4200103448040032C2000874
+:107840001040000830E20200104000063C034000F7
+:107850009602000EAF4300AC0002140010000004BA
+:10786000AF4200B010000002AF4000AC8F480010E3
+:1078700030E20004104000453227FFFF8F4900AC82
+:107880001120000530C200FF144000062402004011
+:10789000100000042402000814400002240200200A
+:1078A00024020004AFA200108F4300301120000416
+:1078B000AFA300148F4200B000621025AFA20014E5
+:1078C0003C02000201021025AFA200188F4600986A
+:1078D0008F4201080040F8090000000010400069D4
+:1078E0003224FFFF8F42008C8F430094244200011A
+:1078F000AF42008C24020001AE03000CA34205C27B
+:107900008F4200982406FFF88F45013C0044102167
+:10791000244200070046102424840007AF4200944C
+:107920008F4200908F43009400862024004410234F
+:107930000065182B14600005AF4200908F42009440
+:107940008F43014400431023AF4200948F430094BF
+:107950008F4201400043102B10400009000000003E
+:107960008F43013C8F4400948F4200908F45013833
+:107970000064182300431023AF420090AF450094E9
+:107980008F4200941000001FAF42009810E0001DCD
+:1079900030C200FF14400002240200102402000242
+:1079A000AFA200108F420030AFA80018AFA20014A1
+:1079B0008F4600988F4201080040F809000000003F
+:1079C000104000303225FFFF8F4200988F44013C69
+:1079D00000451021AF4200988F4200908F430098DD
+:1079E000A34005C2004510230064182B1460000555
+:1079F000AF4200908F4200988F4301440043102310
+:107A0000AF4200988F4200308F4300402442000173
+:107A10002463FFFF00431024AF4200308F42003048
+:107A200014520018000000008F42000010400007B0
+:107A300000000000AF80004C8F82004C1040FFFD22
+:107A4000000000001000000500000000AF800048AA
+:107A50008F8200481040FFFD000000008F82006010
+:107A60002403FFF700431024AF8200608F42000020
+:107A7000104000030000000010000002AF80004C26
+:107A8000AF8000488FBF002C8FB200288FB1002438
+:107A90008FB0002003E0000827BD003003E000089D
+:107AA0000000000027BDFFD83C02000134422EC078
+:107AB000AFBF00208F4300F08F84010802E2102145
+:107AC00054620004246200083C02000134422CC0CD
+:107AD00002E2102100401821AF4300F0AC6000002A
+:107AE0008F4200EC8C660004146200043C0200012A
+:107AF000248200201000000FAF8201088F4300F0A5
+:107B000034422EC002E210215462000424620008B4
+:107B10003C02000134422CC002E210210040182136
+:107B20008C6200040002114000821021AF82010823
+:107B3000AC6000008C85001830A200361040006C4C
+:107B400030A200018C82001C8F4300408F4400341F
+:107B5000244200012463FFFF0043102400862021FB
+:107B6000AF42002C30A2003014400006AF44003475
+:107B70008F4200348C03023C0043102B144000B4AD
+:107B80000000000032C20010104000282407000846
+:107B90008F4401708F4501748F43002C8F48000C77
+:107BA0008F86012024020080AFA20010AFA3001432
+:107BB000AFA800188F42010C0040F80924C6001C31
+:107BC00014400011240200013C010001003708218B
+:107BD000A02240F18F820124AFA200108F820128E1
+:107BE0003C040001248467C4AFA200148F46002C1B
+:107BF0008F8701203C0500090C002B3B34A51100A8
+:107C000010000036000000008F4203008F43002C5C
+:107C100024420001AF4203008F420300240200010E
+:107C2000A34205C110000026AF4300388F44017005
+:107C30008F4501748F43002C8F48000C8F860120E4
+:107C400024020020AFA20010AFA30014AFA80018B8
+:107C50008F42010C0040F80924C6001C144000119A
+:107C6000240200013C01000100370821A02240F05D
+:107C70008F820124AFA200108F8201283C040001F2
+:107C8000248467B8AFA200148F46002C8F87012090
+:107C90003C0500090C002B3B34A509001000000F27
+:107CA000000000008F42030024420001AF420300A5
+:107CB0008F4203008F42002CA34005C1AF42003821
+:107CC0003C01000100370821A02040F13C010001E7
+:107CD00000370821A02040F0AF4000348F42031449
+:107CE00024420001AF420314100000598F420314D4
+:107CF0001040002230A270008C85001C8F420028AA
+:107D000000A2202304810003000000008F420040F5
+:107D1000008220218F4203588F430000AF45002886
+:107D20000044102110600007AF420358AF80004CA0
+:107D30008F82004C1040FFFD000000001000000585
+:107D400000000000AF8000488F8200481040FFFD17
+:107D5000000000008F82006034420008AF820060A3
+:107D60008F420000104000030000000010000038A7
+:107D7000AF80004C10000036AF8000481040002F4C
+:107D800030A210001040000C30A240008C83001C78
+:107D90008F420050006220230482000124840200EC
+:107DA0008F42035C00441021AF42035C8F420368A2
+:107DB0001000001AAF4300501040000C32C2800087
+:107DC0008C83001C8F42007000622023048200011B
+:107DD000248404008F42036400441021AF420364F2
+:107DE0008F4203681000000DAF4300701040000E7A
+:107DF0003C0208008C83001C8F420060006220233C
+:107E000004820001248401008F4203600044102199
+:107E1000AF4203608F420368AF430060004410210B
+:107E2000AF4203683C02080002C210245040000820
+:107E300036940040100000060000000030A201004F
+:107E400010400003000000000C002BD800000000D0
+:107E50008FBF002003E0000827BD002803E00008D2
+:107E60000000000027BDFFA8AFBF0050AFBE004C10
+:107E7000AFB50048AFB30044AFB20040AFB1003C73
+:107E8000AFB000388F91010826220020AF82010890
+:107E90008E3200180000A82132420024104001BA9E
+:107EA0000000F0218E26001C8F43001C00061100EC
+:107EB000006218218C70000C9604000C962D0016A0
+:107EC0009473000A2C8305DD388288702C420001EF
+:107ED00000621825106000150000282132C2004001
+:107EE00010400015240208009603001414620012CA
+:107EF0003402AAAA9603000E146200070000202193
+:107F00009603001024020300146200040080102174
+:107F1000960200122C4400010080102154400006FB
+:107F200024050016100000040000000024020800D0
+:107F3000508200012405000E934205C3144000083E
+:107F400000005821240B000132620180AF4500A8D7
+:107F5000AF5000A010400002AF4600A4A34B05C3E1
+:107F600010A0008502054021910200000000382188
+:107F70003042000F0002508032C200021040001256
+:107F8000010A1821326200021040001032C20001C2
+:107F900001002021948200002484000200E23821A4
+:107FA0000083102B1440FFFB30E2FFFF00071C0290
+:107FB0000062382100071C0230E2FFFF0062382116
+:107FC00000071027A502000A32C200011040006A13
+:107FD0003262000110400068000000008F4200A8DB
+:107FE00010400065000000008F4200A08F4300A8F1
+:107FF00000431021904C0009318900FF392300060D
+:108000000003182B392200110002102B00621824E3
+:108010001060000C3C0500068F4200A43C040001E7
+:10802000248467D4AFA200108F4200A034A546007C
+:10803000012038210C002B3BAFA200141000004E91
+:108040000000000032C20004144000130000282188
+:10805000316200FF1440000400000000950200029D
+:108060001000000D004A28239505000C9502000E13
+:108070009503001000A2282100A3282195030012D7
+:10808000910400099502000200A3282100A42821E0
+:10809000004A102300A2282102002021948200001F
+:1080A0002484000200E238210088102B1440FFFBDA
+:1080B00000071C0230E2FFFF0062382100071C02AB
+:1080C00030E2FFFF0062382101A5282100051C02D3
+:1080D00030A2FFFF0062282100051C0230A2FFFF32
+:1080E0000062282100A728230005140200A22821ED
+:1080F00030A5FFFF50A000013405FFFF316200FFF3
+:1081000014400008318300FF8F4300A08F4200A875
+:1081100000624021910200003042000F00025080B6
+:10812000318300FF2402000614620003010A1021BB
+:10813000100000022444001024440006316200FFB5
+:1081400014400006000000009482000000A22821D4
+:1081500000051C0230A2FFFF00622821934205C3E4
+:10816000104000033262010050400003A48500006B
+:1081700000052827A48500009622000E8F43009C4E
+:108180000062182132A200FF10400007AF43009C9C
+:108190003C02400002021025AFA200208F42009C4A
+:1081A00010000003005E1025AFB000208F42009C3D
+:1081B000AFA2002432620080104000103262010041
+:1081C0008F4200B424430001000210C00057102168
+:1081D000AF4300B48FA300208FA400243C01000112
+:1081E00000220821AC2338E83C01000100220821CC
+:1081F000AC2438EC100000A532C20020104000640E
+:10820000000000008F4200B424430001000210C0AF
+:1082100000571021AF4300B48FA300208FA4002487
+:108220003C01000100220821AC2338E83C01000198
+:1082300000220821AC2438EC8F4200B410400051D9
+:10824000000038213C090001352938E83C08001FAE
+:108250003508FFFF240BFFFF340AFFFF000710C0A3
+:1082600000571021004910218C4300008C44000469
+:10827000AFA30028AFA4002C8F8200FC8FA300289E
+:108280008FA4002CAC430000AC440004244200083E
+:10829000AF8200F08F42008C2442FFFFAF42008C7F
+:1082A00097A2002E8F4402708F450274004018215F
+:1082B0000000102100A3282100A3302B00822021E0
+:1082C00000862021AF440270AF4502748FA20028BF
+:1082D0000048102490430000306300011460000B3C
+:1082E000004020218F4202788F43027C24630001EA
+:1082F0002C64000100441021AF420278AF43027C9D
+:108300008F4202781000001A8F43027C8C8200009A
+:10831000144B000E0000000094820004144A000B6D
+:10832000000000008F4202888F43028C246300010A
+:108330002C64000100441021AF420288AF43028C3C
+:108340008F4202881000000A8F43028C8F42028005
+:108350008F430284246300012C6400010044102137
+:10836000AF420280AF4302848F4202808F43028477
+:108370008F4200B424E7000100E2102B1440FFB844
+:10838000000710C0A34005C31000003FAF4000B479
+:108390008F8200FC8FA300208FA40024AC43000038
+:1083A000AC44000424420008AF8200F08F42009CDD
+:1083B0008F46008C8F4402708F4502740040182154
+:1083C0000000102124C6FFFFAF46008C00A3282127
+:1083D00000A3302B0082202100862021AF440270B0
+:1083E000AF45027492020000304200011440000CBC
+:1083F0002402FFFF8F4202788F43027C2463000136
+:108400002C64000100441021AF420278AF43027C8B
+:108410008F4202788F43027C1000001C32C2002081
+:108420008E0300001462000F3402FFFF9603000465
+:108430001462000C000000008F4202888F43028CFF
+:10844000246300012C64000100441021AF42028823
+:10845000AF43028C8F4202888F43028C1000000BC6
+:1084600032C200208F4202808F43028424630001C5
+:108470002C64000100441021AF420280AF4302840B
+:108480008F4202808F43028432C2002010400005D8
+:10849000AF40009C8F4203582442FFFFAF42035875
+:1084A0008F4203588E22001C8F430040244200015B
+:1084B0002463FFFF00431024AF42002C32420060CF
+:1084C0001440000832C200108F42003424420001E0
+:1084D000AF4200348C03023C0043102B14400102D5
+:1084E00032C2001010400018240700088F440170A9
+:1084F0008F4501748F43002C8F48000C8F8601201C
+:1085000024020080AFA20010AFA30014AFA800188F
+:108510008F42010C0040F80924C6001C104000479F
+:10852000240200018F4203008F43002C24420001EB
+:10853000AF4203008F42030024020001A34205C1A1
+:108540001000007CAF4300388F4401708F450174E8
+:108550008F43002C8F48000C8F86012024020020BE
+:10856000AFA20010AFA30014AFA800188F42010CF7
+:108570000040F80924C6001C1040005724020001E6
+:10858000100000650000000032420012104000752B
+:10859000324200019622000E8F43009C0062182197
+:1085A00032C2002010400005AF43009C8F420358A8
+:1085B0002442FFFFAF4203588F4203588E22001C13
+:1085C0008F430040244200012463FFFF0043102436
+:1085D000AF42002C324200101440000832C200109A
+:1085E0008F42003424420001AF4200348C03023C2D
+:1085F0000043102B144000BC32C200101040002871
+:10860000240700088F4401708F4501748F43002CAC
+:108610008F48000C8F86012024020080AFA200103A
+:10862000AFA30014AFA800188F42010C0040F80956
+:1086300024C6001C14400011240200013C0100016A
+:1086400000370821A02240F18F820124AFA2001040
+:108650008F8201283C040001248467C4AFA2001467
+:108660008F46002C8F8701203C0500090C002B3B16
+:1086700034A5110010000036000000008F420300F6
+:108680008F43002C24420001AF4203008F420300BD
+:1086900024020001A34205C110000026AF430038A8
+:1086A0008F4401708F4501748F43002C8F48000C5C
+:1086B0008F86012024020020AFA20010AFA3001477
+:1086C000AFA800188F42010C0040F80924C6001C16
+:1086D00014400011240200013C0100010037082170
+:1086E000A02240F08F820124AFA200108F820128C7
+:1086F0003C040001248467B8AFA200148F46002C0C
+:108700008F8701203C0500090C002B3B34A5090094
+:108710001000000F000000008F42030024420001FF
+:10872000AF4203008F4203008F42002CA34005C1DB
+:10873000AF4200383C01000100370821A02040F181
+:108740003C01000100370821A02040F0AF40003478
+:108750008F42031424420001AF4203141000006250
+:108760008F42031410400022324270008E25001CFC
+:108770008F42002800A22023048100030000000093
+:108780008F420040008220218F4203588F43000017
+:10879000AF4500280044102110600007AF42035885
+:1087A000AF80004C8F82004C1040FFFD00000000A5
+:1087B0001000000500000000AF8000488F820048D4
+:1087C0001040FFFD000000008F820060344200086E
+:1087D000AF8200608F4200001040000300000000E4
+:1087E00010000041AF80004C1000003FAF800048F7
+:1087F0001040002F324210001040000C3242400066
+:108800008E23001C8F42005000622023048200014E
+:10881000248402008F42035C00441021AF42035CB9
+:108820008F4203681000001AAF4300501040000C44
+:1088300032C280008E23001C8F4200700062202311
+:1088400004820001248404008F4203640044102148
+:10885000AF4203648F4203681000000DAF43007005
+:108860001040000E3C0208008E23001C8F42006066
+:108870000062202304820001248401008F420360EF
+:1088800000441021AF4203608F420368AF43006091
+:1088900000441021AF4203683C02080002C21024C9
+:1088A00050400011369400401000000F00000000FE
+:1088B0003242004810400007241500018E22001C9F
+:1088C0003C03FFFF0043F0243042FFFF1000FD7522
+:1088D000AE22001C324201001040000300000000E4
+:1088E0000C002BD8000000008FBF00508FBE004C42
+:1088F0008FB500488FB300448FB200408FB1003C69
+:108900008FB0003803E0000827BD005803E00008DE
+:108910000000000000000000000000008F8300E461
+:108920008F8200E02404FFF8004410240062102627
+:108930000002102B0002102303E000080062102444
+:1089400003E000080000000027BDFFE0AFBF001CEF
+:10895000AFB000188F8600C48F8400E08F8500E4DC
+:108960002402FFF80082182410A3000927623FF8B0
+:1089700014A2000224A200082762300000408021D7
+:1089800016030005308200041040000400C02021BE
+:1089900010000022000010218E0400008F42011CF4
+:1089A00014A20003000000008F420120AF42011416
+:1089B0008CA300008F420148008318230043102B32
+:1089C00010400003000000008F420148006218219F
+:1089D00094A20006244200500062102B1440000FA5
+:1089E00000A01021AFA40010AFA300148CA60000BB
+:1089F0008CA700043C0400010C002B3B24846894E9
+:108A00008F42020C24420001AF42020C8F42020C42
+:108A100000001021AF9000E8AF9000E48FBF001C71
+:108A20008FB0001803E0000827BD002003E0000815
+:108A3000000000008F8400E08F8800C48F8300E86E
+:108A40002402FFF80082382400E320232C82100047
+:108A50005040000124841000000420C2008018212E
+:108A60008F4402588F45025C0000102100A328218A
+:108A700000A3302B0082202100862021AF44025821
+:108A8000AF45025C8F8300C88F4201480103202359
+:108A90000082102B14400004008018218F420148EE
+:108AA00000822021008018218F4402508F450254FB
+:108AB0000000102100A3282100A3302B00822021D8
+:108AC00000862021AF440250AF450254AF8800C851
+:108AD000AF8700E4AF8700E803E000080000000073
+:108AE00027BDFF30240A0001AFBF00C8AFBE00C4DD
+:108AF000AFB500C0AFB300BCAFB200B8AFB100B407
+:108B0000AFB000B0A3A00097AFA00044AFAA005C34
+:108B1000934205C4A7A0008E1040000AA7A00086BB
+:108B20008F4B00C4AFAB00648F4A00C0AFAA006C8B
+:108B30008F4B00CCAFAB00748F4A00C810000129E6
+:108B4000AFAA007C8F4201140040F8090000000029
+:108B50000040302110C0034F000000008CC2000014
+:108B60008CC30004AFA20020AFA300248FAB00246D
+:108B70008FAA00203162FFFF2442FFFCAFA2006CED
+:108B80003C02000602C21024AFAB007C144000156A
+:108B9000AFAA006491420000304200011040001171
+:108BA0002402FFFF8D430000146200043402FFFF23
+:108BB000954300041062000B000000000C0024BB71
+:108BC0008FA40064304200FF144000060000000043
+:108BD0008F4201180040F809000000001000032D2A
+:108BE000000000008FA200243C03FFBF3463FFFF9E
+:108BF000004310243C03FFFF0043182414600003CB
+:108C0000AFA2002410000040000018213C020080A8
+:108C10000062102410400007000000008F42038C07
+:108C200024420001AF42038C8F42038C10000036B7
+:108C3000240300018F42021024420001AF420210BF
+:108C40008F4202103C020001006210241040000616
+:108C50003C0200028F4201C424420001AF4201C421
+:108C60008F4201C43C020002006210241040000642
+:108C70003C0200048F42037C24420001AF42037C8B
+:108C80008F42037C3C020004006210241040000666
+:108C90003C0200088F42038024420001AF4203805F
+:108CA0008F4203803C02000800621024104000063E
+:108CB0003C0200108F42038424420001AF4203842F
+:108CC0008F4203843C020010006210241040000612
+:108CD0003C0200208F4201C024420001AF4201C08B
+:108CE0008F4201C03C0200200062102410400006A8
+:108CF000240300018F42038824420001AF4203880D
+:108D00008F420388240300018C0202608FAB006C49
+:108D1000004B102B10400014307000FF8F4201E810
+:108D200024420001AF4201E88F4201E88FAA007C93
+:108D30008F8200E0354A0100AFAA007CAFA200108C
+:108D40008F8200E4241000013C040001248468A008
+:108D5000AFA200148FA600208FA700243C050007B7
+:108D60000C002B3B34A50800120000103C020080D0
+:108D700002C210241440000E32C204008FAB007CEB
+:108D80003C020080344201000162102410400005C2
+:108D9000000000008F42020C24420001AF42020C8E
+:108DA0008F42020C100002B08FA3006C32C204008C
+:108DB00010400015340281008FAA00649543000C16
+:108DC000146200123C020100240B0200A7AB008ECB
+:108DD0009542000E8D4300088D4400048D4500002F
+:108DE0008FAA006C8FAB0064254AFFFCAFAA006C11
+:108DF000A7A20086AD63000CAD640008AD65000459
+:108E0000256B0004AFAB00643C02010002C21024D9
+:108E100010400004000000008FAA006C254A0004E6
+:108E2000AFAA006C8F4200BC5040000AAFA0007493
+:108E30008FAB006C004B102B50400006AFA00074AD
+:108E40008F4200BC01621023AFA200748F4A00BCA5
+:108E5000AFAA006C8F4200808FAB006C004B102BD0
+:108E60001040005632C280001040005E240A000309
+:108E700032C210001040005BAFAA005C1000005826
+:108E8000240B00048F4203502403FFBF0283A0245D
+:108E900024420001AF4203501000024F8F420350A2
+:108EA00002C2B0252402FFBF0282A0248F830128C2
+:108EB0003C040001248468D026620001AFA20014A3
+:108EC000AFA300108F8601208F8701243C05000787
+:108ED0000C002B3B34A522501000023F0000000084
+:108EE00002C2B0252402FFBF0282A0248F83012882
+:108EF0003C040001248468D024020002AFA20014C4
+:108F0000AFA300108F8601208F8701243C05000746
+:108F10000C002B3B34A524501000022F0000000051
+:108F20008EA200008EA300043C040001248468E8A3
+:108F3000AFB00010AFBE00148EA7001834A52800F3
+:108F40000C002B3B006030211000022300000000C9
+:108F5000A6B1000A8F8201243C040001248468F039
+:108F6000AFBE0014AFA200108F4600448F870120CF
+:108F70003C0500070C002B3B34A530001000021606
+:108F800000000000A6B1000AA6B2000E8F820124E4
+:108F90003C040001248468FCAFBE0014AFA20010A2
+:108FA0008F4600448F8701203C0500070C002B3BB7
+:108FB00034A5320010000208000000008F42008437
+:108FC0008FAA006C004A102B144000073C020001DD
+:108FD00002C210241040000400000000240B000214
+:108FE000AFAB005C8FAA006C1140021B27AB0020C6
+:108FF000AFAB00A43C0A001F354AFFFFAFAA009C9C
+:109000008FAB005C240A0001556A0021240A00028B
+:109010008F4300548F4200501062000B274B0054C6
+:109020008F5E00543403ECC0AFAB004C27C200018C
+:10903000304201FFAFA20054001E11400043102136
+:109040001000006B02E2A8218F4200448FAA006C3E
+:109050003C040001248468ACAFAA0014AFA2001045
+:109060008F4600548F4700503C0500070C002B3BF7
+:1090700034A513008F4303502402FFBF0282A024B3
+:1090800024630001AF430350100001D38F4203500B
+:10909000156A001D000000008F4300748F420070AD
+:1090A0001062000A274B00748F5E0074AFAB004C57
+:1090B00027C20001304203FFAFA20054001E11403E
+:1090C00024426CC01000004A02E2A8218F420044F2
+:1090D0008FAA006C3C040001248468B83C0500079A
+:1090E000AFAA0014AFA200108F4600748F47007023
+:1090F00034A51500240B00010C002B3BAFAB005C2A
+:109100001000FFC3000000008F4300648F42006026
+:109110001062001A274A00648F5E00648FAB005C07
+:10912000AFAA004C27C20001304200FFAFA200549A
+:10913000240200041562000E001E1140001E118062
+:1091400024420CC002E21021AFA200449442002A43
+:109150008FAA00448FAB006C004B102B10400024F2
+:1091600025550020240A000110000021A3AA009721
+:1091700024424CC01000001E02E2A8218F4200448D
+:109180008FAB006C3C040001248468C4AFAB0014B6
+:10919000AFA200108F4600648F4700603C050007B7
+:1091A0000C002B3B34A518003C02000802C210241E
+:1091B0001440FF34000000008F420370240A0001B5
+:1091C000AFAA005C24420001AF4203701000FF9080
+:1091D0008F42037027A3003600131040006218214D
+:1091E000946200000044102110000020A4620000DE
+:1091F0008FAB0064AEAB001893A2009710400072D2
+:10920000000098218FAA00448FA4006C8FA300A4B3
+:1092100025420020AFA2002825420008AFA200305E
+:1092200025420010AFAA002CAFA200349542002ABC
+:10923000A7A2003895420018A7A2003A9542001A4A
+:10924000A7A2003C9542001CA7A2003E9462001811
+:1092500024630002008220231880FFDE26730001B1
+:109260002E6200041440FFF9000000008F4200FC51
+:109270002665000100A2102A1440002B24030001DF
+:109280008F83012C10600023000000008F820124D6
+:109290000043102300022143588000012484004031
+:1092A0008F820128004310230002194358600001F7
+:1092B000246300400064102A544000010060202113
+:1092C000AF4400FC8F4200FC00A2102A10400011A5
+:1092D0002403000110000015306200FF8FAB006412
+:1092E00096070018AFAB00108E2200083C04000166
+:1092F000248468DC8C4300048C42000034A52400E4
+:10930000024030210C002B3BAFA300141000002BB7
+:10931000000000008F4203340000182124420001A5
+:10932000AF4203348F420334306200FF5040FEDC12
+:109330003C02080012600021000090218FB100A4BF
+:10934000022080218E220008960700188FA6006454
+:109350008C4400008C450004240A0001AFAA0010D0
+:10936000AFBE00148F420008AFA200188F42010C5C
+:109370000040F809000000001040FFD83C0500073D
+:10938000960200188FAB00648FAA009C01625821DE
+:10939000014B102B10400004AFAB00648F4201481A
+:1093A00001625823AFAB0064261000022652000170
+:1093B0000253102B1440FFE3263100048FB0006CE1
+:1093C0001000003697B100388F4200FC24050002DF
+:1093D00000A2102A1440001B240300018F83012CDB
+:1093E00010600013000000008F820124004310234E
+:1093F0000002214358800001248400408F8201280C
+:109400000043102300021943586000012463004008
+:109410000064102A5440000100602021AF4400FC89
+:109420008F4200FC00A2102A144000062403000111
+:109430008F4203340000182124420001AF4203345C
+:109440008F420334306200FF1040FEA53C0208004A
+:1094500096B1000A8FB0006C3223FFFF0070102B12
+:1094600054400001006080218EA400008EA50004FD
+:10947000240B0001AFAB0010AFBE00148F420008F8
+:109480008FA60064AFA200188F42010C0040F809BB
+:10949000020038211040FEA23C05000796A3000EF2
+:1094A00097AA008E1140000700609021934205C4E6
+:1094B000144000040000000097AB0086006A1825E5
+:1094C000A6AB00168FAA007C3C02FFFF01421024CD
+:1094D00010400003000A140234630400A6A2001422
+:1094E0008FAB006C560B0072A6A3000E3462000412
+:1094F000A6A2000E8FAA0074016A1021A6A2000A7B
+:109500008F4300448F4401A08F4501A434028000A2
+:10951000AFA200108F42004402A030212407002097
+:10952000AFA200148F42000C0003194000604821D4
+:10953000AFA200188F42010C0000402100A9282191
+:1095400000A9182B008820210040F8090083202161
+:109550005040FE7FA6B2000E8F420368AFA0006CA1
+:10956000A34005C42442FFFFAF4203688FAB005CF9
+:10957000240A00018F420368156A0006240A0002CB
+:109580008F42035C2442FFFFAF42035C1000000CDB
+:109590008F42035C156A0006000000008F420364DE
+:1095A0002442FFFFAF420364100000058F420364B2
+:1095B0008F4203602442FFFFAF4203608F4203608B
+:1095C0008FAA00548FAB004CAD6A00008F4200445C
+:1095D0008F4400888F430078244200010044102407
+:1095E00024630001AF420044AF4300788C02024084
+:1095F0000062182B14600075240700088F4401686E
+:109600008F45016C8F4300448F48000C8F860120EA
+:1096100024020040AFA20010AFA30014AFA80018AE
+:109620008F42010C0040F80924C6001C14400011B0
+:10963000240B00013C01000100370821A02B40F25F
+:109640008F820124AFA200108F8201283C04000108
+:109650002484688CAFA200148F4600448F870120B9
+:109660003C0500090C002B3B34A513001000000B37
+:10967000000000008F42030424420001AF420304B3
+:109680008F4203048F420044AF42007C3C01000142
+:1096900000370821A02040F2AF4000788F42031825
+:1096A00024420001AF420318100000488F42031803
+:1096B000A6B0000A8F4300448F4401A08F4501A447
+:1096C00034028000AFA200108F42004402A030217B
+:1096D00024070020AFA200148F42000C00031940A1
+:1096E00000604821AFA200188F42010C0000402109
+:1096F00000A9282100A9182B008820210040F80982
+:10970000008320211040FE1F240A0001A34A05C443
+:109710008FAB006C8FAA006401705823AFAB006C54
+:109720008FAB009C01505021016A102B10400004A7
+:10973000AFAA00648F42014801425023AFAA0064DF
+:109740008F4203682442FFFFAF4203688FAA005C88
+:10975000240B00018F420368154B0006240B000206
+:109760008F42035C2442FFFFAF42035C1000000CF9
+:109770008F42035C114B0006000000008F42036023
+:109780002442FFFFAF420360100000058F420360D8
+:109790008F4203642442FFFFAF4203648F4203649D
+:1097A0008FAB00548FAA004CAD4B00008F42004499
+:1097B0008F4400888F430078244200010044102425
+:1097C00024630001AF420044AF4300788FAA006CCD
+:1097D0001540FE0B000000008FAB006C1160001EF6
+:1097E00000000000934205C4104000090000000082
+:1097F0008FAA0064AF4A00C4AF4B00C08FAB007C9F
+:10980000AF4B00C88FAA00741000000EAF4A00CC06
+:1098100097AB008E1160000B340381008FA20020F3
+:109820008C46000CA443000C97AA00868C440004CC
+:109830008C450008A44A000EAC440000AC4500046E
+:10984000AC4600088F42034C24420001AF42034C57
+:10985000100000108F42034C8FAB007C3164FFFF7F
+:109860002484FFFC008018218F4402508F4502544D
+:109870008F4601180000102100A3282100A3382BD7
+:109880000082202100872021AF44025000C0F80947
+:10989000AF4502548FBF00C88FBE00C48FB500C053
+:1098A0008FB300BC8FB200B88FB100B48FB000B0DE
+:1098B00003E0000827BD00D003E00008000000001E
+:1098C00027BDFF38240B0001AFBF00C0AFBE00BCF6
+:1098D000AFB500B8AFB300B4AFB200B0AFB100AC39
+:1098E000AFB000A8A3A00087AFA00044AFAB005C5E
+:1098F000934205C4A7A0007610400007A7A0007EF1
+:109900008F4C00C0AFAC00648F4B00C88F5E00C4AA
+:1099100010000130AFAB006C8F4201140040F80919
+:10992000000000000040302110C002A10000000033
+:109930008CC200008CC30004AFA20020AFA300249F
+:109940008FAC00248FBE00203182FFFF2442FFFC39
+:10995000AFA200643C02000602C2102414400015AD
+:10996000AFAC006C93C20000304200011040001107
+:109970002402FFFF8FC30000146200043402FFFFC3
+:1099800097C300041062000B000000000C0024BB11
+:1099900003C02021304200FF1440000600000000F8
+:1099A0008F4201180040F8090000000010000280FA
+:1099B000000000008FA200243C03FFBF3463FFFFC0
+:1099C000004310243C03FFFF0043182414600003ED
+:1099D000AFA2002410000040000080213C02008063
+:1099E0000062102410400007000000008F42038C2A
+:1099F00024420001AF42038C8F42038C10000036DA
+:109A0000241000018F42021024420001AF420210D4
+:109A10008F4202103C020001006210241040000638
+:109A20003C0200028F4201C424420001AF4201C443
+:109A30008F4201C43C020002006210241040000664
+:109A40003C0200048F42037C24420001AF42037CAD
+:109A50008F42037C3C020004006210241040000688
+:109A60003C0200088F42038024420001AF42038081
+:109A70008F4203803C020008006210241040000660
+:109A80003C0200108F42038424420001AF42038451
+:109A90008F4203843C020010006210241040000634
+:109AA0003C0200208F4201C024420001AF4201C0AD
+:109AB0008F4201C03C0200200062102410400006CA
+:109AC000241000018F42038824420001AF42038822
+:109AD0008F420388241000018C0202608FAB006467
+:109AE000004B102B10400015320200FF8F4201E89E
+:109AF00024420001AF4201E88F4201E88FAC006CC4
+:109B00008F8200E0358C0100AFAC006CAFA200107A
+:109B10008F8200E4241000013C040001248468A02A
+:109B2000AFA200148FA600208FA700243C050007D9
+:109B30000C002B3B34A53600320200FF1040001011
+:109B40003C02008002C210241440000E32C2040005
+:109B50008FAB006C3C020080344201000162102493
+:109B600010400005000000008F42020C244200015A
+:109B7000AF42020C8F42020C100002028FA300645D
+:109B800032C20400104000123402810097C3000C5E
+:109B90001462000F00000000240C0200A7AC007645
+:109BA00097C2000E8FC300088FC400048FAB0064FF
+:109BB0008FC50000256BFFFCAFAB0064A7A2007E41
+:109BC000AFC3000CAFC40008AFC5000427DE00041B
+:109BD0008FA70064320200FF144000343C020100F1
+:109BE00097C4000C2C8305DD388288702C4200015C
+:109BF00000621825106000150000282132C20800FC
+:109C0000104000152402080097C3001414620012CB
+:109C10003402AAAA97C3000E146200070000202194
+:109C200097C3001024020300146200040080102176
+:109C300097C200122C4400010080102154400006FD
+:109C40002405001610000004000000002402080093
+:109C5000508200012405000E10A0001303C520212E
+:109C6000248300093C02001F3442FFFF0043102BF5
+:109C700010400003000000008F42014800621823DA
+:109C800090620000384300062C6300013842001146
+:109C90002C42000100621825106000043C02010003
+:109CA00094820002004538213C02010002C21024C7
+:109CB0005040000EAFA700648FAC006410EC0008A9
+:109CC0003C0500073C040001248469088FA6006459
+:109CD00034A54000AFA000100C002B3BAFA0001437
+:109CE0008FAB0064256B0004AFAB00648F42008033
+:109CF0008FAC0064004C102B1040002C32C280004E
+:109D000010400034240B000332C210001040003118
+:109D1000AFAB005C1000002E240C00048F420350F7
+:109D20002403FFBF0283A02424420001AF4203505A
+:109D3000100001738F4203503C02080002C2B0259C
+:109D40002402FFBF0282A0248F8301283C0400016B
+:109D5000248468D026620001AFA20014AFA30010D3
+:109D60008F8601208F8701243C0500070C002B3BC8
+:109D700034A5530010000162000000008EA2000014
+:109D80008EA300043C040001248468E8AFB00010F6
+:109D9000AFB100148EA7001834A559000C002B3B5E
+:109DA0000060302110000156000000008F42008446
+:109DB0008FAB0064004B102B144000073C020001E5
+:109DC00002C210241040000400000000240C000215
+:109DD000AFAC005C8FAB00641160016627AC002063
+:109DE000AFAC008C8FAB005C240C0001556C0021E3
+:109DF000240C00028F4300548F4200501062000B6D
+:109E0000274B00548F5100543403ECC0AFAB004CCF
+:109E100026220001304201FFAFA200540011114080
+:109E2000004310211000006B02E2A8218F42004481
+:109E30008FAC00643C040001248468ACAFAC001417
+:109E4000AFA200108F4600548F4700503C0500071A
+:109E50000C002B3B34A543008F4303502402FFBF6B
+:109E60000282A02424630001AF43035010000124A8
+:109E70008F420350156C001D000000008F430074DA
+:109E80008F4200701062000A274B00748F510074DB
+:109E9000AFAB004C26220001304203FFAFA20054BA
+:109EA0000011114024426CC01000004A02E2A821B7
+:109EB0008F4200448FAC00643C040001248468B8E5
+:109EC0003C050007AFAC0014AFA200108F46007431
+:109ED0008F47007034A54500240B00010C002B3B7C
+:109EE000AFAB005C1000FFC3000000008F430064B4
+:109EF0008F4200601062001A274C00648F5100648A
+:109F00008FAB005CAFAC004C26220001304200FF5A
+:109F1000AFA20054240200041562000E001111408B
+:109F20000011118024420CC002E21021AFA20044B3
+:109F30009442002A8FAC00448FAB0064004B102B7E
+:109F40001040002425950020240C00011000002161
+:109F5000A3AC008724424CC01000001E02E2A821DE
+:109F60008F4200448FAB00643C040001248468C429
+:109F7000AFAB0014AFA200108F4600648F470060A3
+:109F80003C0500070C002B3B34A548003C020008B0
+:109F900002C210241440FF61000000008F420370D1
+:109FA000240C0001AFAC005C24420001AF420370FE
+:109FB0001000FF908F42037027A30036001310405B
+:109FC0000062182194620000004410211000001F5C
+:109FD000A4620000AEBE001893A200871040008467
+:109FE000000098218FAB00448FA400648FA3008CE5
+:109FF00025620020AFA2002825620008AFA2003031
+:10A0000025620010AFAB002CAFA200349562002A8D
+:10A01000A7A2003895620018A7A2003A9562001A1C
+:10A02000A7A2003C9562001CA7A2003E9462001803
+:10A0300024630002008220231880FFDF26730001C2
+:10A040002E6200041440FFF9000000008F4200FC63
+:10A050000262102A14400030240300018F83012C77
+:10A0600010600028000000008F82012400431023AC
+:10A070000002214358800001248400408F8201287F
+:10A08000004310230002194358600001246300407C
+:10A090000064102A5440000100602021AF4400FCFD
+:10A0A0008F4200FC0262102A1040001624030001B7
+:10A0B0001000001A306200FF8FAC008C00101040BE
+:10A0C000004C10219447001800101080004C102103
+:10A0D000AFBE00108C4200083C040001248468DC00
+:10A0E0003C0500078C4300048C42000034A5550059
+:10A0F000020030210C002B3BAFA3001410000039EC
+:10A10000000000008F4203340000182124420001A7
+:10A11000AF4203348F420334306200FF1040FF0629
+:10A12000000080218F4300082402FBFF1260002DF5
+:10A13000006250243C0B4000022B40258FB1008C64
+:10A140002669FFFF022090218E4200089627001802
+:10A150008C4400008C45000456090004240B0001C7
+:10A16000240C000210000002AFAC0010AFAB0010D6
+:10A1700016000004AFA800148F420008100000026F
+:10A18000AFA20018AFAA00188F42010C03C0302103
+:10A19000AFA80098AFA9009C0040F809AFAA00A0A2
+:10A1A0008FA800988FA9009C8FAA00A01040FFC222
+:10A1B0003C02001F962300183442FFFF03C3F02126
+:10A1C000005E102B10400003263100028F42014830
+:10A1D00003C2F023261000010213102B1440FFDAF3
+:10A1E000265200048FB000641000001A0000000026
+:10A1F00096A3000A8FB000640070102B5440000139
+:10A20000006080218EA400008EA500048FAB005C4E
+:10A21000240C0002AFAC0010934305C4000B1700E0
+:10A2200010600003022230253C02080000C23025E5
+:10A23000AFA600148F420008AFA200188F42010C95
+:10A2400003C030210040F809020038211040FECB45
+:10A250003C05000797AC00761180000796A3000E1E
+:10A26000934205C4144000040000000097AB007E38
+:10A27000006C1825A6AB00168FAC006C3C02FFFFEB
+:10A280000182102410400003000C14023463040007
+:10A29000A6A20014A6B0000A8FAB0064560B0006FD
+:10A2A00003D0F02134620004AFA00064A6A2000E27
+:10A2B0001000000DA34005C48FAC00643C02001FD9
+:10A2C0003442FFFF005E102B01906023AFAC0064AE
+:10A2D000A6A3000E240B000110400003A34B05C4ED
+:10A2E0008F42014803C2F0238FAB00548FAC004C67
+:10A2F000AD8B00008FAC00641580FEBA000000003A
+:10A300008FAB00641160001B00000000934205C485
+:10A310001040000600000000AF5E00C4AF4B00C05C
+:10A320008FAC006C1000000EAF4C00C897AB0076ED
+:10A330001160000B340381008FA200208C46000CBA
+:10A34000A443000C97AC007E8C4400048C450008AC
+:10A35000A44C000EAC440000AC450004AC46000820
+:10A360008F42034C24420001AF42034C1000001006
+:10A370008F42034C8FAB006C3164FFFF2484FFFCE1
+:10A38000008018218F4402508F4502548F460118D7
+:10A390000000102100A3282100A3382B00822021D7
+:10A3A00000872021AF44025000C0F809AF45025495
+:10A3B0008FBF00C08FBE00BC8FB500B88FB300B494
+:10A3C0008FB200B08FB100AC8FB000A803E00008DE
+:10A3D00027BD00C803E000080000000027BDFFD82B
+:10A3E000AFBF0024AFB000208F43004C8F42004825
+:10A3F00010620034000000008F4300488F42004C80
+:10A400000062202304820001248402008F43005450
+:10A410008F42004C0043102B144000042402020021
+:10A420008F43004C10000005004310238F4200545E
+:10A430008F43004C004310232442FFFF0040502173
+:10A44000008A102A54400001008050218F49004C9E
+:10A450008F48004C8F4401888F45018C8F46004CFB
+:10A4600024071000AFA70010000841400100182188
+:10A47000012A4821313001FFAFB000148F4700148A
+:10A480000000102100063140AFA7001800A32821CA
+:10A4900000A3382B00822021008720213402ECC049
+:10A4A00000C230218F42010802E630210040F80945
+:10A4B000000A394054400001AF50004C8F43004C1B
+:10A4C0008F42004814620018000000008F42000014
+:10A4D0001040000700000000AF80004C8F82004C4D
+:10A4E0001040FFFD0000000010000005000000000B
+:10A4F000AF8000488F8200481040FFFD0000000040
+:10A500008F8200602403FDFF00431024AF820060AF
+:10A510008F42000010400003000000001000000205
+:10A52000AF80004CAF8000488FBF00248FB0002068
+:10A5300003E0000827BD002803E000080000000039
+:10A5400027BDFFD8AFBF0024AFB000208F43005C11
+:10A550008F42005810620049000000008F430058ED
+:10A560008F42005C006220230482000124840100E9
+:10A570008F4300648F42005C0043102B14400004A2
+:10A58000240201008F43005C1000000500431023EB
+:10A590008F4200648F43005C004310232442FFFF7E
+:10A5A000004038210087102A5440000100803821E3
+:10A5B0008F42005C00471021305000FF32C2100073
+:10A5C00010400015240820008F49005C8F44019042
+:10A5D0008F4501948F46005C00073980AFA80010BA
+:10A5E000AFB000148F4800140009498001201821E1
+:10A5F0000000102100A3282100A3482B0082202165
+:10A600000089202100063180AFA800188F42010880
+:10A610001000001424C60CC08F49005C8F440190C8
+:10A620008F4501948F46005C00073940AFA80010A9
+:10A63000AFB000148F4800140009494001201821D0
+:10A640000000102100A3282100A3482B0082202114
+:10A650000089202100063140AFA800188F42010870
+:10A6600024C64CC00040F80902E6302154400001E5
+:10A67000AF50005C8F43005C8F420058146200189A
+:10A68000000000008F4200001040000700000000A2
+:10A69000AF80004C8F82004C1040FFFD0000000096
+:10A6A0001000000500000000AF8000488F820048C5
+:10A6B0001040FFFD000000008F8200602403FEFFB9
+:10A6C00000431024AF8200608F420000104000035E
+:10A6D0000000000010000002AF80004CAF80004876
+:10A6E0008FBF00248FB0002003E0000827BD0028A2
+:10A6F00003E000080000000027BDFFD8AFBF002422
+:10A70000AFB000208F43006C8F42006810620033AE
+:10A71000000000008F4300688F42006C006220231D
+:10A7200004820001248404008F4300748F42006C73
+:10A730000043102B14400004240204008F43006CDB
+:10A7400010000005004310238F4200748F43006CFB
+:10A75000004310232442FFFF00405021008A102AAA
+:10A7600054400001008050218F49006C8F48006CDC
+:10A770008F4401988F45019C8F46006C2407400050
+:10A78000AFA700100008414001001821012A48210C
+:10A79000313003FFAFB000148F47001400001021C8
+:10A7A0000006314024C66CC0AFA7001800A32821C2
+:10A7B00000A3382B00822021008720218F4201082E
+:10A7C00002E630210040F809000A394054400001F7
+:10A7D000AF50006C8F43006C8F4200681462001809
+:10A7E000000000008F420000104000070000000041
+:10A7F000AF80004C8F82004C1040FFFD0000000035
+:10A800001000000500000000AF8000488F82004863
+:10A810001040FFFD000000008F8200602403F7FF5E
+:10A8200000431024AF8200608F42000010400003FC
+:10A830000000000010000002AF80004CAF80004814
+:10A840008FBF00248FB0002003E0000827BD002840
+:10A8500003E00008000000008F4200FC3C03000100
+:10A860008F4400F8346330C824420001AF4200FC3A
+:10A870008F85012802E310215482000424820008FD
+:10A880003C02000134422EC802E21021004018218F
+:10A89000AF4300F8AC6000008F4200F41462000483
+:10A8A0003C02000124A200201000000FAF8201280A
+:10A8B0008F4300F8344230C802E210215462000491
+:10A8C000246200083C02000134422EC802E210213A
+:10A8D000004018218C6200040002114000A21021E7
+:10A8E000AF820128AC6000008CA3001830620070B9
+:10A8F0001040002D30620020104000043C02001087
+:10A9000002C210241040000D000000003062004020
+:10A91000104000043C02002002C210241040000736
+:10A9200000000000306200101040001F3C02004098
+:10A9300002C210241440001C000000008F8200405E
+:10A940003042000114400008000020218C03010463
+:10A950002402000150620005240400018C020264FC
+:10A960001040000300801021240400010080102109
+:10A9700010400006000000008F42030C244200013A
+:10A98000AF42030C100000088F42030C8F8200447A
+:10A9900034420004AF8200448F4203082442000185
+:10A9A000AF4203088F42030803E0000800000000E4
+:10A9B00003E000080000000027BDFF98AFBF006063
+:10A9C000AFBE005CAFB50058AFB30054AFB200509B
+:10A9D000AFB1004CAFB000488F4200FC24420001F0
+:10A9E000AF4200FC8F88012825020020AF82012899
+:10A9F0008D030018306200701040002E306200207D
+:10AA0000104000043C02001002C210241040000D4F
+:10AA10000000000030620040104000043C020020B2
+:10AA200002C2102410400007000000003062001035
+:10AA3000104001A93C02004002C21024144001A6AB
+:10AA4000000000008F8200403042000114400008E6
+:10AA5000000020218C030104240200015062000543
+:10AA6000240400018C0202641040000300801021C5
+:10AA700024040001008010211040000600000000A6
+:10AA80008F42030C24420001AF42030C10000192DC
+:10AA90008F42030C8F82004434420004AF82004492
+:10AAA0008F42030824420001AF4203081000018ACC
+:10AAB0008F420308306200021040014B3C02080044
+:10AAC0008D1E001C001E5702AFAA0034950A001606
+:10AAD00003C22024AFAA00248FAA0034240200015C
+:10AAE0001542000633DEFFFF001E11403403ECC0A8
+:10AAF000004310211000001002E2A82124020002ED
+:10AB00001542000524020003001E114024426CC0BF
+:10AB10001000000902E2A82115420005001E118064
+:10AB2000001E114024424CC01000000302E2A82184
+:10AB30000057102124550CE096A2000E304AFFFC6D
+:10AB40003042040010400003AFAA002C100000E1C6
+:10AB500000008821108000040000882197B10026A1
+:10AB6000100000DDA6B100128EB30018966A000C2A
+:10AB7000A7AA003E97A5003E2CA305DD38A2887049
+:10AB80002C420001006218251060001500002021F1
+:10AB900032C2080010400015240208009663001419
+:10ABA000146200123402AAAA9663000E146200070F
+:10ABB00000002821966300102402030014620004A0
+:10ABC00000A01021966200122C45000100A0102167
+:10ABD0005440000624040016100000040000000089
+:10ABE0002402080050A200012404000E108000B9C5
+:10ABF00002649021924200003042000F00028080E7
+:10AC000032C2010010400020025018213C020020F6
+:10AC10000043102B1440000E024020210000282188
+:10AC2000948200002484000200A228210083102BBB
+:10AC30001440FFFB30A2FFFF00051C020062282128
+:10AC400000051C0230A2FFFF10000009006228214D
+:10AC50008F4701488F420110001028423C06002017
+:10AC60000040F809AFA800403045FFFF8FA8004022
+:10AC700050A000013405FFFF8FAA002C354A0002C6
+:10AC800010000002AFAA002C0000282132C2008070
+:10AC900010400090A6A50010264300093C02001FAA
+:10ACA0003442FFFF0043102B10400003000000005F
+:10ACB0008F420148006218239066000030C200FFF6
+:10ACC000384300062C630001384200112C42000179
+:10ACD000006218251060007F24020800000088210F
+:10ACE00097A3003E1462000F0260202196710000BD
+:10ACF0009662000296630004966400060222882190
+:10AD00000223882102248821966200089663000AA3
+:10AD10009664000C0222882102238821100000077B
+:10AD200002248821948200002484000202228821C7
+:10AD30000092102B1440FFFB0000000000111C02C9
+:10AD40003222FFFF0062882100111C023222FFFF25
+:10AD50000062882132C2020010400003264400062F
+:10AD60001000003E000080213C05001F34A5FFFFBD
+:10AD700000A4102B10400003000000008F42014887
+:10AD8000008220239482000030421FFF1040000404
+:10AD90002644000C96420002100000300050802330
+:10ADA0009642000226430014005080233C020020FB
+:10ADB0000043102B1440000A00D080219642000C62
+:10ADC000020280219642000E964300109644001223
+:10ADD0000202802102038021100000200204802151
+:10ADE00000A4102B10400003000000008F42014817
+:10ADF0000082202394820000248400020202802129
+:10AE000000A4102B10400003000000008F420148F6
+:10AE10000082202394820000248400020202802108
+:10AE200000A4102B10400003000000008F420148D6
+:10AE300000822023948200002484000202028021E8
+:10AE400000A4102B10400003000000008F420148B6
+:10AE50000082202394820000020280213C02010033
+:10AE600002C210241040000E000000008FAA002C27
+:10AE7000314200041040000A000000009504000E5A
+:10AE8000026420210C003EEC2484FFFC3042FFFFD2
+:10AE90000222882100111C023222FFFF0062882159
+:10AEA0008FAA002401518823001114020222882154
+:10AEB0000230882100111402022288213231FFFF62
+:10AEC000522000013411FFFF8FAA002C354A0001E7
+:10AED000AFAA002CA6B1001297AA002EA6AA000EB7
+:10AEE0008FAA002C314200041040000224091000F7
+:10AEF000340980008F4800448F4401A08F4501A48D
+:10AF0000AFA900108F4900440008414001001821FA
+:10AF1000AFA900148F48000C02A0302124070020A4
+:10AF2000AFA800188F48010C0000102100A32821B1
+:10AF300000A3482B008220210100F809008920216C
+:10AF40001440000B000000008F8201283C04000127
+:10AF500024846914AFBE0014AFA200108F860124B0
+:10AF60008F8701203C0500070C002B3B34A599205E
+:10AF70008F4203682442FFFFAF4203688F420044C0
+:10AF80008F4300882442000100431024AF42004454
+:10AF90008FAA00348F440368240200011542000682
+:10AFA000240200028F42035C2442FFFFAF42035C95
+:10AFB000100000498F42035C1542000600000000AB
+:10AFC0008F4203642442FFFFAF420364100000423B
+:10AFD0008F4203648F4203602442FFFFAF4203604D
+:10AFE0001000003D8F4203603062100010400005E9
+:10AFF000306280008F420078244200011000003649
+:10B00000AF42007810400034000000008F4200780A
+:10B0100024420001AF4200788C0302400043102B11
+:10B020001440002D240700088F4401688F45016CEF
+:10B030008F4300448F48000C8F860120240200407B
+:10B04000AFA20010AFA30014AFA800188F42010CEC
+:10B050000040F80924C6001C14400011240200011D
+:10B060003C01000100370821A02240F28F82012418
+:10B07000AFA200108F8201283C0400012484688C58
+:10B08000AFA200148F4600448F8701203C050009C1
+:10B090000C002B3B34A513001000000B0000000037
+:10B0A0008F42030424420001AF4203048F42030491
+:10B0B0008F420044AF42007C3C0100010037082170
+:10B0C000A02040F2AF4000788F42031824420001D4
+:10B0D000AF4203188F4203188FBF00608FBE005C21
+:10B0E0008FB500588FB300548FB200508FB1004C11
+:10B0F0008FB0004803E0000827BD006803E00008A7
+:10B100000000000000000000000000008F42013C31
+:10B11000AF8200C08F42013CAF8200C48F42013C2D
+:10B12000AF8200C88F420138AF8200D08F42013811
+:10B13000AF8200D48F42013803E00008AF8200D80C
+:10B1400027BDFFE02784020824050200AFBF0018D6
+:10B150000C002BBF240600088C0202040C004012D5
+:10B16000AF8202103C0200018C426D94304200021A
+:10B170001040000E000020218C060248240200022C
+:10B180003C010001AC226D980C0051042405000222
+:10B19000000020218C060248240200013C0100012D
+:10B1A000AC226D9810000011240500018C060248A5
+:10B1B000240200043C010001AC226D980C005104F3
+:10B1C000240500043C0200018C426D9430420001D1
+:10B1D00010400008240200013C010001AC226D98DF
+:10B1E00000002021240500013C06601B0C005104D6
+:10B1F000000000003C040001248469D08F4201500B
+:10B200008F4301543C0500088F4601580002164048
+:10B21000000319403463040300431025000633C0C3
+:10B2200000461025AF82021CAFA00010AFA0001492
+:10B230008F86021C34A502000C002B3B0000382135
+:10B240003C010001AC206D903C010001AC206DA8D8
+:10B250008FBF001803E0000827BD002027BDFFE0D6
+:10B260003C05000834A50300AFBF0018AFA00010D4
+:10B27000AFA000148F8602003C040001248469DC26
+:10B280000C002B3B000038218F42041024420001A7
+:10B29000AF4204108F4204108FBF001803E0000873
+:10B2A00027BD002027BDFFD8AFBF0020AFB1001CD5
+:10B2B000AFB000188F4203A424420001AF4203A4A0
+:10B2C0008F4203A48F9002208F8200E0AFA2001073
+:10B2D0008F8200E4AFA200148F8600C48F8700C85D
+:10B2E0003C040001248469E80C002B3B0200282167
+:10B2F0003C04400002041024504000B43C0401000F
+:10B300008F4203BC24420001AF4203BC8F4203BC06
+:10B310008F8700C48F8300C88F42014800671823BD
+:10B320000043102B10400003000000008F42014832
+:10B330000062182110600005000000008F42014CDF
+:10B340000043102B1040000B000000008F8200E033
+:10B350008F430124AF42011CAF4301148F820220AE
+:10B360003C0308FF3463FFFB00431024100000CEB1
+:10B37000004410258F8202203C0308FF3463FFFF46
+:10B380000043102434420004AF8202208F8200E088
+:10B390008F430124AF42011CAF4301148F8600C8C4
+:10B3A0008F8401208F8301241000000500002821D4
+:10B3B0001462000224620020276248000040182125
+:10B3C0001064000C30A200FF8C62001830420003B1
+:10B3D0001040FFF727624FE08F4203D024050001A1
+:10B3E00024420001AF4203D08F4203D08C66000894
+:10B3F00030A200FF1440005800000000934205C432
+:10B4000014400055000000008F8700C48F8800E0C2
+:10B410008F8400E42402FFF8010240240104102379
+:10B42000000218C3046200012463020010600005DA
+:10B430002402000110620009000000001000001F3B
+:10B44000000000008F4203C000E0302124420001D0
+:10B45000AF4203C0100000408F4203C08F4203C4BC
+:10B4600024420001AF4203C48C8600008F42014891
+:10B470008F4303C400E618230043102B1040000440
+:10B480002C62233F8F420148006218212C62233F27
+:10B4900014400031000000008F42020C24420001E1
+:10B4A000AF42020C8F42020C00E0302124820008DF
+:10B4B000AF8200E410000028AF8200E88F4203C88A
+:10B4C00024420001AF4203C88F4203C88C850000AC
+:10B4D0008F42014800A718230043102B104000039F
+:10B4E000000000008F420148006218218F42014C89
+:10B4F0000043102B5440000A00A030218F42020C60
+:10B5000024420001AF42020C8F42020C2482000848
+:10B51000AF8200E48F8400E41488FFECAF8400E87D
+:10B520001488000D27623000148200022482FFF884
+:10B5300027623FF8944300063C02001F3442FFFF9D
+:10B5400000C330210046102B104000030000000013
+:10B550008F42014800C23023AF8600C88F8300C4E9
+:10B560008F42014800C318230043102B10400003F2
+:10B57000000000008F4201480062182110600005A1
+:10B58000000000008F42014C0043102B5040000887
+:10B590003C02FDFF8F8202203C0308FF3463FFFB67
+:10B5A000004310243C0340001000003F00431025DE
+:10B5B0008F4303CC3442FFFF0282A02424630001A6
+:10B5C000AF4303CC100000398F4203CC0204102497
+:10B5D0001040000E3C1102008F4203A824420001DB
+:10B5E000AF4203A88F4203A88F8202203C0308FFCA
+:10B5F0003463FFFF00431024004410250C003DAFCE
+:10B60000AF82022010000029000000000211102467
+:10B61000504000083C1104008F4203AC244200015A
+:10B62000AF4203AC0C003DAF8F4203AC10000019D9
+:10B6300000000000021110241040001C0000000057
+:10B640008F83022424021402146200093C050008BE
+:10B650003C040001248469F4AFA00010AFA00014E2
+:10B660008F86022434A505000C002B3B00003821F6
+:10B670008F4203B024420001AF4203B08F4203B0B7
+:10B680008F82022002002021344200020C004E9CD6
+:10B69000AF8202208F8202203C0308FF3463FFFF49
+:10B6A0000043102400511025AF8202208FBF0020DC
+:10B6B0008FB1001C8FB0001803E0000827BD0028E0
+:10B6C00003E00008000000003C0200018C426DA86D
+:10B6D00027BDFFB0AFBF0048AFBE0044AFB50040CC
+:10B6E000AFB3003CAFB20038AFB100341040000F30
+:10B6F000AFB000303C04000124846A003C0500081F
+:10B70000AFA00010AFA000148F86022034A5060061
+:10B71000240200013C010001AC206DA83C010001A5
+:10B72000AC226D9C0C002B3B000038213C037FFFBA
+:10B730008C0202683463FFFF3C04FDFF00431024C9
+:10B74000AC0202688F4200043484FFFF30420002E2
+:10B75000104000920284A0243C040600348420009F
+:10B760008F420004000028212403FFFD0043102421
+:10B77000AF420004AFA400208F5E001827AA00206B
+:10B78000240200FF13C20002AFAA002C27C500014B
+:10B790008C02022800A090211642000E001E38C024
+:10B7A0008F42033C24420001AF42033C8F42033CE2
+:10B7B0008C0202283C040001248469983C0500099D
+:10B7C000AFA00014AFA200108FA600201000006DE3
+:10B7D00034A5050000F710218FA300208FA40024BA
+:10B7E000AC4304C0AC4404C48F8300548F82005423
+:10B7F000247003E8020210232C4203E91040001BCE
+:10B800000000982100E08821263504C08F4401788B
+:10B810008F45017C02201821240A0004AFAA0010E1
+:10B82000AFB200148F48000C0000102102F5302147
+:10B83000AFA800188F48010C2407000800A3282196
+:10B8400000A3482B008220210100F8090089202153
+:10B8500054400006241300018F820054020210237A
+:10B860002C4203E91440FFE900000000326200FFAF
+:10B8700054400017AF5200188F4203782442000151
+:10B88000AF4203788F4203788F8201208FAA002C69
+:10B89000AFA200108F8201243C040001248469A41B
+:10B8A0003C050009AFA200148D46000010000035D1
+:10B8B00034A506008F42030824130001244200012E
+:10B8C000AF4203088F4203081000001E326200FFDF
+:10B8D0008F8300548F820054247003E802021023E7
+:10B8E0002C4203E910400016000098213C1500206E
+:10B8F000241100108F42000C8F4401608F450164B9
+:10B900008F860120AFB10010AFB200140055102592
+:10B91000AFA200188F42010C240700080040F8096C
+:10B9200024C6001C1440FFE3000000008F82005476
+:10B93000020210232C4203E91440FFEE0000000035
+:10B94000326200FF14400011000000008F420378B3
+:10B9500024420001AF4203788F4203788F82012096
+:10B960008FAA002CAFA200108F8201243C0400019A
+:10B97000248469AC3C050009AFA200148D46000088
+:10B9800034A507000C002B3B03C038218F4202EC8A
+:10B9900024420001AF4202EC8F4202EC8FBF00480C
+:10B9A0008FBE00448FB500408FB3003C8FB200388B
+:10B9B0008FB100348FB0003003E0000827BD005085
+:10B9C0003C0200018C426DA827BDFFE01440000D31
+:10B9D000AFBF00183C04000124846A0C3C05000839
+:10B9E000AFA00010AFA000148F86022034A507007E
+:10B9F000240200013C010001AC226DA80C002B3B8D
+:10BA0000000038213C02000402C21024104000074C
+:10BA1000000000008F8202203C0308FF3463FFFF18
+:10BA20000043102434420008AF8202203C0500018C
+:10BA30008CA56D982402000114A2000700002021AB
+:10BA40000C00529B24050001AC02026C8C03026CBA
+:10BA5000100000063C0200070C00529B0000202151
+:10BA6000AC0202688C0302683C02000700621824E2
+:10BA70003C0200025062000D3C0205F50043102B11
+:10BA8000144000063C0200043C0200011062000960
+:10BA90003C0200981000000B000000001462000936
+:10BAA0003C023B9A100000043442CA00100000021D
+:10BAB0003442E10034429680AF4201FC8F4201FCE7
+:10BAC000AEE200648FBF001803E0000827BD00202D
+:10BAD0000000000000000000000000000086102BA5
+:10BAE000504000010087202300C410230002484377
+:10BAF0000125102B1040001B00091040008240213E
+:10BB00000088102B104000070000182194820000CC
+:10BB100024840002006218210088102B1440FFFBCF
+:10BB2000000000000060202100C7302300A910237E
+:10BB30000002104000C2282100C5102B1040000751
+:10BB40000000182194C2000024C6000200621821DF
+:10BB500000C5102B1440FFFB000000001000000D7A
+:10BB60000083202100051040008228210085102B31
+:10BB70001040000700001821948200002484000275
+:10BB8000006218210085102B1440FFFB000000000C
+:10BB90000060202100041C023082FFFF006220218F
+:10BBA00000041C023082FFFF0062202103E0000835
+:10BBB0003082FFFF03E00008000000000080282121
+:10BBC00030A200011040002B3C03001F3463FFFF34
+:10BBD00024A200040062102B544000070065102BC3
+:10BBE00090A2000190A4000390A3000090A5000281
+:10BBF0001000002A00441021104000030000000043
+:10BC00008F42014800A2282390A4000024A500012F
+:10BC10000065102B10400003000000008F42014817
+:10BC200000A2282390A2000024A500010002120017
+:10BC3000008220210065102B10400003000000004E
+:10BC40008F42014800A2282390A2000024A50001F1
+:10BC5000008220210065102B10400003000000002E
+:10BC60008F42014800A2282390A200001000002D5E
+:10BC7000000212003463FFFF24A200040062102BB4
+:10BC80005440000A0065102B90A2000090A400020E
+:10BC900090A3000190A500030044102100021200AF
+:10BCA00000651821100000200043202110400003EF
+:10BCB000000000008F42014800A2282390A200004B
+:10BCC00024A50001000222000065102B1040000393
+:10BCD000000000008F42014800A2282390A200002B
+:10BCE00024A50001008220210065102B10400003D4
+:10BCF000000000008F42014800A2282390A200000B
+:10BD000024A5000100021200008220210065102BF2
+:10BD100010400003000000008F42014800A22823C9
+:10BD200090A200000082202100041C023082FFFF4C
+:10BD30000062202100041C023082FFFF00622021EB
+:10BD400003E000083082FFFF000000008F82022025
+:10BD500034420002AF8202203C0200028C428FF883
+:10BD60003042400010400054240400018F82020041
+:10BD700024067FFF8F830200304500022402FFFD6E
+:10BD800000621824AF830200AF8402048F83005442
+:10BD90008F82005410000002246300018F8200543F
+:10BDA000006210232C4200021440FFFC000000003F
+:10BDB0008F8202241444004D0004204000C4102B44
+:10BDC0001040FFF1000000008F82020000451025A6
+:10BDD000AF8202008F82022034428000AF820220B4
+:10BDE0008F8300548F8200541000000224630001EE
+:10BDF0008F820054006210232C4200021440FFFC8A
+:10BE0000000000008F8202203C0300040043102445
+:10BE10001440000F000000008F8202203C03FFFF4F
+:10BE200034637FFF00431024AF8202208F830054CD
+:10BE30008F82005410000002246300018F8200549E
+:10BE4000006210232C4200021440FFFC000000009E
+:10BE50008F8202203C030004004310241440000D94
+:10BE6000000000008F82022034428000AF82022056
+:10BE70008F8300548F82005410000002246300015D
+:10BE80008F820054006210232C4200021440FFFCF9
+:10BE9000000000008F8202203C03000400431024B5
+:10BEA0001040001B000010218F830220240200019B
+:10BEB000100000153C04F7008F8202203C04F700BC
+:10BEC00000441025AF8202208F8202202403FFFD50
+:10BED00000431024AF8202208F8202203C03030023
+:10BEE000004310241440000300000000100000086C
+:10BEF000000010218F82022034420002AF82022013
+:10BF00008F8302202402000100641825AF830220E1
+:10BF100003E0000800000000000020213C050100B3
+:10BF200024020001AF80021CAF820200AF82022017
+:10BF300027625000AF8200C027625000AF8200C469
+:10BF400027625000AF8200C827625000AF8200D045
+:10BF500027625000AF8200D427625000AF8200D821
+:10BF600027623000AF8200E027623000AF8200E439
+:10BF700027623000AF8200E827622800AF8200F01D
+:10BF800027622800AF8200F427622800AF8200F801
+:10BF9000000418C02484000103631021AC45300460
+:10BFA00003631021AC403000288202001440FFF9E6
+:10BFB000000418C000002021000418C024840001DF
+:10BFC00003631021AC40280403631021AC40280017
+:10BFD000288201001440FFF9000418C0AF80023C21
+:10BFE0002403008024040100AC60000024630004EA
+:10BFF0000064102B5440FFFDAC6000008F830040B4
+:10C000003C02F000006218243C0250001062000C58
+:10C010000043102B144000063C0260003C0240002C
+:10C020001062000824020800100000080000000050
+:10C030001062000424020800100000040000000048
+:10C04000240207003C010001AC226DAC03E00008B3
+:10C05000000000003C0200018C426DBC27BDFFD0F7
+:10C06000AFBF002CAFB20028AFB10024AFB00020AA
+:10C070003C01000110400005AC206D940C004D9E69
+:10C08000000000003C010001AC206DBC8F83005417
+:10C090008F82005410000002246300648F820054D9
+:10C0A000006210232C4200651440FFFC00000000D9
+:10C0B0000C004DB9000000002404000100002821FC
+:10C0C00027A60018340280000C0045BEA7A2001865
+:10C0D0008F8300548F820054100000022463006498
+:10C0E0008F820054006210232C4200651440FFFC34
+:10C0F00024040001240500010C00457C27A600183B
+:10C100008F8300548F820054100000022463006467
+:10C110008F820054006210232C4200651440FFFC03
+:10C1200024040001240500010C00457C27A600180A
+:10C130008F8300548F820054100000022463006437
+:10C140008F820054006210232C4200651440FFFCD3
+:10C15000240400013C06000124C66F240C00457C29
+:10C16000240500028F8300548F82005410000002C7
+:10C17000246300648F820054006210232C42006507
+:10C180001440FFFC24040001240500033C100001BE
+:10C1900026106F260C00457C0200302197A600185F
+:10C1A0003C07000194E76F243C04000124846AE00A
+:10C1B000AFA00014960200003C05000D34A501005C
+:10C1C0000C002B3BAFA2001097A200181040004DAE
+:10C1D00024036040960200003042FFF01443000C3C
+:10C1E000240200203C03000194636F241462000BBE
+:10C1F00024027830240200033C010001AC226D943B
+:10C20000240200053C0100011000003FAC226F3405
+:10C210003C03000194636F24240278301462000C04
+:10C22000240300103C02000194426F263042FFF0CC
+:10C2300014430007240200033C010001AC226D946A
+:10C24000240200063C0100011000002FAC226F34D4
+:10C250003C0200018C426D943C03000194636F2406
+:10C26000344200013C010001AC226D94240200150F
+:10C270001462000B000000003C02000194426F2693
+:10C280003042FFF03843F4202C6300013842F43090
+:10C290002C420001006218251460001B24020003D8
+:10C2A0003C03000194636F2424027810146200168A
+:10C2B000240200023C02000194426F263042FFF04B
+:10C2C00014400011240200021000000F2402000498
+:10C2D0003C0200018C426D94344200083C01000194
+:10C2E000AC226D941000005E240200043C020001A8
+:10C2F0008C426D94344200043C010001100000AFF8
+:10C30000AC226D94240200013C010001AC226F407C
+:10C310003C0200018C426D9430420002144000B295
+:10C320003C09FFF024020E00AF8202388F840054D3
+:10C330008F820054240300083C010001AC236D9857
+:10C3400010000002248401F48F8200540082102324
+:10C350002C4201F51440FFFC3C0200C8344201FBB2
+:10C36000AF8202388F8300548F8200541000000285
+:10C37000246301F48F820054006210232C4201F5E3
+:10C380001440FFFC00008021241200012411000948
+:10C390000C004482000000003C010001AC326DB48E
+:10C3A0000C004547000000003C0200018C426DB4C7
+:10C3B0001451FFFB3C0200C8344201F6AF82023840
+:10C3C0008F8300548F820054100000022463000AFF
+:10C3D0008F820054006210232C42000B1440FFFC9B
+:10C3E000000000008F820220240400013442000279
+:10C3F000AF8202208F83020024057FFF2402FFFD0D
+:10C4000000621824AF830200AF8402048F830054BB
+:10C410008F82005410000002246300018F820054B8
+:10C42000006210232C4200021440FFFC00000000B8
+:10C430008F8202241444000534028000000420404E
+:10C4400000A4102B1040FFF0340280001082FFA0E7
+:10C45000261000012E0200141440FFCD2402000417
+:10C460003C010001AC226D980000802124120009DB
+:10C470003C11FFFF36313F7F0C004482000000007A
+:10C48000240200013C010001AC226DB40C004547C0
+:10C49000000000003C0200018C426DB41452FFFB0E
+:10C4A000000000008F82004400511024344250806C
+:10C4B000AF8200448F8300548F820054100000022A
+:10C4C0002463000A8F820054006210232C42000B68
+:10C4D0001440FFFC000000008F8200440051102433
+:10C4E0003442F080AF8200448F8300548F82005426
+:10C4F000100000022463000A8F820054006210239F
+:10C500002C42000B1440FFFC000000008F82022030
+:10C510003C03F70000431025AF8202208F830054B4
+:10C520008F82005410000002246300648F82005444
+:10C53000006210232C4200651440FFFC0000000044
+:10C540008F8202202404000134420002AF820220C4
+:10C550008F83020024057FFF2402FFFD0062182460
+:10C56000AF830200AF8402048F8300548F82005493
+:10C5700010000002246300018F8200540062102327
+:10C580002C4200021440FFFC000000008F820224B5
+:10C5900014440005340280000004204000A4102B45
+:10C5A0001040FFF0340280001082FF50261000017E
+:10C5B0002E0200641440FFB0000000003C020001A5
+:10C5C0008C426D9430420004144000073C09FFF097
+:10C5D0008F8200443C03FFFF34633F7F00431024FD
+:10C5E000AF8200443C09FFF03529BDC03C06000184
+:10C5F0008CC66D943C04000124846AE0240200018E
+:10C600003C010001AC226D9C8F8200543C0700016C
+:10C610008CE76F403C03000194636F243C080001E9
+:10C6200095086F263C05000D34A501003C01000172
+:10C63000AC206D98004910213C010001AC226F3004
+:10C64000AFA300100C002B3BAFA800148FBF002C31
+:10C650008FB200288FB100248FB0002003E00008C3
+:10C6600027BD003027BDFFE83C0500018CA56D9873
+:10C67000240600042402000114A20014AFBF00101D
+:10C680003C0200028C428FFC3042800010400005CA
+:10C690003C04000F3C0300018C636F401000000558
+:10C6A000348442403C0400043C0300018C636F402E
+:10C6B000348493E024020005146200160000000098
+:10C6C0003C04003D10000013348409003C020002C9
+:10C6D0008C428FF830428000104000053C04001E60
+:10C6E0003C0300018C636F4010000005348484809B
+:10C6F0003C04000F3C0300018C636F4034844240D3
+:10C700002402000514620003000000003C04007ACB
+:10C71000348412003C0200018C426F308F8300543D
+:10C7200000441021004310230044102B1440004CFF
+:10C73000000000003C0200018C426DA01440004843
+:10C74000000000003C01000110C00025AC206DB0CD
+:10C750003C0900018D296D94240700013C04400030
+:10C760003C08000225088FFC250AFFFC0005284232
+:10C7700014A0000224C6FFFF2405000800A910240D
+:10C78000104000100000000014A700080000000086
+:10C790008D020000004410241040000A0000000038
+:10C7A0003C01000110000007AC256DB08D42000077
+:10C7B0000044102410400003000000003C01000170
+:10C7C000AC276DB03C0200018C426DB00006182B06
+:10C7D0002C420001004310245440FFE5000528428C
+:10C7E0008F8200543C0300018C636DB03C0100015A
+:10C7F000AC226F301060003B240200053C030001B6
+:10C800008C636F403C010001AC256D9814620012EE
+:10C81000240200013C0200028C428FF83C032000FD
+:10C820003463500000431024144000062402000129
+:10C830003C010001AC206F1C3C010001AC226D9852
+:10C84000240200013C010001AC226E243C010001E5
+:10C85000AC226DA4240200013C010001AC226D9CBD
+:10C860003C0200018C426DB01040001E0000000030
+:10C870003C0200018C426D9C104000082402000123
+:10C880003C010001AC206D9CAEE204B83C0100010B
+:10C89000AC206E1C3C010001AC226DD48EE304B8C8
+:10C8A0002402000810620005240200010C00423935
+:10C8B000000000001000000B000000003C0300011D
+:10C8C0008C636D98106200072402000E3C03000286
+:10C8D0008C638F9010620003000000000C004E9CDF
+:10C8E0008F8402208FBF001003E0000827BD0018CE
+:10C8F00027BDFFE03C03FDFF3C0400018C846D98E4
+:10C900003C0200018C426DC03463FFFF0283A0240F
+:10C9100014820006AFBF00188EE304B83C02000189
+:10C920008C426DC410620006000000008EE204B864
+:10C930003C010001AC246DC03C010001AC226DC47F
+:10C940003C0300018C636D98240200021062019C7C
+:10C950002C62000310400005240200011062000A4E
+:10C960000000000010000226000000002402000465
+:10C97000106200B6240200081062010A24020001BD
+:10C980001000021F000000008EE204B82443FFFFE5
+:10C990002C6200081040021C000310803C010001C2
+:10C9A000002208218C226AF80040000800000000E4
+:10C9B0003C0300018C636F402402000514620010E8
+:10C9C000000000003C0200018C426DA410400008F1
+:10C9D000240200030C004482000000002402000234
+:10C9E000AEE204B83C01000110000002AC206DA4CE
+:10C9F000AEE204B83C01000110000203AC206D302F
+:10CA00000C004482000000003C0200018C426DA436
+:10CA10003C010001AC206D301440017A2402000278
+:10CA20001000019D240200073C0300018C636F404D
+:10CA30002402000514620003240200013C010001ED
+:10CA4000AC226DD00C0045FF000000003C0300014B
+:10CA50008C636DD010000174240200113C050001AC
+:10CA60008CA56D983C0600028CC68FFC0C0051040E
+:10CA700000002021240200053C010001AC206DA42F
+:10CA8000100001E1AEE204B83C04000124846AEC29
+:10CA90003C05000F34A501000000302100003821C2
+:10CAA000AFA000100C002B3BAFA00014100001D66B
+:10CAB000000000008F8202203C0300040043102489
+:10CAC00014400175240200078F8300543C020001CA
+:10CAD0008C426F282463D8F0004310232C42271087
+:10CAE00014400003240200013C010001AC226D9CB3
+:10CAF0003C0200028C428FFC30425000104001C2C8
+:10CB0000000000008F820220304280001040017D32
+:10CB10000000000010000175000000003C0500014D
+:10CB20008CA56D980C00529B000020210C00551B19
+:10CB3000000020213C0300028C638FF4046101B0EB
+:10CB4000240200013C02000800621024104000068C
+:10CB5000000000008F8202143C03FFFF00431024FA
+:10CB6000100000053442251F8F8202143C03FFFF92
+:10CB7000004310243442241FAF8202148F8202200B
+:10CB80003C03020034420002AF820220240200086B
+:10CB9000AEE204B88F8202200283A0253C03000489
+:10CBA0000043102414400016000000003C02000264
+:10CBB0008C428FFC304250001040000D00000000FD
+:10CBC0008F820220304280001040000600000000EA
+:10CBD0008F8202203C03FFFF34637FFF10000003BD
+:10CBE000004310248F82022034428000AF82022052
+:10CBF0008F8202203C03F70000431025AF82022001
+:10CC00003C0300018C636F40240200051462000A9B
+:10CC1000000000003C02000194426F2624429FBCA9
+:10CC20002C420004104000042404001824050002D3
+:10CC30000C004DDB240600200C003E6D00000000BF
+:10CC40003C01000110000170AC206E208EE204B89F
+:10CC50002443FFFF2C6200081040016B000310808A
+:10CC60003C010001002208218C226B1800400008C2
+:10CC7000000000000C004547000000003C030001DC
+:10CC80008C636DB4100000E8240200093C0200022D
+:10CC90008C428FF830424000104000040000000039
+:10CCA0008F820044100000063442F0808F820044DE
+:10CCB0003C03FFFF34633F7F004310243442A080D5
+:10CCC000AF8200448F830054100000EA2402000465
+:10CCD0008F8300543C0200018C426F282463D8F0FB
+:10CCE000004310232C422710144001472402000562
+:10CCF000100000D8000000008F8202203C03F700E3
+:10CD000000431025AF820220AF8002043C010002E4
+:10CD1000100000D6AC208FE08F8300543C0200014D
+:10CD20008C426F282463FFF6004310232C42000A34
+:10CD30001440013524020007100000D70000000055
+:10CD40000C003F50000000001040012D24020001A3
+:10CD50008F8202143C03FFFF3C0400018C846F1C93
+:10CD6000004310243442251FAF820214240200081D
+:10CD700010800005AEE204B83C0200018C426E4413
+:10CD800010400064240200018F8202203C0300084E
+:10CD9000004310241040006A3C020200100000789A
+:10CDA000000000008EE204B82443FFFF2C6200075D
+:10CDB00010400115000310803C01000100220821F1
+:10CDC0008C226B3800400008000000000C003DAFD2
+:10CDD000000000003C010001AC206D9CAF8002040B
+:10CDE0003C0100020C004482AC208FE024020001D0
+:10CDF0003C010001AC226DB42402000210000102CB
+:10CE0000AEE204B80C004547000000003C030001FE
+:10CE10008C636DB410000084240200093C020002FF
+:10CE20008C428FF830424000104000033C0200C8A2
+:10CE300010000002344201F6344201FEAF82023893
+:10CE40008F8300541000008B240200048F83005451
+:10CE50003C0200018C426F282463D8F00043102369
+:10CE60002C422710144000E824020005100000792D
+:10CE7000000000008F8202203C03F70000431025D1
+:10CE8000AF820220AF8002043C0100021000007754
+:10CE9000AC208FE08F8300543C0200018C426F284D
+:10CEA0002463FFF6004310232C42000A144000D6EE
+:10CEB0002402000710000078000000000C003F5022
+:10CEC00000000000104000CE240200018F820214F6
+:10CED0003C03FFFF3C0400018C846F1C00431024C2
+:10CEE0003442251FAF820214240200081080000F74
+:10CEF000AEE204B83C0200018C426E441440000BC8
+:10CF0000000000008F82022034420002AF82022023
+:10CF1000240200013C010002AC228F900C004E9CC8
+:10CF20008F84022010000016000000008F82022073
+:10CF30003C03000800431024144000113C0202008E
+:10CF40000282A0252402000E3C010002AC228F9038
+:10CF50000C00551B000020218F8202203442000269
+:10CF60000C003E6DAF8202203C0500018CA56D983F
+:10CF70000C00529B00002021100000A300000000C4
+:10CF80003C0200018C426E441040009F00000000F3
+:10CF90003C0200018C426E402442FFFF3C01000134
+:10CFA000AC226E4014400098240200023C010001B3
+:10CFB000AC206E443C01000110000093AC226E4096
+:10CFC0008EE204B82443FFFF2C6200071040008E5D
+:10CFD000000310803C010001002208218C226B58C4
+:10CFE00000400008000000003C0200018C426DA4DB
+:10CFF00010400018240200050C00448200000000CC
+:10D0000024020002AEE204B83C0100011000007EE0
+:10D01000AC206DA40C004963000000003C0300013B
+:10D020008C636DD42402000614620077240200038E
+:10D0300010000075AEE204B83C0500018CA56D98A7
+:10D040003C0600028CC68FF80C0051040000202121
+:10D05000240200051000006CAEE204B88F820220AA
+:10D060003C03F70000431025AF8202208F83005459
+:10D0700024020006AEE204B83C0100011000006288
+:10D08000AC236F288F8202203C030004004310244D
+:10D0900010400003240200071000005BAEE204B859
+:10D0A0008F8300543C0200018C426F282463D8F027
+:10D0B000004310232C4227101440000324020001D7
+:10D0C0003C010001AC226D9C3C0200028C428FF8B6
+:10D0D000304250001040004C000000008F820220BF
+:10D0E0003042800010400007000000008F820220C4
+:10D0F0003C03FFFF34637FFF004310241000004215
+:10D10000AF8202208F820220344280001000003E55
+:10D11000AF8202203C0500018CA56D980C00529B4B
+:10D12000000020210C00551B000020213C020002C1
+:10D130008C428FF004410032240200018F820214DD
+:10D140003C03FFFF004310243442251FAF8202142A
+:10D1500024020008AEE204B88F82022034420002AA
+:10D16000AF8202208F8202203C030004004310247F
+:10D1700014400016000000003C0200028C428FF8B0
+:10D18000304250001040000D000000008F8202204D
+:10D190003042800010400006000000008F82022014
+:10D1A0003C03FFFF34637FFF1000000300431024A3
+:10D1B0008F82022034428000AF8202208F820220C0
+:10D1C0003C03F70000431025AF8202203C0200011F
+:10D1D00094426F2624429FBC2C420004104000045D
+:10D1E00024040018240500020C004DDB2406002056
+:10D1F0000C003E6D00000000100000030000000065
+:10D200003C010001AC226D9C8FBF001803E00008B8
+:10D2100027BD00208F8202008F8202208F82022091
+:10D2200034420004AF8202208F8202003C050001DC
+:10D230008CA56D9834420004AF82020024020002E3
+:10D2400010A2004B2CA20003104000052402000194
+:10D2500010A2000A00000000100000B10000000051
+:10D260002402000410A200722402000810A200850B
+:10D270003C02F0FF100000AA000000008F83005065
+:10D280003C02F0FF3442FFFF3C0400018C846F40FD
+:10D29000006218243C0207000062182524020E00D8
+:10D2A0002484FFFB2C840002AF830050AF85020072
+:10D2B000AF85022014800006AF8202388F820044BE
+:10D2C0003C03FFFF34633F7F00431024AF820044E0
+:10D2D0003C0300018C636F402402000514620004CB
+:10D2E000000000008F82004434425000AF820044AE
+:10D2F0003C0200018C426D883C0300018C636F404E
+:10D30000344200222463FFFC2C6300021460000CF2
+:10D31000AF8202003C0200018C426DAC3C03000174
+:10D320008C636D903C0400018C846D8C34428000D1
+:10D3300000621825006418251000000A34620002FB
+:10D340003C0200018C426D903C0300018C636DAC8B
+:10D350003C0400018C846D8C004310250044102592
+:10D3600034420002AF8202201000002F240200018C
+:10D3700024020E01AF8202388F8300503C02F0FF7E
+:10D380003442FFFF3C0400018C846F1C00621824AF
+:10D390003C020D000062182524020001AF830050FA
+:10D3A000AF820200AF820220108000053C033F00E4
+:10D3B0003C0200018C426D80100000043463007058
+:10D3C0003C0200018C426D803463007200431025E2
+:10D3D000AF8202003C0300018C636D843C02F700C5
+:10D3E000006218253C0200018C426D903C04000153
+:10D3F0008C846DAC3C0500018CA56F40004310256A
+:10D4000000441025AF8202202402000514A2000669
+:10D41000240200018F8200442403AFFF0043102444
+:10D42000AF820044240200011000003DAF820238A8
+:10D430008F8300503C02F0FF3442FFFF3C040001A8
+:10D440008C846F1C006218243C020A0000621825BC
+:10D4500024020001AF830050AF8202001080001E42
+:10D46000AF8202203C0200018C426E441440001A3C
+:10D470003C033F003C0200018C426D801000001A0A
+:10D48000346300E08F8300503C0400018C846F1CE7
+:10D490003442FFFF006218241080000FAF83005059
+:10D4A0003C0200018C426E441440000B3C043F00DF
+:10D4B0003C0300018C636D80348400E02402000191
+:10D4C000AF820200AF82022000641825AF83020001
+:10D4D000100000083C05F7003C0200018C426D8002
+:10D4E0003C033F00346300E200431025AF8202009A
+:10D4F0003C05F70034A580003C0300018C636D847B
+:10D500003C0200018C426D903C0400018C846DACA7
+:10D51000006518250043102500441025AF82022025
+:10D5200003E00008000000003C0300018C636DB4C0
+:10D530003C0200018C426DB810620003240200021C
+:10D540003C010001AC236DB81062001D2C62000389
+:10D55000104000252402000114620023240200046C
+:10D560003C0300018C636D981062000624020008E1
+:10D570001462000C3C0200C8344201FB1000000998
+:10D58000AF82023824020E01AF8202388F8200443B
+:10D590003C03FFFF34633F7F00431024344200808C
+:10D5A000AF8200448F830054240200023C0100013A
+:10D5B000AC226DB43C0100011000000BAC236F2CB9
+:10D5C0008F8300543C0200018C426F2C2463D8F0FE
+:10D5D000004310232C4227101440000324020009AA
+:10D5E0003C010001AC226DB403E000080000000023
+:10D5F00000000000000000000000000027BDFFD870
+:10D60000AFB2001800809021AFB3001C00A0982199
+:10D61000AFB1001400C08821AFB00010000080211D
+:10D62000AFBF0020A62000000C004D7824040001AC
+:10D63000261000012E0200201440FFFB0000000015
+:10D640000C004D78000020210C004D7824040001CE
+:10D650000C004D78240400010C004D7800002021BE
+:10D66000241000100250102410400002000020215D
+:10D67000240400010C004D78001080421600FFFACF
+:10D6800002501024241000100270102410400002D8
+:10D6900000002021240400010C004D78001080427D
+:10D6A0001600FFFA027010240C004DB934108000EF
+:10D6B0000C004DB9000000000C004D5800000000A7
+:10D6C00050400005001080429622000000501025B6
+:10D6D000A6220000001080421600FFF700000000A4
+:10D6E0000C004DB9000000008FBF00208FB3001C5C
+:10D6F0008FB200188FB100148FB0001003E0000843
+:10D7000027BD002827BDFFD8AFB1001400808821B5
+:10D71000AFB2001800A09021AFB3001C00C0982148
+:10D72000AFB0001000008021AFBF00200C004D788A
+:10D7300024040001261000012E0200201440FFFBEB
+:10D74000000000000C004D78000020210C004D78F6
+:10D75000240400010C004D78000020210C004D78BD
+:10D760002404000124100010023010241040000294
+:10D7700000002021240400010C004D78001080429C
+:10D780001600FFFA0230102424100010025010245A
+:10D790001040000200002021240400010C004D78FC
+:10D7A000001080421600FFFA025010240C004D7841
+:10D7B000240400010C004D7800002021341080006A
+:10D7C000966200000050102410400002000020214A
+:10D7D000240400010C004D78001080421600FFF870
+:10D7E000000000000C004DB9000000008FBF0020B9
+:10D7F0008FB3001C8FB200188FB100148FB00010CF
+:10D8000003E0000827BD00283C0400018C846DD093
+:10D810003C0200018C426E1827BDFFD8AFBF00202C
+:10D82000AFB1001C10820003AFB000183C01000132
+:10D83000AC246E183C0300018C636F402402000589
+:10D84000146200052483FFFF0C0049630000000000
+:10D850001000034C000000002C620013104003492C
+:10D86000000310803C010001002208218C226B8003
+:10D8700000400008000000000C004DB900008021AD
+:10D8800034028000A7A2001027B100100C004D78D0
+:10D8900024040001261000012E0200201440FFFB8A
+:10D8A000000000000C004D78000020210C004D7895
+:10D8B000240400010C004D78000020210C004D785C
+:10D8C0002404000124100010320200011040000264
+:10D8D00000002021240400010C004D78001080423B
+:10D8E0001600FFFA32020001241000100C004D78DF
+:10D8F00000002021001080421600FFFC0000000004
+:10D900000C004D78240400010C004D78000020210B
+:10D9100034108000962200000050102410400002B5
+:10D9200000002021240400010C004D7800108042EA
+:10D930001600FFF8000000000C004DB900000000C8
+:10D940001000030E2402000227B10010A7A000104F
+:10D95000000080210C004D782404000126100001F5
+:10D960002E0200201440FFFB000000000C004D7848
+:10D97000000020210C004D78240400010C004D789B
+:10D98000240400010C004D78000020212410001018
+:10D990003202000110400002000020212404000196
+:10D9A0000C004D78001080421600FFFA3202000190
+:10D9B000241000100C004D7800002021001080423F
+:10D9C0001600FFFC000000000C004DB93410800070
+:10D9D0000C004DB9000000000C004D580000000084
+:10D9E0005040000500108042962200000050102593
+:10D9F000A6220000001080421600FFF70000000081
+:10DA00000C004DB90000000097A2001030428000C9
+:10DA1000144002DC24020003100002D800000000C1
+:10DA200024021200A7A2001027B1001000008021DC
+:10DA30000C004D7824040001261000012E02002065
+:10DA40001440FFFB000000000C004D780000202176
+:10DA50000C004D78240400010C004D7800002021BA
+:10DA60000C004D7824040001241000103202000143
+:10DA70001040000200002021240400010C004D7819
+:10DA8000001080421600FFFA32020001241000103C
+:10DA90000C004D7800002021001080421600FFFC91
+:10DAA000000000000C004D78240400010C004D78AB
+:10DAB0000000202134108000962200000050102425
+:10DAC0001040000200002021240400010C004D78C9
+:10DAD000001080421600FFF8000000000C004DB955
+:10DAE000000000008F8300541000029624020004FE
+:10DAF0008F8300543C0200018C426F3C2463FF9CE6
+:10DB0000004310232C4200641440029E24020002B1
+:10DB10003C0300018C636F40106202972C6200038B
+:10DB20001440029624020011240200031062000532
+:10DB300024020004106202912402000F1000028FE0
+:10DB4000240200111000028D24020005240200149A
+:10DB5000A7A2001027B10010000080210C004D7812
+:10DB600024040001261000012E0200201440FFFBB7
+:10DB7000000000000C004D78000020210C004D78C2
+:10DB8000240400010C004D78000020210C004D7889
+:10DB90002404000124100010320200011040000291
+:10DBA00000002021240400010C004D780010804268
+:10DBB0001600FFFA32020001241000103202001297
+:10DBC0001040000200002021240400010C004D78C8
+:10DBD000001080421600FFFA320200120C004D784D
+:10DBE000240400010C004D78000020213410800036
+:10DBF0009622000000501024104000020000202156
+:10DC0000240400010C004D78001080421600FFF83B
+:10DC1000000000000C004DB9000000008F8300548C
+:10DC200010000248240200068F8300543C020001C9
+:10DC30008C426F3C2463FF9C004310232C42006401
+:10DC400014400250240200071000024C00000000A3
+:10DC500024020006A7A2001027B1001000008021B6
+:10DC60000C004D7824040001261000012E02002033
+:10DC70001440FFFB000000000C004D780000202144
+:10DC80000C004D78240400010C004D780000202188
+:10DC90000C004D7824040001241000103202000111
+:10DCA0001040000200002021240400010C004D78E7
+:10DCB000001080421600FFFA32020001241000100A
+:10DCC0003202001310400002000020212404000151
+:10DCD0000C004D78001080421600FFFA320200134B
+:10DCE0000C004D78240400010C004D780000202128
+:10DCF00034108000962200000050102410400002D2
+:10DD000000002021240400010C004D780010804206
+:10DD10001600FFF8000000000C004DB900000000E4
+:10DD20008F83005410000207240200088F830054E0
+:10DD30003C0200018C426F3C2463FF9C0043102393
+:10DD40002C4200641440020F240200091000020B50
+:10DD50000000000027B10010A7A0001000008021E3
+:10DD60000C004D7824040001261000012E02002032
+:10DD70001440FFFB000000000C004D780000202143
+:10DD80000C004D78240400010C004D78240400019F
+:10DD90000C004D78000020212410001032020001F8
+:10DDA0001040000200002021240400010C004D78E6
+:10DDB000001080421600FFFA320200012410001009
+:10DDC000320200181040000200002021240400014B
+:10DDD0000C004D78001080421600FFFA3202001845
+:10DDE0000C004DB9341080000C004DB9000000004B
+:10DDF0000C004D580000000050400005001080420B
+:10DE00009622000000501025A6220000001080423B
+:10DE10001600FFF7000000000C004DB90000802143
+:10DE200097A2001027B1001034420001A7A20010F1
+:10DE30000C004D7824040001261000012E02002061
+:10DE40001440FFFB000000000C004D780000202172
+:10DE50000C004D78240400010C004D7800002021B6
+:10DE60000C004D782404000124100010320200013F
+:10DE70001040000200002021240400010C004D7815
+:10DE8000001080421600FFFA320200012410001038
+:10DE9000320200181040000200002021240400017A
+:10DEA0000C004D78001080421600FFFA3202001874
+:10DEB0000C004D78240400010C004D780000202156
+:10DEC0003410800096220000005010241040000200
+:10DED00000002021240400010C004D780010804235
+:10DEE0001600FFF8000000000C004DB90000000013
+:10DEF0008F830054100001932402000A8F83005482
+:10DF00003C0200018C426F3C2463FF9C00431023C1
+:10DF10002C4200641440019B2402000B1000019766
+:10DF20000000000027B10010A7A000100000802111
+:10DF30000C004D7824040001261000012E02002060
+:10DF40001440FFFB000000000C004D780000202171
+:10DF50000C004D78240400010C004D7824040001CD
+:10DF60000C004D7800002021241000103202000126
+:10DF70001040000200002021240400010C004D7814
+:10DF8000001080421600FFFA320200012410001037
+:10DF9000320200171040000200002021240400017A
+:10DFA0000C004D78001080421600FFFA3202001774
+:10DFB0000C004DB9341080000C004DB90000000079
+:10DFC0000C004D5800000000504000050010804239
+:10DFD0009622000000501025A6220000001080426A
+:10DFE0001600FFF7000000000C004DB90000802172
+:10DFF00097A2001027B1001034420700A7A200101A
+:10E000000C004D7824040001261000012E0200208F
+:10E010001440FFFB000000000C004D7800002021A0
+:10E020000C004D78240400010C004D7800002021E4
+:10E030000C004D782404000124100010320200016D
+:10E040001040000200002021240400010C004D7843
+:10E05000001080421600FFFA320200012410001066
+:10E0600032020017104000020000202124040001A9
+:10E070000C004D78001080421600FFFA32020017A3
+:10E080000C004D78240400010C004D780000202184
+:10E09000341080009622000000501024104000022E
+:10E0A00000002021240400010C004D780010804263
+:10E0B0001600FFF8000000000C004DB90000000041
+:10E0C0008F8300541000011F2402000C8F83005422
+:10E0D0003C0200018C426F3C2463FF9C00431023F0
+:10E0E0002C42006414400127240200121000012376
+:10E0F0000000000027B10010A7A000100000802140
+:10E100000C004D7824040001261000012E0200208E
+:10E110001440FFFB000000000C004D78000020219F
+:10E120000C004D78240400010C004D7824040001FB
+:10E130000C004D7800002021241000103202000154
+:10E140001040000200002021240400010C004D7842
+:10E15000001080421600FFFA320200012410001065
+:10E1600032020014104000020000202124040001AB
+:10E170000C004D78001080421600FFFA32020014A5
+:10E180000C004DB9341080000C004DB900000000A7
+:10E190000C004D5800000000504000050010804267
+:10E1A0009622000000501025A62200000010804298
+:10E1B0001600FFF7000000000C004DB900008021A0
+:10E1C00097A2001027B1001034420010A7A200103F
+:10E1D0000C004D7824040001261000012E020020BE
+:10E1E0001440FFFB000000000C004D7800002021CF
+:10E1F0000C004D78240400010C004D780000202113
+:10E200000C004D782404000124100010320200019B
+:10E210001040000200002021240400010C004D7871
+:10E22000001080421600FFFA320200012410001094
+:10E2300032020014104000020000202124040001DA
+:10E240000C004D78001080421600FFFA32020014D4
+:10E250000C004D78240400010C004D7800002021B2
+:10E26000341080009622000000501024104000025C
+:10E2700000002021240400010C004D780010804291
+:10E280001600FFF8000000000C004DB9000000006F
+:10E290008F830054100000AB240200138F830054BE
+:10E2A0003C0200018C426F3C2463FF9C004310231E
+:10E2B0002C420064144000B32402000D100000AF93
+:10E2C0000000000027B10010A7A00010000080216E
+:10E2D0000C004D7824040001261000012E020020BD
+:10E2E0001440FFFB000000000C004D7800002021CE
+:10E2F0000C004D78240400010C004D78240400012A
+:10E300000C004D7800002021241000103202000182
+:10E310001040000200002021240400010C004D7870
+:10E32000001080421600FFFA320200012410001093
+:10E3300032020018104000020000202124040001D5
+:10E340000C004D78001080421600FFFA32020018CF
+:10E350000C004DB9341080000C004DB900000000D5
+:10E360000C004D5800000000504000050010804295
+:10E370009622000000501025A622000000108042C6
+:10E380001600FFF7000000000C004DB900008021CE
+:10E3900097A2001027B100103042FFFEA7A2001084
+:10E3A0000C004D7824040001261000012E020020EC
+:10E3B0001440FFFB000000000C004D7800002021FD
+:10E3C0000C004D78240400010C004D780000202141
+:10E3D0000C004D78240400012410001032020001CA
+:10E3E0001040000200002021240400010C004D78A0
+:10E3F000001080421600FFFA3202000124100010C3
+:10E400003202001810400002000020212404000104
+:10E410000C004D78001080421600FFFA32020018FE
+:10E420000C004D78240400010C004D7800002021E0
+:10E43000341080009622000000501024104000028A
+:10E4400000002021240400010C004D7800108042BF
+:10E450001600FFF8000000000C004DB9000000009D
+:10E460008F830054100000372402000E240208405D
+:10E47000A7A2001027B10010000080210C004D78E9
+:10E4800024040001261000012E0200201440FFFB8E
+:10E49000000000000C004D78000020210C004D7899
+:10E4A000240400010C004D78000020210C004D7860
+:10E4B0002404000124100010320200011040000268
+:10E4C00000002021240400010C004D78001080423F
+:10E4D0001600FFFA3202000124100010320200136D
+:10E4E0001040000200002021240400010C004D789F
+:10E4F000001080421600FFFA320200130C004D7823
+:10E50000240400010C004D7800002021341080000C
+:10E51000962200000050102410400002000020212C
+:10E52000240400010C004D78001080421600FFF812
+:10E53000000000000C004DB9000000008F83005463
+:10E54000240200103C010001AC226DD03C0100010E
+:10E550001000000CAC236F3C8F8300543C02000180
+:10E560008C426F3C2463FF9C004310232C420064C8
+:10E570001440000400000000240200113C010001CE
+:10E58000AC226DD08FBF00208FB1001C8FB000185F
+:10E5900003E0000827BD00283C0300018C636D9850
+:10E5A00027BDFFC824020002AFBF0034AFB2003065
+:10E5B000AFB1002C14620004AFB000283C1200027E
+:10E5C000100000038E528FF83C1200028E528FFC16
+:10E5D0003C0300018C636DD43C0200018C426E1C34
+:10E5E000506200042463FFFF3C010001AC236E1C59
+:10E5F0002463FFFF2C6200061040037700031080A5
+:10E600003C010001002208218C226BD80040000848
+:10E610000000000000002021000028210C004DDB3C
+:10E6200034068000240400102405000224060002A1
+:10E63000240200020C004DDBA7A2001824020002F5
+:10E640003C01000110000364AC226DD427B1001816
+:10E65000A7A00018000080210C004D7824040001C0
+:10E66000261000012E0200201440FFFB00000000D5
+:10E670000C004D78000020210C004D78240400018E
+:10E680000C004D78240400010C004D78000020217E
+:10E69000241000103202000110400002000020216E
+:10E6A000240400010C004D78001080421600FFFA8F
+:10E6B00032020001241000100C004D7800002021CF
+:10E6C000001080421600FFFC000000000C004DB955
+:10E6D000341080000C004DB9000000000C004D58B3
+:10E6E000000000005040000500108042962200000B
+:10E6F00000501025A6220000001080421600FFF7EF
+:10E70000000000000C004DB90000000097A20018A6
+:10E710003042800014400004240200033C01000148
+:10E72000AC226DD4240200033C0100011000032A36
+:10E73000AC226DD42404001024050002240600023B
+:10E74000240200020C004DDBA7A200183C030001CC
+:10E750008C636E2024020001146201E1000080211C
+:10E7600027B10018A7A000180C004D782404000160
+:10E77000261000012E0200201440FFFB00000000C4
+:10E780000C004D78000020210C004D78240400017D
+:10E790000C004D78240400010C004D78000020216D
+:10E7A000241000103202000110400002000020215D
+:10E7B000240400010C004D78001080421600FFFA7E
+:10E7C0003202000124100010320200181040000232
+:10E7D00000002021240400010C004D78001080422C
+:10E7E0001600FFFA320200180C004DB934108000F8
+:10E7F0000C004DB9000000000C004D580000000056
+:10E800005040000500108042962200000050102564
+:10E81000A6220000001080421600FFF70000000052
+:10E820000C004DB90000802127B10018A7A00018E6
+:10E830000C004D7824040001261000012E02002057
+:10E840001440FFFB000000000C004D780000202168
+:10E850000C004D78240400010C004D7824040001C4
+:10E860000C004D780000202124100010320200011D
+:10E870001040000200002021240400010C004D780B
+:10E88000001080421600FFFA32020001241000102E
+:10E890003202001810400002000020212404000170
+:10E8A0000C004D78001080421600FFFA320200186A
+:10E8B0000C004DB9341080000C004DB90000000070
+:10E8C0000C004D5800000000504000050010804230
+:10E8D0009622000000501025A62200000010804261
+:10E8E0001600FFF7000000000C004DB90000802169
+:10E8F00024040018000028210C004DDB2406040429
+:10E90000A7A0001A0C004D78240400012610000175
+:10E910002E0200201440FFFB000000000C004D7888
+:10E92000000020210C004D78240400010C004D78DB
+:10E93000240400010C004D78000020212410001058
+:10E9400032020001104000020000202124040001D6
+:10E950000C004D78001080421600FFFA32020001D0
+:10E960002410001032020018104000020000202184
+:10E97000240400010C004D78001080421600FFFABC
+:10E98000320200180C004DB9341080000C004DB953
+:10E99000000000000C004D58000000005040000531
+:10E9A0000010804297A2001A00501025A7A2001A5A
+:10E9B000001080421600FFF7000000000C004DB967
+:10E9C00000008021A7A0001A0C004D78240400014B
+:10E9D000261000012E0200201440FFFB0000000062
+:10E9E0000C004D78000020210C004D78240400011B
+:10E9F0000C004D78240400010C004D78000020210B
+:10EA000024100010320200011040000200002021FA
+:10EA1000240400010C004D78001080421600FFFA1B
+:10EA200032020001241000103202001810400002CF
+:10EA300000002021240400010C004D7800108042C9
+:10EA40001600FFFA320200180C004DB93410800095
+:10EA50000C004DB9000000000C004D5800000000F3
+:10EA6000504000050010804297A2001A0050102567
+:10EA7000A7A2001A001080421600FFF70000000055
+:10EA80000C004DB900008021A7A0001C0C004D789F
+:10EA900024040001261000012E0200201440FFFB78
+:10EAA000000000000C004D78000020210C004D7883
+:10EAB000240400010C004D78240400010C004D7862
+:10EAC00000002021241000100C004D7800002021AF
+:10EAD000001080421600FFFC00000000241000100F
+:10EAE0003202001E10400002000020212404000118
+:10EAF0000C004D78001080421600FFFA3202001E12
+:10EB00000C004DB9341080000C004DB9000000001D
+:10EB10000C004D58000000005040000500108042DD
+:10EB200097A2001C00501025A7A2001C00108042D4
+:10EB30001600FFF7000000000C004DB90000802116
+:10EB4000A7A0001C0C004D78240400012610000131
+:10EB50002E0200201440FFFB000000000C004D7846
+:10EB6000000020210C004D78240400010C004D7899
+:10EB7000240400010C004D78000020212410001016
+:10EB80000C004D7800002021001080421600FFFC90
+:10EB900000000000241000103202001E104000028D
+:10EBA00000002021240400010C004D780010804258
+:10EBB0001600FFFA3202001E0C004DB9341080001E
+:10EBC0000C004DB9000000000C004D580000000082
+:10EBD000504000050010804297A2001C00501025F4
+:10EBE000A7A2001C001080421600FFF700000000E2
+:10EBF0000C004DB90000802124020002A7A2001ED3
+:10EC00000C004D7824040001261000012E02002083
+:10EC10001440FFFB000000000C004D780000202194
+:10EC20000C004D78240400010C004D7800002021D8
+:10EC30000C004D7824040001241000100C004D78C5
+:10EC400000002021001080421600FFFC00000000A0
+:10EC5000241000103202001E10400002000020218B
+:10EC6000240400010C004D78001080421600FFFAC9
+:10EC70003202001E0C004D78240400010C004D7877
+:10EC8000000020213410800097A2001E00501024A4
+:10EC90001040000200002021240400010C004D78E7
+:10ECA000001080421600FFF8000000000C004DB973
+:10ECB00000008021A7A000200C004D782404000152
+:10ECC000261000012E0200201440FFFB000000006F
+:10ECD0000C004D78000020210C004D782404000128
+:10ECE0000C004D78240400010C004D780000202118
+:10ECF000241000100C004D780000202100108042EC
+:10ED00001600FFFC00000000241000103202001E5C
+:10ED10001040000200002021240400010C004D7866
+:10ED2000001080421600FFFA3202001E0C004DB99E
+:10ED3000341080000C004DB9000000000C004D584C
+:10ED400000000000504000050010804297A2002003
+:10ED500000501025A7A20020001080421600FFF7E7
+:10ED6000000000000C004DB900008021A7A0002089
+:10ED70000C004D7824040001261000012E02002012
+:10ED80001440FFFB000000000C004D780000202123
+:10ED90000C004D78240400010C004D78240400017F
+:10EDA0000C004D7800002021241000100C004D783C
+:10EDB00000002021001080421600FFFC000000002F
+:10EDC000241000103202001E10400002000020211A
+:10EDD000240400010C004D78001080421600FFFA58
+:10EDE0003202001E0C004DB9341080000C004DB9E9
+:10EDF000000000000C004D580000000050400005CD
+:10EE00000010804297A2002000501025A7A20020E9
+:10EE1000001080421600FFF7000000000C004DB902
+:10EE200000008021A7A000220C004D7824040001DE
+:10EE3000261000012E0200201440FFFB00000000FD
+:10EE40000C004D78000020210C004D7824040001B6
+:10EE50000C004D78000020210C004D7824040001A6
+:10EE6000241000100C004D7800002021001080427A
+:10EE70001600FFFC00000000241000100C004D786C
+:10EE800000002021001080421600FFFC000000005E
+:10EE90000C004D78240400010C004D780000202166
+:10EEA0003410800097A2002200501024104000026D
+:10EEB00000002021240400010C004D780010804245
+:10EEC0001600FFF8000000000C004DB90000000023
+:10EED00024040018240500020C004DDB2406000465
+:10EEE0003C1000018E106E24240200011602011D48
+:10EEF000000000003C02000194426F263C0100012A
+:10EF0000AC206E2424429FBC2C4200041040000C14
+:10EF100024040009240500010C004DDB2406040034
+:10EF200024040018240500010C004DDB24060020F9
+:10EF300024040018240500010C004DDB24062000E9
+:10EF40003C02400002421024104001233C022000F9
+:10EF50000242102410400004000000003C010001A7
+:10EF600010000003AC306F1C3C010001AC206F1C92
+:10EF70003C0300018C636F3424020005146200F925
+:10EF8000000000003C0200018C426F1C1040006732
+:10EF90003C0200040242102410400011A7A00018F7
+:10EFA0003C02000802421024104000022402020029
+:10EFB000A7A200183C0200100242102410400004D6
+:10EFC0000000000097A2001834420100A7A2001818
+:10EFD00097A600182404000910000004000028214E
+:10EFE0002404000900002821000030210C004DDB22
+:10EFF0000000000024020001A7A2001A3C02000841
+:10F00000024210241040000C3C0200020242102474
+:10F010001040000224020101A7A2001A3C020001D4
+:10F0200002421024104000053C02001097A2001A72
+:10F0300034420040A7A2001A3C02001002421024F1
+:10F040001040000E3C020002024210241040000555
+:10F050003C02000197A2001A34420080A7A2001AC5
+:10F060003C02000102421024104000053C0300A0B5
+:10F0700097A2001A34420020A7A2001A3C0300A065
+:10F0800002431024544300043C02002097A2001ABB
+:10F090001000000C344204000242102450400004CE
+:10F0A0003C02008097A2001A1000000634420800BB
+:10F0B00002421024104000040000000097A2001A31
+:10F0C00034420C00A7A2001A97A6001A24040004D8
+:10F0D0000C004DDB000028213C02000402421024F9
+:10F0E00010400004A7A0001C32425000144000044D
+:10F0F00000000000324240001040000500002021C6
+:10F100000C004CF902402021100000960000000085
+:10F1100097A6001C0000282134C612000C004DDB0D
+:10F12000A7A6001C1000008F00000000024210245F
+:10F1300010400004A7A00018324250001440000400
+:10F140000000000032424000104000053C02001068
+:10F150000C004CF90240202110000019A7A0001A51
+:10F1600002421024104000040000000097A2001882
+:10F1700010000004A7A2001897A200183442010052
+:10F18000A7A200183C020001024210241040000413
+:10F190000000000097A2001810000004A7A20018A9
+:10F1A00097A2001834422000A7A2001897A60018C2
+:10F1B000000020210C004DDB00002821A7A0001A30
+:10F1C000000080210C004D7824040001261000016D
+:10F1D0002E0200201440FFFB000000000C004D78C0
+:10F1E000000020210C004D78240400010C004D7813
+:10F1F000240400010C004D78000020212410001090
+:10F20000320200011040000200002021240400010D
+:10F210000C004D78001080421600FFFA3202000107
+:10F22000241000100C004D780000202100108042B6
+:10F230001600FFFC000000000C004DB934108000E7
+:10F240000C004DB9000000000C004D5800000000FB
+:10F25000504000050010804297A2001A005010256F
+:10F26000A7A2001A001080421600FFF7000000005D
+:10F270000C004DB900008021A7A0001A0C004D78A9
+:10F2800024040001261000012E0200201440FFFB80
+:10F29000000000000C004D78000020210C004D788B
+:10F2A000240400010C004D78240400010C004D786A
+:10F2B0000000202124100010320200011040000242
+:10F2C00000002021240400010C004D780010804231
+:10F2D0001600FFFA32020001241000100C004D78D5
+:10F2E00000002021001080421600FFFC00000000FA
+:10F2F0000C004DB9341080000C004DB90000000026
+:10F300000C004D58000000005040000500108042E5
+:10F3100097A2001A00501025A7A2001A00108042E0
+:10F320001600FFF7000000000C004DB900000000BF
+:10F330003C04000124846BCC97A6001897A7001A00
+:10F340003C0200018C426D983C0300018C636F1CF1
+:10F350003C05000D34A50205AFA200100C002B3BAC
+:10F36000AFA300148F830054240200043C01000169
+:10F37000AC226DD43C01000110000017AC236F38A3
+:10F380008F8300543C0200018C426F382463FF9C41
+:10F39000004310232C4200641440000F00000000C2
+:10F3A0008F820220240300053C010001AC236DD4B0
+:10F3B0003C03F7000043102510000007AF82022035
+:10F3C000240200063C010001AC226DD4240200118D
+:10F3D0003C010001AC226DD08FBF00348FB20030F1
+:10F3E0008FB1002C8FB0002803E0000827BD003843
+:10F3F00027BDFFD8AFB0001800808021AFB1001C3E
+:10F40000000088213202400010400013AFBF0020EE
+:10F410003C020010020210242C42000100021023C2
+:10F42000304341003C020001020210241440000657
+:10F43000347140003C020002020210241440000219
+:10F440003471600034714040000020210000282108
+:10F45000100000360220302132021000104000352A
+:10F4600000002021000028210C004DDB2406004074
+:10F4700024040018000028210C004DDB24060C0099
+:10F4800024040017000028210C004DDB2406040092
+:10F4900024040016000028210C004DDB2406000681
+:10F4A00024040017000028210C004DDB2406250051
+:10F4B00024040016000028210C004DDB2406000661
+:10F4C00024040017000028210C004DDB2406460010
+:10F4D00024040016000028210C004DDB2406000641
+:10F4E00024040017000028210C004DDB24066700CF
+:10F4F00024040016000028210C004DDB2406000621
+:10F500002404001F000028210C004DDB24060010FD
+:10F5100024040009000028210C004DDB24061500FE
+:10F52000240400090000282124061D000C004DDBE6
+:10F53000000000003C04000124846BF03C05000E38
+:10F5400034A501000200302102203821AFA00010B4
+:10F550000C002B3BAFA000148FBF00208FB1001C0C
+:10F560008FB0001803E0000827BD00288F850044F5
+:10F570008F8200443C030001004310253C03000837
+:10F58000AF8200448F8400548F82005400A328244B
+:10F5900010000002248400018F8200540082102396
+:10F5A0002C4200021440FFFC000000008F82004447
+:10F5B0003C03FFFE3463FFFF00431024AF8200448E
+:10F5C0008F8300548F8200541000000224630001D6
+:10F5D0008F820054006210232C4200021440FFFC72
+:10F5E0000000000003E0000800A010218F83004409
+:10F5F0003C02FFF03442FFFF000424800062182424
+:10F600003C0200020082202500641825AF830044DC
+:10F610008F8200443C03FFFE3463FFFF004310244D
+:10F62000AF8200448F8300548F8200541000000288
+:10F63000246300018F820054006210232C420002D8
+:10F640001440FFFC000000008F8200443C030001D6
+:10F6500000431025AF8200448F8300548F820054F2
+:10F6600010000002246300018F8200540062102306
+:10F670002C4200021440FFFC0000000003E00008E0
+:10F68000000000008F8200442403FF7F0043102409
+:10F69000AF8200448F8300548F8200541000000218
+:10F6A000246300018F820054006210232C42000268
+:10F6B0001440FFFC000000008F82004434420080B0
+:10F6C000AF8200448F8300548F82005410000002E8
+:10F6D000246300018F820054006210232C42000238
+:10F6E0001440FFFC0000000003E0000800000000E0
+:10F6F0008F8200443C03FFF03463FFFF004310247B
+:10F70000AF8200448F8200443C0300010043102577
+:10F71000AF8200448F8300548F8200541000000297
+:10F72000246300018F820054006210232C420002E7
+:10F730001440FFFC000000008F8200443C03FFFEE9
+:10F740003463FFFF00431024AF8200448F830054D2
+:10F750008F82005410000002246300018F82005445
+:10F76000006210232C4200021440FFFC0000000045
+:10F7700003E000080000000027BDFFC8AFB300246D
+:10F7800000809821AFBE002C00A0F021AFB2002075
+:10F7900000C0902133C2FFFFAFBF0030AFB50028DB
+:10F7A000AFB1001CAFB0001814400034A7B2001075
+:10F7B0003271FFFF27B20010000080210C004D784D
+:10F7C00024040001261000012E0200201440FFFB3B
+:10F7D000000000000C004D78000020210C004D7846
+:10F7E000240400010C004D78000020210C004D780D
+:10F7F0002404000124100010320200011040000215
+:10F8000000002021240400010C004D7800108042EB
+:10F810001600FFFA320200012410001002301024FA
+:10F820001040000200002021240400010C004D784B
+:10F83000001080421600FFFA023010240C004D78B0
+:10F84000240400010C004D780000202134108000B9
+:10F8500096420000005010241040000200002021B9
+:10F86000240400010C004D78001080421200007545
+:10F87000000000001000FFF6000000003275FFFFDE
+:10F8800027B10010A7A00010000080210C004D78C7
+:10F8900024040001261000012E0200201440FFFB6A
+:10F8A000000000000C004D78000020210C004D7875
+:10F8B000240400010C004D78240400010C004D7854
+:10F8C000000020212410001032020001104000022C
+:10F8D00000002021240400010C004D78001080421B
+:10F8E0001600FFFA320200012410001002B01024AA
+:10F8F0001040000200002021240400010C004D787B
+:10F90000001080421600FFFA02B010240C004DB91E
+:10F91000341080000C004DB9000000000C004D5860
+:10F9200000000000504000050010804296220000B8
+:10F9300000501025A6220000001080421600FFF79C
+:10F94000000000000C004DB90000000033C5FFFFAF
+:10F950002402000154A200042402000297A2001015
+:10F96000100000060052102514A200063271FFFF9D
+:10F9700097A200100012182700431024A7A200101D
+:10F980003271FFFF27B20010000080210C004D787B
+:10F9900024040001261000012E0200201440FFFB69
+:10F9A000000000000C004D78000020210C004D7874
+:10F9B000240400010C004D78000020210C004D783B
+:10F9C0002404000124100010320200011040000243
+:10F9D00000002021240400010C004D78001080421A
+:10F9E0001600FFFA32020001241000100230102429
+:10F9F0001040000200002021240400010C004D787A
+:10FA0000001080421600FFFA023010240C004D78DE
+:10FA1000240400010C004D780000202134108000E7
+:10FA200096420000005010241040000200002021E7
+:10FA3000240400010C004D78001080421600FFF8ED
+:10FA4000000000000C004DB9000000008FBF003026
+:10FA50008FBE002C8FB500288FB300248FB20020FA
+:10FA60008FB1001C8FB0001803E0000827BD0038DC
+:10FA700000000000000000000000000027BDFFE8BB
+:10FA8000AFBF00108EE304B824020008146201E046
+:10FA9000000000003C0200018C426F1C1440000575
+:10FAA000000000000C003DAF8F840224100001D83C
+:10FAB000000000008F8202203C0300080043102455
+:10FAC00010400026240200018F8402248F8202202D
+:10FAD0003C03040000431024104000060000000016
+:10FAE0003C010002AC208FA03C0100021000000B82
+:10FAF000AC208FC03C03000224638FA08C62000006
+:10FB000024420001AC6200002C42000214400003B9
+:10FB1000240200013C010002AC228FC03C02000222
+:10FB20008C428FC01040000630820040104000041C
+:10FB3000240200013C01000210000003AC228FC42B
+:10FB40003C010002AC208FC43C010002AC248F9C1D
+:10FB50003C0100021000000BAC208FD03C010002E1
+:10FB6000AC228FD03C010002AC208FC03C010002CF
+:10FB7000AC208FA03C010002AC208FC43C010002ED
+:10FB8000AC208F9C3C0300028C638F903C020002EF
+:10FB90008C428F94506200042463FFFF3C010002FA
+:10FBA000AC238F942463FFFF2C62000E104001945D
+:10FBB000000310803C010001002208218C226C000F
+:10FBC0000040000800000000240200023C01000286
+:10FBD000AC208FC03C010002AC208FA03C01000291
+:10FBE000AC208F9C3C010002AC208FC43C01000281
+:10FBF000AC208FB83C010002AC208FB0AF80022453
+:10FC00003C010002AC228F903C0200028C428FD05B
+:10FC10001440004F3C02FDFF3442FFFF0C003DAF9B
+:10FC20000282A024AF8002048F8202002403FFFD21
+:10FC300000431024AF8202003C010002AC208FE0A0
+:10FC40008F8300543C0200028C428FB824040001D0
+:10FC50003C010002AC248FCC244200013C01000294
+:10FC6000AC228FB82C4200043C010002AC238FB4BC
+:10FC700014400006240200033C010001AC246D9CEA
+:10FC80003C0100021000015EAC208FB83C01000274
+:10FC90001000015BAC228F908F8300543C02000265
+:10FCA0008C428FB42463D8F0004310232C422710D9
+:10FCB00014400003240200043C010002AC228F9097
+:10FCC0003C0200028C428FD0144000213C02FDFF18
+:10FCD0003442FFFF1000014A0282A0243C040001CC
+:10FCE0008C846F203C0100020C005084AC208FA853
+:10FCF0003C0200028C428FDCAF8202043C02000214
+:10FD00008C428FD0144000123C03FDFF8F8202040E
+:10FD10003463FFFF304200301440012F0283A024DF
+:10FD20003C0300028C638FDC240200053C010002CE
+:10FD3000AC228F903C01000210000131AC238FE017
+:10FD40003C0200028C428FD0104000103C02FDFFAC
+:10FD50003C0200018C426E3C244200013C01000147
+:10FD6000AC226E3C2C42000214400125240200010A
+:10FD70003C010001AC226E443C010001AC206E3C11
+:10FD80003C0100011000011EAC226D9C3C030002EE
+:10FD90008C638FC03442FFFF106001190282A024DF
+:10FDA0003C0200028C428F9C1040011500000000B4
+:10FDB0003C010002AC228FC8240200033C01000277
+:10FDC000AC228FA0100000B8240200063C01000203
+:10FDD000AC208FA88F82020434420040AF8202041C
+:10FDE0003C0200028C428FE0240300073C01000229
+:10FDF000AC238F90344200403C010002AC228FE0E3
+:10FE00003C0200028C428FC0104000050000000040
+:10FE10003C0200028C428F9C104000F02402000241
+:10FE20003C05000224A58FA08CA200002C424E218C
+:10FE3000104000EA240200023C0200028C428FC4FF
+:10FE4000104000EF2404FFBF3C0200028C428F9C54
+:10FE50003C0300028C638FC8004410240064182403
+:10FE600010430004240200013C010002100000E4E1
+:10FE7000AC228F9024020003ACA2000024020008F0
+:10FE80003C010002AC228F903C0200028C428FCCDD
+:10FE90001040000C240200013C0400020C005091B0
+:10FEA0008C848F9C3C0200028C428FE81440000539
+:10FEB000240200013C0200028C428FE41040000644
+:10FEC000240200013C010001AC226D9C3C010002B7
+:10FED000100000CBAC208FB83C0200028C428FB0E7
+:10FEE0003C0300028C638F9C2C420001000210C076
+:10FEF000306300083C010002AC228FB03C010002DC
+:10FF0000AC238FAC8F830054240200093C01000213
+:10FF1000AC228F903C010002100000B9AC238FB4DA
+:10FF20008F8300543C0200028C428FB42463D8F0CB
+:10FF3000004310232C4227101440009F00000000B3
+:10FF40003C0200028C428FC01040000500000000FF
+:10FF50003C0200028C428F9C104000A02402000250
+:10FF60003C03000224638FA08C6200002C424E21CF
+:10FF70001040009A240200023C0200028C428FCC06
+:10FF80001040000E000000003C0200028C428F9CDA
+:10FF90003C010002AC208FCC304200801040002F8A
+:10FFA0002402000C8F820204304200801440000CB6
+:10FFB00024020003100000292402000C3C0200026D
+:10FFC0008C428F9C304200801440000524020003C4
+:10FFD0008F820204304200801040001F2402000380
+:10FFE000AC6200002402000A3C010002AC228F90A7
+:10FFF0003C04000224848FD88C8200003C03000261
+:020000021000EC
+:100000008C638FB000431025AF8202048C83000004
+:100010003C0400028C848FB02402000B3C010002DF
+:10002000AC228F90006418253C010002AC238FE0C5
+:100030003C05000224A58FA08CA200002C424E217A
+:1000400010400066240200023C0200028C428FD065
+:1000500010400005000000002402000C3C010002DA
+:1000600010000067AC228F903C0200028C428FC0CF
+:1000700010400063000000003C0400028C848F9C50
+:1000800010800055308200083C0300028C638FAC66
+:100090001062005B240200033C010002AC248FC804
+:1000A000ACA20000240200063C0100021000005433
+:1000B000AC228F908F82020034420002AF82020095
+:1000C0008F8300542402000D3C010002AC228F906B
+:1000D0003C010002AC238FB48F8300543C02000229
+:1000E0008C428FB42463D8F0004310232C42271095
+:1000F00014400031000000003C0200028C428FD00E
+:10010000104000202402000E3C0300028C638FE4A8
+:100110003C01000214600015AC228F900C003E6D73
+:10012000000000003C0500018CA56D980C00529B5E
+:10013000000020213C0300018C636D982402000420
+:10014000146200052403FFFB3C0200018C426D9405
+:10015000100000032403FFF73C0200018C426D9461
+:10016000004310243C010001AC226D948F830224D3
+:100170003C0202003C010002AC238FEC1000002086
+:100180000282A0253C0200028C428FC01040000574
+:10019000000000003C0200028C428F9C1040000FC7
+:1001A000240200023C0200028C428FA02C424E210D
+:1001B0001040000A240200023C0200028C428FC060
+:1001C0001040000F000000003C0200028C428F9C97
+:1001D0001440000B00000000240200023C01000259
+:1001E00010000007AC228F903C0200028C428FC0AE
+:1001F00010400003000000000C003DAF00000000B4
+:100200008F8202203C03F70000431025AF820220BA
+:100210008FBF001003E0000827BD00183C03000258
+:1002200024638FE88C6200001040000534422000F7
+:100230003C010002AC228FDC10000003AC60000027
+:100240003C010002AC248FDC03E000080000000049
+:1002500027BDFFE030820030AFBF00183C01000234
+:10026000AC228FE4144000673C02FFFF34421F0EB3
+:1002700000821024144000612402003030822000EB
+:100280001040005D3083800000031A0230820001BC
+:10029000000212003C0400018C846F2000621825CB
+:1002A000000331C23C03000124636E4830828000A9
+:1002B00000021202308400010004220000441025D4
+:1002C000000239C2000610800043102100471021AF
+:1002D000904300002402000110620025000000008D
+:1002E00010600007240200021062001324020003C1
+:1002F0001062002C3C05000F1000003700000000C9
+:100300008F8202002403FEFF00431024AF8202000C
+:100310008F8202203C03FFFE3463FFFF0043102462
+:10032000AF8202203C010002AC2090043C0100029C
+:1003300010000034AC20900C8F8202003442010087
+:10034000AF8202008F8202203C03FFFE3463FFFF76
+:1003500000431024AF820220240201003C0100026D
+:10036000AC2290043C01000210000026AC20900C4E
+:100370008F8202002403FEFF00431024AF8202009C
+:100380008F8202203C03000100431025AF8202202F
+:100390003C010002AC2090043C0100021000001956
+:1003A000AC23900C8F82020034420100AF82020025
+:1003B0008F8202203C03000100431025AF820220FF
+:1003C000240201003C010002AC2290043C01000226
+:1003D0001000000CAC23900C34A5FFFF3C0400017E
+:1003E00024846C38AFA300100C002B3BAFA000148A
+:1003F0001000000400000000240200303C01000254
+:10040000AC228FE88FBF001803E0000827BD002052
+:1004100000000000000000000000000027BDFFC831
+:10042000AFB2002800809021AFB3002C00A098212B
+:10043000AFB0002000C080213C04000124846C5037
+:100440003C0500093C0200018C426D9834A59001E6
+:100450000240302102603821AFBF0030AFB100242C
+:10046000A7A0001AAFB000140C002B3BAFA20010E5
+:1004700024020002126200832E6200031040000575
+:10048000240200011262000A000000001000017343
+:100490000000000024020004126200F82402000898
+:1004A000126200F73C02FFEC1000016C000000003B
+:1004B0003C0200018C426D94304200021440000462
+:1004C000001289403C02FFFB3442FFFF02028024FD
+:1004D0003C01000200310821AC308FFC3C0240009E
+:1004E000020210241040004E001023C2308400305D
+:1004F000001013823042001C3C03000124636DD8BD
+:1005000000431021008238213C0200200202102406
+:1005100010400006240201003C01000200310821C5
+:10052000AC229000100000053C0200803C0100025B
+:1005300000310821AC2090003C020080020210240F
+:1005400010400006001219403C0200013C0100026C
+:100550000023082110000005AC2290080012114071
+:100560003C01000200220821AC20900894E4000025
+:100570003C0300018C636F402402000510620010F0
+:10058000A7A400183202400010400002348240004C
+:10059000A7A200182404000194E20002240500042C
+:1005A00024E60002344200010C0045BEA4E2000231
+:1005B00024040001000028210C0045BE27A60018D5
+:1005C0003C0200018C426D98241100013C010001A5
+:1005D000AC316DA414530004320280000C003DAF16
+:1005E00000000000320280001040011C00000000EA
+:1005F0000C003DAF000000003C0300018C636F4025
+:100600002402000510620115240200023C010001D1
+:10061000AC316D9C3C01000110000110AC226D98C2
+:10062000240400012405000427B0001A0C0045BE74
+:100630000200302124040001000028210C0045BEE6
+:10064000020030213C020002005110218C428FF444
+:100650003C0400018C846D983C03BFFF3463FFFFB2
+:100660003C010001AC336DA4004310243C010002A6
+:1006700000310821109300F7AC228FF4100000F72E
+:10068000000000003C02200002021024104000057F
+:10069000240200013C010001AC226F1C1000000488
+:1006A000001289403C010001AC206F1C00128940FF
+:1006B0003C01000200310821AC308FF83C024000C0
+:1006C0000202102414400014000000003C0200014B
+:1006D0008C426F1C10400006240400042405000115
+:1006E0000C004DDB2406200024020001AEE204B819
+:1006F0003C020002005110218C428FF03C03BFFFEE
+:100700003463FFFF004310243C0100020031082144
+:10071000100000D0AC228FF03C0200018C426F1C14
+:10072000104000283C0300A0020310245443000D95
+:100730003C0200203C0200018C426F202403010097
+:100740003C01000200310821AC2390043C0300016D
+:100750003C01000200310821AC23900C1000001570
+:100760003442040002021024104000082403010057
+:100770003C0200018C426F203C0100020031082144
+:10078000AC2390041000000B344208003C020080AF
+:10079000020210241040002E3C0300013C02000124
+:1007A0008C426F203C01000200310821AC23900CE8
+:1007B00034420C003C010001AC226F2010000025E7
+:1007C000240400013C020020020210241040000614
+:1007D000240201003C01000200310821AC229004F7
+:1007E000100000053C0200803C010002003108219D
+:1007F000AC2090043C02008002021024104000074C
+:10080000001219403C0200013C01000200230821B3
+:10081000AC22900C100000062404000100121140CC
+:100820003C01000200220821AC20900C24040001AD
+:100830000000282127B0001E0C00457C020030215A
+:1008400024040001000028210C00457C0200302116
+:10085000240400012405000127B0001C0C00457C85
+:100860000200302124040001240500010C00457C15
+:100870000200302110000077000000003C02FFEC75
+:100880003442FFFF020280243C020008020280255D
+:10089000001211403C01000200220821AC308FF808
+:1008A0003C02200002021024104000090000000059
+:1008B0003C0200018C426E441440000524020001F9
+:1008C0003C010001AC226F1C100000043C024000FF
+:1008D0003C010001AC206F1C3C02400002021024CD
+:1008E0001440001D24020E013C0300018C636F1CA8
+:1008F000AF8202383C010001AC206DB010600005F1
+:10090000240220203C010001AC226F2024020001BF
+:10091000AEE204B83C04BFFF001219403C020002E2
+:10092000004310218C428FF03C0500018CA56D988E
+:100930003484FFFF004410243C01000200230821FE
+:10094000AC228FF02402000110A20044000000003D
+:1009500010000040000000003C0200018C426F1CAF
+:100960001040001C240220003C010001AC226F203A
+:100970003C0300A0020310241443000500121140A0
+:100980003402A0003C0100011000002DAC226F20B9
+:100990003C030002006218218C638FF83C020020A7
+:1009A0000062102410400004240220013C010001D8
+:1009B00010000023AC226F203C0200800062102453
+:1009C0001040001F3402A0013C0100011000001C77
+:1009D000AC226F203C0200200202102410400007CD
+:1009E00000121940240201003C01000200230821EA
+:1009F000AC229004100000063C020080001211405E
+:100A00003C01000200220821AC2090043C0200803E
+:100A10000202102410400006001219403C0200019E
+:100A20003C0100020023082110000005AC22900CBC
+:100A3000001211403C01000200220821AC20900C61
+:100A40003C0300018C636D982402000110620003D6
+:100A5000000000000C003DAF000000008FBF003020
+:100A60008FB3002C8FB200288FB100248FB00020EC
+:100A700003E0000827BD003827BDFFB0AFB3003C3E
+:100A800000009821AFB500400000A821AFB10034AC
+:100A90000000882124020002AFBF0048AFBE00441E
+:100AA000AFB20038AFB00030AFA4002CA7A0001A3E
+:100AB000A7A00018A7A00020A7A0001EA7A00022A2
+:100AC00010A20130A7A0001C2CA2000310400005BA
+:100AD0002402000110A2000A3C0240001000025D46
+:100AE000022010212402000410A2020A240200089D
+:100AF00010A202080220102110000256000000007F
+:100B00008FA8002C000881403C03000200701821CF
+:100B10008C638FFC0062102414400009240400013F
+:100B20003C027FFF3442FFFF006288243C01000248
+:100B300000300821AC318FF4100002460220102151
+:100B4000240500010C00457C27A6001824040001A0
+:100B5000240500010C00457C27A6001897A2001868
+:100B600030420004104000D93C1140003C0200011A
+:100B70008C426F402443FFFF2C620006104000D9D6
+:100B8000000310803C010001002208218C226C68C7
+:100B900000400008000000002404000124050011AA
+:100BA00027B0001A0C00457C02003021240400010B
+:100BB000240500110C00457C0200302197A3001A87
+:100BC00030624000104000023C1500103C15000847
+:100BD00030628000104000AA3C130001100000A801
+:100BE0003C130002240400012405001427B0001A5D
+:100BF0000C00457C0200302124040001240500146F
+:100C00000C00457C0200302197A3001A30621000CE
+:100C1000104000023C1500103C150008306208002E
+:100C2000104000973C130001100000953C13000297
+:100C3000240400012405001927B0001C0C00457C89
+:100C40000200302124040001240500190C00457C19
+:100C50000200302197A2001C304307002402040048
+:100C600010620027286204011040000E24020200D6
+:100C70001062001F286202011040000524020100DA
+:100C80005062001E3C1300011000001E24040001ED
+:100C900024020300506200193C13000210000019E6
+:100CA00024040001240206001062000D28620601DF
+:100CB00010400005240205005062000B3C130002A6
+:100CC0001000001024040001240207001462000D2B
+:100CD000240400013C1300041000000A3C15000825
+:100CE000100000063C130004100000053C1500082D
+:100CF0003C130001100000023C1500083C150010D8
+:100D0000240400012405001827B0001E0C00457CB7
+:100D10000200302124040001240500180C00457C49
+:100D2000020030218FA8002C97A7001E0008114058
+:100D30003C06000200C230218CC68FF497A200222C
+:100D40003C10000126106C5C02002021AFA20010B4
+:100D500097A2001C3C05000C34A503030C002B3BA0
+:100D6000AFA200143C020004166200103C02000115
+:100D70008F84005424030001240200023C0100017E
+:100D8000AC236D9C3C010001AC226D983C0100013C
+:100D9000AC236DA43C010001AC236E243C01000196
+:100DA000AC246F301000004F02B388251662003962
+:100DB0003C0280003C0200018C426E201440001E68
+:100DC0002404001800002021000028210C004DDB25
+:100DD000340680008F8300548F82005402B388252C
+:100DE00010000002246300328F820054006210233E
+:100DF0002C4200331440FFFC000000008F8300549D
+:100E0000240200013C010001AC226E203C010001E3
+:100E1000AC226D9C3C010001AC226D983C010001AC
+:100E2000AC226DA43C010001AC226E243C01000107
+:100E30001000002CAC236F30000028210C004DDB8B
+:100E400024060404000020212405001E27A6001803
+:100E5000240200020C0045BEA7A2001800002021B9
+:100E60000000282127A600180C0045BEA7A00018E6
+:100E700024040018240500020C004DDB24060004A5
+:100E80003C0280000222102502B318251000001534
+:100E90000043882502221025027518250043882565
+:100EA0000200202197A6001C3C0700018CE76D98EA
+:100EB0003C05000C34A50326AFB300100C002B3BFF
+:100EC000AFB1001410000007000000003C11000248
+:100ED000023088218E318FFC3C027FFF3442FFFFBD
+:100EE000022288243C0200018C426DA81040001EA2
+:100EF000000000003C0200018C426F1C1040000208
+:100F00003C022000022288258FA8002C00081140F6
+:100F10003C010002002208218C22900010400003B6
+:100F20003C02002010000005022288253C02FFDF61
+:100F30003442FFFF022288248FA8002C00081140B1
+:100F40003C010002002208218C229008104000037E
+:100F50003C02008010000004022288253C02FF7F32
+:100F60003442FFFF022288248FA8002C0008114081
+:100F70003C01000200220821AC318FF41000013541
+:100F8000022010218FA8002C0008F1403C03000231
+:100F9000007E18218C638FF83C0240000062102410
+:100FA00014400009240400013C027FFF3442FFFF8B
+:100FB000006288243C010002003E0821AC318FF021
+:100FC0001000012402201021000028210C00457C83
+:100FD00027A6001824040001000028210C00457CED
+:100FE00027A60018240400012405000127B20020D0
+:100FF0000C00457C0240302124040001240500013E
+:101000000C00457C0240302124040001240500042A
+:1010100027B1001E0C00457C022030212404000171
+:10102000240500040C00457C02203021240400012A
+:101030002405000527B000220C00457C0200302169
+:1010400024040001240500050C00457C0200302129
+:1010500024040001240500100C00457C27A600187C
+:1010600024040001240500100C00457C27A600186C
+:10107000240400012405000A0C00457C02403021B4
+:10108000240400012405000A0C00457C02403021A4
+:1010900024040001240500180C00457C02203021A6
+:1010A00024040001240500180C00457C0220302196
+:1010B00024040001240500010C00457C27A600182B
+:1010C00024040001240500010C00457C27A600181B
+:1010D00097A2001830420004104000663C11400006
+:1010E0003C0300018C636F34240200051462006726
+:1010F000240400012405001927B0001C0C00457CC5
+:101100000200302124040001240500190C00457C54
+:101110000200302197A2001C304307002402040083
+:1011200010620027286204011040000E2402020011
+:101130001062001F28620201104000052402010015
+:101140005062001E3C1300011000001E3C0200040F
+:1011500024020300506200193C1300021000001921
+:101160003C020004240206001062000D2862060101
+:1011700010400005240205005062000B3C130002E1
+:10118000100000103C020004240207001462000D4D
+:101190003C0200043C1300041000000A3C15000847
+:1011A000100000063C130004100000053C15000868
+:1011B0003C130001100000023C1500083C15001013
+:1011C0003C020004126200173C0280008F8200542F
+:1011D000241000013C010001AC306D9C3C01000179
+:1011E000AC306D983C010001AC306DA43C010001B5
+:1011F000AC306E243C010001AC226F303C02000197
+:101200001662002202758825000020210000282196
+:101210000C004DDB340680003C0100011000001B77
+:10122000AC306E200222102502B318250043882519
+:1012300097A6001C3C0200018C426F1C3C07000179
+:101240008CE76D983C04000124846C5CAFA2001014
+:1012500097A2001E3C05000C34A503233C010001AD
+:10126000AC206E200C002B3BAFA200141000000736
+:10127000000000003C110002023E88218E318FF0F8
+:101280003C027FFF3442FFFF022288243C0200011F
+:101290008C426DA810400069000000003C02000173
+:1012A0008C426F1C104000023C0220000222882564
+:1012B0008FA8002C000811403C01000200220821E8
+:1012C0008C229004104000033C0200201000000516
+:1012D000022288253C02FFDF3442FFFF02228824DD
+:1012E0008FA8002C000811403C01000200220821B8
+:1012F0008C22900C104000033C0200801000004F34
+:10130000022288253C02FF7F3442FFFF1000004B81
+:10131000022288248FA8002C000829403C030002E8
+:10132000006518218C638FF83C0240000062102495
+:10133000144000083C027FFF3442FFFF0062882413
+:101340003C01000200250821AC318FF01000004163
+:10135000022010213C0200018C426DA81040003494
+:101360003C11C00C3C0200018C426E443C04C00C99
+:10137000348420003C0300018C636F1C0002102B9E
+:10138000000210230044102410600003005188253F
+:101390003C022000022288253C0200020045102168
+:1013A0008C429004104000033C0200201000000416
+:1013B000022288253C02FFDF3442FFFF02228824FC
+:1013C0008FA8002C000811403C01000200220821D7
+:1013D0008C22900C104000033C020080100000049E
+:1013E000022288253C02FF7F3442FFFF022288242C
+:1013F0003C0200018C426E30104000023C020800AA
+:10140000022288253C0200018C426E34104000020A
+:101410003C020400022288253C0200018C426E3806
+:10142000104000063C020100100000040222882542
+:101430003C027FFF3442FFFF006288248FA8002C0B
+:10144000000811403C01000200220821AC318FF05D
+:10145000022010218FBF00488FBE00448FB500408E
+:101460008FB3003C8FB200388FB100348FB00030A2
+:1014700003E0000827BD005027BDFFD0AFB2002811
+:1014800000809021AFBF002CAFB10024AFB000208E
+:101490008F8402003C1000018E106D988F86022010
+:1014A000240200021202005C2E020003104000051C
+:1014B000240200011202000A001219401000010C5F
+:1014C0000000000024020004120200BF24020008F1
+:1014D000120200BE00128940100001050000000049
+:1014E0003C05000200A328218CA58FFC3C100002C3
+:1014F000020380218E108FF43C02400000A21024D1
+:10150000104000383C020008020210241040002065
+:10151000348400023C020002004310218C429000FF
+:101520001040000534840020348401003C02002077
+:1015300010000006020280252402FEFF0082202403
+:101540003C02FFDF3442FFFF020280240012114000
+:101550003C010002002208218C2290081040000566
+:101560003C02000100C230253C0200801000001641
+:10157000020280253C02FFFE3442FFFF00C23024FD
+:101580003C02FF7F3442FFFF1000000F0202802464
+:101590002402FEDF008220243C02FFFE3442FFFFD3
+:1015A00000C230243C02FF5F3442FFFF020280246D
+:1015B0003C01000200230821AC2090003C01000205
+:1015C00000230821AC209008AF840200AF860220DF
+:1015D0008F82022034420002AF8202201000000AF3
+:1015E000001211403C02BFFF3442FFFF8F83020014
+:1015F000020280242402FFFD006218240C003DAF8B
+:10160000AF830200001211403C01000200220821B9
+:10161000100000B7AC308FF43C0200018C426F1C0C
+:101620001040006924050004240400010C00457CDE
+:1016300027A6001824040001240500050C00457CA1
+:1016400027A6001A97A3001897A2001A3C040001CD
+:1016500024846E4830630C0000031A8230420C0070
+:1016600000021282A7A2001A00021080004410217A
+:1016700000431021A7A30018904800002402000195
+:101680003103FFFF106200292862000210400005AC
+:101690000000000010600009000000001000003D84
+:1016A0000000000010700013240200031062002CE0
+:1016B0000000000010000037000000008F820200D0
+:1016C0002403FEFF00431024AF8202008F82022019
+:1016D0003C03FFFE3463FFFF00431024AF8202206F
+:1016E0003C010002AC2090043C01000210000032DA
+:1016F000AC20900C8F82020034420100AF820200C5
+:101700008F8202203C03FFFE3463FFFF004310245E
+:10171000AF820220240201003C010002AC229004AE
+:101720003C01000210000024AC20900C8F820200CB
+:101730002403FEFF00431024AF8202008F820220A8
+:101740003C03000100431025AF8202203C0100024F
+:10175000AC2090043C01000210000017AC23900C58
+:101760008F82020034420100AF8202008F82022089
+:101770003C03000100431025AF8202202402010037
+:101780003C010002AC2290043C0100021000000A5F
+:10179000AC23900C3C04000124846C8097A6001AB2
+:1017A00097A700183C05000134A5FFFFAFA8001063
+:1017B0000C002B3BAFA000148F82020034420002C9
+:1017C0001000004BAF820200001289403C0500026D
+:1017D00000B128218CA58FF83C1000020211802155
+:1017E0008E108FF03C02400000A210241440001024
+:1017F000000000003C0200018C426F1C14400005F8
+:101800003C02BFFF8F82020034420002AF8202001E
+:101810003C02BFFF3442FFFF0C003DAF02028024B8
+:101820003C0100020031082110000031AC308FF083
+:101830003C0200018C426F1C104000053C0200205D
+:101840003C0200018C426E44104000253C02002006
+:1018500000A210241040000734840020240201005C
+:101860003C01000200310821AC2290041000000667
+:10187000348401003C01000200310821AC209004B6
+:101880002402FEFF008220243C02008000A21024DB
+:1018900010400007001219403C0200013C01000208
+:1018A00000230821AC22900C1000000800C2302553
+:1018B000001211403C01000200220821AC20900CD3
+:1018C0003C02FFFE3442FFFF00C23024AF8402001E
+:1018D000AF8602208F82022034420002AF820220B3
+:1018E000001211403C01000200220821AC308FF0B0
+:1018F0008FBF002C8FB200288FB100248FB0002042
+:1019000003E0000827BD003000000000000018219F
+:10191000308400FF2405FFDF2406FFBF00641007AA
+:101920003042000110400004000000008F8200449B
+:1019300010000003344200408F820044004610240F
+:10194000AF8200448F82004434420020AF820044C2
+:101950008F82004400451024AF82004424630001BC
+:10196000286200085440FFEE0064100703E00008FE
+:10197000000000002C8200081040001B0000000046
+:101980002405FFDF2406FFBF000418803C0200018D
+:1019900024426E60006218212464000490620000FA
+:1019A00010400004000000008F820044100000037B
+:1019B000344200408F82004400461024AF8200442D
+:1019C0008F82004434420020AF8200448F82004462
+:1019D00000451024AF820044246300010064102BF2
+:1019E0001440FFEE0000000003E0000800000000CB
+:1019F0000000000000000000000000008F8400C410
+:101A00008F8600E08F8700E42402FFF800C22824BC
+:101A100010E5001A27623FF814E2000224E80008EB
+:101A200027683000550500048D0A000030C200040C
+:101A300014400012008050218CE900008F42013CCC
+:101A4000014948230049182B94EB0006106000025E
+:101A500025630050004948210123182B5040000302
+:101A60008F4201FC03E0000800E01021AF8800E88D
+:101A700024420001AF4201FCAF8800E403E000080B
+:101A80000000102103E00008000000008F8300E444
+:101A900027623FF81062000424620008AF8200E869
+:101AA00003E00008AF8200E427623000AF8200E864
+:101AB00003E00008AF8200E403E00008000000003B
+:101AC0000000000000000000000000008F880120DE
+:101AD00027624FE08F8301281502000225090020AC
+:101AE00027694800112300128FA20010AD040000E6
+:101AF000AD050004AD060008A507000E8FA3001475
+:101B0000AD0200188FA20018AD03001C25030016BB
+:101B1000AD020010AD030014AF8901208F4300FC1B
+:101B2000240200012463FFFF03E00008AF4300FC30
+:101B30008F430324000010212463000103E0000808
+:101B4000AF43032403E00008000000008F88010079
+:101B5000276247E08F830108150200022509002053
+:101B6000276940001123000F8FA20010AD04000070
+:101B7000AD050004AD060008A507000E8FA30014F4
+:101B8000AD0200188FA20018AD03001C250300163B
+:101B9000AD020010AD030014AF89010003E000089E
+:101BA000240200018F430328000010212463000158
+:101BB00003E00008AF43032803E000080000000032
+:101BC00000000000000000000000000024486561E3
+:101BD0006465723A202F70726F6A656374732F7236
+:101BE00063732F73772F67652F2E2F6E69632F66B0
+:101BF00077322F636F6D6D6F6E2F66776D61696ED3
+:101C00002E632C7620312E312E322E343520313970
+:101C100039392F30312F32342030303A31303A35A3
+:101C20003520736875616E67204578702024000048
+:101C3000657674526E674600516576744600000002
+:101C400051657674505F46004D657674526E6746F6
+:101C5000000000004D516576744600004D516576D8
+:101C6000505F46005173436F6E495F4600000000AD
+:101C70005173436F6E734600517250726F64460029
+:101C80006261644D656D537A0000000068775665A7
+:101C900072000000626164487756657200000000BF
+:101CA0002A2A4441574E5F41000000007478527860
+:101CB0004266537A00000000626641746E4D726B9A
+:101CC000000000007265645A6F6E6531000000000C
+:101CD000706369436F6E660067656E436F6E660082
+:101CE0002A646D615244666C000000002A50414E27
+:101CF00049432A002E2E2F2E2E2F2E2E2F2E2E2F02
+:101D00002E2E2F7372632F6E69632F6677322F63C7
+:101D10006F6D6D6F6E2F66776D61696E2E6300005B
+:101D2000726362466C616773000000006261645216
+:101D30007852636200000000676C6F62466C6773E4
+:101D4000000000002B5F646973705F6C6F6F700040
+:101D50002B65765F68616E646C65720063616E749A
+:101D600031446D61000000002B715F646D615F7430
+:101D70006F5F6E69635F636B73756D002B685F7374
+:101D8000656E645F646174615F72656164795F63ED
+:101D90006B73756D000000002B685F646D615F728E
+:101DA000645F6173736973745F636B73756D000057
+:101DB00074436B736D4F6E002B715F646D615F7464
+:101DC0006F5F6E69630000002B685F73656E645F10
+:101DD000646174615F726561647900002B685F649F
+:101DE0006D615F72645F61737369737400000000FA
+:101DF00074436B736D4F6666000000002B685F7361
+:101E0000656E645F62645F72656164790000000002
+:101E10006873745352696E67000000006261645316
+:101E200052696E67000000006E69635352696E6705
+:101E30000000000077446D61416C6C4100000000BF
+:101E40002B715F646D615F746F5F686F73745F6344
+:101E50006B73756D000000002B685F6D61635F72CE
+:101E6000785F636F6D705F636B73756D000000006A
+:101E70002B685F646D615F77725F61737369737400
+:101E80005F636B73756D000072436B736D4F6E0013
+:101E90002B715F646D615F746F5F686F73740000B6
+:101EA0002B685F6D61635F72785F636F6D700000B8
+:101EB0002B685F646D615F77725F617373697374C0
+:101EC0000000000072436B736D4F666600000000F7
+:101ED0002B685F726563765F62645F7265616479C7
+:101EE000000000002B685F726563765F6A756D6243
+:101EF0006F5F62645F726561647900002B685F7276
+:101F00006563765F6D696E695F62645F7265616467
+:101F1000790000002B6D685F636F6D6D616E64000A
+:101F20002B685F74696D6572000000002B685F6448
+:101F30006F5F7570646174655F74785F636F6E73F3
+:101F4000000000002B685F646F5F757064617465EA
+:101F50005F72785F70726F64000000002B636B73B8
+:101F6000756D3136000000002B7065656B5F6D612B
+:101F7000635F72785F7761002B7065656B5F6D6181
+:101F8000635F7278000000002B6465715F6D6163B0
+:101F90005F7278002B685F6D61635F72785F617458
+:101FA000746E0000626164526574537A0000000030
+:101FB000727842644266537A000000002B6E756CA2
+:101FC0006C5F68616E646C657200000066774F70CC
+:101FD0004661696C000000002B685F757064617475
+:101FE000655F6C65643400002B685F7570646174B4
+:101FF000655F6C65643600002B685F7570646174A2
+:10200000655F6C6564320000696E74537461746559
+:10201000000000002A2A696E697443700000000005
+:102020002373637265616D0069537461636B4572FC
+:102030000000000070726F62654D656D0000000069
+:102040002A2A4441574E5F42000000002B73775FFD
+:10205000646D615F6173736973745F706C75735FD6
+:1020600074696D65720000002B267072656C6F617B
+:10207000645F77725F646573637200002B26707211
+:10208000656C6F61645F72645F64657363720000A6
+:102090002B685F68665F74696D65720024486561CE
+:1020A0006465723A202F70726F6A656374732F7261
+:1020B00063732F73772F67652F2E2F6E69632F66DB
+:1020C00077322F636F6D6D6F6E2F74696D65722E31
+:1020D000632C7620312E312E322E33352031393992
+:1020E000392F30312F32372031393A30393A3530C3
+:1020F0002068617965732045787020240000000015
+:10210000657674526E67460051657674460000002D
+:1021100051657674505F46004D657674526E674621
+:10212000000000004D516576744600004D51657603
+:10213000505F46005173436F6E495F4600000000D8
+:102140005173436F6E734600517250726F64460054
+:10215000542D446D6152643200000000542D446DD2
+:102160006152643100000000542D446D615264429C
+:1021700000000000542D446D6157723200000000D1
+:10218000542D446D6157723100000000542D446D90
+:1021900061577242000000000000000024486561A1
+:1021A0006465723A202F70726F6A656374732F7260
+:1021B00063732F73772F67652F2E2F6E69632F66DA
+:1021C00077322F636F6D6D6F6E2F636F6D6D616E04
+:1021D000642E632C7620312E312E322E323820316F
+:1021E0003939392F30312F32302031393A34393AB8
+:1021F000343920736875616E67204578702024003B
+:10220000657674526E67460051657674460000002C
+:1022100051657674505F46004D657674526E674620
+:10222000000000004D516576744600004D51657602
+:10223000505F46005173436F6E495F4600000000D7
+:102240005173436F6E734600517250726F64460053
+:102250003F48636D644D6278000000003F636D6429
+:1022600048737453000000003F636D644D634D6418
+:10227000000000003F636D6450726F6D000000004D
+:102280003F636D644C696E6B000000003F636D64DA
+:1022900045727200000086AC00008E5C00008E5C0F
+:1022A00000008DE400008B7800008E3000008E5C12
+:1022B00000008790000088000000899000008A6874
+:1022C00000008A3400008E5C0000887000008B24BF
+:1022D00000008E5C00008B34000087B4000088246E
+:1022E00000000000000000000000000024486561BC
+:1022F0006465723A202F70726F6A656374732F720F
+:1023000063732F73772F67652F2E2F6E69632F6688
+:1023100077322F636F6D6D6F6E2F6D636173742EE7
+:10232000632C7620312E312E322E38203139393837
+:102330002F31322F30382030323A33363A3336208C
+:10234000736875616E672045787020240000000076
+:10235000657674526E6746005165767446000000DB
+:1023600051657674505F46004D657674526E6746CF
+:10237000000000004D516576744600004D516576B1
+:10238000505F46005173436F6E495F460000000086
+:102390005173436F6E734600517250726F64460002
+:1023A0006164644D63447570000000006164644DB5
+:1023B0006346756C0000000064656C4D634E6F45AC
+:1023C00000000000000000000000000024486561DB
+:1023D0006465723A202F70726F6A656374732F722E
+:1023E00063732F73772F67652F2E2F6E69632F66A8
+:1023F00077322F636F6D6D6F6E2F646D612E632C5E
+:102400007620312E312E322E323420313939382F88
+:1024100031322F32312030303A33333A3039207371
+:102420006875616E67204578702024006576745267
+:102430006E674600516576744600000051657674FB
+:10244000505F46004D657674526E6746000000008E
+:102450004D516576744600004D516576505F4600DB
+:102460005173436F6E495F46000000005173436F24
+:102470006E734600517250726F6446007377446DFC
+:10248000614F66660000000031446D614F6E0000D0
+:102490007377446D614F6E002372446D6141544EF9
+:1024A0000000000072446D6141544E300000000095
+:1024B00072446D6141544E310000000072446D6100
+:1024C000344762002A50414E49432A002E2E2F2EB7
+:1024D0002E2F2E2E2F2E2E2F2E2E2F7372632F6E19
+:1024E00069632F6677322F636F6D6D6F6E2F646D2A
+:1024F000612E63002377446D6141544E000000005B
+:1025000077446D6141544E300000000077446D61A6
+:1025100041544E310000000077446D613447620041
+:102520000000000000000000000000002448656179
+:102530006465723A202F70726F6A656374732F72CC
+:1025400063732F73772F67652F2E2F6E69632F6646
+:1025500077322F636F6D6D6F6E2F74726163652EAE
+:10256000632C7620312E312E322E352031393938F8
+:102570002F30392F33302031383A35303A32382045
+:10258000736875616E672045787020240000000034
+:102590000000000000000000000000002448656109
+:1025A0006465723A202F70726F6A656374732F725C
+:1025B00063732F73772F67652F2E2F6E69632F66D6
+:1025C00077322F636F6D6D6F6E2F646174612E6350
+:1025D0002C7620312E312E322E31322031393939BC
+:1025E0002F30312F32302031393A34393A353120D9
+:1025F000736875616E6720457870202400000000C4
+:1026000046575F56455253494F4E3A202331204694
+:1026100072692041707220372031373A35373A35A8
+:1026200032205044542032303030000046575F434F
+:102630004F4D50494C455F54494D453A2031373A4A
+:1026400035373A353200000046575F434F4D504909
+:102650004C455F42593A206465767263730000000E
+:1026600046575F434F4D50494C455F484F53543A8E
+:1026700020636F6D707574650000000046575F43FE
+:102680004F4D50494C455F444F4D41494E3A2065AE
+:102690006E672E616374656F6E2E636F6D00000050
+:1026A00046575F434F4D50494C45523A206763634C
+:1026B0002076657273696F6E20322E372E320000DD
+:1026C00000000000120411000000000024486561B1
+:1026D0006465723A202F70726F6A656374732F722B
+:1026E00063732F73772F67652F2E2F6E69632F66A5
+:1026F00077322F636F6D6D6F6E2F6D656D2E632C4E
+:102700007620312E312E322E3520313939382F3086
+:10271000392F33302031383A35303A303820736829
+:1027200075616E672045787020240000244865613B
+:102730006465723A202F70726F6A656374732F72CA
+:1027400063732F73772F67652F2E2F6E69632F6644
+:1027500077322F636F6D6D6F6E2F73656E642E63AE
+:102760002C7620312E312E322E3434203139393826
+:102770002F31322F32312030303A33333A31382052
+:10278000736875616E672045787020240000000032
+:10279000657674526E674600516576744600000097
+:1027A00051657674505F46004D657674526E67468B
+:1027B000000000004D516576744600004D5165766D
+:1027C000505F46005173436F6E495F460000000042
+:1027D0005173436F6E734600517250726F644600BE
+:1027E00069736E745463705500000000244865617D
+:1027F0006465723A202F70726F6A656374732F720A
+:1028000063732F73772F67652F2E2F6E69632F6683
+:1028100077322F636F6D6D6F6E2F726563762E63E7
+:102820002C7620312E312E322E3533203139393964
+:102830002F30312F31362030323A35353A3433208B
+:10284000736875616E672045787020240000000071
+:10285000657674526E6746005165767446000000D6
+:1028600051657674505F46004D657674526E6746CA
+:10287000000000004D516576744600004D516576AC
+:10288000505F46005173436F6E495F460000000081
+:102890005173436F6E734600517250726F644600FD
+:1028A000724D616343686B300000000072784672BD
+:1028B0006D324C670000000072784E6F53744264B2
+:1028C0000000000072784E6F4D6942640000000005
+:1028D00072784E6F4A6D4264000000007278436B5C
+:1028E000446D614600000000727851446D457846A1
+:1028F00000000000727851446D61460072785144C6
+:102900004C42644600000000727851446D426446B7
+:1029100000000000727843726350616400000000A0
+:1029200072536D51446D614600000000244865619A
+:102930006465723A202F70726F6A656374732F72C8
+:1029400063732F73772F67652F2E2F6E69632F6642
+:1029500077322F636F6D6D6F6E2F6D61632E632CF9
+:102960007620312E312E322E323220313939382F25
+:1029700031322F30382030323A33363A3330207308
+:102980006875616E67204578702024006576745202
+:102990006E67460051657674460000005165767496
+:1029A000505F46004D657674526E67460000000029
+:1029B0004D516576744600004D516576505F460076
+:1029C0005173436F6E495F46000000005173436FBF
+:1029D0006E734600517250726F6446006D616354AD
+:1029E000687265730000000023744D616341544EAA
+:1029F0000000000023724D616341544E000000004E
+:102A000072656D4173737274000000006C696E6BC7
+:102A1000444F574E000000006C696E6B555000002B
+:102A20000000000000000000000000002448656174
+:102A30006465723A202F70726F6A656374732F72C7
+:102A400063732F73772F67652F2E2F6E69632F6641
+:102A500077322F636F6D6D6F6E2F636B73756D2E95
+:102A6000632C7620312E312E322E392031393939EE
+:102A70002F30312F31342030303A30333A3438204F
+:102A8000736875616E67204578702024000000002F
+:102A9000657674526E674600516576744600000094
+:102AA00051657674505F46004D657674526E674688
+:102AB000000000004D516576744600004D5165766A
+:102AC000505F46005173436F6E495F46000000003F
+:102AD0005173436F6E734600517250726F644600BB
+:102AE00000000000000000000000000050726F6253
+:102AF00065506879000000006C6E6B41535352546E
+:102B000000000000000109A400010A1C00010A5095
+:102B100000010A7C0001105000010AA800010B10FE
+:102B2000000111FC00010DC000010C6800010C80C7
+:102B300000010CC400010CEC00010D0C00010D346F
+:102B4000000111FC00010DC000010DF800010E1084
+:102B500000010E4000010E6800010E8800010EB059
+:102B60000000000000010FDC000110080001102C23
+:102B7000000111FC00011050000110780001110843
+:102B80000000000000000000000000000001186CC0
+:102B90000001193C00011A1400011AE400011B4055
+:102BA00000011C1C00011C4400011D2000011D48E7
+:102BB00000011EF000011F18000120C0000122B812
+:102BC0000001254C000124600001254C00012578FE
+:102BD000000120E8000122907273745F676D6969DB
+:102BE00000000000000126080001264000012728FF
+:102BF00000013374000133B4000133CC7365746C8D
+:102C00006F6F7000000000000000000000013BBC7E
+:102C100000013BFC00013C8C00013CD000013D3434
+:102C200000013DC000013DF400013E7C00013F1465
+:102C300000013FE400014024000140A8000140CC15
+:102C4000000141DC646F4261736550670000000061
+:102C500000000000000000000000000073746D61BF
+:102C6000634C4E4B000000006765746D636C6E6BC7
+:102C70000000000000014ED800014ED800014B8C2E
+:102C800000014BD800014C2400014ED87365746DCF
+:102C90006163616374000000000000000000000038
+:102CA0000000000000000000000000000000000024
+:102CB0000000000000000000000000000000000014
+:102CC0000000000000000000000000000000000103
+:102CD000000000010000000100C001FC00003FFCFA
+:102CE00000C00000416C74656F6E204163654E4901
+:102CF000432056000000000000000000000000001B
+:102D0000000000000000000000000000416C74653D
+:102D10006F6E204163654E49432056004242424255
+:102D2000000000000000000000000000001FFFFC89
+:102D3000001FFF7C000000000000000000000000F9
+:102D40000000000000000000000000000060CF0054
+:102D500000000060CF000000000000000000000044
+:102D60000000000000000000000000000000000063
+:102D70000000000000000000000000000000000053
+:102D80000000000000000000000000000000000043
+:102D90000000000000000000000000000000000033
+:102DA0000000000000000000000000030000000020
+:102DB0000000000100000000000000000000000012
+:102DC0000000000100000000000000010000000001
+:102DD00000000000000000000000000000000001F2
+:102DE00000000001000000000000000000000000E2
+:102DF00000000000000000000100000021000000B1
+:102E0000120001400000000000000000200000004F
+:102E1000120000A0000000001200006012000180FB
+:102E2000120001E0000000000000000000000000AF
+:102E30000000000100000000000000000000000091
+:102E40000000000000000000000000000000000280
+:102E5000000000000000000000030001000000016D
+:102E60000003020100000000000000000101010158
+:102E70000101010000010100010100010001000148
+:0C2E800001000101000001010000000041
+:00000001FF
+/* tg2 firmware v12.4.11 */
-- 
cgit v0.10.2


From 077f849de42e58172e25ccb24df4c1a13e82420c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 4 Jan 2009 16:11:25 -0800
Subject: firmware: convert tg3 driver to request_firmware()

Firmware blob looks like this...
        u8 firmware_major
        u8 firmware_minor
        u8 firmware_fix
        u8 pad
        __be32 start_address
        __be32 length (total, including BSS sections to be zeroed)
        data... (in __be32 words, which is native for the firmware)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 04ae1e8..5e2dbae 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -40,6 +40,7 @@
 #include <linux/workqueue.h>
 #include <linux/prefetch.h>
 #include <linux/dma-mapping.h>
+#include <linux/firmware.h>
 
 #include <net/checksum.h>
 #include <net/ip.h>
@@ -137,6 +138,10 @@
 
 #define TG3_NUM_TEST		6
 
+#define FIRMWARE_TG3		"tigon/tg3.bin"
+#define FIRMWARE_TG3TSO		"tigon/tg3_tso.bin"
+#define FIRMWARE_TG3TSO5	"tigon/tg3_tso5.bin"
+
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
@@ -144,6 +149,10 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox
 MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
+MODULE_FIRMWARE(FIRMWARE_TG3);
+MODULE_FIRMWARE(FIRMWARE_TG3TSO);
+MODULE_FIRMWARE(FIRMWARE_TG3TSO5);
+
 
 static int tg3_debug = -1;	/* -1 == use TG3_DEF_MSG_ENABLE as value */
 module_param(tg3_debug, int, 0);
@@ -6205,130 +6214,6 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent)
 	return 0;
 }
 
-#define TG3_FW_RELEASE_MAJOR	0x0
-#define TG3_FW_RELASE_MINOR	0x0
-#define TG3_FW_RELEASE_FIX	0x0
-#define TG3_FW_START_ADDR	0x08000000
-#define TG3_FW_TEXT_ADDR	0x08000000
-#define TG3_FW_TEXT_LEN		0x9c0
-#define TG3_FW_RODATA_ADDR	0x080009c0
-#define TG3_FW_RODATA_LEN	0x60
-#define TG3_FW_DATA_ADDR	0x08000a40
-#define TG3_FW_DATA_LEN		0x20
-#define TG3_FW_SBSS_ADDR	0x08000a60
-#define TG3_FW_SBSS_LEN		0xc
-#define TG3_FW_BSS_ADDR		0x08000a70
-#define TG3_FW_BSS_LEN		0x10
-
-static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
-	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
-	0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
-	0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
-	0x0e00021c, 0x00000000, 0x0000000d, 0x00000000, 0x00000000, 0x00000000,
-	0x27bdffe0, 0x3c1cc000, 0xafbf0018, 0xaf80680c, 0x0e00004c, 0x241b2105,
-	0x97850000, 0x97870002, 0x9782002c, 0x9783002e, 0x3c040800, 0x248409c0,
-	0xafa00014, 0x00021400, 0x00621825, 0x00052c00, 0xafa30010, 0x8f860010,
-	0x00e52825, 0x0e000060, 0x24070102, 0x3c02ac00, 0x34420100, 0x3c03ac01,
-	0x34630100, 0xaf820490, 0x3c02ffff, 0xaf820494, 0xaf830498, 0xaf82049c,
-	0x24020001, 0xaf825ce0, 0x0e00003f, 0xaf825d00, 0x0e000140, 0x00000000,
-	0x8fbf0018, 0x03e00008, 0x27bd0020, 0x2402ffff, 0xaf825404, 0x8f835400,
-	0x34630400, 0xaf835400, 0xaf825404, 0x3c020800, 0x24420034, 0xaf82541c,
-	0x03e00008, 0xaf805400, 0x00000000, 0x00000000, 0x3c020800, 0x34423000,
-	0x3c030800, 0x34633000, 0x3c040800, 0x348437ff, 0x3c010800, 0xac220a64,
-	0x24020040, 0x3c010800, 0xac220a68, 0x3c010800, 0xac200a60, 0xac600000,
-	0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
-	0x00804821, 0x8faa0010, 0x3c020800, 0x8c420a60, 0x3c040800, 0x8c840a68,
-	0x8fab0014, 0x24430001, 0x0044102b, 0x3c010800, 0xac230a60, 0x14400003,
-	0x00004021, 0x3c010800, 0xac200a60, 0x3c020800, 0x8c420a60, 0x3c030800,
-	0x8c630a64, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
-	0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020800, 0x8c420a60,
-	0x3c030800, 0x8c630a64, 0x8f84680c, 0x00021140, 0x00431021, 0xac440008,
-	0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0,
-	0x02000008, 0x00000000, 0x0a0001e3, 0x3c0a0001, 0x0a0001e3, 0x3c0a0002,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x3c0a0007, 0x0a0001e3, 0x3c0a0008, 0x0a0001e3, 0x3c0a0009,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000b,
-	0x0a0001e3, 0x3c0a000c, 0x0a0001e3, 0x3c0a000d, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000e, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
-	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a0013, 0x0a0001e3, 0x3c0a0014,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0x27bdffe0, 0x00001821, 0x00001021, 0xafbf0018, 0xafb10014, 0xafb00010,
-	0x3c010800, 0x00220821, 0xac200a70, 0x3c010800, 0x00220821, 0xac200a74,
-	0x3c010800, 0x00220821, 0xac200a78, 0x24630001, 0x1860fff5, 0x2442000c,
-	0x24110001, 0x8f906810, 0x32020004, 0x14400005, 0x24040001, 0x3c020800,
-	0x8c420a78, 0x18400003, 0x00002021, 0x0e000182, 0x00000000, 0x32020001,
-	0x10400003, 0x00000000, 0x0e000169, 0x00000000, 0x0a000153, 0xaf915028,
-	0x8fbf0018, 0x8fb10014, 0x8fb00010, 0x03e00008, 0x27bd0020, 0x3c050800,
-	0x8ca50a70, 0x3c060800, 0x8cc60a80, 0x3c070800, 0x8ce70a78, 0x27bdffe0,
-	0x3c040800, 0x248409d0, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014,
-	0x0e00017b, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x24020001,
-	0x8f836810, 0x00821004, 0x00021027, 0x00621824, 0x03e00008, 0xaf836810,
-	0x27bdffd8, 0xafbf0024, 0x1080002e, 0xafb00020, 0x8f825cec, 0xafa20018,
-	0x8f825cec, 0x3c100800, 0x26100a78, 0xafa2001c, 0x34028000, 0xaf825cec,
-	0x8e020000, 0x18400016, 0x00000000, 0x3c020800, 0x94420a74, 0x8fa3001c,
-	0x000221c0, 0xac830004, 0x8fa2001c, 0x3c010800, 0x0e000201, 0xac220a74,
-	0x10400005, 0x00000000, 0x8e020000, 0x24420001, 0x0a0001df, 0xae020000,
-	0x3c020800, 0x8c420a70, 0x00021c02, 0x000321c0, 0x0a0001c5, 0xafa2001c,
-	0x0e000201, 0x00000000, 0x1040001f, 0x00000000, 0x8e020000, 0x8fa3001c,
-	0x24420001, 0x3c010800, 0xac230a70, 0x3c010800, 0xac230a74, 0x0a0001df,
-	0xae020000, 0x3c100800, 0x26100a78, 0x8e020000, 0x18400028, 0x00000000,
-	0x0e000201, 0x00000000, 0x14400024, 0x00000000, 0x8e020000, 0x3c030800,
-	0x8c630a70, 0x2442ffff, 0xafa3001c, 0x18400006, 0xae020000, 0x00031402,
-	0x000221c0, 0x8c820004, 0x3c010800, 0xac220a70, 0x97a2001e, 0x2442ff00,
-	0x2c420300, 0x1440000b, 0x24024000, 0x3c040800, 0x248409dc, 0xafa00010,
-	0xafa00014, 0x8fa6001c, 0x24050008, 0x0e000060, 0x00003821, 0x0a0001df,
-	0x00000000, 0xaf825cf8, 0x3c020800, 0x8c420a40, 0x8fa3001c, 0x24420001,
-	0xaf835cf8, 0x3c010800, 0xac220a40, 0x8fbf0024, 0x8fb00020, 0x03e00008,
-	0x27bd0028, 0x27bdffe0, 0x3c040800, 0x248409e8, 0x00002821, 0x00003021,
-	0x00003821, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x8fbf0018,
-	0x03e00008, 0x27bd0020, 0x8f82680c, 0x8f85680c, 0x00021827, 0x0003182b,
-	0x00031823, 0x00431024, 0x00441021, 0x00a2282b, 0x10a00006, 0x00000000,
-	0x00401821, 0x8f82680c, 0x0043102b, 0x1440fffd, 0x00000000, 0x03e00008,
-	0x00000000, 0x3c040800, 0x8c840000, 0x3c030800, 0x8c630a40, 0x0064102b,
-	0x54400002, 0x00831023, 0x00641023, 0x2c420008, 0x03e00008, 0x38420001,
-	0x27bdffe0, 0x00802821, 0x3c040800, 0x24840a00, 0x00003021, 0x00003821,
-	0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x0a000216, 0x00000000,
-	0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000, 0x27bdffe0, 0x3c1cc000,
-	0xafbf0018, 0x0e00004c, 0xaf80680c, 0x3c040800, 0x24840a10, 0x03802821,
-	0x00003021, 0x00003821, 0xafa00010, 0x0e000060, 0xafa00014, 0x2402ffff,
-	0xaf825404, 0x3c0200aa, 0x0e000234, 0xaf825434, 0x8fbf0018, 0x03e00008,
-	0x27bd0020, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe8, 0xafb00010,
-	0x24100001, 0xafbf0014, 0x3c01c003, 0xac200000, 0x8f826810, 0x30422000,
-	0x10400003, 0x00000000, 0x0e000246, 0x00000000, 0x0a00023a, 0xaf905428,
-	0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x27bdfff8, 0x8f845d0c,
-	0x3c0200ff, 0x3c030800, 0x8c630a50, 0x3442fff8, 0x00821024, 0x1043001e,
-	0x3c0500ff, 0x34a5fff8, 0x3c06c003, 0x3c074000, 0x00851824, 0x8c620010,
-	0x3c010800, 0xac230a50, 0x30420008, 0x10400005, 0x00871025, 0x8cc20000,
-	0x24420001, 0xacc20000, 0x00871025, 0xaf825d0c, 0x8fa20000, 0x24420001,
-	0xafa20000, 0x8fa20000, 0x8fa20000, 0x24420001, 0xafa20000, 0x8fa20000,
-	0x8f845d0c, 0x3c030800, 0x8c630a50, 0x00851024, 0x1443ffe8, 0x00851824,
-	0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
-};
-
-static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
-	0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
-	0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
-	0x00000000, 0x00000000, 0x4d61696e, 0x43707542, 0x00000000, 0x00000000,
-	0x00000000
-};
-
-#if 0 /* All zeros, don't eat up space with it. */
-u32 tg3FwData[(TG3_FW_DATA_LEN / sizeof(u32)) + 1] = {
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000
-};
-#endif
-
 #define RX_CPU_SCRATCH_BASE	0x30000
 #define RX_CPU_SCRATCH_SIZE	0x04000
 #define TX_CPU_SCRATCH_BASE	0x34000
@@ -6383,15 +6268,9 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset)
 }
 
 struct fw_info {
-	unsigned int text_base;
-	unsigned int text_len;
-	const u32 *text_data;
-	unsigned int rodata_base;
-	unsigned int rodata_len;
-	const u32 *rodata_data;
-	unsigned int data_base;
-	unsigned int data_len;
-	const u32 *data_data;
+	unsigned int fw_base;
+	unsigned int fw_len;
+	const __be32 *fw_data;
 };
 
 /* tp->lock is held. */
@@ -6428,24 +6307,11 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b
 		write_op(tp, cpu_scratch_base + i, 0);
 	tw32(cpu_base + CPU_STATE, 0xffffffff);
 	tw32(cpu_base + CPU_MODE, tr32(cpu_base+CPU_MODE)|CPU_MODE_HALT);
-	for (i = 0; i < (info->text_len / sizeof(u32)); i++)
-		write_op(tp, (cpu_scratch_base +
-			      (info->text_base & 0xffff) +
-			      (i * sizeof(u32))),
-			 (info->text_data ?
-			  info->text_data[i] : 0));
-	for (i = 0; i < (info->rodata_len / sizeof(u32)); i++)
-		write_op(tp, (cpu_scratch_base +
-			      (info->rodata_base & 0xffff) +
-			      (i * sizeof(u32))),
-			 (info->rodata_data ?
-			  info->rodata_data[i] : 0));
-	for (i = 0; i < (info->data_len / sizeof(u32)); i++)
+	for (i = 0; i < (info->fw_len / sizeof(u32)); i++)
 		write_op(tp, (cpu_scratch_base +
-			      (info->data_base & 0xffff) +
+			      (info->fw_base & 0xffff) +
 			      (i * sizeof(u32))),
-			 (info->data_data ?
-			  info->data_data[i] : 0));
+			      be32_to_cpu(info->fw_data[i]));
 
 	err = 0;
 
@@ -6457,17 +6323,20 @@ out:
 static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 {
 	struct fw_info info;
+	const __be32 *fw_data;
 	int err, i;
 
-	info.text_base = TG3_FW_TEXT_ADDR;
-	info.text_len = TG3_FW_TEXT_LEN;
-	info.text_data = &tg3FwText[0];
-	info.rodata_base = TG3_FW_RODATA_ADDR;
-	info.rodata_len = TG3_FW_RODATA_LEN;
-	info.rodata_data = &tg3FwRodata[0];
-	info.data_base = TG3_FW_DATA_ADDR;
-	info.data_len = TG3_FW_DATA_LEN;
-	info.data_data = NULL;
+	fw_data = (void *)tp->fw->data;
+
+	/* Firmware blob starts with version numbers, followed by
+	   start address and length. We are setting complete length.
+	   length = end_address_of_bss - start_address_of_text.
+	   Remainder is the blob to be loaded contiguously
+	   from start address. */
+
+	info.fw_base = be32_to_cpu(fw_data[1]);
+	info.fw_len = tp->fw->size - 12;
+	info.fw_data = &fw_data[3];
 
 	err = tg3_load_firmware_cpu(tp, RX_CPU_BASE,
 				    RX_CPU_SCRATCH_BASE, RX_CPU_SCRATCH_SIZE,
@@ -6483,21 +6352,21 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 
 	/* Now startup only the RX cpu. */
 	tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
-	tw32_f(RX_CPU_BASE + CPU_PC,    TG3_FW_TEXT_ADDR);
+	tw32_f(RX_CPU_BASE + CPU_PC, info.fw_base);
 
 	for (i = 0; i < 5; i++) {
-		if (tr32(RX_CPU_BASE + CPU_PC) == TG3_FW_TEXT_ADDR)
+		if (tr32(RX_CPU_BASE + CPU_PC) == info.fw_base)
 			break;
 		tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
 		tw32(RX_CPU_BASE + CPU_MODE,  CPU_MODE_HALT);
-		tw32_f(RX_CPU_BASE + CPU_PC,    TG3_FW_TEXT_ADDR);
+		tw32_f(RX_CPU_BASE + CPU_PC, info.fw_base);
 		udelay(1000);
 	}
 	if (i >= 5) {
 		printk(KERN_ERR PFX "tg3_load_firmware fails for %s "
 		       "to set RX CPU PC, is %08x should be %08x\n",
 		       tp->dev->name, tr32(RX_CPU_BASE + CPU_PC),
-		       TG3_FW_TEXT_ADDR);
+		       info.fw_base);
 		return -ENODEV;
 	}
 	tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
@@ -6506,547 +6375,36 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 	return 0;
 }
 
-
-#define TG3_TSO_FW_RELEASE_MAJOR	0x1
-#define TG3_TSO_FW_RELASE_MINOR		0x6
-#define TG3_TSO_FW_RELEASE_FIX		0x0
-#define TG3_TSO_FW_START_ADDR		0x08000000
-#define TG3_TSO_FW_TEXT_ADDR		0x08000000
-#define TG3_TSO_FW_TEXT_LEN		0x1aa0
-#define TG3_TSO_FW_RODATA_ADDR		0x08001aa0
-#define TG3_TSO_FW_RODATA_LEN		0x60
-#define TG3_TSO_FW_DATA_ADDR		0x08001b20
-#define TG3_TSO_FW_DATA_LEN		0x30
-#define TG3_TSO_FW_SBSS_ADDR		0x08001b50
-#define TG3_TSO_FW_SBSS_LEN		0x2c
-#define TG3_TSO_FW_BSS_ADDR		0x08001b80
-#define TG3_TSO_FW_BSS_LEN		0x894
-
-static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
-	0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000,
-	0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800,
-	0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
-	0xafbf0018, 0x0e0005d8, 0x34840002, 0x0e000668, 0x00000000, 0x3c030800,
-	0x90631b68, 0x24020002, 0x3c040800, 0x24841aac, 0x14620003, 0x24050001,
-	0x3c040800, 0x24841aa0, 0x24060006, 0x00003821, 0xafa00010, 0x0e00067c,
-	0xafa00014, 0x8f625c50, 0x34420001, 0xaf625c50, 0x8f625c90, 0x34420001,
-	0xaf625c90, 0x2402ffff, 0x0e000034, 0xaf625404, 0x8fbf0018, 0x03e00008,
-	0x27bd0020, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe0, 0xafbf001c,
-	0xafb20018, 0xafb10014, 0x0e00005b, 0xafb00010, 0x24120002, 0x24110001,
-	0x8f706820, 0x32020100, 0x10400003, 0x00000000, 0x0e0000bb, 0x00000000,
-	0x8f706820, 0x32022000, 0x10400004, 0x32020001, 0x0e0001f0, 0x24040001,
-	0x32020001, 0x10400003, 0x00000000, 0x0e0000a3, 0x00000000, 0x3c020800,
-	0x90421b98, 0x14520003, 0x00000000, 0x0e0004c0, 0x00000000, 0x0a00003c,
-	0xaf715028, 0x8fbf001c, 0x8fb20018, 0x8fb10014, 0x8fb00010, 0x03e00008,
-	0x27bd0020, 0x27bdffe0, 0x3c040800, 0x24841ac0, 0x00002821, 0x00003021,
-	0x00003821, 0xafbf0018, 0xafa00010, 0x0e00067c, 0xafa00014, 0x3c040800,
-	0x248423d8, 0xa4800000, 0x3c010800, 0xa0201b98, 0x3c010800, 0xac201b9c,
-	0x3c010800, 0xac201ba0, 0x3c010800, 0xac201ba4, 0x3c010800, 0xac201bac,
-	0x3c010800, 0xac201bb8, 0x3c010800, 0xac201bbc, 0x8f624434, 0x3c010800,
-	0xac221b88, 0x8f624438, 0x3c010800, 0xac221b8c, 0x8f624410, 0xac80f7a8,
-	0x3c010800, 0xac201b84, 0x3c010800, 0xac2023e0, 0x3c010800, 0xac2023c8,
-	0x3c010800, 0xac2023cc, 0x3c010800, 0xac202400, 0x3c010800, 0xac221b90,
-	0x8f620068, 0x24030007, 0x00021702, 0x10430005, 0x00000000, 0x8f620068,
-	0x00021702, 0x14400004, 0x24020001, 0x3c010800, 0x0a000097, 0xac20240c,
-	0xac820034, 0x3c040800, 0x24841acc, 0x3c050800, 0x8ca5240c, 0x00003021,
-	0x00003821, 0xafa00010, 0x0e00067c, 0xafa00014, 0x8fbf0018, 0x03e00008,
-	0x27bd0020, 0x27bdffe0, 0x3c040800, 0x24841ad8, 0x00002821, 0x00003021,
-	0x00003821, 0xafbf0018, 0xafa00010, 0x0e00067c, 0xafa00014, 0x0e00005b,
-	0x00000000, 0x0e0000b4, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020,
-	0x24020001, 0x8f636820, 0x00821004, 0x00021027, 0x00621824, 0x03e00008,
-	0xaf636820, 0x27bdffd0, 0xafbf002c, 0xafb60028, 0xafb50024, 0xafb40020,
-	0xafb3001c, 0xafb20018, 0xafb10014, 0xafb00010, 0x8f675c5c, 0x3c030800,
-	0x24631bbc, 0x8c620000, 0x14470005, 0x3c0200ff, 0x3c020800, 0x90421b98,
-	0x14400119, 0x3c0200ff, 0x3442fff8, 0x00e28824, 0xac670000, 0x00111902,
-	0x306300ff, 0x30e20003, 0x000211c0, 0x00622825, 0x00a04021, 0x00071602,
-	0x3c030800, 0x90631b98, 0x3044000f, 0x14600036, 0x00804821, 0x24020001,
-	0x3c010800, 0xa0221b98, 0x00051100, 0x00821025, 0x3c010800, 0xac201b9c,
-	0x3c010800, 0xac201ba0, 0x3c010800, 0xac201ba4, 0x3c010800, 0xac201bac,
-	0x3c010800, 0xac201bb8, 0x3c010800, 0xac201bb0, 0x3c010800, 0xac201bb4,
-	0x3c010800, 0xa42223d8, 0x9622000c, 0x30437fff, 0x3c010800, 0xa4222410,
-	0x30428000, 0x3c010800, 0xa4231bc6, 0x10400005, 0x24020001, 0x3c010800,
-	0xac2223f4, 0x0a000102, 0x2406003e, 0x24060036, 0x3c010800, 0xac2023f4,
-	0x9622000a, 0x3c030800, 0x94631bc6, 0x3c010800, 0xac2023f0, 0x3c010800,
-	0xac2023f8, 0x00021302, 0x00021080, 0x00c21021, 0x00621821, 0x3c010800,
-	0xa42223d0, 0x3c010800, 0x0a000115, 0xa4231b96, 0x9622000c, 0x3c010800,
-	0xa42223ec, 0x3c040800, 0x24841b9c, 0x8c820000, 0x00021100, 0x3c010800,
-	0x00220821, 0xac311bc8, 0x8c820000, 0x00021100, 0x3c010800, 0x00220821,
-	0xac271bcc, 0x8c820000, 0x25030001, 0x306601ff, 0x00021100, 0x3c010800,
-	0x00220821, 0xac261bd0, 0x8c820000, 0x00021100, 0x3c010800, 0x00220821,
-	0xac291bd4, 0x96230008, 0x3c020800, 0x8c421bac, 0x00432821, 0x3c010800,
-	0xac251bac, 0x9622000a, 0x30420004, 0x14400018, 0x00061100, 0x8f630c14,
-	0x3063000f, 0x2c620002, 0x1440000b, 0x3c02c000, 0x8f630c14, 0x3c020800,
-	0x8c421b40, 0x3063000f, 0x24420001, 0x3c010800, 0xac221b40, 0x2c620002,
-	0x1040fff7, 0x3c02c000, 0x00e21825, 0xaf635c5c, 0x8f625c50, 0x30420002,
-	0x10400014, 0x00000000, 0x0a000147, 0x00000000, 0x3c030800, 0x8c631b80,
-	0x3c040800, 0x94841b94, 0x01221025, 0x3c010800, 0xa42223da, 0x24020001,
-	0x3c010800, 0xac221bb8, 0x24630001, 0x0085202a, 0x3c010800, 0x10800003,
-	0xac231b80, 0x3c010800, 0xa4251b94, 0x3c060800, 0x24c61b9c, 0x8cc20000,
-	0x24420001, 0xacc20000, 0x28420080, 0x14400005, 0x00000000, 0x0e000656,
-	0x24040002, 0x0a0001e6, 0x00000000, 0x3c020800, 0x8c421bb8, 0x10400078,
-	0x24020001, 0x3c050800, 0x90a51b98, 0x14a20072, 0x00000000, 0x3c150800,
-	0x96b51b96, 0x3c040800, 0x8c841bac, 0x32a3ffff, 0x0083102a, 0x1440006c,
-	0x00000000, 0x14830003, 0x00000000, 0x3c010800, 0xac2523f0, 0x1060005c,
-	0x00009021, 0x24d60004, 0x0060a021, 0x24d30014, 0x8ec20000, 0x00028100,
-	0x3c110800, 0x02308821, 0x0e000625, 0x8e311bc8, 0x00402821, 0x10a00054,
-	0x00000000, 0x9628000a, 0x31020040, 0x10400005, 0x2407180c, 0x8e22000c,
-	0x2407188c, 0x00021400, 0xaca20018, 0x3c030800, 0x00701821, 0x8c631bd0,
-	0x3c020800, 0x00501021, 0x8c421bd4, 0x00031d00, 0x00021400, 0x00621825,
-	0xaca30014, 0x8ec30004, 0x96220008, 0x00432023, 0x3242ffff, 0x3083ffff,
-	0x00431021, 0x0282102a, 0x14400002, 0x02b23023, 0x00803021, 0x8e620000,
-	0x30c4ffff, 0x00441021, 0xae620000, 0x8e220000, 0xaca20000, 0x8e220004,
-	0x8e63fff4, 0x00431021, 0xaca20004, 0xa4a6000e, 0x8e62fff4, 0x00441021,
-	0xae62fff4, 0x96230008, 0x0043102a, 0x14400005, 0x02469021, 0x8e62fff0,
-	0xae60fff4, 0x24420001, 0xae62fff0, 0xaca00008, 0x3242ffff, 0x14540008,
-	0x24020305, 0x31020080, 0x54400001, 0x34e70010, 0x24020905, 0xa4a2000c,
-	0x0a0001cb, 0x34e70020, 0xa4a2000c, 0x3c020800, 0x8c4223f0, 0x10400003,
-	0x3c024b65, 0x0a0001d3, 0x34427654, 0x3c02b49a, 0x344289ab, 0xaca2001c,
-	0x30e2ffff, 0xaca20010, 0x0e0005a2, 0x00a02021, 0x3242ffff, 0x0054102b,
-	0x1440ffa9, 0x00000000, 0x24020002, 0x3c010800, 0x0a0001e6, 0xa0221b98,
-	0x8ec2083c, 0x24420001, 0x0a0001e6, 0xaec2083c, 0x0e0004c0, 0x00000000,
-	0x8fbf002c, 0x8fb60028, 0x8fb50024, 0x8fb40020, 0x8fb3001c, 0x8fb20018,
-	0x8fb10014, 0x8fb00010, 0x03e00008, 0x27bd0030, 0x27bdffd0, 0xafbf0028,
-	0xafb30024, 0xafb20020, 0xafb1001c, 0xafb00018, 0x8f725c9c, 0x3c0200ff,
-	0x3442fff8, 0x3c070800, 0x24e71bb4, 0x02428824, 0x9623000e, 0x8ce20000,
-	0x00431021, 0xace20000, 0x8e220010, 0x30420020, 0x14400011, 0x00809821,
-	0x0e00063b, 0x02202021, 0x3c02c000, 0x02421825, 0xaf635c9c, 0x8f625c90,
-	0x30420002, 0x1040011e, 0x00000000, 0xaf635c9c, 0x8f625c90, 0x30420002,
-	0x10400119, 0x00000000, 0x0a00020d, 0x00000000, 0x8e240008, 0x8e230014,
-	0x00041402, 0x000231c0, 0x00031502, 0x304201ff, 0x2442ffff, 0x3042007f,
-	0x00031942, 0x30637800, 0x00021100, 0x24424000, 0x00624821, 0x9522000a,
-	0x3084ffff, 0x30420008, 0x104000b0, 0x000429c0, 0x3c020800, 0x8c422400,
-	0x14400024, 0x24c50008, 0x94c20014, 0x3c010800, 0xa42223d0, 0x8cc40010,
-	0x00041402, 0x3c010800, 0xa42223d2, 0x3c010800, 0xa42423d4, 0x94c2000e,
-	0x3083ffff, 0x00431023, 0x3c010800, 0xac222408, 0x94c2001a, 0x3c010800,
-	0xac262400, 0x3c010800, 0xac322404, 0x3c010800, 0xac2223fc, 0x3c02c000,
-	0x02421825, 0xaf635c9c, 0x8f625c90, 0x30420002, 0x104000e5, 0x00000000,
-	0xaf635c9c, 0x8f625c90, 0x30420002, 0x104000e0, 0x00000000, 0x0a000246,
-	0x00000000, 0x94c2000e, 0x3c030800, 0x946323d4, 0x00434023, 0x3103ffff,
-	0x2c620008, 0x1040001c, 0x00000000, 0x94c20014, 0x24420028, 0x00a22821,
-	0x00031042, 0x1840000b, 0x00002021, 0x24e60848, 0x00403821, 0x94a30000,
-	0x8cc20000, 0x24840001, 0x00431021, 0xacc20000, 0x0087102a, 0x1440fff9,
-	0x24a50002, 0x31020001, 0x1040001f, 0x3c024000, 0x3c040800, 0x248423fc,
-	0xa0a00001, 0x94a30000, 0x8c820000, 0x00431021, 0x0a000285, 0xac820000,
-	0x8f626800, 0x3c030010, 0x00431024, 0x10400009, 0x00000000, 0x94c2001a,
-	0x3c030800, 0x8c6323fc, 0x00431021, 0x3c010800, 0xac2223fc, 0x0a000286,
-	0x3c024000, 0x94c2001a, 0x94c4001c, 0x3c030800, 0x8c6323fc, 0x00441023,
-	0x00621821, 0x3c010800, 0xac2323fc, 0x3c024000, 0x02421825, 0xaf635c9c,
-	0x8f625c90, 0x30420002, 0x1440fffc, 0x00000000, 0x9522000a, 0x30420010,
-	0x1040009b, 0x00000000, 0x3c030800, 0x946323d4, 0x3c070800, 0x24e72400,
-	0x8ce40000, 0x8f626800, 0x24630030, 0x00832821, 0x3c030010, 0x00431024,
-	0x1440000a, 0x00000000, 0x94a20004, 0x3c040800, 0x8c842408, 0x3c030800,
-	0x8c6323fc, 0x00441023, 0x00621821, 0x3c010800, 0xac2323fc, 0x3c040800,
-	0x8c8423fc, 0x00041c02, 0x3082ffff, 0x00622021, 0x00041402, 0x00822021,
-	0x00041027, 0xa4a20006, 0x3c030800, 0x8c632404, 0x3c0200ff, 0x3442fff8,
-	0x00628824, 0x96220008, 0x24050001, 0x24034000, 0x000231c0, 0x00801021,
-	0xa4c2001a, 0xa4c0001c, 0xace00000, 0x3c010800, 0xac251b60, 0xaf635cb8,
-	0x8f625cb0, 0x30420002, 0x10400003, 0x00000000, 0x3c010800, 0xac201b60,
-	0x8e220008, 0xaf625cb8, 0x8f625cb0, 0x30420002, 0x10400003, 0x00000000,
-	0x3c010800, 0xac201b60, 0x3c020800, 0x8c421b60, 0x1040ffec, 0x00000000,
-	0x3c040800, 0x0e00063b, 0x8c842404, 0x0a00032a, 0x00000000, 0x3c030800,
-	0x90631b98, 0x24020002, 0x14620003, 0x3c034b65, 0x0a0002e1, 0x00008021,
-	0x8e22001c, 0x34637654, 0x10430002, 0x24100002, 0x24100001, 0x00c02021,
-	0x0e000350, 0x02003021, 0x24020003, 0x3c010800, 0xa0221b98, 0x24020002,
-	0x1202000a, 0x24020001, 0x3c030800, 0x8c6323f0, 0x10620006, 0x00000000,
-	0x3c020800, 0x944223d8, 0x00021400, 0x0a00031f, 0xae220014, 0x3c040800,
-	0x248423da, 0x94820000, 0x00021400, 0xae220014, 0x3c020800, 0x8c421bbc,
-	0x3c03c000, 0x3c010800, 0xa0201b98, 0x00431025, 0xaf625c5c, 0x8f625c50,
-	0x30420002, 0x10400009, 0x00000000, 0x2484f7e2, 0x8c820000, 0x00431025,
-	0xaf625c5c, 0x8f625c50, 0x30420002, 0x1440fffa, 0x00000000, 0x3c020800,
-	0x24421b84, 0x8c430000, 0x24630001, 0xac430000, 0x8f630c14, 0x3063000f,
-	0x2c620002, 0x1440000c, 0x3c024000, 0x8f630c14, 0x3c020800, 0x8c421b40,
-	0x3063000f, 0x24420001, 0x3c010800, 0xac221b40, 0x2c620002, 0x1040fff7,
-	0x00000000, 0x3c024000, 0x02421825, 0xaf635c9c, 0x8f625c90, 0x30420002,
-	0x1440fffc, 0x00000000, 0x12600003, 0x00000000, 0x0e0004c0, 0x00000000,
-	0x8fbf0028, 0x8fb30024, 0x8fb20020, 0x8fb1001c, 0x8fb00018, 0x03e00008,
-	0x27bd0030, 0x8f634450, 0x3c040800, 0x24841b88, 0x8c820000, 0x00031c02,
-	0x0043102b, 0x14400007, 0x3c038000, 0x8c840004, 0x8f624450, 0x00021c02,
-	0x0083102b, 0x1040fffc, 0x3c038000, 0xaf634444, 0x8f624444, 0x00431024,
-	0x1440fffd, 0x00000000, 0x8f624448, 0x03e00008, 0x3042ffff, 0x3c024000,
-	0x00822025, 0xaf645c38, 0x8f625c30, 0x30420002, 0x1440fffc, 0x00000000,
-	0x03e00008, 0x00000000, 0x27bdffe0, 0x00805821, 0x14c00011, 0x256e0008,
-	0x3c020800, 0x8c4223f4, 0x10400007, 0x24020016, 0x3c010800, 0xa42223d2,
-	0x2402002a, 0x3c010800, 0x0a000364, 0xa42223d4, 0x8d670010, 0x00071402,
-	0x3c010800, 0xa42223d2, 0x3c010800, 0xa42723d4, 0x3c040800, 0x948423d4,
-	0x3c030800, 0x946323d2, 0x95cf0006, 0x3c020800, 0x944223d0, 0x00832023,
-	0x01e2c023, 0x3065ffff, 0x24a20028, 0x01c24821, 0x3082ffff, 0x14c0001a,
-	0x01226021, 0x9582000c, 0x3042003f, 0x3c010800, 0xa42223d6, 0x95820004,
-	0x95830006, 0x3c010800, 0xac2023e4, 0x3c010800, 0xac2023e8, 0x00021400,
-	0x00431025, 0x3c010800, 0xac221bc0, 0x95220004, 0x3c010800, 0xa4221bc4,
-	0x95230002, 0x01e51023, 0x0043102a, 0x10400010, 0x24020001, 0x3c010800,
-	0x0a000398, 0xac2223f8, 0x3c030800, 0x8c6323e8, 0x3c020800, 0x94421bc4,
-	0x00431021, 0xa5220004, 0x3c020800, 0x94421bc0, 0xa5820004, 0x3c020800,
-	0x8c421bc0, 0xa5820006, 0x3c020800, 0x8c4223f0, 0x3c0d0800, 0x8dad23e4,
-	0x3c0a0800, 0x144000e5, 0x8d4a23e8, 0x3c020800, 0x94421bc4, 0x004a1821,
-	0x3063ffff, 0x0062182b, 0x24020002, 0x10c2000d, 0x01435023, 0x3c020800,
-	0x944223d6, 0x30420009, 0x10400008, 0x00000000, 0x9582000c, 0x3042fff6,
-	0xa582000c, 0x3c020800, 0x944223d6, 0x30420009, 0x01a26823, 0x3c020800,
-	0x8c4223f8, 0x1040004a, 0x01203821, 0x3c020800, 0x944223d2, 0x00004021,
-	0xa520000a, 0x01e21023, 0xa5220002, 0x3082ffff, 0x00021042, 0x18400008,
-	0x00003021, 0x00401821, 0x94e20000, 0x25080001, 0x00c23021, 0x0103102a,
-	0x1440fffb, 0x24e70002, 0x00061c02, 0x30c2ffff, 0x00623021, 0x00061402,
-	0x00c23021, 0x00c02821, 0x00061027, 0xa522000a, 0x00003021, 0x2527000c,
-	0x00004021, 0x94e20000, 0x25080001, 0x00c23021, 0x2d020004, 0x1440fffb,
-	0x24e70002, 0x95220002, 0x00004021, 0x91230009, 0x00442023, 0x01803821,
-	0x3082ffff, 0xa4e00010, 0x00621821, 0x00021042, 0x18400010, 0x00c33021,
-	0x00404821, 0x94e20000, 0x24e70002, 0x00c23021, 0x30e2007f, 0x14400006,
-	0x25080001, 0x8d630000, 0x3c02007f, 0x3442ff80, 0x00625824, 0x25670008,
-	0x0109102a, 0x1440fff3, 0x00000000, 0x30820001, 0x10400005, 0x00061c02,
-	0xa0e00001, 0x94e20000, 0x00c23021, 0x00061c02, 0x30c2ffff, 0x00623021,
-	0x00061402, 0x00c23021, 0x0a00047d, 0x30c6ffff, 0x24020002, 0x14c20081,
-	0x00000000, 0x3c020800, 0x8c42240c, 0x14400007, 0x00000000, 0x3c020800,
-	0x944223d2, 0x95230002, 0x01e21023, 0x10620077, 0x00000000, 0x3c020800,
-	0x944223d2, 0x01e21023, 0xa5220002, 0x3c020800, 0x8c42240c, 0x1040001a,
-	0x31e3ffff, 0x8dc70010, 0x3c020800, 0x94421b96, 0x00e04021, 0x00072c02,
-	0x00aa2021, 0x00431023, 0x00823823, 0x00072402, 0x30e2ffff, 0x00823821,
-	0x00071027, 0xa522000a, 0x3102ffff, 0x3c040800, 0x948423d4, 0x00453023,
-	0x00e02821, 0x00641823, 0x006d1821, 0x00c33021, 0x00061c02, 0x30c2ffff,
-	0x0a00047d, 0x00623021, 0x01203821, 0x00004021, 0x3082ffff, 0x00021042,
-	0x18400008, 0x00003021, 0x00401821, 0x94e20000, 0x25080001, 0x00c23021,
-	0x0103102a, 0x1440fffb, 0x24e70002, 0x00061c02, 0x30c2ffff, 0x00623021,
-	0x00061402, 0x00c23021, 0x00c02821, 0x00061027, 0xa522000a, 0x00003021,
-	0x2527000c, 0x00004021, 0x94e20000, 0x25080001, 0x00c23021, 0x2d020004,
-	0x1440fffb, 0x24e70002, 0x95220002, 0x00004021, 0x91230009, 0x00442023,
-	0x01803821, 0x3082ffff, 0xa4e00010, 0x3c040800, 0x948423d4, 0x00621821,
-	0x00c33021, 0x00061c02, 0x30c2ffff, 0x00623021, 0x00061c02, 0x3c020800,
-	0x944223d0, 0x00c34821, 0x00441023, 0x00021fc2, 0x00431021, 0x00021043,
-	0x18400010, 0x00003021, 0x00402021, 0x94e20000, 0x24e70002, 0x00c23021,
-	0x30e2007f, 0x14400006, 0x25080001, 0x8d630000, 0x3c02007f, 0x3442ff80,
-	0x00625824, 0x25670008, 0x0104102a, 0x1440fff3, 0x00000000, 0x3c020800,
-	0x944223ec, 0x00c23021, 0x3122ffff, 0x00c23021, 0x00061c02, 0x30c2ffff,
-	0x00623021, 0x00061402, 0x00c23021, 0x00c04021, 0x00061027, 0xa5820010,
-	0xadc00014, 0x0a00049d, 0xadc00000, 0x8dc70010, 0x00e04021, 0x11400007,
-	0x00072c02, 0x00aa3021, 0x00061402, 0x30c3ffff, 0x00433021, 0x00061402,
-	0x00c22821, 0x00051027, 0xa522000a, 0x3c030800, 0x946323d4, 0x3102ffff,
-	0x01e21021, 0x00433023, 0x00cd3021, 0x00061c02, 0x30c2ffff, 0x00623021,
-	0x00061402, 0x00c23021, 0x00c04021, 0x00061027, 0xa5820010, 0x3102ffff,
-	0x00051c00, 0x00431025, 0xadc20010, 0x3c020800, 0x8c4223f4, 0x10400005,
-	0x2de205eb, 0x14400002, 0x25e2fff2, 0x34028870, 0xa5c20034, 0x3c030800,
-	0x246323e8, 0x8c620000, 0x24420001, 0xac620000, 0x3c040800, 0x8c8423e4,
-	0x3c020800, 0x8c421bc0, 0x3303ffff, 0x00832021, 0x00431821, 0x0062102b,
-	0x3c010800, 0xac2423e4, 0x10400003, 0x2482ffff, 0x3c010800, 0xac2223e4,
-	0x3c010800, 0xac231bc0, 0x03e00008, 0x27bd0020, 0x27bdffb8, 0x3c050800,
-	0x24a51b96, 0xafbf0044, 0xafbe0040, 0xafb7003c, 0xafb60038, 0xafb50034,
-	0xafb40030, 0xafb3002c, 0xafb20028, 0xafb10024, 0xafb00020, 0x94a90000,
-	0x3c020800, 0x944223d0, 0x3c030800, 0x8c631bb0, 0x3c040800, 0x8c841bac,
-	0x01221023, 0x0064182a, 0xa7a9001e, 0x106000be, 0xa7a20016, 0x24be0022,
-	0x97b6001e, 0x24b3001a, 0x24b70016, 0x8fc20000, 0x14400008, 0x00000000,
-	0x8fc2fff8, 0x97a30016, 0x8fc4fff4, 0x00431021, 0x0082202a, 0x148000b0,
-	0x00000000, 0x97d50818, 0x32a2ffff, 0x104000a3, 0x00009021, 0x0040a021,
-	0x00008821, 0x0e000625, 0x00000000, 0x00403021, 0x14c00007, 0x00000000,
-	0x3c020800, 0x8c4223dc, 0x24420001, 0x3c010800, 0x0a000596, 0xac2223dc,
-	0x3c100800, 0x02118021, 0x8e101bc8, 0x9608000a, 0x31020040, 0x10400005,
-	0x2407180c, 0x8e02000c, 0x2407188c, 0x00021400, 0xacc20018, 0x31020080,
-	0x54400001, 0x34e70010, 0x3c020800, 0x00511021, 0x8c421bd0, 0x3c030800,
-	0x00711821, 0x8c631bd4, 0x00021500, 0x00031c00, 0x00431025, 0xacc20014,
-	0x96040008, 0x3242ffff, 0x00821021, 0x0282102a, 0x14400002, 0x02b22823,
-	0x00802821, 0x8e020000, 0x02459021, 0xacc20000, 0x8e020004, 0x00c02021,
-	0x26310010, 0xac820004, 0x30e2ffff, 0xac800008, 0xa485000e, 0xac820010,
-	0x24020305, 0x0e0005a2, 0xa482000c, 0x3242ffff, 0x0054102b, 0x1440ffc5,
-	0x3242ffff, 0x0a00058e, 0x00000000, 0x8e620000, 0x8e63fffc, 0x0043102a,
-	0x10400067, 0x00000000, 0x8e62fff0, 0x00028900, 0x3c100800, 0x02118021,
-	0x0e000625, 0x8e101bc8, 0x00403021, 0x14c00005, 0x00000000, 0x8e62082c,
-	0x24420001, 0x0a000596, 0xae62082c, 0x9608000a, 0x31020040, 0x10400005,
-	0x2407180c, 0x8e02000c, 0x2407188c, 0x00021400, 0xacc20018, 0x3c020800,
-	0x00511021, 0x8c421bd0, 0x3c030800, 0x00711821, 0x8c631bd4, 0x00021500,
-	0x00031c00, 0x00431025, 0xacc20014, 0x8e63fff4, 0x96020008, 0x00432023,
-	0x3242ffff, 0x3083ffff, 0x00431021, 0x02c2102a, 0x10400003, 0x00802821,
-	0x97a9001e, 0x01322823, 0x8e620000, 0x30a4ffff, 0x00441021, 0xae620000,
-	0xa4c5000e, 0x8e020000, 0xacc20000, 0x8e020004, 0x8e63fff4, 0x00431021,
-	0xacc20004, 0x8e63fff4, 0x96020008, 0x00641821, 0x0062102a, 0x14400006,
-	0x02459021, 0x8e62fff0, 0xae60fff4, 0x24420001, 0x0a000571, 0xae62fff0,
-	0xae63fff4, 0xacc00008, 0x3242ffff, 0x10560003, 0x31020004, 0x10400006,
-	0x24020305, 0x31020080, 0x54400001, 0x34e70010, 0x34e70020, 0x24020905,
-	0xa4c2000c, 0x8ee30000, 0x8ee20004, 0x14620007, 0x3c02b49a, 0x8ee20860,
-	0x54400001, 0x34e70400, 0x3c024b65, 0x0a000588, 0x34427654, 0x344289ab,
-	0xacc2001c, 0x30e2ffff, 0xacc20010, 0x0e0005a2, 0x00c02021, 0x3242ffff,
-	0x0056102b, 0x1440ff9b, 0x00000000, 0x8e620000, 0x8e63fffc, 0x0043102a,
-	0x1440ff48, 0x00000000, 0x8fbf0044, 0x8fbe0040, 0x8fb7003c, 0x8fb60038,
-	0x8fb50034, 0x8fb40030, 0x8fb3002c, 0x8fb20028, 0x8fb10024, 0x8fb00020,
-	0x03e00008, 0x27bd0048, 0x27bdffe8, 0xafbf0014, 0xafb00010, 0x8f624450,
-	0x8f634410, 0x0a0005b1, 0x00808021, 0x8f626820, 0x30422000, 0x10400003,
-	0x00000000, 0x0e0001f0, 0x00002021, 0x8f624450, 0x8f634410, 0x3042ffff,
-	0x0043102b, 0x1440fff5, 0x00000000, 0x8f630c14, 0x3063000f, 0x2c620002,
-	0x1440000b, 0x00000000, 0x8f630c14, 0x3c020800, 0x8c421b40, 0x3063000f,
-	0x24420001, 0x3c010800, 0xac221b40, 0x2c620002, 0x1040fff7, 0x00000000,
-	0xaf705c18, 0x8f625c10, 0x30420002, 0x10400009, 0x00000000, 0x8f626820,
-	0x30422000, 0x1040fff8, 0x00000000, 0x0e0001f0, 0x00002021, 0x0a0005c4,
-	0x00000000, 0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x00000000,
-	0x00000000, 0x00000000, 0x27bdffe8, 0x3c1bc000, 0xafbf0014, 0xafb00010,
-	0xaf60680c, 0x8f626804, 0x34420082, 0xaf626804, 0x8f634000, 0x24020b50,
-	0x3c010800, 0xac221b54, 0x24020b78, 0x3c010800, 0xac221b64, 0x34630002,
-	0xaf634000, 0x0e000605, 0x00808021, 0x3c010800, 0xa0221b68, 0x304200ff,
-	0x24030002, 0x14430005, 0x00000000, 0x3c020800, 0x8c421b54, 0x0a0005f8,
-	0xac5000c0, 0x3c020800, 0x8c421b54, 0xac5000bc, 0x8f624434, 0x8f634438,
-	0x8f644410, 0x3c010800, 0xac221b5c, 0x3c010800, 0xac231b6c, 0x3c010800,
-	0xac241b58, 0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x3c040800,
-	0x8c870000, 0x3c03aa55, 0x3463aa55, 0x3c06c003, 0xac830000, 0x8cc20000,
-	0x14430007, 0x24050002, 0x3c0355aa, 0x346355aa, 0xac830000, 0x8cc20000,
-	0x50430001, 0x24050001, 0x3c020800, 0xac470000, 0x03e00008, 0x00a01021,
-	0x27bdfff8, 0x18800009, 0x00002821, 0x8f63680c, 0x8f62680c, 0x1043fffe,
-	0x00000000, 0x24a50001, 0x00a4102a, 0x1440fff9, 0x00000000, 0x03e00008,
-	0x27bd0008, 0x8f634450, 0x3c020800, 0x8c421b5c, 0x00031c02, 0x0043102b,
-	0x14400008, 0x3c038000, 0x3c040800, 0x8c841b6c, 0x8f624450, 0x00021c02,
-	0x0083102b, 0x1040fffc, 0x3c038000, 0xaf634444, 0x8f624444, 0x00431024,
-	0x1440fffd, 0x00000000, 0x8f624448, 0x03e00008, 0x3042ffff, 0x3082ffff,
-	0x2442e000, 0x2c422001, 0x14400003, 0x3c024000, 0x0a000648, 0x2402ffff,
-	0x00822025, 0xaf645c38, 0x8f625c30, 0x30420002, 0x1440fffc, 0x00001021,
-	0x03e00008, 0x00000000, 0x8f624450, 0x3c030800, 0x8c631b58, 0x0a000651,
-	0x3042ffff, 0x8f624450, 0x3042ffff, 0x0043102b, 0x1440fffc, 0x00000000,
-	0x03e00008, 0x00000000, 0x27bdffe0, 0x00802821, 0x3c040800, 0x24841af0,
-	0x00003021, 0x00003821, 0xafbf0018, 0xafa00010, 0x0e00067c, 0xafa00014,
-	0x0a000660, 0x00000000, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000,
-	0x00000000, 0x00000000, 0x3c020800, 0x34423000, 0x3c030800, 0x34633000,
-	0x3c040800, 0x348437ff, 0x3c010800, 0xac221b74, 0x24020040, 0x3c010800,
-	0xac221b78, 0x3c010800, 0xac201b70, 0xac600000, 0x24630004, 0x0083102b,
-	0x5040fffd, 0xac600000, 0x03e00008, 0x00000000, 0x00804821, 0x8faa0010,
-	0x3c020800, 0x8c421b70, 0x3c040800, 0x8c841b78, 0x8fab0014, 0x24430001,
-	0x0044102b, 0x3c010800, 0xac231b70, 0x14400003, 0x00004021, 0x3c010800,
-	0xac201b70, 0x3c020800, 0x8c421b70, 0x3c030800, 0x8c631b74, 0x91240000,
-	0x00021140, 0x00431021, 0x00481021, 0x25080001, 0xa0440000, 0x29020008,
-	0x1440fff4, 0x25290001, 0x3c020800, 0x8c421b70, 0x3c030800, 0x8c631b74,
-	0x8f64680c, 0x00021140, 0x00431021, 0xac440008, 0xac45000c, 0xac460010,
-	0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000,
-};
-
-static const u32 tg3TsoFwRodata[] = {
-	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
-	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f,
-	0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x66617461, 0x6c457272, 0x00000000, 0x00000000,
-	0x00000000,
-};
-
-static const u32 tg3TsoFwData[] = {
-	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000,
-};
-
 /* 5705 needs a special version of the TSO firmware.  */
-#define TG3_TSO5_FW_RELEASE_MAJOR	0x1
-#define TG3_TSO5_FW_RELASE_MINOR	0x2
-#define TG3_TSO5_FW_RELEASE_FIX		0x0
-#define TG3_TSO5_FW_START_ADDR		0x00010000
-#define TG3_TSO5_FW_TEXT_ADDR		0x00010000
-#define TG3_TSO5_FW_TEXT_LEN		0xe90
-#define TG3_TSO5_FW_RODATA_ADDR		0x00010e90
-#define TG3_TSO5_FW_RODATA_LEN		0x50
-#define TG3_TSO5_FW_DATA_ADDR		0x00010f00
-#define TG3_TSO5_FW_DATA_LEN		0x20
-#define TG3_TSO5_FW_SBSS_ADDR		0x00010f20
-#define TG3_TSO5_FW_SBSS_LEN		0x28
-#define TG3_TSO5_FW_BSS_ADDR		0x00010f50
-#define TG3_TSO5_FW_BSS_LEN		0x88
-
-static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
-	0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000,
-	0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001,
-	0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
-	0xafbf0018, 0x0c0042e8, 0x34840002, 0x0c004364, 0x00000000, 0x3c030001,
-	0x90630f34, 0x24020002, 0x3c040001, 0x24840e9c, 0x14620003, 0x24050001,
-	0x3c040001, 0x24840e90, 0x24060002, 0x00003821, 0xafa00010, 0x0c004378,
-	0xafa00014, 0x0c00402c, 0x00000000, 0x8fbf0018, 0x03e00008, 0x27bd0020,
-	0x00000000, 0x00000000, 0x27bdffe0, 0xafbf001c, 0xafb20018, 0xafb10014,
-	0x0c0042d4, 0xafb00010, 0x3c128000, 0x24110001, 0x8f706810, 0x32020400,
-	0x10400007, 0x00000000, 0x8f641008, 0x00921024, 0x14400003, 0x00000000,
-	0x0c004064, 0x00000000, 0x3c020001, 0x90420f56, 0x10510003, 0x32020200,
-	0x1040fff1, 0x00000000, 0x0c0041b4, 0x00000000, 0x08004034, 0x00000000,
-	0x8fbf001c, 0x8fb20018, 0x8fb10014, 0x8fb00010, 0x03e00008, 0x27bd0020,
-	0x27bdffe0, 0x3c040001, 0x24840eb0, 0x00002821, 0x00003021, 0x00003821,
-	0xafbf0018, 0xafa00010, 0x0c004378, 0xafa00014, 0x0000d021, 0x24020130,
-	0xaf625000, 0x3c010001, 0xa4200f50, 0x3c010001, 0xa0200f57, 0x8fbf0018,
-	0x03e00008, 0x27bd0020, 0x00000000, 0x00000000, 0x3c030001, 0x24630f60,
-	0x90620000, 0x27bdfff0, 0x14400003, 0x0080c021, 0x08004073, 0x00004821,
-	0x3c022000, 0x03021024, 0x10400003, 0x24090002, 0x08004073, 0xa0600000,
-	0x24090001, 0x00181040, 0x30431f80, 0x346f8008, 0x1520004b, 0x25eb0028,
-	0x3c040001, 0x00832021, 0x8c848010, 0x3c050001, 0x24a50f7a, 0x00041402,
-	0xa0a20000, 0x3c010001, 0xa0240f7b, 0x3c020001, 0x00431021, 0x94428014,
-	0x3c010001, 0xa0220f7c, 0x3c0c0001, 0x01836021, 0x8d8c8018, 0x304200ff,
-	0x24420008, 0x000220c3, 0x24020001, 0x3c010001, 0xa0220f60, 0x0124102b,
-	0x1040000c, 0x00003821, 0x24a6000e, 0x01602821, 0x8ca20000, 0x8ca30004,
-	0x24a50008, 0x24e70001, 0xacc20000, 0xacc30004, 0x00e4102b, 0x1440fff8,
-	0x24c60008, 0x00003821, 0x3c080001, 0x25080f7b, 0x91060000, 0x3c020001,
-	0x90420f7c, 0x2503000d, 0x00c32821, 0x00461023, 0x00021fc2, 0x00431021,
-	0x00021043, 0x1840000c, 0x00002021, 0x91020001, 0x00461023, 0x00021fc2,
-	0x00431021, 0x00021843, 0x94a20000, 0x24e70001, 0x00822021, 0x00e3102a,
-	0x1440fffb, 0x24a50002, 0x00041c02, 0x3082ffff, 0x00622021, 0x00041402,
-	0x00822021, 0x3c02ffff, 0x01821024, 0x3083ffff, 0x00431025, 0x3c010001,
-	0x080040fa, 0xac220f80, 0x3c050001, 0x24a50f7c, 0x90a20000, 0x3c0c0001,
-	0x01836021, 0x8d8c8018, 0x000220c2, 0x1080000e, 0x00003821, 0x01603021,
-	0x24a5000c, 0x8ca20000, 0x8ca30004, 0x24a50008, 0x24e70001, 0xacc20000,
-	0xacc30004, 0x00e4102b, 0x1440fff8, 0x24c60008, 0x3c050001, 0x24a50f7c,
-	0x90a20000, 0x30430007, 0x24020004, 0x10620011, 0x28620005, 0x10400005,
-	0x24020002, 0x10620008, 0x000710c0, 0x080040fa, 0x00000000, 0x24020006,
-	0x1062000e, 0x000710c0, 0x080040fa, 0x00000000, 0x00a21821, 0x9463000c,
-	0x004b1021, 0x080040fa, 0xa4430000, 0x000710c0, 0x00a21821, 0x8c63000c,
-	0x004b1021, 0x080040fa, 0xac430000, 0x00a21821, 0x8c63000c, 0x004b2021,
-	0x00a21021, 0xac830000, 0x94420010, 0xa4820004, 0x95e70006, 0x3c020001,
-	0x90420f7c, 0x3c030001, 0x90630f7a, 0x00e2c823, 0x3c020001, 0x90420f7b,
-	0x24630028, 0x01e34021, 0x24420028, 0x15200012, 0x01e23021, 0x94c2000c,
-	0x3c010001, 0xa4220f78, 0x94c20004, 0x94c30006, 0x3c010001, 0xa4200f76,
-	0x3c010001, 0xa4200f72, 0x00021400, 0x00431025, 0x3c010001, 0xac220f6c,
-	0x95020004, 0x3c010001, 0x08004124, 0xa4220f70, 0x3c020001, 0x94420f70,
-	0x3c030001, 0x94630f72, 0x00431021, 0xa5020004, 0x3c020001, 0x94420f6c,
-	0xa4c20004, 0x3c020001, 0x8c420f6c, 0xa4c20006, 0x3c040001, 0x94840f72,
-	0x3c020001, 0x94420f70, 0x3c0a0001, 0x954a0f76, 0x00441821, 0x3063ffff,
-	0x0062182a, 0x24020002, 0x1122000b, 0x00832023, 0x3c030001, 0x94630f78,
-	0x30620009, 0x10400006, 0x3062fff6, 0xa4c2000c, 0x3c020001, 0x94420f78,
-	0x30420009, 0x01425023, 0x24020001, 0x1122001b, 0x29220002, 0x50400005,
-	0x24020002, 0x11200007, 0x31a2ffff, 0x08004197, 0x00000000, 0x1122001d,
-	0x24020016, 0x08004197, 0x31a2ffff, 0x3c0e0001, 0x95ce0f80, 0x10800005,
-	0x01806821, 0x01c42021, 0x00041c02, 0x3082ffff, 0x00627021, 0x000e1027,
-	0xa502000a, 0x3c030001, 0x90630f7b, 0x31a2ffff, 0x00e21021, 0x0800418d,
-	0x00432023, 0x3c020001, 0x94420f80, 0x00442021, 0x00041c02, 0x3082ffff,
-	0x00622021, 0x00807021, 0x00041027, 0x08004185, 0xa502000a, 0x3c050001,
-	0x24a50f7a, 0x90a30000, 0x14620002, 0x24e2fff2, 0xa5e20034, 0x90a20000,
-	0x00e21023, 0xa5020002, 0x3c030001, 0x94630f80, 0x3c020001, 0x94420f5a,
-	0x30e5ffff, 0x00641821, 0x00451023, 0x00622023, 0x00041c02, 0x3082ffff,
-	0x00622021, 0x00041027, 0xa502000a, 0x3c030001, 0x90630f7c, 0x24620001,
-	0x14a20005, 0x00807021, 0x01631021, 0x90420000, 0x08004185, 0x00026200,
-	0x24620002, 0x14a20003, 0x306200fe, 0x004b1021, 0x944c0000, 0x3c020001,
-	0x94420f82, 0x3183ffff, 0x3c040001, 0x90840f7b, 0x00431021, 0x00e21021,
-	0x00442023, 0x008a2021, 0x00041c02, 0x3082ffff, 0x00622021, 0x00041402,
-	0x00822021, 0x00806821, 0x00041027, 0xa4c20010, 0x31a2ffff, 0x000e1c00,
-	0x00431025, 0x3c040001, 0x24840f72, 0xade20010, 0x94820000, 0x3c050001,
-	0x94a50f76, 0x3c030001, 0x8c630f6c, 0x24420001, 0x00b92821, 0xa4820000,
-	0x3322ffff, 0x00622021, 0x0083182b, 0x3c010001, 0xa4250f76, 0x10600003,
-	0x24a2ffff, 0x3c010001, 0xa4220f76, 0x3c024000, 0x03021025, 0x3c010001,
-	0xac240f6c, 0xaf621008, 0x03e00008, 0x27bd0010, 0x3c030001, 0x90630f56,
-	0x27bdffe8, 0x24020001, 0xafbf0014, 0x10620026, 0xafb00010, 0x8f620cf4,
-	0x2442ffff, 0x3042007f, 0x00021100, 0x8c434000, 0x3c010001, 0xac230f64,
-	0x8c434008, 0x24444000, 0x8c5c4004, 0x30620040, 0x14400002, 0x24020088,
-	0x24020008, 0x3c010001, 0xa4220f68, 0x30620004, 0x10400005, 0x24020001,
-	0x3c010001, 0xa0220f57, 0x080041d5, 0x00031402, 0x3c010001, 0xa0200f57,
-	0x00031402, 0x3c010001, 0xa4220f54, 0x9483000c, 0x24020001, 0x3c010001,
-	0xa4200f50, 0x3c010001, 0xa0220f56, 0x3c010001, 0xa4230f62, 0x24020001,
-	0x1342001e, 0x00000000, 0x13400005, 0x24020003, 0x13420067, 0x00000000,
-	0x080042cf, 0x00000000, 0x3c020001, 0x94420f62, 0x241a0001, 0x3c010001,
-	0xa4200f5e, 0x3c010001, 0xa4200f52, 0x304407ff, 0x00021bc2, 0x00031823,
-	0x3063003e, 0x34630036, 0x00021242, 0x3042003c, 0x00621821, 0x3c010001,
-	0xa4240f58, 0x00832021, 0x24630030, 0x3c010001, 0xa4240f5a, 0x3c010001,
-	0xa4230f5c, 0x3c060001, 0x24c60f52, 0x94c50000, 0x94c30002, 0x3c040001,
-	0x94840f5a, 0x00651021, 0x0044102a, 0x10400013, 0x3c108000, 0x00a31021,
-	0xa4c20000, 0x3c02a000, 0xaf620cf4, 0x3c010001, 0xa0200f56, 0x8f641008,
-	0x00901024, 0x14400003, 0x00000000, 0x0c004064, 0x00000000, 0x8f620cf4,
-	0x00501024, 0x104000b7, 0x00000000, 0x0800420f, 0x00000000, 0x3c030001,
-	0x94630f50, 0x00851023, 0xa4c40000, 0x00621821, 0x3042ffff, 0x3c010001,
-	0xa4230f50, 0xaf620ce8, 0x3c020001, 0x94420f68, 0x34420024, 0xaf620cec,
-	0x94c30002, 0x3c020001, 0x94420f50, 0x14620012, 0x3c028000, 0x3c108000,
-	0x3c02a000, 0xaf620cf4, 0x3c010001, 0xa0200f56, 0x8f641008, 0x00901024,
-	0x14400003, 0x00000000, 0x0c004064, 0x00000000, 0x8f620cf4, 0x00501024,
-	0x1440fff7, 0x00000000, 0x080042cf, 0x241a0003, 0xaf620cf4, 0x3c108000,
-	0x8f641008, 0x00901024, 0x14400003, 0x00000000, 0x0c004064, 0x00000000,
-	0x8f620cf4, 0x00501024, 0x1440fff7, 0x00000000, 0x080042cf, 0x241a0003,
-	0x3c070001, 0x24e70f50, 0x94e20000, 0x03821021, 0xaf620ce0, 0x3c020001,
-	0x8c420f64, 0xaf620ce4, 0x3c050001, 0x94a50f54, 0x94e30000, 0x3c040001,
-	0x94840f58, 0x3c020001, 0x94420f5e, 0x00a32823, 0x00822023, 0x30a6ffff,
-	0x3083ffff, 0x00c3102b, 0x14400043, 0x00000000, 0x3c020001, 0x94420f5c,
-	0x00021400, 0x00621025, 0xaf620ce8, 0x94e20000, 0x3c030001, 0x94630f54,
-	0x00441021, 0xa4e20000, 0x3042ffff, 0x14430021, 0x3c020008, 0x3c020001,
-	0x90420f57, 0x10400006, 0x3c03000c, 0x3c020001, 0x94420f68, 0x34630624,
-	0x0800427c, 0x0000d021, 0x3c020001, 0x94420f68, 0x3c030008, 0x34630624,
-	0x00431025, 0xaf620cec, 0x3c108000, 0x3c02a000, 0xaf620cf4, 0x3c010001,
-	0xa0200f56, 0x8f641008, 0x00901024, 0x14400003, 0x00000000, 0x0c004064,
-	0x00000000, 0x8f620cf4, 0x00501024, 0x10400015, 0x00000000, 0x08004283,
-	0x00000000, 0x3c030001, 0x94630f68, 0x34420624, 0x3c108000, 0x00621825,
-	0x3c028000, 0xaf630cec, 0xaf620cf4, 0x8f641008, 0x00901024, 0x14400003,
-	0x00000000, 0x0c004064, 0x00000000, 0x8f620cf4, 0x00501024, 0x1440fff7,
-	0x00000000, 0x3c010001, 0x080042cf, 0xa4200f5e, 0x3c020001, 0x94420f5c,
-	0x00021400, 0x00c21025, 0xaf620ce8, 0x3c020001, 0x90420f57, 0x10400009,
-	0x3c03000c, 0x3c020001, 0x94420f68, 0x34630624, 0x0000d021, 0x00431025,
-	0xaf620cec, 0x080042c1, 0x3c108000, 0x3c020001, 0x94420f68, 0x3c030008,
-	0x34630604, 0x00431025, 0xaf620cec, 0x3c020001, 0x94420f5e, 0x00451021,
-	0x3c010001, 0xa4220f5e, 0x3c108000, 0x3c02a000, 0xaf620cf4, 0x3c010001,
-	0xa0200f56, 0x8f641008, 0x00901024, 0x14400003, 0x00000000, 0x0c004064,
-	0x00000000, 0x8f620cf4, 0x00501024, 0x1440fff7, 0x00000000, 0x8fbf0014,
-	0x8fb00010, 0x03e00008, 0x27bd0018, 0x00000000, 0x27bdffe0, 0x3c040001,
-	0x24840ec0, 0x00002821, 0x00003021, 0x00003821, 0xafbf0018, 0xafa00010,
-	0x0c004378, 0xafa00014, 0x0000d021, 0x24020130, 0xaf625000, 0x3c010001,
-	0xa4200f50, 0x3c010001, 0xa0200f57, 0x8fbf0018, 0x03e00008, 0x27bd0020,
-	0x27bdffe8, 0x3c1bc000, 0xafbf0014, 0xafb00010, 0xaf60680c, 0x8f626804,
-	0x34420082, 0xaf626804, 0x8f634000, 0x24020b50, 0x3c010001, 0xac220f20,
-	0x24020b78, 0x3c010001, 0xac220f30, 0x34630002, 0xaf634000, 0x0c004315,
-	0x00808021, 0x3c010001, 0xa0220f34, 0x304200ff, 0x24030002, 0x14430005,
-	0x00000000, 0x3c020001, 0x8c420f20, 0x08004308, 0xac5000c0, 0x3c020001,
-	0x8c420f20, 0xac5000bc, 0x8f624434, 0x8f634438, 0x8f644410, 0x3c010001,
-	0xac220f28, 0x3c010001, 0xac230f38, 0x3c010001, 0xac240f24, 0x8fbf0014,
-	0x8fb00010, 0x03e00008, 0x27bd0018, 0x03e00008, 0x24020001, 0x27bdfff8,
-	0x18800009, 0x00002821, 0x8f63680c, 0x8f62680c, 0x1043fffe, 0x00000000,
-	0x24a50001, 0x00a4102a, 0x1440fff9, 0x00000000, 0x03e00008, 0x27bd0008,
-	0x8f634450, 0x3c020001, 0x8c420f28, 0x00031c02, 0x0043102b, 0x14400008,
-	0x3c038000, 0x3c040001, 0x8c840f38, 0x8f624450, 0x00021c02, 0x0083102b,
-	0x1040fffc, 0x3c038000, 0xaf634444, 0x8f624444, 0x00431024, 0x1440fffd,
-	0x00000000, 0x8f624448, 0x03e00008, 0x3042ffff, 0x3082ffff, 0x2442e000,
-	0x2c422001, 0x14400003, 0x3c024000, 0x08004347, 0x2402ffff, 0x00822025,
-	0xaf645c38, 0x8f625c30, 0x30420002, 0x1440fffc, 0x00001021, 0x03e00008,
-	0x00000000, 0x8f624450, 0x3c030001, 0x8c630f24, 0x08004350, 0x3042ffff,
-	0x8f624450, 0x3042ffff, 0x0043102b, 0x1440fffc, 0x00000000, 0x03e00008,
-	0x00000000, 0x27bdffe0, 0x00802821, 0x3c040001, 0x24840ed0, 0x00003021,
-	0x00003821, 0xafbf0018, 0xafa00010, 0x0c004378, 0xafa00014, 0x0800435f,
-	0x00000000, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x3c020001, 0x3442d600,
-	0x3c030001, 0x3463d600, 0x3c040001, 0x3484ddff, 0x3c010001, 0xac220f40,
-	0x24020040, 0x3c010001, 0xac220f44, 0x3c010001, 0xac200f3c, 0xac600000,
-	0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
-	0x00804821, 0x8faa0010, 0x3c020001, 0x8c420f3c, 0x3c040001, 0x8c840f44,
-	0x8fab0014, 0x24430001, 0x0044102b, 0x3c010001, 0xac230f3c, 0x14400003,
-	0x00004021, 0x3c010001, 0xac200f3c, 0x3c020001, 0x8c420f3c, 0x3c030001,
-	0x8c630f40, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
-	0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020001, 0x8c420f3c,
-	0x3c030001, 0x8c630f40, 0x8f64680c, 0x00021140, 0x00431021, 0xac440008,
-	0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
-	0x00000000, 0x00000000, 0x00000000,
-};
-
-static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
-	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
-	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000,
-	0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
-	0x00000000, 0x00000000, 0x00000000,
-};
-
-static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
-	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000,
-};
 
 /* tp->lock is held. */
 static int tg3_load_tso_firmware(struct tg3 *tp)
 {
 	struct fw_info info;
+	const __be32 *fw_data;
 	unsigned long cpu_base, cpu_scratch_base, cpu_scratch_size;
 	int err, i;
 
 	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
 		return 0;
 
+	fw_data = (void *)tp->fw->data;
+
+	/* Firmware blob starts with version numbers, followed by
+	   start address and length. We are setting complete length.
+	   length = end_address_of_bss - start_address_of_text.
+	   Remainder is the blob to be loaded contiguously
+	   from start address. */
+
+	info.fw_base = be32_to_cpu(fw_data[1]);
+	cpu_scratch_size = tp->fw_len;
+	info.fw_len = tp->fw->size - 12;
+	info.fw_data = &fw_data[3];
+
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
-		info.text_base = TG3_TSO5_FW_TEXT_ADDR;
-		info.text_len = TG3_TSO5_FW_TEXT_LEN;
-		info.text_data = &tg3Tso5FwText[0];
-		info.rodata_base = TG3_TSO5_FW_RODATA_ADDR;
-		info.rodata_len = TG3_TSO5_FW_RODATA_LEN;
-		info.rodata_data = &tg3Tso5FwRodata[0];
-		info.data_base = TG3_TSO5_FW_DATA_ADDR;
-		info.data_len = TG3_TSO5_FW_DATA_LEN;
-		info.data_data = &tg3Tso5FwData[0];
 		cpu_base = RX_CPU_BASE;
 		cpu_scratch_base = NIC_SRAM_MBUF_POOL_BASE5705;
-		cpu_scratch_size = (info.text_len +
-				    info.rodata_len +
-				    info.data_len +
-				    TG3_TSO5_FW_SBSS_LEN +
-				    TG3_TSO5_FW_BSS_LEN);
 	} else {
-		info.text_base = TG3_TSO_FW_TEXT_ADDR;
-		info.text_len = TG3_TSO_FW_TEXT_LEN;
-		info.text_data = &tg3TsoFwText[0];
-		info.rodata_base = TG3_TSO_FW_RODATA_ADDR;
-		info.rodata_len = TG3_TSO_FW_RODATA_LEN;
-		info.rodata_data = &tg3TsoFwRodata[0];
-		info.data_base = TG3_TSO_FW_DATA_ADDR;
-		info.data_len = TG3_TSO_FW_DATA_LEN;
-		info.data_data = &tg3TsoFwData[0];
 		cpu_base = TX_CPU_BASE;
 		cpu_scratch_base = TX_CPU_SCRATCH_BASE;
 		cpu_scratch_size = TX_CPU_SCRATCH_SIZE;
@@ -7060,21 +6418,21 @@ static int tg3_load_tso_firmware(struct tg3 *tp)
 
 	/* Now startup the cpu. */
 	tw32(cpu_base + CPU_STATE, 0xffffffff);
-	tw32_f(cpu_base + CPU_PC,    info.text_base);
+	tw32_f(cpu_base + CPU_PC, info.fw_base);
 
 	for (i = 0; i < 5; i++) {
-		if (tr32(cpu_base + CPU_PC) == info.text_base)
+		if (tr32(cpu_base + CPU_PC) == info.fw_base)
 			break;
 		tw32(cpu_base + CPU_STATE, 0xffffffff);
 		tw32(cpu_base + CPU_MODE,  CPU_MODE_HALT);
-		tw32_f(cpu_base + CPU_PC,    info.text_base);
+		tw32_f(cpu_base + CPU_PC, info.fw_base);
 		udelay(1000);
 	}
 	if (i >= 5) {
 		printk(KERN_ERR PFX "tg3_load_tso_firmware fails for %s "
 		       "to set CPU PC, is %08x should be %08x\n",
 		       tp->dev->name, tr32(cpu_base + CPU_PC),
-		       info.text_base);
+		       info.fw_base);
 		return -ENODEV;
 	}
 	tw32(cpu_base + CPU_STATE, 0xffffffff);
@@ -7299,11 +6657,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 	else if (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) {
 		int fw_len;
 
-		fw_len = (TG3_TSO5_FW_TEXT_LEN +
-			  TG3_TSO5_FW_RODATA_LEN +
-			  TG3_TSO5_FW_DATA_LEN +
-			  TG3_TSO5_FW_SBSS_LEN +
-			  TG3_TSO5_FW_BSS_LEN);
+		fw_len = tp->fw_len;
 		fw_len = (fw_len + (0x80 - 1)) & ~(0x80 - 1);
 		tw32(BUFMGR_MB_POOL_ADDR,
 		     NIC_SRAM_MBUF_POOL_BASE5705 + fw_len);
@@ -13580,6 +12934,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	struct net_device *dev;
 	struct tg3 *tp;
 	int err, pm_cap;
+	const char *fw_name = NULL;
 	char str[40];
 	u64 dma_mask, persist_dma_mask;
 
@@ -13735,6 +13090,9 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 
 	tg3_init_bufmgr_config(tp);
 
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0)
+		fw_name = FIRMWARE_TG3;
+
 	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 		tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
 	}
@@ -13747,6 +13105,37 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	} else {
 		tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE | TG3_FLG2_TSO_BUG;
 	}
+	if (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)
+			fw_name = FIRMWARE_TG3TSO5;
+		else
+			fw_name = FIRMWARE_TG3TSO;
+	}
+
+	if (fw_name) {
+		const __be32 *fw_data;
+
+		err = request_firmware(&tp->fw, fw_name, &tp->pdev->dev);
+		if (err) {
+			printk(KERN_ERR "tg3: Failed to load firmware \"%s\"\n",
+			       fw_name);
+			goto err_out_iounmap;
+		}
+
+		fw_data = (void *)tp->fw->data;
+
+		/* Firmware blob starts with version numbers, followed by
+		   start address and _full_ length including BSS sections
+		   (which must be longer than the actual data, of course */
+
+		tp->fw_len = be32_to_cpu(fw_data[2]);	/* includes bss */
+		if (tp->fw_len < (tp->fw->size - 12)) {
+			printk(KERN_ERR "tg3: bogus length %d in \"%s\"\n",
+			       tp->fw_len, fw_name);
+			err = -EINVAL;
+			goto err_out_fw;
+		}
+	}
 
 	/* TSO is on by default on chips that support hardware TSO.
 	 * Firmware TSO on older chips gives lower performance, so it
@@ -13778,7 +13167,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	if (err) {
 		printk(KERN_ERR PFX "Could not obtain valid ethernet address, "
 		       "aborting.\n");
-		goto err_out_iounmap;
+		goto err_out_fw;
 	}
 
 	if (tp->tg3_flags3 & TG3_FLG3_ENABLE_APE) {
@@ -13787,7 +13176,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 			printk(KERN_ERR PFX "Cannot map APE registers, "
 			       "aborting.\n");
 			err = -ENOMEM;
-			goto err_out_iounmap;
+			goto err_out_fw;
 		}
 
 		tg3_ape_lock_init(tp);
@@ -13867,6 +13256,10 @@ err_out_apeunmap:
 		tp->aperegs = NULL;
 	}
 
+err_out_fw:
+	if (tp->fw)
+		release_firmware(tp->fw);
+
 err_out_iounmap:
 	if (tp->regs) {
 		iounmap(tp->regs);
@@ -13892,6 +13285,9 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev)
 	if (dev) {
 		struct tg3 *tp = netdev_priv(dev);
 
+		if (tp->fw)
+			release_firmware(tp->fw);
+
 		flush_scheduled_work();
 
 		if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) {
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 8936edf..ae5da60 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2762,6 +2762,10 @@ struct tg3 {
 #define SST_25VF0X0_PAGE_SIZE		4098
 
 	struct ethtool_coalesce		coal;
+
+	/* firmware info */
+	const struct firmware		*fw;
+	u32				fw_len; /* includes BSS */
 };
 
 #endif /* !(_T3_H) */
diff --git a/firmware/Makefile b/firmware/Makefile
index e333a42..0dc7afc 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile
@@ -45,6 +45,8 @@ fw-shipped-$(CONFIG_SND_SB16_CSP) += sb16/mulaw_main.csp sb16/alaw_main.csp \
 				     sb16/ima_adpcm_capture.csp
 fw-shipped-$(CONFIG_SND_YMFPCI) += yamaha/ds1_ctrl.fw yamaha/ds1_dsp.fw \
 				   yamaha/ds1e_ctrl.fw
+fw-shipped-$(CONFIG_TIGON3) += tigon/tg3.bin tigon/tg3_tso.bin \
+			       tigon/tg3_tso5.bin
 fw-shipped-$(CONFIG_USB_DABUSB) += dabusb/firmware.fw dabusb/bitstream.bin
 fw-shipped-$(CONFIG_USB_EMI26) += emi26/loader.fw emi26/firmware.fw \
 				  emi26/bitstream.fw
diff --git a/firmware/WHENCE b/firmware/WHENCE
index 8823a43..0460eb3 100644
--- a/firmware/WHENCE
+++ b/firmware/WHENCE
@@ -371,3 +371,22 @@ Found in hex form in kernel source, but source allegedly available at
 http://alteon.shareable.org/
 
 --------------------------------------------------------------------------
+
+Driver: tigon3 -- Broadcom Tigon3 based gigabit Ethernet cards
+
+File: tigon/tg3.bin
+File: tigon/tg3_tso.bin
+File: tigon/tg3_tso5.bin
+
+Licence:
+ * Firmware is:
+ *	Derived from proprietary unpublished source code,
+ *	Copyright (C) 2000-2003 Broadcom Corporation.
+ *
+ *	Permission is hereby granted for the distribution of this firmware
+ *	data in hexadecimal or equivalent format, provided this copyright
+ *	notice is accompanying it.
+
+Found in hex form in kernel source.
+
+--------------------------------------------------------------------------
diff --git a/firmware/tigon/tg3.bin.ihex b/firmware/tigon/tg3.bin.ihex
new file mode 100644
index 0000000..d842d7c
--- /dev/null
+++ b/firmware/tigon/tg3.bin.ihex
@@ -0,0 +1,175 @@
+:10000000000000000800000000000A80000000005E
+:1000100010000003000000000000000D0000000DB3
+:100020003C1D080037BD3FFC03A0F0213C10080038
+:10003000261000000E000018000000000000000D57
+:100040003C1D080037BD3FFC03A0F0213C10080018
+:10005000261000340E00021C000000000000000DFD
+:1000600000000000000000000000000027BDFFE0CD
+:100070003C1CC000AFBF0018AF80680C0E00004CE5
+:10008000241B210597850000978700029782002C8A
+:100090009783002E3C040800248409C0AFA00014FC
+:1000A000000214000062182500052C00AFA3001008
+:1000B0008F86001000E528250E000060240701024D
+:1000C0003C02AC00344201003C03AC01346301004B
+:1000D000AF8204903C02FFFFAF820494AF83049888
+:1000E000AF82049C24020001AF825CE00E00003F5E
+:1000F000AF825D000E000140000000008FBF0018BD
+:1001000003E0000827BD00202402FFFFAF82540453
+:100110008F83540034630400AF835400AF825404CF
+:100120003C02080024420034AF82541C03E0000863
+:10013000AF80540000000000000000003C020800F6
+:10014000344230003C030800346330003C040800B3
+:10015000348437FF3C010800AC220A6424020040CA
+:100160003C010800AC220A683C010800AC200A608F
+:10017000AC600000246300040083102B5040FFFD9E
+:10018000AC60000003E0000800000000008048218F
+:100190008FAA00103C0208008C420A603C04080050
+:1001A0008C840A688FAB0014244300010044102B98
+:1001B0003C010800AC230A60144000030000402109
+:1001C0003C010800AC200A603C0208008C420A6036
+:1001D0003C0308008C630A64912400000002114073
+:1001E000004310210048102125080001A044000010
+:1001F000290200081440FFF4252900013C020800F0
+:100200008C420A603C0308008C630A648F84680C8B
+:100210000002114000431021AC440008AC45000C22
+:10022000AC460010AC470014AC4A001803E00008CC
+:10023000AC4B001C000000000000000000000000AB
+:1002400000000000000000000000000000000000AE
+:10025000000000000000000000000000000000009E
+:10026000000000000000000000000000000000008E
+:10027000000000000000000000000000000000007E
+:10028000000000000000000000000000000000006E
+:10029000000000000000000000000000000000005E
+:1002A000000000000000000000000000000000004E
+:1002B000000000000000000000000000000000003E
+:1002C000000000000000000000000000000000002E
+:1002D000000000000000000000000000000000001E
+:1002E000000000000000000000000000000000000E
+:1002F00000000000000000000000000000000000FE
+:1003000000000000000000000000000002000008E3
+:10031000000000000A0001E33C0A00010A0001E3BA
+:100320003C0A00020A0001E3000000000A0001E3A9
+:10033000000000000A0001E3000000000A0001E3E1
+:10034000000000000A0001E3000000000A0001E3D1
+:10035000000000000A0001E3000000000A0001E3C1
+:10036000000000000A0001E3000000000A0001E3B1
+:100370003C0A00070A0001E33C0A00080A0001E306
+:100380003C0A00090A0001E3000000000A0001E342
+:10039000000000000A0001E33C0A000B0A0001E330
+:1003A0003C0A000C0A0001E33C0A000D0A0001E3CC
+:1003B000000000000A0001E3000000000A0001E361
+:1003C0003C0A000E0A0001E3000000000A0001E3FD
+:1003D000000000000A0001E3000000000A0001E341
+:1003E000000000000A0001E3000000000A0001E331
+:1003F000000000000A0001E3000000000A0001E321
+:10040000000000000A0001E33C0A00130A0001E3B7
+:100410003C0A001400000000000000000000000082
+:1004200000000000000000000000000000000000CC
+:1004300000000000000000000000000000000000BC
+:1004400000000000000000000000000000000000AC
+:10045000000000000000000000000000000000009C
+:10046000000000000000000000000000000000008C
+:10047000000000000000000000000000000000007C
+:10048000000000000000000000000000000000006C
+:10049000000000000000000000000000000000005C
+:1004A000000000000000000000000000000000004C
+:1004B000000000000000000000000000000000003C
+:1004C000000000000000000000000000000000002C
+:1004D000000000000000000000000000000000001C
+:1004E000000000000000000000000000000000000C
+:1004F00000000000000000000000000000000000FC
+:1005000000000000000000000000000027BDFFE028
+:100510000000182100001021AFBF0018AFB1001477
+:10052000AFB000103C01080000220821AC200A7086
+:100530003C01080000220821AC200A743C0108009C
+:1005400000220821AC200A78246300011860FFF51E
+:100550002442000C241100018F9068103202000424
+:1005600014400005240400013C0208008C420A7873
+:1005700018400003000020210E000182000000004E
+:100580003202000110400003000000000E0001696B
+:10059000000000000A000153AF9150288FBF0018DF
+:1005A0008FB100148FB0001003E0000827BD0020B9
+:1005B0003C0508008CA50A703C0608008CC60A8021
+:1005C0003C0708008CE70A7827BDFFE03C040800E0
+:1005D000248409D0AFBF0018AFA000100E00006047
+:1005E000AFA000140E00017B000020218FBF001877
+:1005F00003E0000827BD0020240200018F8368105B
+:1006000000821004000210270062182403E0000892
+:10061000AF83681027BDFFD8AFBF00241080002E25
+:10062000AFB000208F825CECAFA200188F825CEC30
+:100630003C10080026100A78AFA2001C340280008B
+:10064000AF825CEC8E020000184000160000000033
+:100650003C02080094420A748FA3001C000221C0CF
+:10066000AC8300048FA2001C3C0108000E000201B4
+:10067000AC220A7410400005000000008E02000049
+:10068000244200010A0001DFAE0200003C02080023
+:100690008C420A7000021C02000321C00A0001C53E
+:1006A000AFA2001C0E000201000000001040001F5D
+:1006B000000000008E0200008FA3001C24420001F5
+:1006C0003C010800AC230A703C010800AC230A740A
+:1006D0000A0001DFAE0200003C10080026100A7874
+:1006E0008E02000018400028000000000E000201E9
+:1006F0000000000014400024000000008E020000F2
+:100700003C0308008C630A702442FFFFAFA3001C67
+:1007100018400006AE02000000031402000221C0CF
+:100720008C8200043C010800AC220A7097A2001ED3
+:100730002442FF002C4203001440000B240240001E
+:100740003C040800248409DCAFA00010AFA0001412
+:100750008FA6001C240500080E0000600000382150
+:100760000A0001DF00000000AF825CF83C020800D4
+:100770008C420A408FA3001C24420001AF835CF826
+:100780003C010800AC220A408FBF00248FB000203B
+:1007900003E0000827BD002827BDFFE03C04080057
+:1007A000248409E8000028210000302100003821BD
+:1007B000AFBF0018AFA000100E000060AFA0001483
+:1007C0008FBF001803E0000827BD00208F82680C4F
+:1007D0008F85680C000218270003182B00031823CC
+:1007E000004310240044102100A2282B10A0000672
+:1007F00000000000004018218F82680C0043102B7D
+:100800001440FFFD0000000003E0000800000000AD
+:100810003C0408008C8400003C0308008C630A4000
+:100820000064102B54400002008310230064102346
+:100830002C42000803E000083842000127BDFFE019
+:10084000008028213C04080024840A000000302194
+:1008500000003821AFBF0018AFA000100E000060EC
+:10086000AFA000140A000216000000008FBF00189D
+:1008700003E0000827BD00200000000027BDFFE0C6
+:100880003C1CC000AFBF00180E00004CAF80680CCD
+:100890003C04080024840A10038028210000302131
+:1008A00000003821AFA000100E000060AFA00014BF
+:1008B0002402FFFFAF8254043C0200AA0E0002345F
+:1008C000AF8254348FBF001803E0000827BD00201A
+:1008D00000000000000000000000000027BDFFE84D
+:1008E000AFB0001024100001AFBF00143C01C003E2
+:1008F000AC2000008F8268103042200010400003BE
+:10090000000000000E000246000000000A00023A4B
+:10091000AF9054288FBF00148FB0001003E0000880
+:1009200027BD001827BDFFF88F845D0C3C0200FF37
+:100930003C0308008C630A503442FFF80082102404
+:100940001043001E3C0500FF34A5FFF83C06C00321
+:100950003C074000008518248C6200103C01080010
+:10096000AC230A50304200081040000500871025D3
+:100970008CC2000024420001ACC200000087102598
+:10098000AF825D0C8FA2000024420001AFA20000E4
+:100990008FA200008FA2000024420001AFA200003D
+:1009A0008FA200008F845D0C3C0308008C630A500A
+:1009B000008510241443FFE80085182427BD000893
+:1009C00003E000080000000000000000353730316F
+:1009D000726C734100000000000000005377457600
+:1009E000656E743000000000726C704576656E7440
+:1009F00031000000556E6B6E45766E74000000008D
+:100A0000000000000000000000000000666174614A
+:100A10006C45727200000000000000004D61696EBC
+:100A2000437075420000000000000000000000005C
+:100A300000000000000000000000000000000000B6
+:100A400000000000000000000000000000000000A6
+:100A50000000000000000000000000000000000096
+:0C0A60000000000000000000000000008A
+:00000001FF
+ * Firmware is:
+ *	Derived from proprietary unpublished source code,
+ *	Copyright (C) 2000-2003 Broadcom Corporation.
+ *
+ *	Permission is hereby granted for the distribution of this firmware
+ *	data in hexadecimal or equivalent format, provided this copyright
+ *	notice is accompanying it.
diff --git a/firmware/tigon/tg3_tso.bin.ihex b/firmware/tigon/tg3_tso.bin.ihex
new file mode 100644
index 0000000..f10c4ef
--- /dev/null
+++ b/firmware/tigon/tg3_tso.bin.ihex
@@ -0,0 +1,446 @@
+:100000000106000008000000000024140E00000398
+:100010000000000008001B24000000001000000386
+:10002000000000000000000D0000000D3C1D080055
+:1000300037BD400003A0F0213C100800261000004E
+:100040000E000010000000000000000D27BDFFE0C2
+:100050003C04FEFEAFBF00180E0005D83484000239
+:100060000E000668000000003C03080090631B6857
+:10007000240200023C04080024841AAC1462000329
+:10008000240500013C04080024841AA0240600066C
+:1000900000003821AFA000100E00067CAFA00014B5
+:1000A0008F625C5034420001AF625C508F625C90A2
+:1000B00034420001AF625C902402FFFF0E00003466
+:1000C000AF6254048FBF001803E0000827BD002072
+:1000D00000000000000000000000000027BDFFE05D
+:1000E000AFBF001CAFB20018AFB100140E00005B30
+:1000F000AFB0001024120002241100018F7068209C
+:100100003202010010400003000000000E0000BB9E
+:10011000000000008F7068203202200010400004B0
+:10012000320200010E0001F024040001320200013D
+:1001300010400003000000000E0000A300000000BB
+:100140003C02080090421B9814520003000000007B
+:100150000E0004C0000000000A00003CAF715028EF
+:100160008FBF001C8FB200188FB100148FB0001029
+:1001700003E0000827BD002027BDFFE03C04080085
+:1001800024841AC0000028210000302100003821FA
+:10019000AFBF0018AFA000100E00067CAFA0001487
+:1001A0003C040800248423D8A48000003C010800FB
+:1001B000A0201B983C010800AC201B9C3C010800BF
+:1001C000AC201BA03C010800AC201BA43C01080093
+:1001D000AC201BAC3C010800AC201BB83C01080063
+:1001E000AC201BBC8F6244343C010800AC221B884D
+:1001F0008F6244383C010800AC221B8C8F62441093
+:10020000AC80F7A83C010800AC201B843C0108002E
+:10021000AC2023E03C010800AC2023C83C010800CE
+:10022000AC2023CC3C010800AC2024003C01080099
+:10023000AC221B908F6200682403000700021702A3
+:1002400010430005000000008F62006800021702E2
+:1002500014400004240200013C0108000A00009739
+:10026000AC20240CAC8200343C04080024841ACC5A
+:100270003C0508008CA5240C00003021000038212A
+:10028000AFA000100E00067CAFA000148FBF0018B6
+:1002900003E0000827BD002027BDFFE03C04080064
+:1002A00024841AD8000028210000302100003821C1
+:1002B000AFBF0018AFA000100E00067CAFA0001466
+:1002C0000E00005B000000000E0000B400002021C2
+:1002D0008FBF001803E0000827BD002024020001A2
+:1002E0008F63682000821004000210270062182427
+:1002F00003E00008AF63682027BDFFD0AFBF002C2C
+:10030000AFB60028AFB50024AFB40020AFB3001CD7
+:10031000AFB20018AFB10014AFB000108F675C5CD3
+:100320003C03080024631BBC8C62000014470005DA
+:100330003C0200FF3C02080090421B981440011947
+:100340003C0200FF3442FFF800E28824AC67000062
+:1003500000111902306300FF30E20003000211C0F7
+:100360000062282500A04021000716023C03080077
+:1003700090631B983044000F1460003600804821C1
+:10038000240200013C010800A0221B980005110076
+:10039000008210253C010800AC201B9C3C01080099
+:1003A000AC201BA03C010800AC201BA43C010800B1
+:1003B000AC201BAC3C010800AC201BB83C01080081
+:1003C000AC201BB03C010800AC201BB43C01080071
+:1003D000A42223D89622000C30437FFF3C01080062
+:1003E000A4222410304280003C010800A4231BC634
+:1003F00010400005240200013C010800AC2223F457
+:100400000A0001022406003E240600363C010800D2
+:10041000AC2023F49622000A3C03080094631BC618
+:100420003C010800AC2023F03C010800AC2023F87C
+:10043000000213020002108000C210210062182185
+:100440003C010800A42223D03C0108000A00011549
+:10045000A4231B969622000C3C010800A42223EC46
+:100460003C04080024841B9C8C82000000021100C4
+:100470003C01080000220821AC311BC88C8200001E
+:10048000000211003C01080000220821AC271BCC0F
+:100490008C82000025030001306601FF000211007C
+:1004A0003C01080000220821AC261BD08C820000F1
+:1004B000000211003C01080000220821AC291BD4D5
+:1004C000962300083C0208008C421BAC0043282104
+:1004D0003C010800AC251BAC9622000A3042000407
+:1004E00014400018000611008F630C143063000FD5
+:1004F0002C6200021440000B3C02C0008F630C14FD
+:100500003C0208008C421B403063000F2442000173
+:100510003C010800AC221B402C6200021040FFF797
+:100520003C02C00000E21825AF635C5C8F625C5047
+:100530003042000210400014000000000A00014791
+:10054000000000003C0308008C631B803C04080092
+:1005500094841B94012210253C010800A42223DA74
+:10056000240200013C010800AC221BB824630001F6
+:100570000085202A3C01080010800003AC231B806A
+:100580003C010800A4251B943C06080024C61B9CC3
+:100590008CC2000024420001ACC20000284200804E
+:1005A00014400005000000000E000656240400025E
+:1005B0000A0001E6000000003C0208008C421BB863
+:1005C00010400078240200013C05080090A51B980B
+:1005D00014A20072000000003C15080096B51B969E
+:1005E0003C0408008C841BAC32A3FFFF0083102A5C
+:1005F0001440006C000000001483000300000000A1
+:100600003C010800AC2523F01060005C0000902144
+:1006100024D600040060A02124D300148EC2000060
+:10062000000281003C110800023088210E000625DE
+:100630008E311BC80040282110A00054000000008B
+:100640009628000A31020040104000052407180CCB
+:100650008E22000C2407188C00021400ACA2001893
+:100660003C030800007018218C631BD03C0208007A
+:10067000005010218C421BD400031D000002140006
+:1006800000621825ACA300148EC300049622000853
+:10069000004320233242FFFF3083FFFF004310213D
+:1006A0000282102A1440000202B23023008030215E
+:1006B0008E62000030C4FFFF00441021AE620000D3
+:1006C0008E220000ACA200008E2200048E63FFF494
+:1006D00000431021ACA20004A4A6000E8E62FFF419
+:1006E00000441021AE62FFF4962300080043102A54
+:1006F00014400005024690218E62FFF0AE60FFF4C8
+:1007000024420001AE62FFF0ACA000083242FFFFBD
+:1007100014540008240203053102008054400001F3
+:1007200034E7001024020905A4A2000C0A0001CB42
+:1007300034E70020A4A2000C3C0208008C4223F005
+:10074000104000033C024B650A0001D3344276544A
+:100750003C02B49A344289ABACA2001C30E2FFFFE9
+:10076000ACA200100E0005A200A020213242FFFF23
+:100770000054102B1440FFA90000000024020002C6
+:100780003C0108000A0001E6A0221B988EC2083C2A
+:10079000244200010A0001E6AEC2083C0E0004C07B
+:1007A000000000008FBF002C8FB600288FB50024FA
+:1007B0008FB400208FB3001C8FB200188FB10014CB
+:1007C0008FB0001003E0000827BD003027BDFFD028
+:1007D000AFBF0028AFB30024AFB20020AFB1001C00
+:1007E000AFB000188F725C9C3C0200FF3442FFF8EF
+:1007F0003C07080024E71BB4024288249623000E1D
+:100800008CE2000000431021ACE200008E220010B8
+:100810003042002014400011008098210E00063B59
+:10082000022020213C02C00002421825AF635C9CDC
+:100830008F625C90304200021040011E00000000F8
+:10084000AF635C9C8F625C903042000210400119E3
+:10085000000000000A00020D000000008E240008C5
+:100860008E23001400041402000231C0000315029C
+:10087000304201FF2442FFFF3042007F0003194253
+:1008800030637800000211002442400000624821D9
+:100890009522000A3084FFFF30420008104000B06B
+:1008A000000429C03C0208008C42240014400024AB
+:1008B00024C5000894C200143C010800A42223D0DF
+:1008C0008CC40010000414023C010800A42223D2AE
+:1008D0003C010800A42423D494C2000E3083FFFFFF
+:1008E000004310233C010800AC22240894C2001AE3
+:1008F0003C010800AC2624003C010800AC32240472
+:100900003C010800AC2223FC3C02C0000242182536
+:10091000AF635C9C8F625C9030420002104000E547
+:1009200000000000AF635C9C8F625C90304200026C
+:10093000104000E0000000000A0002460000000035
+:1009400094C2000E3C030800946323D40043402368
+:100950003103FFFF2C6200081040001C0000000063
+:1009600094C200142442002800A22821000310424F
+:100970001840000B0000202124E6084800403821E0
+:1009800094A300008CC200002484000100431021C5
+:10099000ACC200000087102A1440FFF924A5000211
+:1009A000310200011040001F3C0240003C040800DE
+:1009B000248423FCA0A0000194A300008C820000EA
+:1009C000004310210A000285AC8200008F6268009B
+:1009D0003C030010004310241040000900000000F8
+:1009E00094C2001A3C0308008C6323FC00431021CE
+:1009F0003C010800AC2223FC0A0002863C024000B5
+:100A000094C2001A94C4001C3C0308008C6323FCAD
+:100A100000441023006218213C010800AC2323FC91
+:100A20003C02400002421825AF635C9C8F625C90E0
+:100A3000304200021440FFFC000000009522000A32
+:100A4000304200101040009B000000003C030800F2
+:100A5000946323D43C07080024E724008CE40000BE
+:100A60008F62680024630030008328213C0300105B
+:100A7000004310241440000A0000000094A2000467
+:100A80003C0408008C8424083C0308008C6323FC8D
+:100A900000441023006218213C010800AC2323FC11
+:100AA0003C0408008C8423FC00041C023082FFFFFD
+:100AB000006220210004140200822021000410277B
+:100AC000A4A200063C0308008C6324043C0200FF3F
+:100AD0003442FFF8006288249622000824050001B1
+:100AE00024034000000231C000801021A4C2001A7B
+:100AF000A4C0001CACE000003C010800AC251B6059
+:100B0000AF635CB88F625CB03042000210400003FB
+:100B1000000000003C010800AC201B608E22000891
+:100B2000AF625CB88F625CB03042000210400003DC
+:100B3000000000003C010800AC201B603C020800E3
+:100B40008C421B601040FFEC000000003C040800D9
+:100B50000E00063B8C8424040A00032A00000000D7
+:100B60003C03080090631B982402000214620003F7
+:100B70003C034B650A0002E1000080218E22001C2C
+:100B80003463765410430002241000022410000144
+:100B900000C020210E000350020030212402000377
+:100BA0003C010800A0221B98240200021202000A45
+:100BB000240200013C0308008C6323F0106200064D
+:100BC000000000003C020800944223D800021400F8
+:100BD0000A00031FAE2200143C040800248423DA18
+:100BE0009482000000021400AE2200143C020800AF
+:100BF0008C421BBC3C03C0003C010800A0201B9899
+:100C000000431025AF625C5C8F625C503042000292
+:100C100010400009000000002484F7E28C820000EC
+:100C200000431025AF625C5C8F625C503042000272
+:100C30001440FFFA000000003C02080024421B841C
+:100C40008C43000024630001AC4300008F630C144C
+:100C50003063000F2C6200021440000C3C02400084
+:100C60008F630C143C0208008C421B403063000F61
+:100C7000244200013C010800AC221B402C6200020F
+:100C80001040FFF7000000003C024000024218251F
+:100C9000AF635C9C8F625C90304200021440FFFCAA
+:100CA0000000000012600003000000000E0004C0FD
+:100CB000000000008FBF00288FB300248FB20020F7
+:100CC0008FB1001C8FB0001803E0000827BD003072
+:100CD0008F6344503C04080024841B888C820000ED
+:100CE00000031C020043102B144000073C0380004B
+:100CF0008C8400048F62445000021C020083102B7D
+:100D00001040FFFC3C038000AF6344448F624444C6
+:100D1000004310241440FFFD000000008F6244488F
+:100D200003E000083042FFFF3C0240000082202523
+:100D3000AF645C388F625C30304200021440FFFCCC
+:100D40000000000003E000080000000027BDFFE0F5
+:100D50000080582114C00011256E00083C020800D4
+:100D60008C4223F410400007240200163C010800C6
+:100D7000A42223D22402002A3C0108000A000364B2
+:100D8000A42223D48D670010000714023C01080040
+:100D9000A42223D23C010800A42723D43C04080049
+:100DA000948423D43C030800946323D295CF000697
+:100DB0003C020800944223D00083202301E2C02398
+:100DC0003065FFFF24A2002801C248213082FFFFC6
+:100DD00014C0001A012260219582000C3042003FAD
+:100DE0003C010800A42223D69582000495830006C6
+:100DF0003C010800AC2023E43C010800AC2023E8BF
+:100E000000021400004310253C010800AC221BC066
+:100E1000952200043C010800A4221BC49523000273
+:100E200001E510230043102A1040001024020001A5
+:100E30003C0108000A000398AC2223F83C03080098
+:100E40008C6323E83C02080094421BC40043102139
+:100E5000A52200043C02080094421BC0A5820004A5
+:100E60003C0208008C421BC0A58200063C02080020
+:100E70008C4223F03C0D08008DAD23E43C0A0800B1
+:100E8000144000E58D4A23E83C02080094421BC44C
+:100E9000004A18213063FFFF0062182B2402000271
+:100EA00010C2000D014350233C020800944223D697
+:100EB0003042000910400008000000009582000C3C
+:100EC0003042FFF6A582000C3C020800944223D673
+:100ED0003042000901A268233C0208008C4223F83A
+:100EE0001040004A012038213C020800944223D2DD
+:100EF00000004021A520000A01E21023A5220002E3
+:100F00003082FFFF0002104218400008000030212C
+:100F10000040182194E200002508000100C23021A1
+:100F20000103102A1440FFFB24E7000200061C0204
+:100F300030C2FFFF006230210006140200C23021DF
+:100F400000C0282100061027A522000A0000302139
+:100F50002527000C0000402194E200002508000134
+:100F600000C230212D0200041440FFFB24E70002E0
+:100F70009522000200004021912300090044202313
+:100F8000018038213082FFFFA4E0001000621821A8
+:100F9000000210421840001000C3302100404821D8
+:100FA00094E2000024E7000200C2302130E2007F1A
+:100FB00014400006250800018D6300003C02007FFC
+:100FC0003442FF8000625824256700080109102A76
+:100FD0001440FFF3000000003082000110400005C3
+:100FE00000061C02A0E0000194E2000000C23021D3
+:100FF00000061C0230C2FFFF00623021000614020E
+:1010000000C230210A00047D30C6FFFF2402000226
+:1010100014C20081000000003C0208008C42240C35
+:1010200014400007000000003C020800944223D254
+:101030009523000201E210231062007700000000F7
+:101040003C020800944223D201E21023A5220002B0
+:101050003C0208008C42240C1040001A31E3FFFFD0
+:101060008DC700103C02080094421B9600E040210E
+:1010700000072C0200AA20210043102300823823FD
+:101080000007240230E2FFFF00823821000710270A
+:10109000A522000A3102FFFF3C040800948423D4F7
+:1010A0000045302300E0282100641823006D18213A
+:1010B00000C3302100061C0230C2FFFF0A00047D7D
+:1010C0000062302101203821000040213082FFFFE2
+:1010D0000002104218400008000030210040182192
+:1010E00094E200002508000100C230210103102A0B
+:1010F0001440FFFB24E7000200061C0230C2FFFF81
+:10110000006230210006140200C2302100C02821F4
+:1011100000061027A522000A000030212527000C18
+:101120000000402194E200002508000100C23021A7
+:101130002D0200041440FFFB24E700029522000268
+:101140000000402191230009004420230180382120
+:101150003082FFFFA4E000103C040800948423D4F4
+:101160000062182100C3302100061C0230C2FFFFBC
+:101170000062302100061C023C020800944223D089
+:1011800000C348210044102300021FC20043102165
+:1011900000021043184000100000302100402021C0
+:1011A00094E2000024E7000200C2302130E2007F18
+:1011B00014400006250800018D6300003C02007FFA
+:1011C0003442FF8000625824256700080104102A79
+:1011D0001440FFF3000000003C020800944223EC9E
+:1011E00000C230213122FFFF00C2302100061C0264
+:1011F00030C2FFFF006230210006140200C230211D
+:1012000000C0402100061027A5820010ADC00014C8
+:101210000A00049DADC000008DC7001000E0402111
+:101220001140000700072C0200AA3021000614021A
+:1012300030C3FFFF004330210006140200C2282102
+:1012400000051027A522000A3C030800946323D45C
+:101250003102FFFF01E210210043302300CD302195
+:1012600000061C0230C2FFFF00623021000614029B
+:1012700000C2302100C0402100061027A5820010C6
+:101280003102FFFF00051C0000431025ADC2001015
+:101290003C0208008C4223F4104000052DE205EBCF
+:1012A0001440000225E2FFF234028870A5C2003427
+:1012B0003C030800246323E88C6200002442000100
+:1012C000AC6200003C0408008C8423E43C0208006B
+:1012D0008C421BC03303FFFF0083202100431821F1
+:1012E0000062102B3C010800AC2423E410400003F2
+:1012F0002482FFFF3C010800AC2223E43C010800EB
+:10130000AC231BC003E0000827BD002027BDFFB8A9
+:101310003C05080024A51B96AFBF0044AFBE0040AB
+:10132000AFB7003CAFB60038AFB50034AFB4003053
+:10133000AFB3002CAFB20028AFB10024AFB0002093
+:1013400094A900003C020800944223D03C0308000A
+:101350008C631BB03C0408008C841BAC012210235E
+:101360000064182AA7A9001E106000BEA7A20016DC
+:1013700024BE002297B6001E24B3001A24B700161C
+:101380008FC2000014400008000000008FC2FFF868
+:1013900097A300168FC4FFF4004310210082202A77
+:1013A000148000B00000000097D5081832A2FFFF9B
+:1013B000104000A3000090210040A02100008821DF
+:1013C0000E000625000000000040302114C0000778
+:1013D000000000003C0208008C4223DC2442000193
+:1013E0003C0108000A000596AC2223DC3C100800F2
+:1013F000021180218E101BC89608000A310200409D
+:10140000104000052407180C8E02000C2407188CCD
+:1014100000021400ACC200183102008054400001E8
+:1014200034E700103C020800005110218C421BD010
+:101430003C030800007118218C631BD400021500C6
+:1014400000031C0000431025ACC2001496040008E1
+:101450003242FFFF008210210282102A1440000253
+:1014600002B22823008028218E020000024590212C
+:10147000ACC200008E02000400C020212631001002
+:10148000AC82000430E2FFFFAC800008A485000EAF
+:10149000AC820010240203050E0005A2A482000CF9
+:1014A0003242FFFF0054102B1440FFC53242FFFFB1
+:1014B0000A00058E000000008E6200008E63FFFCB3
+:1014C0000043102A10400067000000008E62FFF009
+:1014D000000289003C100800021180210E00062540
+:1014E0008E101BC80040302114C000050000000011
+:1014F0008E62082C244200010A000596AE62082C78
+:101500009608000A31020040104000052407180C1C
+:101510008E02000C2407188C00021400ACC20018C4
+:101520003C020800005110218C421BD03C030800F3
+:10153000007118218C631BD40002150000031C00ED
+:1015400000431025ACC200148E63FFF4960200081D
+:10155000004320233242FFFF3083FFFF004310216E
+:1015600002C2102A104000030080282197A9001E03
+:10157000013228238E62000030A4FFFF00441021B6
+:10158000AE620000A4C5000E8E020000ACC20000D6
+:101590008E0200048E63FFF400431021ACC20004ED
+:1015A0008E63FFF496020008006418210062102A7E
+:1015B00014400006024590218E62FFF0AE60FFF4F9
+:1015C000244200010A000571AE62FFF0AE63FFF431
+:1015D000ACC000083242FFFF105600033102000485
+:1015E000104000062402030531020080544000012F
+:1015F00034E7001034E7002024020905A4C2000CDF
+:101600008EE300008EE20004146200073C02B49AEC
+:101610008EE208605440000134E704003C024B6550
+:101620000A00058834427654344289ABACC2001CAF
+:1016300030E2FFFFACC200100E0005A200C0202166
+:101640003242FFFF0056102B1440FF9B00000000A9
+:101650008E6200008E63FFFC0043102A1440FF4896
+:10166000000000008FBF00448FBE00408FB7003CD9
+:101670008FB600388FB500348FB400308FB3002C94
+:101680008FB200288FB100248FB0002003E0000843
+:1016900027BD004827BDFFE8AFBF0014AFB0001062
+:1016A0008F6244508F6344100A0005B1008080218E
+:1016B0008F626820304220001040000300000000CC
+:1016C0000E0001F0000020218F6244508F6344100F
+:1016D0003042FFFF0043102B1440FFF500000000D4
+:1016E0008F630C143063000F2C6200021440000B57
+:1016F000000000008F630C143C0208008C421B4069
+:101700003063000F244200013C010800AC221B4062
+:101710002C6200021040FFF700000000AF705C1860
+:101720008F625C103042000210400009000000008F
+:101730008F626820304220001040FFF80000000057
+:101740000E0001F0000020210A0005C40000000086
+:101750008FBF00148FB0001003E0000827BD0018F1
+:1017600000000000000000000000000027BDFFE8AE
+:101770003C1BC000AFBF0014AFB00010AF60680CDE
+:101780008F62680434420082AF6268048F63400055
+:1017900024020B503C010800AC221B5424020B789D
+:1017A0003C010800AC221B6434630002AF634000BC
+:1017B0000E000605008080213C010800A0221B6865
+:1017C000304200FF24030002144300050000000023
+:1017D0003C0208008C421B540A0005F8AC5000C0C3
+:1017E0003C0208008C421B54AC5000BC8F62443455
+:1017F0008F6344388F6444103C010800AC221B5CAA
+:101800003C010800AC231B6C3C010800AC241B58B5
+:101810008FBF00148FB0001003E0000827BD001830
+:101820003C0408008C8700003C03AA553463AA5589
+:101830003C06C003AC8300008CC2000014430007C8
+:10184000240500023C0355AA346355AAAC8300006A
+:101850008CC2000050430001240500013C02080036
+:10186000AC47000003E0000800A0102127BDFFF8EE
+:1018700018800009000028218F63680C8F62680CB3
+:101880001043FFFE0000000024A5000100A4102A60
+:101890001440FFF90000000003E0000827BD000825
+:1018A0008F6344503C0208008C421B5C00031C0206
+:1018B0000043102B144000083C0380003C04080047
+:1018C0008C841B6C8F62445000021C020083102B1E
+:1018D0001040FFFC3C038000AF6344448F624444EB
+:1018E000004310241440FFFD000000008F624448B4
+:1018F00003E000083042FFFF3082FFFF2442E00097
+:101900002C422001144000033C0240000A0006481B
+:101910002402FFFF00822025AF645C388F625C30B8
+:10192000304200021440FFFC0000102103E00008D8
+:10193000000000008F6244503C0308008C631B5879
+:101940000A0006513042FFFF8F6244503042FFFFD1
+:101950000043102B1440FFFC0000000003E00008CF
+:101960000000000027BDFFE0008028213C040800A3
+:1019700024841AF00000302100003821AFBF001885
+:10198000AFA000100E00067CAFA000140A00066095
+:10199000000000008FBF001803E0000827BD0020F2
+:1019A0000000000000000000000000003C020800F1
+:1019B000344230003C030800346330003C0408002B
+:1019C000348437FF3C010800AC221B742402004021
+:1019D0003C010800AC221B783C010800AC201B70C5
+:1019E000AC600000246300040083102B5040FFFD16
+:1019F000AC60000003E00008000000000080482107
+:101A00008FAA00103C0208008C421B703C040800A6
+:101A10008C841B788FAB0014244300010044102BEE
+:101A20003C010800AC231B7014400003000040215F
+:101A30003C010800AC201B703C0208008C421B706B
+:101A40003C0308008C631B749124000000021140C9
+:101A5000004310210048102125080001A044000087
+:101A6000290200081440FFF4252900013C02080067
+:101A70008C421B703C0308008C631B748F64680CE1
+:101A80000002114000431021AC440008AC45000C9A
+:101A9000AC460010AC470014AC4A001803E0000844
+:101AA000AC4B001C00000000000000004D61696E9E
+:101AB00043707542000000004D61696E43707541CE
+:101AC00000000000000000000000000073746B6F55
+:101AD00066666C64496E000073746B6F66662A2AD2
+:101AE0000000000053774576656E743000000000FA
+:101AF000000000000000000000000000666174614A
+:101B00006C45727200000000000000000000000040
+:101B100000000000000000000000000000000000C5
+:101B200000000000000000000000000000000000B5
+:101B300073746B6F66666C645F76312E362E300080
+:101B40000000000000000000000000000000000095
+:0C1B500000000000000000000000000089
+:00000001FF
+ * Firmware is:
+ *	Derived from proprietary unpublished source code,
+ *	Copyright (C) 2000-2003 Broadcom Corporation.
+ *
+ *	Permission is hereby granted for the distribution of this firmware
+ *	data in hexadecimal or equivalent format, provided this copyright
+ *	notice is accompanying it.
diff --git a/firmware/tigon/tg3_tso5.bin.ihex b/firmware/tigon/tg3_tso5.bin.ihex
new file mode 100644
index 0000000..3367251
--- /dev/null
+++ b/firmware/tigon/tg3_tso5.bin.ihex
@@ -0,0 +1,252 @@
+:10000000010200000001000000000FD80C004003B6
+:100010000000000000010F040000000010000003B9
+:10002000000000000000000D0000000D3C1D00015C
+:1000300037BDE00003A0F0213C10000126100000B5
+:100040000C004010000000000000000D27BDFFE084
+:100050003C04FEFEAFBF00180C0042E834840002EE
+:100060000C004364000000003C03000190630F3467
+:10007000240200023C04000124840E9C146200034C
+:10008000240500013C04000124840E902406000293
+:1000900000003821AFA000100C004378AFA000147E
+:1000A0000C00402C000000008FBF001803E0000887
+:1000B00027BD0020000000000000000027BDFFE079
+:1000C000AFBF001CAFB20018AFB100140C0042D497
+:1000D000AFB000103C128000241100018F70681036
+:1000E0003202040010400007000000008F64100876
+:1000F0000092102414400003000000000C00406433
+:10010000000000003C02000190420F561051000315
+:10011000320202001040FFF1000000000C0041B468
+:100120000000000008004034000000008FBF001CE9
+:100130008FB200188FB100148FB0001003E00008D8
+:1001400027BD002027BDFFE03C04000124840EB041
+:10015000000028210000302100003821AFBF001826
+:10016000AFA000100C004378AFA000140000D02115
+:1001700024020130AF6250003C010001A4200F5066
+:100180003C010001A0200F578FBF001803E00008BA
+:1001900027BD002000000000000000003C0300011B
+:1001A00024630F609062000027BDFFF0144000033D
+:1001B0000080C02108004073000048213C0220005C
+:1001C00003021024104000032409000208004073B9
+:1001D000A0600000240900010018104030431F8077
+:1001E000346F80081520004B25EB00283C040001EB
+:1001F000008320218C8480103C05000124A50F7A07
+:1002000000041402A0A200003C010001A0240F7B06
+:100210003C02000100431021944280143C01000183
+:10022000A0220F7C3C0C0001018360218D8C801882
+:10023000304200FF24420008000220C324020001D3
+:100240003C010001A0220F600124102B1040000C83
+:100250000000382124A6000E016028218CA2000095
+:100260008CA3000424A5000824E70001ACC2000010
+:10027000ACC3000400E4102B1440FFF824C60008AF
+:10028000000038213C08000125080F7B9106000082
+:100290003C02000190420F7C2503000D00C3282181
+:1002A0000046102300021FC2004310210002104329
+:1002B0001840000C0000202191020001004610238C
+:1002C00000021FC2004310210002184394A2000044
+:1002D00024E700010082202100E3102A1440FFFBE4
+:1002E00024A5000200041C023082FFFF00622021CE
+:1002F00000041402008220213C02FFFF018210242E
+:100300003083FFFF004310253C010001080040FA44
+:10031000AC220F803C05000124A50F7C90A20000B8
+:100320003C0C0001018360218D8C8018000220C2EA
+:100330001080000E000038210160302124A5000C3F
+:100340008CA200008CA3000424A5000824E700016F
+:10035000ACC20000ACC3000400E4102B1440FFF852
+:1003600024C600083C05000124A50F7C90A20000D3
+:1003700030430007240200041062001128620005C7
+:10038000104000052402000210620008000710C09F
+:10039000080040FA00000000240200061062000E6F
+:1003A000000710C0080040FA0000000000A2182159
+:1003B0009463000C004B1021080040FAA443000095
+:1003C000000710C000A218218C63000C004B102104
+:1003D000080040FAAC43000000A218218C63000C16
+:1003E000004B202100A21021AC8300009442001099
+:1003F000A482000495E700063C02000190420F7CB5
+:100400003C03000190630F7A00E2C8233C02000124
+:1004100090420F7B2463002801E3402124420028FE
+:100420001520001201E2302194C2000C3C010001B1
+:10043000A4220F7894C2000494C300063C0100017A
+:10044000A4200F763C010001A4200F7200021400CA
+:10045000004310253C010001AC220F6C9502000402
+:100460003C01000108004124A4220F703C0200015D
+:1004700094420F703C03000194630F7200431021FB
+:10048000A50200043C02000194420F6CA4C20004C7
+:100490003C0200018C420F6CA4C200063C04000127
+:1004A00094840F723C02000194420F703C0A0001D8
+:1004B000954A0F76004418213063FFFF0062182A26
+:1004C000240200021122000B008320233C030001C0
+:1004D00094630F7830620009104000063062FFF626
+:1004E000A4C2000C3C02000194420F783042000983
+:1004F00001425023240200011122001B2922000284
+:1005000050400005240200021120000731A2FFFF25
+:1005100008004197000000001122001D240200166F
+:100520000800419731A2FFFF3C0E000195CE0F80DD
+:10053000108000050180682101C4202100041C02F4
+:100540003082FFFF00627021000E1027A502000A12
+:100550003C03000190630F7B31A2FFFF00E21021FA
+:100560000800418D004320233C02000194420F808B
+:100570000044202100041C023082FFFF0062202181
+:10058000008070210004102708004185A502000AA0
+:100590003C05000124A50F7A90A30000146200021C
+:1005A00024E2FFF2A5E2003490A2000000E2102352
+:1005B000A50200023C03000194630F803C0200018D
+:1005C00094420F5A30E5FFFF0064182100451023C4
+:1005D0000062202300041C023082FFFF0062202101
+:1005E00000041027A502000A3C03000190630F7C61
+:1005F0002462000114A20005008070210163102113
+:10060000904200000800418500026200246200025E
+:1006100014A20003306200FE004B1021944C000035
+:100620003C02000194420F823183FFFF3C04000131
+:1006300090840F7B0043102100E21021004420230E
+:10064000008A202100041C023082FFFF006220216A
+:100650000004140200822021008068210004102779
+:10066000A4C2001031A2FFFF000E1C0000431025A1
+:100670003C04000124840F72ADE20010948200005B
+:100680003C05000194A50F763C0300018C630F6CC0
+:100690002442000100B92821A48200003322FFFF78
+:1006A000006220210083182B3C010001A4250F7655
+:1006B0001060000324A2FFFF3C010001A4220F767A
+:1006C0003C024000030210253C010001AC240F6CE9
+:1006D000AF62100803E0000827BD00103C030001D2
+:1006E00090630F5627BDFFE824020001AFBF00143E
+:1006F00010620026AFB000108F620CF42442FFFF9E
+:100700003042007F000211008C4340003C01000198
+:10071000AC230F648C434008244440008C5C4004AC
+:1007200030620040144000022402008824020008C5
+:100730003C010001A4220F68306200041040000553
+:10074000240200013C010001A0220F57080041D5FE
+:10075000000314023C010001A0200F570003140203
+:100760003C010001A4220F549483000C24020001D8
+:100770003C010001A4200F503C010001A0220F56B3
+:100780003C010001A4230F62240200011342001E59
+:10079000000000001340000524020003134200671C
+:1007A00000000000080042CF000000003C020001F1
+:1007B00094420F62241A00013C010001A4200F5E44
+:1007C0003C010001A4200F52304407FF00021BC26D
+:1007D000000318233063003E3463003600021242E7
+:1007E0003042003C006218213C010001A4240F5853
+:1007F00000832021246300303C010001A4240F5A0F
+:100800003C010001A4230F5C3C06000124C60F52EA
+:1008100094C5000094C300023C04000194840F5A64
+:10082000006510210044102A104000133C10800085
+:1008300000A31021A4C200003C02A000AF620CF48F
+:100840003C010001A0200F568F6410080090102476
+:1008500014400003000000000C0040640000000091
+:100860008F620CF400501024104000B7000000000C
+:100870000800420F000000003C03000194630F5089
+:1008800000851023A4C40000006218213042FFFF3D
+:100890003C010001A4230F50AF620CE83C020001B0
+:1008A00094420F6834420024AF620CEC94C30002FF
+:1008B0003C02000194420F50146200123C0280007E
+:1008C0003C1080003C02A000AF620CF43C0100012F
+:1008D000A0200F568F6410080090102414400003CD
+:1008E000000000000C004064000000008F620CF467
+:1008F000005010241440FFF700000000080042CF11
+:10090000241A0003AF620CF43C1080008F641008BE
+:100910000090102414400003000000000C0040640C
+:10092000000000008F620CF4005010241440FFF708
+:1009300000000000080042CF241A00033C07000119
+:1009400024E70F5094E2000003821021AF620CE014
+:100950003C0200018C420F64AF620CE43C050001D4
+:1009600094A50F5494E300003C04000194840F58B4
+:100970003C02000194420F5E00A328230082202342
+:1009800030A6FFFF3083FFFF00C3102B144000434D
+:10099000000000003C02000194420F5C00021400C1
+:1009A00000621025AF620CE894E200003C030001F5
+:1009B00094630F5400441021A4E200003042FFFF72
+:1009C000144300213C0200083C02000190420F57F2
+:1009D000104000063C03000C3C02000194420F68EA
+:1009E000346306240800427C0000D0213C02000150
+:1009F00094420F683C03000834630624004310252A
+:100A0000AF620CEC3C1080003C02A000AF620CF422
+:100A10003C010001A0200F568F64100800901024A4
+:100A200014400003000000000C00406400000000BF
+:100A30008F620CF4005010241040001500000000DC
+:100A400008004283000000003C03000194630F682B
+:100A5000344206243C108000006218253C028000CD
+:100A6000AF630CECAF620CF48F641008009010249C
+:100A700014400003000000000C004064000000006F
+:100A80008F620CF4005010241440FFF700000000A7
+:100A90003C010001080042CFA4200F5E3C0200018F
+:100AA00094420F5C0002140000C21025AF620CE8F3
+:100AB0003C02000190420F57104000093C03000C1B
+:100AC0003C02000194420F68346306240000D021E8
+:100AD00000431025AF620CEC080042C13C108000BE
+:100AE0003C02000194420F683C0300083463060492
+:100AF00000431025AF620CEC3C02000194420F5EF3
+:100B0000004510213C010001A4220F5E3C10800032
+:100B10003C02A000AF620CF43C010001A0200F5683
+:100B20008F6410080090102414400003000000009F
+:100B30000C004064000000008F620CF40050102490
+:100B40001440FFF7000000008FBF00148FB00010AA
+:100B500003E0000827BD00180000000027BDFFE0EB
+:100B60003C04000124840EC0000028210000302134
+:100B700000003821AFBF0018AFA000100C00437870
+:100B8000AFA000140000D02124020130AF62500059
+:100B90003C010001A4200F503C010001A0200F5790
+:100BA0008FBF001803E0000827BD002027BDFFE825
+:100BB0003C1BC000AFBF0014AFB00010AF60680CAA
+:100BC0008F62680434420082AF6268048F63400021
+:100BD00024020B503C010001AC220F2024020B78B0
+:100BE0003C010001AC220F3034630002AF634000CF
+:100BF0000C004315008080213C010001A0220F342D
+:100C0000304200FF240300021443000500000000EE
+:100C10003C0200018C420F2008004308AC5000C089
+:100C20003C0200018C420F20AC5000BC8F62443467
+:100C30008F6344388F6444103C010001AC220F28BC
+:100C40003C010001AC230F383C010001AC240F240F
+:100C50008FBF00148FB0001003E0000827BD0018FC
+:100C600003E000082402000127BDFFF818800009F6
+:100C7000000028218F63680C8F62680C1043FFFE10
+:100C80000000000024A5000100A4102A1440FFF970
+:100C90000000000003E0000827BD00088F634450F7
+:100CA0003C0200018C420F2800031C020043102B61
+:100CB000144000083C0380003C0400018C840F3881
+:100CC0008F62445000021C020083102B1040FFFC76
+:100CD0003C038000AF6344448F62444400431024CB
+:100CE0001440FFFD000000008F62444803E000084C
+:100CF0003042FFFF3082FFFF2442E0002C422001FF
+:100D0000144000033C024000080043472402FFFF58
+:100D100000822025AF645C388F625C303042000274
+:100D20001440FFFC0000102103E000080000000058
+:100D30008F6244503C0300018C630F240800435031
+:100D40003042FFFF8F6244503042FFFF0043102BC0
+:100D50001440FFFC0000000003E000080000000059
+:100D600027BDFFE0008028213C04000124840ED030
+:100D70000000302100003821AFBF0018AFA00010E4
+:100D80000C004378AFA000140800435F000000008F
+:100D90008FBF001803E0000827BD00203C020001BF
+:100DA0003442D6003C0300013463D6003C04000109
+:100DB0003484DDFF3C010001AC220F4024020040DE
+:100DC0003C010001AC220F443C010001AC200F3C6F
+:100DD000AC600000246300040083102B5040FFFD32
+:100DE000AC60000003E00008000000000080482123
+:100DF0008FAA00103C0200018C420F3C3C04000111
+:100E00008C840F448FAB0014244300010044102B4A
+:100E10003C010001AC230F3C1440000300004021C2
+:100E20003C010001AC200F3C3C0200018C420F3C15
+:100E30003C0300018C630F4091240000000211402C
+:100E4000004310210048102125080001A0440000A3
+:100E5000290200081440FFF4252900013C0200018A
+:100E60008C420F3C3C0300018C630F408F64680C84
+:100E70000002114000431021AC440008AC45000CB6
+:100E8000AC460010AC470014AC4A001803E0000860
+:100E9000AC4B001C00000000000000004D61696EBA
+:100EA00043707542000000004D61696E43707541EA
+:100EB00000000000000000000000000073746B6F71
+:100EC00066666C64000000000000000073746B6FC5
+:100ED00066666C64000000000000000066617461DA
+:100EE0006C4572720000000000000000000000006D
+:100EF00000000000000000000000000000000000F2
+:100F000000000000000000000000000000000000E1
+:100F100073746B6F66666C645F76312E322E3000B0
+:0C0F2000000000000000000000000000C5
+:00000001FF
+ * Firmware is:
+ *	Derived from proprietary unpublished source code,
+ *	Copyright (C) 2000-2003 Broadcom Corporation.
+ *
+ *	Permission is hereby granted for the distribution of this firmware
+ *	data in hexadecimal or equivalent format, provided this copyright
+ *	notice is accompanying it.
+/* 5705 needs a special version of the TSO firmware.  */
-- 
cgit v0.10.2


From cfc3a44c3c32abe48898398d9a92e8524c976803 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 4 Jan 2009 16:12:11 -0800
Subject: starfire: use request_firmware()

Firmware blob is big endian

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index f54ac23..57fb1f7 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -42,11 +42,11 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/mm.h>
+#include <linux/firmware.h>
 #include <asm/processor.h>		/* Processor type for cache alignment. */
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-#include "starfire_firmware.h"
 /*
  * The current frame processor firmware fails to checksum a fragment
  * of length 1. If and when this is fixed, the #define below can be removed.
@@ -173,6 +173,10 @@ static int full_duplex[MAX_UNITS] = {0, };
 #define skb_first_frag_len(skb)	skb_headlen(skb)
 #define skb_num_frags(skb) (skb_shinfo(skb)->nr_frags + 1)
 
+/* Firmware names */
+#define FIRMWARE_RX	"adaptec/starfire_rx.bin"
+#define FIRMWARE_TX	"adaptec/starfire_tx.bin"
+
 /* These identify the driver base version and may not be removed. */
 static char version[] =
 KERN_INFO "starfire.c:v1.03 7/26/2000  Written by Donald Becker <becker@scyld.com>\n"
@@ -182,6 +186,8 @@ MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
 MODULE_DESCRIPTION("Adaptec Starfire Ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
+MODULE_FIRMWARE(FIRMWARE_RX);
+MODULE_FIRMWARE(FIRMWARE_TX);
 
 module_param(max_interrupt_work, int, 0);
 module_param(mtu, int, 0);
@@ -902,9 +908,12 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
 
 static int netdev_open(struct net_device *dev)
 {
+	const struct firmware *fw_rx, *fw_tx;
+	const __be32 *fw_rx_data, *fw_tx_data;
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
 	int i, retval;
+	size_t tx_size, rx_size;
 	size_t tx_done_q_size, rx_done_q_size, tx_ring_size, rx_ring_size;
 
 	/* Do we ever need to reset the chip??? */
@@ -1040,11 +1049,40 @@ static int netdev_open(struct net_device *dev)
 	writel(ETH_P_8021Q, ioaddr + VlanType);
 #endif /* VLAN_SUPPORT */
 
+	retval = request_firmware(&fw_rx, FIRMWARE_RX, &np->pci_dev->dev);
+	if (retval) {
+		printk(KERN_ERR "starfire: Failed to load firmware \"%s\"\n",
+		       FIRMWARE_RX);
+		return retval;
+	}
+	if (fw_rx->size % 4) {
+		printk(KERN_ERR "starfire: bogus length %zu in \"%s\"\n",
+		       fw_rx->size, FIRMWARE_RX);
+		retval = -EINVAL;
+		goto out_rx;
+	}
+	retval = request_firmware(&fw_tx, FIRMWARE_TX, &np->pci_dev->dev);
+	if (retval) {
+		printk(KERN_ERR "starfire: Failed to load firmware \"%s\"\n",
+		       FIRMWARE_TX);
+		goto out_rx;
+	}
+	if (fw_tx->size % 4) {
+		printk(KERN_ERR "starfire: bogus length %zu in \"%s\"\n",
+		       fw_tx->size, FIRMWARE_TX);
+		retval = -EINVAL;
+		goto out_tx;
+	}
+	fw_rx_data = (const __be32 *)&fw_rx->data[0];
+	fw_tx_data = (const __be32 *)&fw_tx->data[0];
+	rx_size = fw_rx->size / 4;
+	tx_size = fw_tx->size / 4;
+
 	/* Load Rx/Tx firmware into the frame processors */
-	for (i = 0; i < FIRMWARE_RX_SIZE * 2; i++)
-		writel(firmware_rx[i], ioaddr + RxGfpMem + i * 4);
-	for (i = 0; i < FIRMWARE_TX_SIZE * 2; i++)
-		writel(firmware_tx[i], ioaddr + TxGfpMem + i * 4);
+	for (i = 0; i < rx_size; i++)
+		writel(be32_to_cpup(&fw_rx_data[i]), ioaddr + RxGfpMem + i * 4);
+	for (i = 0; i < tx_size; i++)
+		writel(be32_to_cpup(&fw_tx_data[i]), ioaddr + TxGfpMem + i * 4);
 	if (enable_hw_cksum)
 		/* Enable the Rx and Tx units, and the Rx/Tx frame processors. */
 		writel(TxEnable|TxGFPEnable|RxEnable|RxGFPEnable, ioaddr + GenCtrl);
@@ -1056,7 +1094,11 @@ static int netdev_open(struct net_device *dev)
 		printk(KERN_DEBUG "%s: Done netdev_open().\n",
 		       dev->name);
 
-	return 0;
+out_tx:
+	release_firmware(fw_tx);
+out_rx:
+	release_firmware(fw_rx);
+	return retval;
 }
 
 
diff --git a/drivers/net/starfire_firmware.h b/drivers/net/starfire_firmware.h
deleted file mode 100644
index 0a66852..0000000
--- a/drivers/net/starfire_firmware.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Copyright 2003 Adaptec, Inc.
- *
- * Please read the following license before using the Adaptec Software
- * ("Program"). If you do not agree to the license terms, do not use the
- * Program:
- *
- * You agree to be bound by version 2 of the General Public License ("GPL")
- * dated June 1991, which can be found at http://www.fsf.org/licenses/gpl.html.
- * If the link is broken, write to Free Software Foundation, 59 Temple Place,
- * Boston, Massachusetts 02111-1307.
- *
- * BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE IT IS LICENSED "AS IS" AND
- * THERE IS NO WARRANTY FOR THE PROGRAM, INCLUDING BUT NOT LIMITED TO THE
- * IMPLIED WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * (TO THE EXTENT PERMITTED BY APPLICABLE LAW). USE OF THE PROGRAM IS AT YOUR
- * OWN RISK. IN NO EVENT WILL ADAPTEC OR ITS LICENSORS BE LIABLE TO YOU FOR
- * DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM.
- *
- */
-
-static const u32 firmware_rx[] = {
-  0x010003dc, 0x00000000,
-  0x04000421, 0x00000086,
-  0x80000015, 0x0000180e,
-  0x81000015, 0x00006664,
-  0x1a0040ab, 0x00000b06,
-  0x14200011, 0x00000000,
-  0x14204022, 0x0000aaaa,
-  0x14204022, 0x00000300,
-  0x14204022, 0x00000000,
-  0x1a0040ab, 0x00000b14,
-  0x14200011, 0x00000000,
-  0x83000015, 0x00000002,
-  0x04000021, 0x00000000,
-  0x00000010, 0x00000000,
-  0x04000421, 0x00000087,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00008015, 0x00000000,
-  0x0000003e, 0x00000000,
-  0x00000010, 0x00000000,
-  0x82000015, 0x00004000,
-  0x009e8050, 0x00000000,
-  0x03008015, 0x00000000,
-  0x86008015, 0x00000000,
-  0x82000015, 0x00008000,
-  0x0100001c, 0x00000000,
-  0x000050a0, 0x0000010c,
-  0x4e20d011, 0x00006008,
-  0x1420d012, 0x00004008,
-  0x0000f090, 0x00007000,
-  0x0000c8b0, 0x00003000,
-  0x00004040, 0x00000000,
-  0x00108015, 0x00000000,
-  0x00a2c150, 0x00004000,
-  0x00a400b0, 0x00000014,
-  0x00000020, 0x00000000,
-  0x2500400d, 0x00002525,
-  0x00047220, 0x00003100,
-  0x00934070, 0x00000000,
-  0x00000020, 0x00000000,
-  0x00924460, 0x00000184,
-  0x2b20c011, 0x00000000,
-  0x0000c420, 0x00000540,
-  0x36014018, 0x0000422d,
-  0x14200011, 0x00000000,
-  0x00924460, 0x00000183,
-  0x3200001f, 0x00000034,
-  0x02ac0015, 0x00000002,
-  0x00a60110, 0x00000008,
-  0x42200011, 0x00000000,
-  0x00924060, 0x00000103,
-  0x0000001e, 0x00000000,
-  0x00000020, 0x00000100,
-  0x0000001e, 0x00000000,
-  0x00924460, 0x00000086,
-  0x00004080, 0x00000000,
-  0x0092c070, 0x00000000,
-  0x00924060, 0x00000100,
-  0x0000c890, 0x00005000,
-  0x00a6c110, 0x00000000,
-  0x00b0c090, 0x00000012,
-  0x021c0015, 0x00000000,
-  0x3200001f, 0x00000034,
-  0x00924460, 0x00000510,
-  0x44210011, 0x00000000,
-  0x42000011, 0x00000000,
-  0x83000015, 0x00000040,
-  0x00924460, 0x00000508,
-  0x45014018, 0x00004545,
-  0x00808050, 0x00000000,
-  0x62208012, 0x00000000,
-  0x82000015, 0x00000800,
-  0x15200011, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x80000015, 0x0000eea4,
-  0x81000015, 0x0000005f,
-  0x00000060, 0x00000000,
-  0x00004120, 0x00000000,
-  0x00004a00, 0x00004000,
-  0x00924460, 0x00000190,
-  0x5601401a, 0x00005956,
-  0x14000011, 0x00000000,
-  0x00934050, 0x00000018,
-  0x00930050, 0x00000018,
-  0x3601403a, 0x0000002d,
-  0x000643a9, 0x00000000,
-  0x0000c420, 0x00000140,
-  0x5601401a, 0x00005956,
-  0x14000011, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x000642a9, 0x00000000,
-  0x00024420, 0x00000183,
-  0x5601401a, 0x00005956,
-  0x82000015, 0x00002000,
-  0x15200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x15200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x15200011, 0x00000000,
-};	/* 104 Rx instructions */
-#define FIRMWARE_RX_SIZE 104
-
-static const u32 firmware_tx[] = {
-  0x010003dc, 0x00000000,
-  0x04000421, 0x00000086,
-  0x80000015, 0x0000180e,
-  0x81000015, 0x00006664,
-  0x1a0040ab, 0x00000b06,
-  0x14200011, 0x00000000,
-  0x14204022, 0x0000aaaa,
-  0x14204022, 0x00000300,
-  0x14204022, 0x00000000,
-  0x1a0040ab, 0x00000b14,
-  0x14200011, 0x00000000,
-  0x83000015, 0x00000002,
-  0x04000021, 0x00000000,
-  0x00000010, 0x00000000,
-  0x04000421, 0x00000087,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00008015, 0x00000000,
-  0x0000003e, 0x00000000,
-  0x00000010, 0x00000000,
-  0x82000015, 0x00004000,
-  0x009e8050, 0x00000000,
-  0x03008015, 0x00000000,
-  0x86008015, 0x00000000,
-  0x82000015, 0x00008000,
-  0x0100001c, 0x00000000,
-  0x000050a0, 0x0000010c,
-  0x4e20d011, 0x00006008,
-  0x1420d012, 0x00004008,
-  0x0000f090, 0x00007000,
-  0x0000c8b0, 0x00003000,
-  0x00004040, 0x00000000,
-  0x00108015, 0x00000000,
-  0x00a2c150, 0x00004000,
-  0x00a400b0, 0x00000014,
-  0x00000020, 0x00000000,
-  0x2500400d, 0x00002525,
-  0x00047220, 0x00003100,
-  0x00934070, 0x00000000,
-  0x00000020, 0x00000000,
-  0x00924460, 0x00000184,
-  0x2b20c011, 0x00000000,
-  0x0000c420, 0x00000540,
-  0x36014018, 0x0000422d,
-  0x14200011, 0x00000000,
-  0x00924460, 0x00000183,
-  0x3200001f, 0x00000034,
-  0x02ac0015, 0x00000002,
-  0x00a60110, 0x00000008,
-  0x42200011, 0x00000000,
-  0x00924060, 0x00000103,
-  0x0000001e, 0x00000000,
-  0x00000020, 0x00000100,
-  0x0000001e, 0x00000000,
-  0x00924460, 0x00000086,
-  0x00004080, 0x00000000,
-  0x0092c070, 0x00000000,
-  0x00924060, 0x00000100,
-  0x0000c890, 0x00005000,
-  0x00a6c110, 0x00000000,
-  0x00b0c090, 0x00000012,
-  0x021c0015, 0x00000000,
-  0x3200001f, 0x00000034,
-  0x00924460, 0x00000510,
-  0x44210011, 0x00000000,
-  0x42000011, 0x00000000,
-  0x83000015, 0x00000040,
-  0x00924460, 0x00000508,
-  0x45014018, 0x00004545,
-  0x00808050, 0x00000000,
-  0x62208012, 0x00000000,
-  0x82000015, 0x00000800,
-  0x15200011, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x80000015, 0x0000eea4,
-  0x81000015, 0x0000005f,
-  0x00000060, 0x00000000,
-  0x00004120, 0x00000000,
-  0x00004a00, 0x00004000,
-  0x00924460, 0x00000190,
-  0x5601401a, 0x00005956,
-  0x14000011, 0x00000000,
-  0x00934050, 0x00000018,
-  0x00930050, 0x00000018,
-  0x3601403a, 0x0000002d,
-  0x000643a9, 0x00000000,
-  0x0000c420, 0x00000140,
-  0x5601401a, 0x00005956,
-  0x14000011, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x000642a9, 0x00000000,
-  0x00024420, 0x00000183,
-  0x5601401a, 0x00005956,
-  0x82000015, 0x00002000,
-  0x15200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x15200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x15200011, 0x00000000,
-};	/* 104 Tx instructions */
-#define FIRMWARE_TX_SIZE 104
-#if 0
-static const u32 firmware_wol[] = {
-  0x010003dc, 0x00000000,
-  0x19000421, 0x00000087,
-  0x80000015, 0x00001a1a,
-  0x81000015, 0x00001a1a,
-  0x1a0040ab, 0x00000b06,
-  0x15200011, 0x00000000,
-  0x15204022, 0x0000aaaa,
-  0x15204022, 0x00000300,
-  0x15204022, 0x00000000,
-  0x1a0040ab, 0x00000b15,
-  0x15200011, 0x00000000,
-  0x83000015, 0x00000002,
-  0x04000021, 0x00000000,
-  0x00000010, 0x00000000,
-  0x04000421, 0x00000087,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00008015, 0x00000000,
-  0x0000003e, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x82000015, 0x00004000,
-  0x82000015, 0x00008000,
-  0x0000000c, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00004080, 0x00000100,
-  0x1f20c011, 0x00001122,
-  0x2720f011, 0x00003011,
-  0x19200071, 0x00000000,
-  0x1a200051, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x1d2040a4, 0x00003344,
-  0x1d2040a2, 0x00005566,
-  0x000040a0, 0x00000100,
-  0x00108050, 0x00000001,
-  0x1a208012, 0x00000006,
-  0x82000015, 0x00008080,
-  0x010003dc, 0x00000000,
-  0x1d2040a4, 0x00002233,
-  0x1d2040a4, 0x00004455,
-  0x2d208011, 0x00000005,
-  0x1d2040a4, 0x00006611,
-  0x00108050, 0x00000001,
-  0x27200011, 0x00000000,
-  0x1d2050a4, 0x00006600,
-  0x82000015, 0x00008080,
-  0x010003dc, 0x00000000,
-  0x00000050, 0x00000000,
-  0x1b200031, 0x00000000,
-  0x0000001e, 0x00000000,
-  0x0000001e, 0x00000000,
-  0x0000001e, 0x00000000,
-  0x0000001e, 0x00000000,
-  0x00924460, 0x00000086,
-  0x00004080, 0x00000000,
-  0x0092c070, 0x00000000,
-  0x00924060, 0x00000100,
-  0x0000c890, 0x00005000,
-  0x00a6c110, 0x00000000,
-  0x00b0c090, 0x00000012,
-  0x021c0015, 0x00000000,
-  0x3200001f, 0x00000034,
-  0x00924460, 0x00000510,
-  0x44210011, 0x00000000,
-  0x42000011, 0x00000000,
-  0x83000015, 0x00000040,
-  0x00924460, 0x00000508,
-  0x476a0012, 0x00000100,
-  0x83000015, 0x00000008,
-  0x16200011, 0x00000000,
-  0x001e8050, 0x00000000,
-  0x001e8050, 0x00000000,
-  0x00808050, 0x00000000,
-  0x03008015, 0x00000000,
-  0x62208012, 0x00000000,
-  0x82000015, 0x00000800,
-  0x16200011, 0x00000000,
-  0x80000015, 0x0000eea4,
-  0x81000015, 0x0000005f,
-  0x00000020, 0x00000000,
-  0x00004120, 0x00000000,
-  0x00004a00, 0x00004000,
-  0x00924460, 0x00000190,
-  0x5c01401a, 0x0000595c,
-  0x15000011, 0x00000000,
-  0x00934050, 0x00000018,
-  0x00930050, 0x00000018,
-  0x3601403a, 0x0000002d,
-  0x00064029, 0x00000000,
-  0x0000c420, 0x00000140,
-  0x5c01401a, 0x0000595c,
-  0x15000011, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00000010, 0x00000000,
-  0x00064029, 0x00000000,
-  0x00024420, 0x00000183,
-  0x5c01401a, 0x0000595c,
-  0x82000015, 0x00002000,
-  0x16200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x16200011, 0x00000000,
-  0x82000015, 0x00000010,
-  0x16200011, 0x00000000,
-};	/* 104 WoL instructions */
-#define FIRMWARE_WOL_SIZE 104
-#endif
diff --git a/drivers/net/starfire_firmware.pl b/drivers/net/starfire_firmware.pl
deleted file mode 100644
index 0c82b80..0000000
--- a/drivers/net/starfire_firmware.pl
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/perl
-
-# This script can be used to generate a new starfire_firmware.h
-# from GFP_RX.DAT and GFP_TX.DAT, files included with the DDK
-# and also with the Novell drivers.
-
-open FW, "GFP_RX.DAT" || die;
-open FWH, ">starfire_firmware.h" || die;
-
-printf(FWH "static u32 firmware_rx[] = {\n");
-$counter = 0;
-while ($foo = <FW>) {
-  chomp;
-  printf(FWH "  0x%s, 0x0000%s,\n", substr($foo, 4, 8), substr($foo, 0, 4));
-  $counter++;
-}
-
-close FW;
-open FW, "GFP_TX.DAT" || die;
-
-printf(FWH "};\t/* %d Rx instructions */\n#define FIRMWARE_RX_SIZE %d\n\nstatic u32 firmware_tx[] = {\n", $counter, $counter);
-$counter = 0;
-while ($foo = <FW>) {
-  chomp;
-  printf(FWH "  0x%s, 0x0000%s,\n", substr($foo, 4, 8), substr($foo, 0, 4));
-  $counter++;
-}
-
-close FW;
-printf(FWH "};\t/* %d Tx instructions */\n#define FIRMWARE_TX_SIZE %d\n", $counter, $counter);
-close(FWH);
diff --git a/firmware/Makefile b/firmware/Makefile
index 0dc7afc..0e43284 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile
@@ -27,6 +27,8 @@ else
 acenic-objs := acenic/tg1.bin acenic/tg2.bin
 endif
 fw-shipped-$(CONFIG_ACENIC) += $(acenic-objs)
+fw-shipped-$(CONFIG_ADAPTEC_STARFIRE) += adaptec/starfire_rx.bin \
+					 adaptec/starfire_tx.bin
 fw-shipped-$(CONFIG_ATARI_DSP56K) += dsp56k/bootstrap.bin
 fw-shipped-$(CONFIG_ATM_AMBASSADOR) += atmsar11.fw
 fw-shipped-$(CONFIG_CASSINI) += sun/cassini.bin
diff --git a/firmware/WHENCE b/firmware/WHENCE
index 0460eb3..1bb2cf4 100644
--- a/firmware/WHENCE
+++ b/firmware/WHENCE
@@ -390,3 +390,22 @@ Licence:
 Found in hex form in kernel source.
 
 --------------------------------------------------------------------------
+
+Driver: ADAPTEC_STARFIRE - Adaptec Starfire/DuraLAN support
+
+File: adaptec/starfire_rx.bin
+File: adaptec/starfire_tx.bin
+
+Licence: Allegedly GPLv2, but no source visible.
+
+Found in hex form in kernel source, with the following notice:
+
+ BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE IT IS LICENSED "AS IS" AND
+ THERE IS NO WARRANTY FOR THE PROGRAM, INCLUDING BUT NOT LIMITED TO THE
+ IMPLIED WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ (TO THE EXTENT PERMITTED BY APPLICABLE LAW). USE OF THE PROGRAM IS AT YOUR
+ OWN RISK. IN NO EVENT WILL ADAPTEC OR ITS LICENSORS BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+ ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM.
+
+--------------------------------------------------------------------------
diff --git a/firmware/adaptec/starfire_rx.bin.ihex b/firmware/adaptec/starfire_rx.bin.ihex
new file mode 100644
index 0000000..6b1fae0
--- /dev/null
+++ b/firmware/adaptec/starfire_rx.bin.ihex
@@ -0,0 +1,53 @@
+:10000000010003DC00000000040004210000008661
+:10001000800000150000180E8100001500006664C5
+:100020001A0040AB00000B06142000110000000075
+:10003000142040220000AAAA14204022000003003D
+:1000400014204022000000001A0040AB00000B14F6
+:1000500014200011000000008300001500000002C1
+:10006000040000210000000000000010000000005B
+:1000700004000421000000870000001000000000C0
+:1000800000000010000000000000801500000000CB
+:100090000000003E00000000000000100000000012
+:1000A0008200001500004000009E8050000000000B
+:1000B000030080150000000086008015000000008D
+:1000C00082000015000080000100001C00000000FC
+:1000D000000050A00000010C4E20D011000060086C
+:1000E0001420D012000040080000F09000007000C2
+:1000F0000000C8B0000030000000404000000000D8
+:10010000001080150000000000A2C1500000400057
+:1001100000A400B000000014000000200000000057
+:100120002500400D0000252500047220000031004C
+:10013000009340700000000000000020000000005C
+:1001400000924460000001842B20C01100000000D8
+:100150000000C42000000540360140180000422D78
+:100160001420001100000000009244600000018390
+:100170003200001F0000003402AC00150000000235
+:1001800000A601100000000842200011000000003D
+:1001900000924060000001030000001E000000000B
+:1001A00000000020000001000000001E0000000010
+:1001B00000924460000000860000408000000000C3
+:1001C0000092C0700000000000924060000001003A
+:1001D0000000C8900000500000A6C1100000000000
+:1001E00000B0C09000000012021C001500000000CA
+:1001F0003200001F0000003400924460000005102F
+:100200004421001100000000420000110000000025
+:1002100083000015000000400092446000000508C3
+:100220004501401800004545008080500000000056
+:10023000622080120000000082000015000008000B
+:100240001520001100000000000000100000000058
+:10025000000000100000000000000010000000007E
+:10026000000000100000000000000010000000006E
+:10027000800000150000EEA4810000150000005F62
+:1002800000000060000000000000412000000000AD
+:1002900000004A000000400000924460000001900D
+:1002A0005601401A000059561400001100000000C9
+:1002B0000093405000000018009300500000001808
+:1002C0003601403A0000002D000643A9000000005E
+:1002D0000000C420000001405601401A0000595699
+:1002E00014000011000000000000001000000000D9
+:1002F0000000001000000000000642A900000000FD
+:1003000000024420000001835601401A00005956A3
+:1003100082000015000020001520001100000000E0
+:1003200082000015000000101520001100000000E0
+:1003300082000015000000101520001100000000D0
+:00000001FF
diff --git a/firmware/adaptec/starfire_tx.bin.ihex b/firmware/adaptec/starfire_tx.bin.ihex
new file mode 100644
index 0000000..6b1fae0
--- /dev/null
+++ b/firmware/adaptec/starfire_tx.bin.ihex
@@ -0,0 +1,53 @@
+:10000000010003DC00000000040004210000008661
+:10001000800000150000180E8100001500006664C5
+:100020001A0040AB00000B06142000110000000075
+:10003000142040220000AAAA14204022000003003D
+:1000400014204022000000001A0040AB00000B14F6
+:1000500014200011000000008300001500000002C1
+:10006000040000210000000000000010000000005B
+:1000700004000421000000870000001000000000C0
+:1000800000000010000000000000801500000000CB
+:100090000000003E00000000000000100000000012
+:1000A0008200001500004000009E8050000000000B
+:1000B000030080150000000086008015000000008D
+:1000C00082000015000080000100001C00000000FC
+:1000D000000050A00000010C4E20D011000060086C
+:1000E0001420D012000040080000F09000007000C2
+:1000F0000000C8B0000030000000404000000000D8
+:10010000001080150000000000A2C1500000400057
+:1001100000A400B000000014000000200000000057
+:100120002500400D0000252500047220000031004C
+:10013000009340700000000000000020000000005C
+:1001400000924460000001842B20C01100000000D8
+:100150000000C42000000540360140180000422D78
+:100160001420001100000000009244600000018390
+:100170003200001F0000003402AC00150000000235
+:1001800000A601100000000842200011000000003D
+:1001900000924060000001030000001E000000000B
+:1001A00000000020000001000000001E0000000010
+:1001B00000924460000000860000408000000000C3
+:1001C0000092C0700000000000924060000001003A
+:1001D0000000C8900000500000A6C1100000000000
+:1001E00000B0C09000000012021C001500000000CA
+:1001F0003200001F0000003400924460000005102F
+:100200004421001100000000420000110000000025
+:1002100083000015000000400092446000000508C3
+:100220004501401800004545008080500000000056
+:10023000622080120000000082000015000008000B
+:100240001520001100000000000000100000000058
+:10025000000000100000000000000010000000007E
+:10026000000000100000000000000010000000006E
+:10027000800000150000EEA4810000150000005F62
+:1002800000000060000000000000412000000000AD
+:1002900000004A000000400000924460000001900D
+:1002A0005601401A000059561400001100000000C9
+:1002B0000093405000000018009300500000001808
+:1002C0003601403A0000002D000643A9000000005E
+:1002D0000000C420000001405601401A0000595699
+:1002E00014000011000000000000001000000000D9
+:1002F0000000001000000000000642A900000000FD
+:1003000000024420000001835601401A00005956A3
+:1003100082000015000020001520001100000000E0
+:1003200082000015000000101520001100000000E0
+:1003300082000015000000101520001100000000D0
+:00000001FF
-- 
cgit v0.10.2


From b530256d2e0f1a75fab31f9821129fff1bb49faa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Jan 2009 16:13:19 -0800
Subject: gro: Use gso_size to store MSS

In order to allow GRO packets without frag_list at all, we need to
store the MSS in the packet itself.  The obvious place is gso_size.
The only thing to watch out for is if the packet ends up not being
GRO then we need to clear gso_size before pushing the packet into
the stack.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 09c66a4..1e1a680 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2365,6 +2365,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 	}
 
 out:
+	skb_shinfo(skb)->gso_size = 0;
 	__skb_push(skb, -skb_network_offset(skb));
 	return netif_receive_skb(skb);
 }
@@ -2446,6 +2447,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	}
 
 	NAPI_GRO_CB(skb)->count = 1;
+	skb_shinfo(skb)->gso_size = skb->len;
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b8d0abb..3aafb10 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2613,6 +2613,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
 	skb_shinfo(nskb)->frag_list = p;
+	skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size;
 	skb_header_release(p);
 	nskb->prev = p;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f28acf1..4d655e9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2519,9 +2519,7 @@ found:
 	flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
 
 	total = p->len;
-	mss = total;
-	if (skb_shinfo(p)->frag_list)
-		mss = skb_shinfo(p)->frag_list->len;
+	mss = skb_shinfo(p)->gso_size;
 
 	flush |= skb->len > mss || skb->len <= 0;
 	flush |= ntohl(th2->seq) + total != ntohl(th->seq);
@@ -2557,7 +2555,6 @@ int tcp_gro_complete(struct sk_buff *skb)
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 
-	skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len;
 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 
 	if (th->cwr)
-- 
cgit v0.10.2


From 5d38a079ce3971f932bbdc0dc5b887806fabd5dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Jan 2009 16:13:40 -0800
Subject: gro: Add page frag support

This patch allows GRO to merge page frags (skb_shinfo(skb)->frags)
in one skb, rather than using the less efficient frag_list.

It also adds a new interface, napi_gro_frags to allow drivers
to inject page frags directly into the stack without allocating
an skb.  This is intended to be the GRO equivalent for LRO's
lro_receive_frags interface.

The existing GSO interface can already handle page frags with
or without an appended frag_list so nothing needs to be changed
there.

The merging itself is rather simple.  We store any new frag entries
after the last existing entry, without checking whether the first
new entry can be merged with the last existing entry.  Making this
check would actually be easy but since no existing driver can
produce contiguous frags anyway it would just be mental masturbation.

If the total number of entries would exceed the capacity of a
single skb, we simply resort to using frag_list as we do now.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41e1224..c28bbba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -313,10 +313,11 @@ struct napi_struct {
 #ifdef CONFIG_NETPOLL
 	spinlock_t		poll_lock;
 	int			poll_owner;
-	struct net_device	*dev;
 #endif
+	struct net_device	*dev;
 	struct list_head	dev_list;
 	struct sk_buff		*gro_list;
+	struct sk_buff		*skb;
 };
 
 enum
@@ -990,6 +991,9 @@ struct napi_gro_cb {
 
 	/* Number of segments aggregated. */
 	int count;
+
+	/* Free the skb? */
+	int free;
 };
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -1011,6 +1015,14 @@ struct packet_type {
 	struct list_head	list;
 };
 
+struct napi_gro_fraginfo {
+	skb_frag_t frags[MAX_SKB_FRAGS];
+	unsigned int nr_frags;
+	unsigned int ip_summed;
+	unsigned int len;
+	__wsum csum;
+};
+
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 
@@ -1363,6 +1375,8 @@ extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
 extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
+extern int		napi_gro_frags(struct napi_struct *napi,
+				       struct napi_gro_fraginfo *info);
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e1a680..382df6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,9 @@
 /* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2345,7 +2348,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int err = -ENOENT;
 
-	if (!skb_shinfo(skb)->frag_list)
+	if (NAPI_GRO_CB(skb)->count == 1)
 		goto out;
 
 	rcu_read_lock();
@@ -2384,7 +2387,7 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_type *ptype;
@@ -2393,6 +2396,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	int count = 0;
 	int same_flow;
 	int mac_len;
+	int free;
 
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
@@ -2409,6 +2413,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		skb->mac_len = mac_len;
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
+		NAPI_GRO_CB(skb)->free = 0;
 
 		for (p = napi->gro_list; p; p = p->next) {
 			count++;
@@ -2428,6 +2433,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		goto normal;
 
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
+	free = NAPI_GRO_CB(skb)->free;
 
 	if (pp) {
 		struct sk_buff *nskb = *pp;
@@ -2452,13 +2458,86 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	napi->gro_list = skb;
 
 ok:
-	return NET_RX_SUCCESS;
+	return free;
 
 normal:
-	return netif_receive_skb(skb);
+	return -1;
+}
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 1:
+		kfree_skb(skb);
+		break;
+	}
+
+	return NET_RX_SUCCESS;
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+	struct net_device *dev = napi->dev;
+	struct sk_buff *skb = napi->skb;
+	int err = NET_RX_DROP;
+
+	napi->skb = NULL;
+
+	if (!skb) {
+		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+		if (!skb)
+			goto out;
+
+		skb_reserve(skb, NET_IP_ALIGN);
+	}
+
+	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+	skb_shinfo(skb)->nr_frags = info->nr_frags;
+	memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
+
+	skb->data_len = info->len;
+	skb->len += info->len;
+	skb->truesize += info->len;
+
+	if (!pskb_may_pull(skb, ETH_HLEN))
+		goto reuse;
+
+	err = NET_RX_SUCCESS;
+
+	skb->protocol = eth_type_trans(skb, dev);
+
+	skb->ip_summed = info->ip_summed;
+	skb->csum = info->csum;
+
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 0:
+		goto out;
+	}
+
+reuse:
+	skb_shinfo(skb)->nr_frags = 0;
+
+	skb->len -= skb->data_len;
+	skb->truesize -= skb->data_len;
+	skb->data_len = 0;
+
+	__skb_pull(skb, skb_headlen(skb));
+	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+	napi->skb = skb;
+
+out:
+	return err;
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2537,11 +2616,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 {
 	INIT_LIST_HEAD(&napi->poll_list);
 	napi->gro_list = NULL;
+	napi->skb = NULL;
 	napi->poll = poll;
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
-#ifdef CONFIG_NETPOLL
 	napi->dev = dev;
+#ifdef CONFIG_NETPOLL
 	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
@@ -2554,6 +2634,7 @@ void netif_napi_del(struct napi_struct *napi)
 	struct sk_buff *skb, *next;
 
 	list_del_init(&napi->dev_list);
+	kfree(napi->skb);
 
 	for (skb = napi->gro_list; skb; skb = next) {
 		next = skb->next;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3aafb10..5110b35 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2594,6 +2594,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	if (skb_shinfo(p)->frag_list)
 		goto merge;
+	else if (!skb_headlen(p) && !skb_headlen(skb) &&
+		 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
+		 MAX_SKB_FRAGS) {
+		memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
+		       skb_shinfo(skb)->frags,
+		       skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
+
+		skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
+		NAPI_GRO_CB(skb)->free = 1;
+		goto done;
+	}
 
 	headroom = skb_headroom(p);
 	nskb = netdev_alloc_skb(p->dev, headroom);
@@ -2628,11 +2639,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	p = nskb;
 
 merge:
-	NAPI_GRO_CB(p)->count++;
 	p->prev->next = skb;
 	p->prev = skb;
 	skb_header_release(skb);
 
+done:
+	NAPI_GRO_CB(p)->count++;
 	p->data_len += skb->len;
 	p->truesize += skb->len;
 	p->len += skb->len;
-- 
cgit v0.10.2


From fecc7036e73a71231045e03ff524b5f8bd892a84 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 4 Jan 2009 16:22:04 -0800
Subject: isdn: capi: &&/|| typos

Correct two typos.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index d5b4cc3..6501202 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -1519,7 +1519,7 @@ static int decodeFVteln(char *teln, unsigned long *bmaskp, int *activep)
 		int digit2 = 0;
 		if (!isdigit(*s)) return -3;
 		while (isdigit(*s)) { digit1 = digit1*10 + (*s - '0'); s++; }
-		if (digit1 <= 0 && digit1 > 30) return -4;
+		if (digit1 <= 0 || digit1 > 30) return -4;
 		if (*s == 0 || *s == ',' || *s == ' ') {
 			bmask |= (1 << digit1);
 			digit1 = 0;
@@ -1530,7 +1530,7 @@ static int decodeFVteln(char *teln, unsigned long *bmaskp, int *activep)
 		s++;
 		if (!isdigit(*s)) return -3;
 		while (isdigit(*s)) { digit2 = digit2*10 + (*s - '0'); s++; }
-		if (digit2 <= 0 && digit2 > 30) return -4;
+		if (digit2 <= 0 || digit2 > 30) return -4;
 		if (*s == 0 || *s == ',' || *s == ' ') {
 			if (digit1 > digit2)
 				for (i = digit2; i <= digit1 ; i++)
-- 
cgit v0.10.2


From 22692018b93f0782cda5a843cecfffda1854eb8d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 4 Jan 2009 16:23:01 -0800
Subject: enc28j60: fix RX buffer overflow

The enc28j60 driver doesn't check whether the length of the packet as reported
by the hardware fits into the preallocated buffer. When stressed, the hardware
may report insanely large packets even tough the "Receive OK" bit is set. Fix
this.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index b0ef46c..cefe1d9 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -944,7 +944,7 @@ static void enc28j60_hw_rx(struct net_device *ndev)
 	if (netif_msg_rx_status(priv))
 		enc28j60_dump_rsv(priv, __func__, next_packet, len, rxstat);
 
-	if (!RSV_GETBIT(rxstat, RSV_RXOK)) {
+	if (!RSV_GETBIT(rxstat, RSV_RXOK) || len > MAX_FRAMELEN) {
 		if (netif_msg_rx_err(priv))
 			dev_err(&ndev->dev, "Rx Error (%04x)\n", rxstat);
 		ndev->stats.rx_errors++;
@@ -952,6 +952,8 @@ static void enc28j60_hw_rx(struct net_device *ndev)
 			ndev->stats.rx_crc_errors++;
 		if (RSV_GETBIT(rxstat, RSV_LENCHECKERR))
 			ndev->stats.rx_frame_errors++;
+		if (len > MAX_FRAMELEN)
+			ndev->stats.rx_over_errors++;
 	} else {
 		skb = dev_alloc_skb(len + NET_IP_ALIGN);
 		if (!skb) {
-- 
cgit v0.10.2


From c907a35acf0e964dfd0753519b3dc7689727e175 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Sun, 4 Jan 2009 17:06:46 -0800
Subject: qlge: bugfix: Add missing pci_mapping_err checking.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 718a7bd..c6ab6a4 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -963,6 +963,11 @@ static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 						     sbq_desc->p.skb->data,
 						     rx_ring->sbq_buf_size /
 						     2, PCI_DMA_FROMDEVICE);
+				if (pci_dma_mapping_error(qdev->pdev, map)) {
+					QPRINTK(qdev, IFUP, ERR, "PCI mapping failed.\n");
+					rx_ring->sbq_clean_idx = clean_idx;
+					return;
+				}
 				pci_unmap_addr_set(sbq_desc, mapaddr, map);
 				pci_unmap_len_set(sbq_desc, maplen,
 						  rx_ring->sbq_buf_size / 2);
-- 
cgit v0.10.2


From 4055c7d495f2502718bbbea871e6504ae95add14 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Sun, 4 Jan 2009 17:07:09 -0800
Subject: qlge: bugfix: Add missing pci_unmap_page call in receive path.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index c6ab6a4..9ceedfc 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1308,6 +1308,11 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
 					"No skb available, drop the packet.\n");
 				return NULL;
 			}
+			pci_unmap_page(qdev->pdev,
+				       pci_unmap_addr(lbq_desc,
+						      mapaddr),
+				       pci_unmap_len(lbq_desc, maplen),
+				       PCI_DMA_FROMDEVICE);
 			skb_reserve(skb, NET_IP_ALIGN);
 			QPRINTK(qdev, RX_STATUS, DEBUG,
 				"%d bytes of headers and data in large. Chain page to new skb and pull tail.\n", length);
-- 
cgit v0.10.2


From 2b72c7849f9a091c1e5d7255732faf14ac7d5123 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Sun, 4 Jan 2009 17:07:50 -0800
Subject: qlge: bugfix: Fix shadow register endian issue.

Shadow registers are consistent memory locations to which the chip
echos ring indexes in little endian format.  These values need to
be endian swapped before referencing.

Note:
The register pointer declaration uses the volatile modifier which
causes warnings in checkpatch.
Per Documentation/volatile-considered-harmful.txt:
  - Pointers to data structures in coherent memory which might be modified
    by I/O devices can, sometimes, legitimately be volatile.  A ring buffer
    used by a network adapter, where that adapter changes pointers to
    indicate which descriptors have been processed, is an example of this
    type of situation.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h
index ba2e1c5b..97321bb 100644
--- a/drivers/net/qlge/qlge.h
+++ b/drivers/net/qlge/qlge.h
@@ -1189,7 +1189,7 @@ struct rx_ring {
 	u32 cq_size;
 	u32 cq_len;
 	u16 cq_id;
-	u32 *prod_idx_sh_reg;	/* Shadowed producer register. */
+	volatile __le32 *prod_idx_sh_reg;	/* Shadowed producer register. */
 	dma_addr_t prod_idx_sh_reg_dma;
 	void __iomem *cnsmr_idx_db_reg;	/* PCI doorbell mem area + 0 */
 	u32 cnsmr_idx;		/* current sw idx */
@@ -1467,21 +1467,6 @@ static inline void ql_write_db_reg(u32 val, void __iomem *addr)
 	mmiowb();
 }
 
-/*
- * Shadow Registers:
- * Outbound queues have a consumer index that is maintained by the chip.
- * Inbound queues have a producer index that is maintained by the chip.
- * For lower overhead, these registers are "shadowed" to host memory
- * which allows the device driver to track the queue progress without
- * PCI reads. When an entry is placed on an inbound queue, the chip will
- * update the relevant index register and then copy the value to the
- * shadow register in host memory.
- */
-static inline unsigned int ql_read_sh_reg(const volatile void  *addr)
-{
-	return *(volatile unsigned int __force *)addr;
-}
-
 extern char qlge_driver_name[];
 extern const char qlge_driver_version[];
 extern const struct ethtool_ops qlge_ethtool_ops;
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 9ceedfc..c0e43c5 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1559,7 +1559,7 @@ static void ql_process_chip_ae_intr(struct ql_adapter *qdev,
 static int ql_clean_outbound_rx_ring(struct rx_ring *rx_ring)
 {
 	struct ql_adapter *qdev = rx_ring->qdev;
-	u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+	u32 prod = le32_to_cpu(*rx_ring->prod_idx_sh_reg);
 	struct ob_mac_iocb_rsp *net_rsp = NULL;
 	int count = 0;
 
@@ -1585,7 +1585,7 @@ static int ql_clean_outbound_rx_ring(struct rx_ring *rx_ring)
 		}
 		count++;
 		ql_update_cq(rx_ring);
-		prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+		prod = le32_to_cpu(*rx_ring->prod_idx_sh_reg);
 	}
 	ql_write_cq_idx(rx_ring);
 	if (netif_queue_stopped(qdev->ndev) && net_rsp != NULL) {
@@ -1605,7 +1605,7 @@ static int ql_clean_outbound_rx_ring(struct rx_ring *rx_ring)
 static int ql_clean_inbound_rx_ring(struct rx_ring *rx_ring, int budget)
 {
 	struct ql_adapter *qdev = rx_ring->qdev;
-	u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+	u32 prod = le32_to_cpu(*rx_ring->prod_idx_sh_reg);
 	struct ql_net_rsp_iocb *net_rsp;
 	int count = 0;
 
@@ -1638,7 +1638,7 @@ static int ql_clean_inbound_rx_ring(struct rx_ring *rx_ring, int budget)
 		}
 		count++;
 		ql_update_cq(rx_ring);
-		prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
+		prod = le32_to_cpu(*rx_ring->prod_idx_sh_reg);
 		if (count == budget)
 			break;
 	}
@@ -1801,7 +1801,7 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
 	 * Check the default queue and wake handler if active.
 	 */
 	rx_ring = &qdev->rx_ring[0];
-	if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) != rx_ring->cnsmr_idx) {
+	if (le32_to_cpu(*rx_ring->prod_idx_sh_reg) != rx_ring->cnsmr_idx) {
 		QPRINTK(qdev, INTR, INFO, "Waking handler for rx_ring[0].\n");
 		ql_disable_completion_interrupt(qdev, intr_context->intr);
 		queue_delayed_work_on(smp_processor_id(), qdev->q_workqueue,
@@ -1815,7 +1815,7 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
 		 */
 		for (i = 1; i < qdev->rx_ring_count; i++) {
 			rx_ring = &qdev->rx_ring[i];
-			if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) !=
+			if (le32_to_cpu(*rx_ring->prod_idx_sh_reg) !=
 			    rx_ring->cnsmr_idx) {
 				QPRINTK(qdev, INTR, INFO,
 					"Waking handler for rx_ring[%d].\n", i);
-- 
cgit v0.10.2


From 459caf5a99cd066598192a86f8f63d73f0b423a6 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Sun, 4 Jan 2009 17:08:11 -0800
Subject: qlge: bugfix: Fix ring length setting for rx ring, large/small

The length field for these rings is 16-bits.  If the length is
the max supported 65536 then the setting should be zero.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index c0e43c5..ffa2100 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2491,7 +2491,8 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	memset((void *)cqicb, 0, sizeof(struct cqicb));
 	cqicb->msix_vect = rx_ring->irq;
 
-	cqicb->len = cpu_to_le16(rx_ring->cq_len | LEN_V | LEN_CPP_CONT);
+	bq_len = (rx_ring->cq_len == 65536) ? 0 : (u16) rx_ring->cq_len;
+	cqicb->len = cpu_to_le16(bq_len | LEN_V | LEN_CPP_CONT);
 
 	cqicb->addr_lo = cpu_to_le32(rx_ring->cq_base_dma);
 	cqicb->addr_hi = cpu_to_le32((u64) rx_ring->cq_base_dma >> 32);
@@ -2513,8 +2514,11 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 		    cpu_to_le32(rx_ring->lbq_base_indirect_dma);
 		cqicb->lbq_addr_hi =
 		    cpu_to_le32((u64) rx_ring->lbq_base_indirect_dma >> 32);
-		cqicb->lbq_buf_size = cpu_to_le32(rx_ring->lbq_buf_size);
-		bq_len = (u16) rx_ring->lbq_len;
+		bq_len = (rx_ring->lbq_buf_size == 65536) ? 0 :
+			(u16) rx_ring->lbq_buf_size;
+		cqicb->lbq_buf_size = cpu_to_le16(bq_len);
+		bq_len = (rx_ring->lbq_len == 65536) ? 0 :
+			(u16) rx_ring->lbq_len;
 		cqicb->lbq_len = cpu_to_le16(bq_len);
 		rx_ring->lbq_prod_idx = rx_ring->lbq_len - 16;
 		rx_ring->lbq_curr_idx = 0;
@@ -2530,7 +2534,8 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 		    cpu_to_le32((u64) rx_ring->sbq_base_indirect_dma >> 32);
 		cqicb->sbq_buf_size =
 		    cpu_to_le16(((rx_ring->sbq_buf_size / 2) + 8) & 0xfffffff8);
-		bq_len = (u16) rx_ring->sbq_len;
+		bq_len = (rx_ring->sbq_len == 65536) ? 0 :
+			(u16) rx_ring->sbq_len;
 		cqicb->sbq_len = cpu_to_le16(bq_len);
 		rx_ring->sbq_prod_idx = rx_ring->sbq_len - 16;
 		rx_ring->sbq_curr_idx = 0;
-- 
cgit v0.10.2


From 939678f81a55c839ae58c9cc3d4ec6d0f60e7dc7 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Sun, 4 Jan 2009 17:08:29 -0800
Subject: qlge: bugfix: Fix register access error checking.

Some indexed registers do not have error bits. In these cases a
value of zero should be used for error checking.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index ffa2100..837be72 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -257,7 +257,7 @@ int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
 		{
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
@@ -265,13 +265,13 @@ int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
 				   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MR, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MR, 0);
 			if (status)
 				goto exit;
 			*value++ = ql_read32(qdev, MAC_ADDR_DATA);
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
@@ -279,14 +279,14 @@ int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
 				   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MR, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MR, 0);
 			if (status)
 				goto exit;
 			*value++ = ql_read32(qdev, MAC_ADDR_DATA);
 			if (type == MAC_ADDR_TYPE_CAM_MAC) {
 				status =
 				    ql_wait_reg_rdy(qdev,
-					MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+					MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 				if (status)
 					goto exit;
 				ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
@@ -294,7 +294,7 @@ int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
 					   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
 				status =
 				    ql_wait_reg_rdy(qdev, MAC_ADDR_IDX,
-						    MAC_ADDR_MR, MAC_ADDR_E);
+						    MAC_ADDR_MR, 0);
 				if (status)
 					goto exit;
 				*value++ = ql_read32(qdev, MAC_ADDR_DATA);
@@ -344,7 +344,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type,
 
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
@@ -353,7 +353,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type,
 			ql_write32(qdev, MAC_ADDR_DATA, lower);
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
@@ -362,7 +362,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type,
 			ql_write32(qdev, MAC_ADDR_DATA, upper);
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, (offset) |	/* offset */
@@ -400,7 +400,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type,
 
 			status =
 			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, MAC_ADDR_E);
+				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
 			if (status)
 				goto exit;
 			ql_write32(qdev, MAC_ADDR_IDX, offset |	/* offset */
@@ -431,13 +431,13 @@ int ql_get_routing_reg(struct ql_adapter *qdev, u32 index, u32 *value)
 	if (status)
 		goto exit;
 
-	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MW, RT_IDX_E);
+	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MW, 0);
 	if (status)
 		goto exit;
 
 	ql_write32(qdev, RT_IDX,
 		   RT_IDX_TYPE_NICQ | RT_IDX_RS | (index << RT_IDX_IDX_SHIFT));
-	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MR, RT_IDX_E);
+	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MR, 0);
 	if (status)
 		goto exit;
 	*value = ql_read32(qdev, RT_DATA);
-- 
cgit v0.10.2


From f32f8b72e02e851972a0172603104046aa5fec96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20Holm=20Th=C3=B8gersen?= <odie@cs.aau.dk>
Date: Sun, 4 Jan 2009 17:11:24 -0800
Subject: net/rfkill/rfkill.c: fix unused rfkill_led_trigger() warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 4dec9b807be757780ca3611a959ac22c28d292a7 ("rfkill: strip pointless
notifier chain") removed the only user of rfkill_led_trigger() that was not
guarded by #ifdef CONFIG_RFKILL_LEDS. Therefore, move rfkill_led_trigger()
completely inside #ifdef CONFIG_RFKILL_LEDS and avoid the compile time
warning:

net/rfkill/rfkill.c:59: warning: 'rfkill_led_trigger' defined but not used

Signed-off-by: Simon Holm Thøgersen <odie@cs.aau.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 3c94f76..3eaa394 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -54,10 +54,10 @@ static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
 static bool rfkill_epo_lock_active;
 
 
+#ifdef CONFIG_RFKILL_LEDS
 static void rfkill_led_trigger(struct rfkill *rfkill,
 			       enum rfkill_state state)
 {
-#ifdef CONFIG_RFKILL_LEDS
 	struct led_trigger *led = &rfkill->led_trigger;
 
 	if (!led->name)
@@ -66,10 +66,8 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 		led_trigger_event(led, LED_OFF);
 	else
 		led_trigger_event(led, LED_FULL);
-#endif /* CONFIG_RFKILL_LEDS */
 }
 
-#ifdef CONFIG_RFKILL_LEDS
 static void rfkill_led_trigger_activate(struct led_classdev *led)
 {
 	struct rfkill *rfkill = container_of(led->trigger,
-- 
cgit v0.10.2


From f26251eb68ea766a98fed922593c154d15127621 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Sun, 4 Jan 2009 17:12:04 -0800
Subject: e100: cosmetic cleanup

Add missing space after if, switch, for and while keywords.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 9f38b16..134b2d6 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -658,12 +658,12 @@ static int e100_self_test(struct nic *nic)
 	e100_disable_irq(nic);
 
 	/* Check results of self-test */
-	if(nic->mem->selftest.result != 0) {
+	if (nic->mem->selftest.result != 0) {
 		DPRINTK(HW, ERR, "Self-test failed: result=0x%08X\n",
 			nic->mem->selftest.result);
 		return -ETIMEDOUT;
 	}
-	if(nic->mem->selftest.signature == 0) {
+	if (nic->mem->selftest.signature == 0) {
 		DPRINTK(HW, ERR, "Self-test failed: timed out\n");
 		return -ETIMEDOUT;
 	}
@@ -684,13 +684,13 @@ static void e100_eeprom_write(struct nic *nic, u16 addr_len, u16 addr, __le16 da
 	cmd_addr_data[2] = op_ewds << (addr_len - 2);
 
 	/* Bit-bang cmds to write word to eeprom */
-	for(j = 0; j < 3; j++) {
+	for (j = 0; j < 3; j++) {
 
 		/* Chip select */
 		iowrite8(eecs | eesk, &nic->csr->eeprom_ctrl_lo);
 		e100_write_flush(nic); udelay(4);
 
-		for(i = 31; i >= 0; i--) {
+		for (i = 31; i >= 0; i--) {
 			ctrl = (cmd_addr_data[j] & (1 << i)) ?
 				eecs | eedi : eecs;
 			iowrite8(ctrl, &nic->csr->eeprom_ctrl_lo);
@@ -723,7 +723,7 @@ static __le16 e100_eeprom_read(struct nic *nic, u16 *addr_len, u16 addr)
 	e100_write_flush(nic); udelay(4);
 
 	/* Bit-bang to read word from eeprom */
-	for(i = 31; i >= 0; i--) {
+	for (i = 31; i >= 0; i--) {
 		ctrl = (cmd_addr_data & (1 << i)) ? eecs | eedi : eecs;
 		iowrite8(ctrl, &nic->csr->eeprom_ctrl_lo);
 		e100_write_flush(nic); udelay(4);
@@ -734,7 +734,7 @@ static __le16 e100_eeprom_read(struct nic *nic, u16 *addr_len, u16 addr)
 		/* Eeprom drives a dummy zero to EEDO after receiving
 		 * complete address.  Use this to adjust addr_len. */
 		ctrl = ioread8(&nic->csr->eeprom_ctrl_lo);
-		if(!(ctrl & eedo) && i > 16) {
+		if (!(ctrl & eedo) && i > 16) {
 			*addr_len -= (i - 16);
 			i = 17;
 		}
@@ -758,9 +758,9 @@ static int e100_eeprom_load(struct nic *nic)
 	e100_eeprom_read(nic, &addr_len, 0);
 	nic->eeprom_wc = 1 << addr_len;
 
-	for(addr = 0; addr < nic->eeprom_wc; addr++) {
+	for (addr = 0; addr < nic->eeprom_wc; addr++) {
 		nic->eeprom[addr] = e100_eeprom_read(nic, &addr_len, addr);
-		if(addr < nic->eeprom_wc - 1)
+		if (addr < nic->eeprom_wc - 1)
 			checksum += le16_to_cpu(nic->eeprom[addr]);
 	}
 
@@ -784,15 +784,15 @@ static int e100_eeprom_save(struct nic *nic, u16 start, u16 count)
 	e100_eeprom_read(nic, &addr_len, 0);
 	nic->eeprom_wc = 1 << addr_len;
 
-	if(start + count >= nic->eeprom_wc)
+	if (start + count >= nic->eeprom_wc)
 		return -EINVAL;
 
-	for(addr = start; addr < start + count; addr++)
+	for (addr = start; addr < start + count; addr++)
 		e100_eeprom_write(nic, addr_len, addr, nic->eeprom[addr]);
 
 	/* The checksum, stored in the last word, is calculated such that
 	 * the sum of words should be 0xBABA */
-	for(addr = 0; addr < nic->eeprom_wc - 1; addr++)
+	for (addr = 0; addr < nic->eeprom_wc - 1; addr++)
 		checksum += le16_to_cpu(nic->eeprom[addr]);
 	nic->eeprom[nic->eeprom_wc - 1] = cpu_to_le16(0xBABA - checksum);
 	e100_eeprom_write(nic, addr_len, nic->eeprom_wc - 1,
@@ -812,19 +812,19 @@ static int e100_exec_cmd(struct nic *nic, u8 cmd, dma_addr_t dma_addr)
 	spin_lock_irqsave(&nic->cmd_lock, flags);
 
 	/* Previous command is accepted when SCB clears */
-	for(i = 0; i < E100_WAIT_SCB_TIMEOUT; i++) {
-		if(likely(!ioread8(&nic->csr->scb.cmd_lo)))
+	for (i = 0; i < E100_WAIT_SCB_TIMEOUT; i++) {
+		if (likely(!ioread8(&nic->csr->scb.cmd_lo)))
 			break;
 		cpu_relax();
-		if(unlikely(i > E100_WAIT_SCB_FAST))
+		if (unlikely(i > E100_WAIT_SCB_FAST))
 			udelay(5);
 	}
-	if(unlikely(i == E100_WAIT_SCB_TIMEOUT)) {
+	if (unlikely(i == E100_WAIT_SCB_TIMEOUT)) {
 		err = -EAGAIN;
 		goto err_unlock;
 	}
 
-	if(unlikely(cmd != cuc_resume))
+	if (unlikely(cmd != cuc_resume))
 		iowrite32(dma_addr, &nic->csr->scb.gen_ptr);
 	iowrite8(cmd, &nic->csr->scb.cmd_lo);
 
@@ -843,7 +843,7 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
 
 	spin_lock_irqsave(&nic->cb_lock, flags);
 
-	if(unlikely(!nic->cbs_avail)) {
+	if (unlikely(!nic->cbs_avail)) {
 		err = -ENOMEM;
 		goto err_unlock;
 	}
@@ -853,7 +853,7 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
 	nic->cbs_avail--;
 	cb->skb = skb;
 
-	if(unlikely(!nic->cbs_avail))
+	if (unlikely(!nic->cbs_avail))
 		err = -ENOSPC;
 
 	cb_prepare(nic, cb, skb);
@@ -864,15 +864,15 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
 	wmb();
 	cb->prev->command &= cpu_to_le16(~cb_s);
 
-	while(nic->cb_to_send != nic->cb_to_use) {
-		if(unlikely(e100_exec_cmd(nic, nic->cuc_cmd,
+	while (nic->cb_to_send != nic->cb_to_use) {
+		if (unlikely(e100_exec_cmd(nic, nic->cuc_cmd,
 			nic->cb_to_send->dma_addr))) {
 			/* Ok, here's where things get sticky.  It's
 			 * possible that we can't schedule the command
 			 * because the controller is too busy, so
 			 * let's just queue the command and try again
 			 * when another command is scheduled. */
-			if(err == -ENOSPC) {
+			if (err == -ENOSPC) {
 				//request a reset
 				schedule_work(&nic->tx_timeout_task);
 			}
@@ -945,7 +945,7 @@ static void e100_get_defaults(struct nic *nic)
 
 	/* MAC type is encoded as rev ID; exception: ICH is treated as 82559 */
 	nic->mac = (nic->flags & ich) ? mac_82559_D101M : nic->pdev->revision;
-	if(nic->mac == mac_unknown)
+	if (nic->mac == mac_unknown)
 		nic->mac = mac_82557_D100_A;
 
 	nic->params.rfds = rfds;
@@ -1008,23 +1008,23 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb)
 	config->adaptive_ifs = nic->adaptive_ifs;
 	config->loopback = nic->loopback;
 
-	if(nic->mii.force_media && nic->mii.full_duplex)
+	if (nic->mii.force_media && nic->mii.full_duplex)
 		config->full_duplex_force = 0x1;	/* 1=force, 0=auto */
 
-	if(nic->flags & promiscuous || nic->loopback) {
+	if (nic->flags & promiscuous || nic->loopback) {
 		config->rx_save_bad_frames = 0x1;	/* 1=save, 0=discard */
 		config->rx_discard_short_frames = 0x0;	/* 1=discard, 0=save */
 		config->promiscuous_mode = 0x1;		/* 1=on, 0=off */
 	}
 
-	if(nic->flags & multicast_all)
+	if (nic->flags & multicast_all)
 		config->multicast_all = 0x1;		/* 1=accept, 0=no */
 
 	/* disable WoL when up */
-	if(netif_running(nic->netdev) || !(nic->flags & wol_magic))
+	if (netif_running(nic->netdev) || !(nic->flags & wol_magic))
 		config->magic_packet_disable = 0x1;	/* 1=off, 0=on */
 
-	if(nic->mac >= mac_82558_D101_A4) {
+	if (nic->mac >= mac_82558_D101_A4) {
 		config->fc_disable = 0x1;	/* 1=Tx fc off, 0=Tx fc on */
 		config->mwi_enable = 0x1;	/* 1=enable, 0=disable */
 		config->standard_tcb = 0x0;	/* 1=standard, 0=extended */
@@ -1369,21 +1369,21 @@ static int e100_phy_init(struct nic *nic)
 	u16 bmcr, stat, id_lo, id_hi, cong;
 
 	/* Discover phy addr by searching addrs in order {1,0,2,..., 31} */
-	for(addr = 0; addr < 32; addr++) {
+	for (addr = 0; addr < 32; addr++) {
 		nic->mii.phy_id = (addr == 0) ? 1 : (addr == 1) ? 0 : addr;
 		bmcr = mdio_read(netdev, nic->mii.phy_id, MII_BMCR);
 		stat = mdio_read(netdev, nic->mii.phy_id, MII_BMSR);
 		stat = mdio_read(netdev, nic->mii.phy_id, MII_BMSR);
-		if(!((bmcr == 0xFFFF) || ((stat == 0) && (bmcr == 0))))
+		if (!((bmcr == 0xFFFF) || ((stat == 0) && (bmcr == 0))))
 			break;
 	}
 	DPRINTK(HW, DEBUG, "phy_addr = %d\n", nic->mii.phy_id);
-	if(addr == 32)
+	if (addr == 32)
 		return -EAGAIN;
 
 	/* Selected the phy and isolate the rest */
-	for(addr = 0; addr < 32; addr++) {
-		if(addr != nic->mii.phy_id) {
+	for (addr = 0; addr < 32; addr++) {
+		if (addr != nic->mii.phy_id) {
 			mdio_write(netdev, addr, MII_BMCR, BMCR_ISOLATE);
 		} else {
 			bmcr = mdio_read(netdev, addr, MII_BMCR);
@@ -1400,7 +1400,7 @@ static int e100_phy_init(struct nic *nic)
 
 	/* Handle National tx phys */
 #define NCS_PHY_MODEL_MASK	0xFFF0FFFF
-	if((nic->phy & NCS_PHY_MODEL_MASK) == phy_nsc_tx) {
+	if ((nic->phy & NCS_PHY_MODEL_MASK) == phy_nsc_tx) {
 		/* Disable congestion control */
 		cong = mdio_read(netdev, nic->mii.phy_id, MII_NSC_CONG);
 		cong |= NSC_CONG_TXREADY;
@@ -1408,7 +1408,7 @@ static int e100_phy_init(struct nic *nic)
 		mdio_write(netdev, nic->mii.phy_id, MII_NSC_CONG, cong);
 	}
 
-	if((nic->mac >= mac_82550_D102) || ((nic->flags & ich) &&
+	if ((nic->mac >= mac_82550_D102) || ((nic->flags & ich) &&
 	   (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) &&
 		!(nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled))) {
 		/* enable/disable MDI/MDI-X auto-switching. */
@@ -1426,25 +1426,25 @@ static int e100_hw_init(struct nic *nic)
 	e100_hw_reset(nic);
 
 	DPRINTK(HW, ERR, "e100_hw_init\n");
-	if(!in_interrupt() && (err = e100_self_test(nic)))
+	if (!in_interrupt() && (err = e100_self_test(nic)))
 		return err;
 
-	if((err = e100_phy_init(nic)))
+	if ((err = e100_phy_init(nic)))
 		return err;
-	if((err = e100_exec_cmd(nic, cuc_load_base, 0)))
+	if ((err = e100_exec_cmd(nic, cuc_load_base, 0)))
 		return err;
-	if((err = e100_exec_cmd(nic, ruc_load_base, 0)))
+	if ((err = e100_exec_cmd(nic, ruc_load_base, 0)))
 		return err;
 	if ((err = e100_exec_cb_wait(nic, NULL, e100_setup_ucode)))
 		return err;
-	if((err = e100_exec_cb(nic, NULL, e100_configure)))
+	if ((err = e100_exec_cb(nic, NULL, e100_configure)))
 		return err;
-	if((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
+	if ((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
 		return err;
-	if((err = e100_exec_cmd(nic, cuc_dump_addr,
+	if ((err = e100_exec_cmd(nic, cuc_dump_addr,
 		nic->dma_addr + offsetof(struct mem, stats))))
 		return err;
-	if((err = e100_exec_cmd(nic, cuc_dump_reset, 0)))
+	if ((err = e100_exec_cmd(nic, cuc_dump_reset, 0)))
 		return err;
 
 	e100_disable_irq(nic);
@@ -1460,7 +1460,7 @@ static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb)
 
 	cb->command = cpu_to_le16(cb_multi);
 	cb->u.multi.count = cpu_to_le16(count * ETH_ALEN);
-	for(i = 0; list && i < count; i++, list = list->next)
+	for (i = 0; list && i < count; i++, list = list->next)
 		memcpy(&cb->u.multi.addr[i*ETH_ALEN], &list->dmi_addr,
 			ETH_ALEN);
 }
@@ -1472,12 +1472,12 @@ static void e100_set_multicast_list(struct net_device *netdev)
 	DPRINTK(HW, DEBUG, "mc_count=%d, flags=0x%04X\n",
 		netdev->mc_count, netdev->flags);
 
-	if(netdev->flags & IFF_PROMISC)
+	if (netdev->flags & IFF_PROMISC)
 		nic->flags |= promiscuous;
 	else
 		nic->flags &= ~promiscuous;
 
-	if(netdev->flags & IFF_ALLMULTI ||
+	if (netdev->flags & IFF_ALLMULTI ||
 		netdev->mc_count > E100_MAX_MULTICAST_ADDRS)
 		nic->flags |= multicast_all;
 	else
@@ -1500,7 +1500,7 @@ static void e100_update_stats(struct nic *nic)
 	 * complete, so we're always waiting for results of the
 	 * previous command. */
 
-	if(*complete == cpu_to_le32(cuc_dump_reset_complete)) {
+	if (*complete == cpu_to_le32(cuc_dump_reset_complete)) {
 		*complete = 0;
 		nic->tx_frames = le32_to_cpu(s->tx_good_frames);
 		nic->tx_collisions = le32_to_cpu(s->tx_total_collisions);
@@ -1527,12 +1527,12 @@ static void e100_update_stats(struct nic *nic)
 			le32_to_cpu(s->tx_single_collisions);
 		nic->tx_multiple_collisions +=
 			le32_to_cpu(s->tx_multiple_collisions);
-		if(nic->mac >= mac_82558_D101_A4) {
+		if (nic->mac >= mac_82558_D101_A4) {
 			nic->tx_fc_pause += le32_to_cpu(s->fc_xmt_pause);
 			nic->rx_fc_pause += le32_to_cpu(s->fc_rcv_pause);
 			nic->rx_fc_unsupported +=
 				le32_to_cpu(s->fc_rcv_unsupported);
-			if(nic->mac >= mac_82559_D101M) {
+			if (nic->mac >= mac_82559_D101M) {
 				nic->tx_tco_frames +=
 					le16_to_cpu(s->xmt_tco_frames);
 				nic->rx_tco_frames +=
@@ -1542,7 +1542,7 @@ static void e100_update_stats(struct nic *nic)
 	}
 
 
-	if(e100_exec_cmd(nic, cuc_dump_reset, 0))
+	if (e100_exec_cmd(nic, cuc_dump_reset, 0))
 		DPRINTK(TX_ERR, DEBUG, "exec cuc_dump_reset failed\n");
 }
 
@@ -1551,19 +1551,19 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
 	/* Adjust inter-frame-spacing (IFS) between two transmits if
 	 * we're getting collisions on a half-duplex connection. */
 
-	if(duplex == DUPLEX_HALF) {
+	if (duplex == DUPLEX_HALF) {
 		u32 prev = nic->adaptive_ifs;
 		u32 min_frames = (speed == SPEED_100) ? 1000 : 100;
 
-		if((nic->tx_frames / 32 < nic->tx_collisions) &&
+		if ((nic->tx_frames / 32 < nic->tx_collisions) &&
 		   (nic->tx_frames > min_frames)) {
-			if(nic->adaptive_ifs < 60)
+			if (nic->adaptive_ifs < 60)
 				nic->adaptive_ifs += 5;
 		} else if (nic->tx_frames < min_frames) {
-			if(nic->adaptive_ifs >= 5)
+			if (nic->adaptive_ifs >= 5)
 				nic->adaptive_ifs -= 5;
 		}
-		if(nic->adaptive_ifs != prev)
+		if (nic->adaptive_ifs != prev)
 			e100_exec_cb(nic, NULL, e100_configure);
 	}
 }
@@ -1579,12 +1579,12 @@ static void e100_watchdog(unsigned long data)
 
 	mii_ethtool_gset(&nic->mii, &cmd);
 
-	if(mii_link_ok(&nic->mii) && !netif_carrier_ok(nic->netdev)) {
+	if (mii_link_ok(&nic->mii) && !netif_carrier_ok(nic->netdev)) {
 		printk(KERN_INFO "e100: %s NIC Link is Up %s Mbps %s Duplex\n",
 		       nic->netdev->name,
 		       cmd.speed == SPEED_100 ? "100" : "10",
 		       cmd.duplex == DUPLEX_FULL ? "Full" : "Half");
-	} else if(!mii_link_ok(&nic->mii) && netif_carrier_ok(nic->netdev)) {
+	} else if (!mii_link_ok(&nic->mii) && netif_carrier_ok(nic->netdev)) {
 		printk(KERN_INFO "e100: %s NIC Link is Down\n",
 		       nic->netdev->name);
 	}
@@ -1604,11 +1604,11 @@ static void e100_watchdog(unsigned long data)
 	e100_update_stats(nic);
 	e100_adjust_adaptive_ifs(nic, cmd.speed, cmd.duplex);
 
-	if(nic->mac <= mac_82557_D100_C)
+	if (nic->mac <= mac_82557_D100_C)
 		/* Issue a multicast command to workaround a 557 lock up */
 		e100_set_multicast_list(nic->netdev);
 
-	if(nic->flags & ich && cmd.speed==SPEED_10 && cmd.duplex==DUPLEX_HALF)
+	if (nic->flags & ich && cmd.speed==SPEED_10 && cmd.duplex==DUPLEX_HALF)
 		/* Need SW workaround for ICH[x] 10Mbps/half duplex Tx hang. */
 		nic->flags |= ich_10h_workaround;
 	else
@@ -1623,7 +1623,7 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb,
 {
 	cb->command = nic->tx_command;
 	/* interrupt every 16 packets regardless of delay */
-	if((nic->cbs_avail & ~15) == nic->cbs_avail)
+	if ((nic->cbs_avail & ~15) == nic->cbs_avail)
 		cb->command |= cpu_to_le16(cb_i);
 	cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
 	cb->u.tcb.tcb_byte_count = 0;
@@ -1640,18 +1640,18 @@ static int e100_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	struct nic *nic = netdev_priv(netdev);
 	int err;
 
-	if(nic->flags & ich_10h_workaround) {
+	if (nic->flags & ich_10h_workaround) {
 		/* SW workaround for ICH[x] 10Mbps/half duplex Tx hang.
 		   Issue a NOP command followed by a 1us delay before
 		   issuing the Tx command. */
-		if(e100_exec_cmd(nic, cuc_nop, 0))
+		if (e100_exec_cmd(nic, cuc_nop, 0))
 			DPRINTK(TX_ERR, DEBUG, "exec cuc_nop failed\n");
 		udelay(1);
 	}
 
 	err = e100_exec_cb(nic, skb, e100_xmit_prepare);
 
-	switch(err) {
+	switch (err) {
 	case -ENOSPC:
 		/* We queued the skb, but now we're out of space. */
 		DPRINTK(TX_ERR, DEBUG, "No space for CB\n");
@@ -1677,14 +1677,14 @@ static int e100_tx_clean(struct nic *nic)
 	spin_lock(&nic->cb_lock);
 
 	/* Clean CBs marked complete */
-	for(cb = nic->cb_to_clean;
+	for (cb = nic->cb_to_clean;
 	    cb->status & cpu_to_le16(cb_complete);
 	    cb = nic->cb_to_clean = cb->next) {
 		DPRINTK(TX_DONE, DEBUG, "cb[%d]->status = 0x%04X\n",
 		        (int)(((void*)cb - (void*)nic->cbs)/sizeof(struct cb)),
 		        cb->status);
 
-		if(likely(cb->skb != NULL)) {
+		if (likely(cb->skb != NULL)) {
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += cb->skb->len;
 
@@ -1703,7 +1703,7 @@ static int e100_tx_clean(struct nic *nic)
 	spin_unlock(&nic->cb_lock);
 
 	/* Recover from running out of Tx resources in xmit_frame */
-	if(unlikely(tx_cleaned && netif_queue_stopped(nic->netdev)))
+	if (unlikely(tx_cleaned && netif_queue_stopped(nic->netdev)))
 		netif_wake_queue(nic->netdev);
 
 	return tx_cleaned;
@@ -1711,10 +1711,10 @@ static int e100_tx_clean(struct nic *nic)
 
 static void e100_clean_cbs(struct nic *nic)
 {
-	if(nic->cbs) {
-		while(nic->cbs_avail != nic->params.cbs.count) {
+	if (nic->cbs) {
+		while (nic->cbs_avail != nic->params.cbs.count) {
 			struct cb *cb = nic->cb_to_clean;
-			if(cb->skb) {
+			if (cb->skb) {
 				pci_unmap_single(nic->pdev,
 					le32_to_cpu(cb->u.tcb.tbd.buf_addr),
 					le16_to_cpu(cb->u.tcb.tbd.size),
@@ -1746,10 +1746,10 @@ static int e100_alloc_cbs(struct nic *nic)
 
 	nic->cbs = pci_alloc_consistent(nic->pdev,
 		sizeof(struct cb) * count, &nic->cbs_dma_addr);
-	if(!nic->cbs)
+	if (!nic->cbs)
 		return -ENOMEM;
 
-	for(cb = nic->cbs, i = 0; i < count; cb++, i++) {
+	for (cb = nic->cbs, i = 0; i < count; cb++, i++) {
 		cb->next = (i + 1 < count) ? cb + 1 : nic->cbs;
 		cb->prev = (i == 0) ? nic->cbs + count - 1 : cb - 1;
 
@@ -1767,14 +1767,14 @@ static int e100_alloc_cbs(struct nic *nic)
 
 static inline void e100_start_receiver(struct nic *nic, struct rx *rx)
 {
-	if(!nic->rxs) return;
-	if(RU_SUSPENDED != nic->ru_running) return;
+	if (!nic->rxs) return;
+	if (RU_SUSPENDED != nic->ru_running) return;
 
 	/* handle init time starts */
-	if(!rx) rx = nic->rxs;
+	if (!rx) rx = nic->rxs;
 
 	/* (Re)start RU if suspended or idle and RFA is non-NULL */
-	if(rx->skb) {
+	if (rx->skb) {
 		e100_exec_cmd(nic, ruc_start, rx->dma_addr);
 		nic->ru_running = RU_RUNNING;
 	}
@@ -1783,7 +1783,7 @@ static inline void e100_start_receiver(struct nic *nic, struct rx *rx)
 #define RFD_BUF_LEN (sizeof(struct rfd) + VLAN_ETH_FRAME_LEN)
 static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 {
-	if(!(rx->skb = netdev_alloc_skb(nic->netdev, RFD_BUF_LEN + NET_IP_ALIGN)))
+	if (!(rx->skb = netdev_alloc_skb(nic->netdev, RFD_BUF_LEN + NET_IP_ALIGN)))
 		return -ENOMEM;
 
 	/* Align, init, and map the RFD. */
@@ -1820,7 +1820,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 	struct rfd *rfd = (struct rfd *)skb->data;
 	u16 rfd_status, actual_size;
 
-	if(unlikely(work_done && *work_done >= work_to_do))
+	if (unlikely(work_done && *work_done >= work_to_do))
 		return -EAGAIN;
 
 	/* Need to sync before taking a peek at cb_complete bit */
@@ -1847,7 +1847,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 
 	/* Get actual data size */
 	actual_size = le16_to_cpu(rfd->actual_size) & 0x3FFF;
-	if(unlikely(actual_size > RFD_BUF_LEN - sizeof(struct rfd)))
+	if (unlikely(actual_size > RFD_BUF_LEN - sizeof(struct rfd)))
 		actual_size = RFD_BUF_LEN - sizeof(struct rfd);
 
 	/* Get data */
@@ -1872,10 +1872,10 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 	skb_put(skb, actual_size);
 	skb->protocol = eth_type_trans(skb, nic->netdev);
 
-	if(unlikely(!(rfd_status & cb_ok))) {
+	if (unlikely(!(rfd_status & cb_ok))) {
 		/* Don't indicate if hardware indicates errors */
 		dev_kfree_skb_any(skb);
-	} else if(actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN) {
+	} else if (actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN) {
 		/* Don't indicate oversized frames */
 		nic->rx_over_length_errors++;
 		dev_kfree_skb_any(skb);
@@ -1883,7 +1883,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += actual_size;
 		netif_receive_skb(skb);
-		if(work_done)
+		if (work_done)
 			(*work_done)++;
 	}
 
@@ -1901,7 +1901,7 @@ static void e100_rx_clean(struct nic *nic, unsigned int *work_done,
 	struct rfd *old_before_last_rfd, *new_before_last_rfd;
 
 	/* Indicate newly arrived packets */
-	for(rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
+	for (rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
 		err = e100_rx_indicate(nic, rx, work_done, work_to_do);
 		/* Hit quota or no more to clean */
 		if (-EAGAIN == err || -ENODATA == err)
@@ -1922,8 +1922,8 @@ static void e100_rx_clean(struct nic *nic, unsigned int *work_done,
 	old_before_last_rfd = (struct rfd *)old_before_last_rx->skb->data;
 
 	/* Alloc new skbs to refill list */
-	for(rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
-		if(unlikely(e100_rx_alloc_skb(nic, rx)))
+	for (rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
+		if (unlikely(e100_rx_alloc_skb(nic, rx)))
 			break; /* Better luck next time (see watchdog) */
 	}
 
@@ -1959,11 +1959,11 @@ static void e100_rx_clean(struct nic *nic, unsigned int *work_done,
 			PCI_DMA_BIDIRECTIONAL);
 	}
 
-	if(restart_required) {
+	if (restart_required) {
 		// ack the rnr?
 		iowrite8(stat_ack_rnr, &nic->csr->scb.stat_ack);
 		e100_start_receiver(nic, nic->rx_to_clean);
-		if(work_done)
+		if (work_done)
 			(*work_done)++;
 	}
 }
@@ -1975,9 +1975,9 @@ static void e100_rx_clean_list(struct nic *nic)
 
 	nic->ru_running = RU_UNINITIALIZED;
 
-	if(nic->rxs) {
-		for(rx = nic->rxs, i = 0; i < count; rx++, i++) {
-			if(rx->skb) {
+	if (nic->rxs) {
+		for (rx = nic->rxs, i = 0; i < count; rx++, i++) {
+			if (rx->skb) {
 				pci_unmap_single(nic->pdev, rx->dma_addr,
 					RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 				dev_kfree_skb(rx->skb);
@@ -1999,13 +1999,13 @@ static int e100_rx_alloc_list(struct nic *nic)
 	nic->rx_to_use = nic->rx_to_clean = NULL;
 	nic->ru_running = RU_UNINITIALIZED;
 
-	if(!(nic->rxs = kcalloc(count, sizeof(struct rx), GFP_ATOMIC)))
+	if (!(nic->rxs = kcalloc(count, sizeof(struct rx), GFP_ATOMIC)))
 		return -ENOMEM;
 
-	for(rx = nic->rxs, i = 0; i < count; rx++, i++) {
+	for (rx = nic->rxs, i = 0; i < count; rx++, i++) {
 		rx->next = (i + 1 < count) ? rx + 1 : nic->rxs;
 		rx->prev = (i == 0) ? nic->rxs + count - 1 : rx - 1;
-		if(e100_rx_alloc_skb(nic, rx)) {
+		if (e100_rx_alloc_skb(nic, rx)) {
 			e100_rx_clean_list(nic);
 			return -ENOMEM;
 		}
@@ -2038,7 +2038,7 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 
 	DPRINTK(INTR, DEBUG, "stat_ack = 0x%02X\n", stat_ack);
 
-	if(stat_ack == stat_ack_not_ours ||	/* Not our interrupt */
+	if (stat_ack == stat_ack_not_ours ||	/* Not our interrupt */
 	   stat_ack == stat_ack_not_present)	/* Hardware is ejected */
 		return IRQ_NONE;
 
@@ -2046,10 +2046,10 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 	iowrite8(stat_ack, &nic->csr->scb.stat_ack);
 
 	/* We hit Receive No Resource (RNR); restart RU after cleaning */
-	if(stat_ack & stat_ack_rnr)
+	if (stat_ack & stat_ack_rnr)
 		nic->ru_running = RU_SUSPENDED;
 
-	if(likely(netif_rx_schedule_prep(&nic->napi))) {
+	if (likely(netif_rx_schedule_prep(&nic->napi))) {
 		e100_disable_irq(nic);
 		__netif_rx_schedule(&nic->napi);
 	}
@@ -2102,7 +2102,7 @@ static int e100_set_mac_address(struct net_device *netdev, void *p)
 
 static int e100_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	if(new_mtu < ETH_ZLEN || new_mtu > ETH_DATA_LEN)
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_DATA_LEN)
 		return -EINVAL;
 	netdev->mtu = new_mtu;
 	return 0;
@@ -2121,16 +2121,16 @@ static int e100_up(struct nic *nic)
 {
 	int err;
 
-	if((err = e100_rx_alloc_list(nic)))
+	if ((err = e100_rx_alloc_list(nic)))
 		return err;
-	if((err = e100_alloc_cbs(nic)))
+	if ((err = e100_alloc_cbs(nic)))
 		goto err_rx_clean_list;
-	if((err = e100_hw_init(nic)))
+	if ((err = e100_hw_init(nic)))
 		goto err_clean_cbs;
 	e100_set_multicast_list(nic->netdev);
 	e100_start_receiver(nic, NULL);
 	mod_timer(&nic->watchdog, jiffies);
-	if((err = request_irq(nic->pdev->irq, e100_intr, IRQF_SHARED,
+	if ((err = request_irq(nic->pdev->irq, e100_intr, IRQF_SHARED,
 		nic->netdev->name, nic->netdev)))
 		goto err_no_irq;
 	netif_wake_queue(nic->netdev);
@@ -2192,26 +2192,26 @@ static int e100_loopback_test(struct nic *nic, enum loopback loopback_mode)
 	 * in loopback mode, and the test passes if the received
 	 * packet compares byte-for-byte to the transmitted packet. */
 
-	if((err = e100_rx_alloc_list(nic)))
+	if ((err = e100_rx_alloc_list(nic)))
 		return err;
-	if((err = e100_alloc_cbs(nic)))
+	if ((err = e100_alloc_cbs(nic)))
 		goto err_clean_rx;
 
 	/* ICH PHY loopback is broken so do MAC loopback instead */
-	if(nic->flags & ich && loopback_mode == lb_phy)
+	if (nic->flags & ich && loopback_mode == lb_phy)
 		loopback_mode = lb_mac;
 
 	nic->loopback = loopback_mode;
-	if((err = e100_hw_init(nic)))
+	if ((err = e100_hw_init(nic)))
 		goto err_loopback_none;
 
-	if(loopback_mode == lb_phy)
+	if (loopback_mode == lb_phy)
 		mdio_write(nic->netdev, nic->mii.phy_id, MII_BMCR,
 			BMCR_LOOPBACK);
 
 	e100_start_receiver(nic, NULL);
 
-	if(!(skb = netdev_alloc_skb(nic->netdev, ETH_DATA_LEN))) {
+	if (!(skb = netdev_alloc_skb(nic->netdev, ETH_DATA_LEN))) {
 		err = -ENOMEM;
 		goto err_loopback_none;
 	}
@@ -2224,7 +2224,7 @@ static int e100_loopback_test(struct nic *nic, enum loopback loopback_mode)
 	pci_dma_sync_single_for_cpu(nic->pdev, nic->rx_to_clean->dma_addr,
 			RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
-	if(memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd),
+	if (memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd),
 	   skb->data, ETH_DATA_LEN))
 		err = -EAGAIN;
 
@@ -2301,7 +2301,7 @@ static void e100_get_regs(struct net_device *netdev,
 	buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 |
 		ioread8(&nic->csr->scb.cmd_lo) << 16 |
 		ioread16(&nic->csr->scb.status);
-	for(i = E100_PHY_REGS; i >= 0; i--)
+	for (i = E100_PHY_REGS; i >= 0; i--)
 		buff[1 + E100_PHY_REGS - i] =
 			mdio_read(netdev, nic->mii.phy_id, i);
 	memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf));
@@ -2326,7 +2326,7 @@ static int e100_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	    !device_can_wakeup(&nic->pdev->dev))
 		return -EOPNOTSUPP;
 
-	if(wol->wolopts)
+	if (wol->wolopts)
 		nic->flags |= wol_magic;
 	else
 		nic->flags &= ~wol_magic;
@@ -2385,7 +2385,7 @@ static int e100_set_eeprom(struct net_device *netdev,
 {
 	struct nic *nic = netdev_priv(netdev);
 
-	if(eeprom->magic != E100_EEPROM_MAGIC)
+	if (eeprom->magic != E100_EEPROM_MAGIC)
 		return -EINVAL;
 
 	memcpy(&((u8 *)nic->eeprom)[eeprom->offset], bytes, eeprom->len);
@@ -2421,7 +2421,7 @@ static int e100_set_ringparam(struct net_device *netdev,
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
 
-	if(netif_running(netdev))
+	if (netif_running(netdev))
 		e100_down(nic);
 	rfds->count = max(ring->rx_pending, rfds->min);
 	rfds->count = min(rfds->count, rfds->max);
@@ -2429,7 +2429,7 @@ static int e100_set_ringparam(struct net_device *netdev,
 	cbs->count = min(cbs->count, cbs->max);
 	DPRINTK(DRV, INFO, "Ring Param settings: rx: %d, tx %d\n",
 	        rfds->count, cbs->count);
-	if(netif_running(netdev))
+	if (netif_running(netdev))
 		e100_up(nic);
 
 	return 0;
@@ -2454,12 +2454,12 @@ static void e100_diag_test(struct net_device *netdev,
 	memset(data, 0, E100_TEST_LEN * sizeof(u64));
 	data[0] = !mii_link_ok(&nic->mii);
 	data[1] = e100_eeprom_load(nic);
-	if(test->flags & ETH_TEST_FL_OFFLINE) {
+	if (test->flags & ETH_TEST_FL_OFFLINE) {
 
 		/* save speed, duplex & autoneg settings */
 		err = mii_ethtool_gset(&nic->mii, &cmd);
 
-		if(netif_running(netdev))
+		if (netif_running(netdev))
 			e100_down(nic);
 		data[2] = e100_self_test(nic);
 		data[3] = e100_loopback_test(nic, lb_mac);
@@ -2468,10 +2468,10 @@ static void e100_diag_test(struct net_device *netdev,
 		/* restore speed, duplex & autoneg settings */
 		err = mii_ethtool_sset(&nic->mii, &cmd);
 
-		if(netif_running(netdev))
+		if (netif_running(netdev))
 			e100_up(nic);
 	}
-	for(i = 0; i < E100_TEST_LEN; i++)
+	for (i = 0; i < E100_TEST_LEN; i++)
 		test->flags |= data[i] ? ETH_TEST_FL_FAILED : 0;
 
 	msleep_interruptible(4 * 1000);
@@ -2481,7 +2481,7 @@ static int e100_phys_id(struct net_device *netdev, u32 data)
 {
 	struct nic *nic = netdev_priv(netdev);
 
-	if(!data || data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))
+	if (!data || data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))
 		data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ);
 	mod_timer(&nic->blink_timer, jiffies);
 	msleep_interruptible(data * 1000);
@@ -2524,7 +2524,7 @@ static void e100_get_ethtool_stats(struct net_device *netdev,
 	struct nic *nic = netdev_priv(netdev);
 	int i;
 
-	for(i = 0; i < E100_NET_STATS_LEN; i++)
+	for (i = 0; i < E100_NET_STATS_LEN; i++)
 		data[i] = ((unsigned long *)&netdev->stats)[i];
 
 	data[i++] = nic->tx_deferred;
@@ -2539,7 +2539,7 @@ static void e100_get_ethtool_stats(struct net_device *netdev,
 
 static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
-	switch(stringset) {
+	switch (stringset) {
 	case ETH_SS_TEST:
 		memcpy(data, *e100_gstrings_test, sizeof(e100_gstrings_test));
 		break;
@@ -2589,7 +2589,7 @@ static int e100_alloc(struct nic *nic)
 
 static void e100_free(struct nic *nic)
 {
-	if(nic->mem) {
+	if (nic->mem) {
 		pci_free_consistent(nic->pdev, sizeof(struct mem),
 			nic->mem, nic->dma_addr);
 		nic->mem = NULL;
@@ -2602,7 +2602,7 @@ static int e100_open(struct net_device *netdev)
 	int err = 0;
 
 	netif_carrier_off(netdev);
-	if((err = e100_up(nic)))
+	if ((err = e100_up(nic)))
 		DPRINTK(IFUP, ERR, "Cannot open interface, aborting.\n");
 	return err;
 }
@@ -2635,8 +2635,8 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	struct nic *nic;
 	int err;
 
-	if(!(netdev = alloc_etherdev(sizeof(struct nic)))) {
-		if(((1 << debug) - 1) & NETIF_MSG_PROBE)
+	if (!(netdev = alloc_etherdev(sizeof(struct nic)))) {
+		if (((1 << debug) - 1) & NETIF_MSG_PROBE)
 			printk(KERN_ERR PFX "Etherdev alloc failed, abort.\n");
 		return -ENOMEM;
 	}
@@ -2653,24 +2653,24 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	nic->msg_enable = (1 << debug) - 1;
 	pci_set_drvdata(pdev, netdev);
 
-	if((err = pci_enable_device(pdev))) {
+	if ((err = pci_enable_device(pdev))) {
 		DPRINTK(PROBE, ERR, "Cannot enable PCI device, aborting.\n");
 		goto err_out_free_dev;
 	}
 
-	if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
 		DPRINTK(PROBE, ERR, "Cannot find proper PCI device "
 			"base address, aborting.\n");
 		err = -ENODEV;
 		goto err_out_disable_pdev;
 	}
 
-	if((err = pci_request_regions(pdev, DRV_NAME))) {
+	if ((err = pci_request_regions(pdev, DRV_NAME))) {
 		DPRINTK(PROBE, ERR, "Cannot obtain PCI resources, aborting.\n");
 		goto err_out_disable_pdev;
 	}
 
-	if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
+	if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
 		DPRINTK(PROBE, ERR, "No usable DMA configuration, aborting.\n");
 		goto err_out_free_res;
 	}
@@ -2681,13 +2681,13 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 		DPRINTK(PROBE, INFO, "using i/o access mode\n");
 
 	nic->csr = pci_iomap(pdev, (use_io ? 1 : 0), sizeof(struct csr));
-	if(!nic->csr) {
+	if (!nic->csr) {
 		DPRINTK(PROBE, ERR, "Cannot map device registers, aborting.\n");
 		err = -ENOMEM;
 		goto err_out_free_res;
 	}
 
-	if(ent->driver_data)
+	if (ent->driver_data)
 		nic->flags |= ich;
 	else
 		nic->flags &= ~ich;
@@ -2715,12 +2715,12 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 
 	INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
 
-	if((err = e100_alloc(nic))) {
+	if ((err = e100_alloc(nic))) {
 		DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting.\n");
 		goto err_out_iounmap;
 	}
 
-	if((err = e100_eeprom_load(nic)))
+	if ((err = e100_eeprom_load(nic)))
 		goto err_out_free;
 
 	e100_phy_init(nic);
@@ -2740,7 +2740,7 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	}
 
 	/* Wol magic packet can be enabled from eeprom */
-	if((nic->mac >= mac_82558_D101_A4) &&
+	if ((nic->mac >= mac_82558_D101_A4) &&
 	   (nic->eeprom[eeprom_id] & eeprom_id_wol)) {
 		nic->flags |= wol_magic;
 		device_set_wakeup_enable(&pdev->dev, true);
@@ -2750,7 +2750,7 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	pci_pme_active(pdev, false);
 
 	strcpy(netdev->name, "eth%d");
-	if((err = register_netdev(netdev))) {
+	if ((err = register_netdev(netdev))) {
 		DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n");
 		goto err_out_free;
 	}
@@ -2779,7 +2779,7 @@ static void __devexit e100_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 
-	if(netdev) {
+	if (netdev) {
 		struct nic *nic = netdev_priv(netdev);
 		unregister_netdev(netdev);
 		e100_free(nic);
@@ -2932,7 +2932,7 @@ static struct pci_driver e100_driver = {
 
 static int __init e100_init_module(void)
 {
-	if(((1 << debug) - 1) & NETIF_MSG_DRV) {
+	if (((1 << debug) - 1) & NETIF_MSG_DRV) {
 		printk(KERN_INFO PFX "%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
 		printk(KERN_INFO PFX "%s\n", DRV_COPYRIGHT);
 	}
-- 
cgit v0.10.2


From 745417e20684e4951afcabfe74583a3884e54980 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 17:14:46 -0800
Subject: tun: Eliminate sparse signedness warning

register_pernet_gen_device() expects 'int*', found via sparse.

 CHECK   drivers/net/tun.c
 drivers/net/tun.c:1245:36: warning: incorrect type in argument 1 (different signedness)
 drivers/net/tun.c:1245:36:    expected int *id
 drivers/net/tun.c:1245:36:    got unsigned int static [toplevel] *<noident>

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 69f9a0e..d7b81e4 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -213,7 +213,7 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
 
 /* Network device part of the driver */
 
-static unsigned int tun_net_id;
+static int tun_net_id;
 struct tun_net {
 	struct list_head dev_list;
 };
-- 
cgit v0.10.2


From 22604c866889c4b2e12b73cbf1683bda1b72a313 Mon Sep 17 00:00:00 2001
From: Michael Marineau <mike@marineau.org>
Date: Sun, 4 Jan 2009 17:18:51 -0800
Subject: net: Fix for initial link state in 2.6.28

From: Michael Marineau <mike@marineau.org>

Commit b47300168e770b60ab96c8924854c3b0eb4260eb "Do not fire linkwatch
events until the device is registered." was made as a workaround for
drivers that call netif_carrier_off before registering the device.
Unfortunately this causes these drivers to incorrectly report their
link status as IF_OPER_UNKNOWN which can falsely set the IFF_RUNNING
flag when the interface is first brought up. This issues was
previously pointed out[1] but was dismissed saying that IFF_RUNNING is
not related to the link status. From my digging IFF_RUNNING, as
reported to userspace, is based on the link state. It is set based on
__LINK_STATE_START and IF_OPER_UP or IF_OPER_UNKNOWN. See [2], [3],
and [4]. (Whether or not the kernel has IFF_RUNNING set in flags is
not reported to user space so it may well be independent of the link,
I don't know if and when it may get set.)

The end result depends slightly depending on the driver. The the two I
tested were e1000e and b44. With e1000e if the system is booted
without a network cable attached the interface will falsely report
RUNNING when it is brought up causing NetworkManager to attempt to
start it and eventually time out. With b44 when the system is booted
with a network cable attached and brought up with dhcpcd it will time
out the first time.

The attached patch that will still set the operstate variable
correctly to IF_OPER_UP/DOWN/etc when linkwatch_fire_event is called
but then return rather than skipping the linkwatch_fire_event call
entirely as the previous fix did. (sorry it isn't inline, I don't have
a patch friendly email client at the moment)

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af..1e401e1 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -178,7 +178,6 @@ static void __linkwatch_run_queue(int urgent_only)
 		 */
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
-		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev))
 				dev_activate(dev);
@@ -215,6 +214,12 @@ void linkwatch_fire_event(struct net_device *dev)
 {
 	bool urgent = linkwatch_urgent_event(dev);
 
+	rfc2863_policy(dev);
+
+	/* Some drivers call netif_carrier_off early */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		return;
+
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
 		dev_hold(dev);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5f5efe4..23a8e61 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -270,8 +270,6 @@ static void dev_watchdog_down(struct net_device *dev)
 void netif_carrier_on(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
-		if (dev->reg_state == NETREG_UNINITIALIZED)
-			return;
 		linkwatch_fire_event(dev);
 		if (netif_running(dev))
 			__netdev_watchdog_up(dev);
@@ -288,8 +286,6 @@ EXPORT_SYMBOL(netif_carrier_on);
 void netif_carrier_off(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
-		if (dev->reg_state == NETREG_UNINITIALIZED)
-			return;
 		linkwatch_fire_event(dev);
 	}
 }
-- 
cgit v0.10.2


From 914d11647b6d6fe81bdf0c059612ee36282b8cee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 4 Jan 2009 17:27:31 -0800
Subject: ipv6: IPV6_PKTINFO relied userspace providing correct length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index eeeaad2..40f3246 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -404,7 +404,7 @@ sticky_done:
 		else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
 			goto e_inval;
 
-		if (copy_from_user(&pkt, optval, optlen)) {
+		if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
 				retv = -EFAULT;
 				break;
 		}
-- 
cgit v0.10.2


From 858eb711ba64f8a001d7003295b8078bcab33b6d Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 4 Jan 2009 17:29:21 -0800
Subject: DCB: fix kfree(skb)

Use kfree_skb instead of kfree for struct sk_buff pointers.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5dbfe5f..8379496 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -191,7 +191,7 @@ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 	return ret;
 }
 
@@ -272,7 +272,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -314,7 +314,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
 
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -380,7 +380,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -458,7 +458,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return ret;
 }
@@ -687,7 +687,7 @@ err_pg:
 	nla_nest_cancel(dcbnl_skb, pg_nest);
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	ret  = -EINVAL;
 	return ret;
@@ -949,7 +949,7 @@ err_bcn:
 	nla_nest_cancel(dcbnl_skb, bcn_nest);
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	ret  = -EINVAL;
 	return ret;
-- 
cgit v0.10.2


From 6e5c172cf7ca1ab878cc6a6a4c1d52fef60f3ee0 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Sun, 4 Jan 2009 17:31:18 -0800
Subject: can: update can-bcm for hrtimer hardirq callbacks

Since commit ca109491f612aab5c8152207631c0444f63da97f ("hrtimer:
removing all ur callback modes") the hrtimer callbacks are processed
only in hardirq context.

This patch moves some functionality into tasklets to run in softirq
context.

Additionally some duplicated code was removed in bcm_rx_thr_flush()
and an avoidable memcpy was removed from bcm_rx_handler().

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/can/bcm.c b/net/can/bcm.c
index da0d426..6248ae2 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -70,7 +70,7 @@
 
 #define CAN_BCM_VERSION CAN_VERSION
 static __initdata const char banner[] = KERN_INFO
-	"can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
+	"can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n";
 
 MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -90,6 +90,7 @@ struct bcm_op {
 	unsigned long frames_abs, frames_filtered;
 	struct timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
+	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
 	int count;
@@ -341,6 +342,23 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	}
 }
 
+static void bcm_tx_timeout_tsklet(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_msg_head msg_head;
+
+	/* create notification to user */
+	msg_head.opcode  = TX_EXPIRED;
+	msg_head.flags   = op->flags;
+	msg_head.count   = op->count;
+	msg_head.ival1   = op->ival1;
+	msg_head.ival2   = op->ival2;
+	msg_head.can_id  = op->can_id;
+	msg_head.nframes = 0;
+
+	bcm_send_to_user(op, &msg_head, NULL, 0);
+}
+
 /*
  * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
  */
@@ -352,20 +370,8 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
 	if (op->kt_ival1.tv64 && (op->count > 0)) {
 
 		op->count--;
-		if (!op->count && (op->flags & TX_COUNTEVT)) {
-			struct bcm_msg_head msg_head;
-
-			/* create notification to user */
-			msg_head.opcode  = TX_EXPIRED;
-			msg_head.flags   = op->flags;
-			msg_head.count   = op->count;
-			msg_head.ival1   = op->ival1;
-			msg_head.ival2   = op->ival2;
-			msg_head.can_id  = op->can_id;
-			msg_head.nframes = 0;
-
-			bcm_send_to_user(op, &msg_head, NULL, 0);
-		}
+		if (!op->count && (op->flags & TX_COUNTEVT))
+			tasklet_schedule(&op->tsklet);
 	}
 
 	if (op->kt_ival1.tv64 && (op->count > 0)) {
@@ -402,6 +408,9 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
 	if (op->frames_filtered > ULONG_MAX/100)
 		op->frames_filtered = op->frames_abs = 0;
 
+	/* this element is not throttled anymore */
+	data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV);
+
 	head.opcode  = RX_CHANGED;
 	head.flags   = op->flags;
 	head.count   = op->count;
@@ -420,37 +429,32 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
  */
 static void bcm_rx_update_and_send(struct bcm_op *op,
 				   struct can_frame *lastdata,
-				   struct can_frame *rxdata)
+				   const struct can_frame *rxdata)
 {
 	memcpy(lastdata, rxdata, CFSIZ);
 
-	/* mark as used */
-	lastdata->can_dlc |= RX_RECV;
+	/* mark as used and throttled by default */
+	lastdata->can_dlc |= (RX_RECV|RX_THR);
 
-	/* throtteling mode inactive OR data update already on the run ? */
-	if (!op->kt_ival2.tv64 || hrtimer_callback_running(&op->thrtimer)) {
+	/* throtteling mode inactive ? */
+	if (!op->kt_ival2.tv64) {
 		/* send RX_CHANGED to the user immediately */
-		bcm_rx_changed(op, rxdata);
+		bcm_rx_changed(op, lastdata);
 		return;
 	}
 
-	if (hrtimer_active(&op->thrtimer)) {
-		/* mark as 'throttled' */
-		lastdata->can_dlc |= RX_THR;
+	/* with active throttling timer we are just done here */
+	if (hrtimer_active(&op->thrtimer))
 		return;
-	}
 
-	if (!op->kt_lastmsg.tv64) {
-		/* send first RX_CHANGED to the user immediately */
-		bcm_rx_changed(op, rxdata);
-		op->kt_lastmsg = ktime_get();
-		return;
-	}
+	/* first receiption with enabled throttling mode */
+	if (!op->kt_lastmsg.tv64)
+		goto rx_changed_settime;
 
+	/* got a second frame inside a potential throttle period? */
 	if (ktime_us_delta(ktime_get(), op->kt_lastmsg) <
 	    ktime_to_us(op->kt_ival2)) {
-		/* mark as 'throttled' and start timer */
-		lastdata->can_dlc |= RX_THR;
+		/* do not send the saved data - only start throttle timer */
 		hrtimer_start(&op->thrtimer,
 			      ktime_add(op->kt_lastmsg, op->kt_ival2),
 			      HRTIMER_MODE_ABS);
@@ -458,7 +462,8 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 	}
 
 	/* the gap was that big, that throttling was not needed here */
-	bcm_rx_changed(op, rxdata);
+rx_changed_settime:
+	bcm_rx_changed(op, lastdata);
 	op->kt_lastmsg = ktime_get();
 }
 
@@ -467,7 +472,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
  *                       received data stored in op->last_frames[]
  */
 static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
-				struct can_frame *rxdata)
+				const struct can_frame *rxdata)
 {
 	/*
 	 * no one uses the MSBs of can_dlc for comparation,
@@ -511,14 +516,12 @@ static void bcm_rx_starttimer(struct bcm_op *op)
 		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
 }
 
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
+static void bcm_rx_timeout_tsklet(unsigned long data)
 {
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
+	struct bcm_op *op = (struct bcm_op *)data;
 	struct bcm_msg_head msg_head;
 
+	/* create notification to user */
 	msg_head.opcode  = RX_TIMEOUT;
 	msg_head.flags   = op->flags;
 	msg_head.count   = op->count;
@@ -528,6 +531,17 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 	msg_head.nframes = 0;
 
 	bcm_send_to_user(op, &msg_head, NULL, 0);
+}
+
+/*
+ * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
+ */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
+{
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
+
+	/* schedule before NET_RX_SOFTIRQ */
+	tasklet_hi_schedule(&op->tsklet);
 
 	/* no restart of the timer is done here! */
 
@@ -541,9 +555,25 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 }
 
 /*
+ * bcm_rx_do_flush - helper for bcm_rx_thr_flush
+ */
+static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index)
+{
+	if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) {
+		if (update)
+			bcm_rx_changed(op, &op->last_frames[index]);
+		return 1;
+	}
+	return 0;
+}
+
+/*
  * bcm_rx_thr_flush - Check for throttled data and send it to the userspace
+ *
+ * update == 0 : just check if throttled data is available  (any irq context)
+ * update == 1 : check and send throttled data to userspace (soft_irq context)
  */
-static int bcm_rx_thr_flush(struct bcm_op *op)
+static int bcm_rx_thr_flush(struct bcm_op *op, int update)
 {
 	int updated = 0;
 
@@ -551,27 +581,25 @@ static int bcm_rx_thr_flush(struct bcm_op *op)
 		int i;
 
 		/* for MUX filter we start at index 1 */
-		for (i = 1; i < op->nframes; i++) {
-			if ((op->last_frames) &&
-			    (op->last_frames[i].can_dlc & RX_THR)) {
-				op->last_frames[i].can_dlc &= ~RX_THR;
-				bcm_rx_changed(op, &op->last_frames[i]);
-				updated++;
-			}
-		}
+		for (i = 1; i < op->nframes; i++)
+			updated += bcm_rx_do_flush(op, update, i);
 
 	} else {
 		/* for RX_FILTER_ID and simple filter */
-		if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) {
-			op->last_frames[0].can_dlc &= ~RX_THR;
-			bcm_rx_changed(op, &op->last_frames[0]);
-			updated++;
-		}
+		updated += bcm_rx_do_flush(op, update, 0);
 	}
 
 	return updated;
 }
 
+static void bcm_rx_thr_tsklet(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+
+	/* push the changed data to the userspace */
+	bcm_rx_thr_flush(op, 1);
+}
+
 /*
  * bcm_rx_thr_handler - the time for blocked content updates is over now:
  *                      Check for throttled data and send it to the userspace
@@ -580,7 +608,9 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 {
 	struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
 
-	if (bcm_rx_thr_flush(op)) {
+	tasklet_schedule(&op->thrtsklet);
+
+	if (bcm_rx_thr_flush(op, 0)) {
 		hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
 		return HRTIMER_RESTART;
 	} else {
@@ -596,48 +626,38 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 static void bcm_rx_handler(struct sk_buff *skb, void *data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
-	struct can_frame rxframe;
+	const struct can_frame *rxframe = (struct can_frame *)skb->data;
 	int i;
 
 	/* disable timeout */
 	hrtimer_cancel(&op->timer);
 
-	if (skb->len == sizeof(rxframe)) {
-		memcpy(&rxframe, skb->data, sizeof(rxframe));
-		/* save rx timestamp */
-		op->rx_stamp = skb->tstamp;
-		/* save originator for recvfrom() */
-		op->rx_ifindex = skb->dev->ifindex;
-		/* update statistics */
-		op->frames_abs++;
-		kfree_skb(skb);
+	if (op->can_id != rxframe->can_id)
+		goto rx_freeskb;
 
-	} else {
-		kfree_skb(skb);
-		return;
-	}
-
-	if (op->can_id != rxframe.can_id)
-		return;
+	/* save rx timestamp */
+	op->rx_stamp = skb->tstamp;
+	/* save originator for recvfrom() */
+	op->rx_ifindex = skb->dev->ifindex;
+	/* update statistics */
+	op->frames_abs++;
 
 	if (op->flags & RX_RTR_FRAME) {
 		/* send reply for RTR-request (placed in op->frames[0]) */
 		bcm_can_tx(op);
-		return;
+		goto rx_freeskb;
 	}
 
 	if (op->flags & RX_FILTER_ID) {
 		/* the easiest case */
-		bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe);
-		bcm_rx_starttimer(op);
-		return;
+		bcm_rx_update_and_send(op, &op->last_frames[0], rxframe);
+		goto rx_freeskb_starttimer;
 	}
 
 	if (op->nframes == 1) {
 		/* simple compare with index 0 */
-		bcm_rx_cmp_to_index(op, 0, &rxframe);
-		bcm_rx_starttimer(op);
-		return;
+		bcm_rx_cmp_to_index(op, 0, rxframe);
+		goto rx_freeskb_starttimer;
 	}
 
 	if (op->nframes > 1) {
@@ -649,15 +669,19 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
 		 */
 
 		for (i = 1; i < op->nframes; i++) {
-			if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) ==
+			if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) ==
 			    (GET_U64(&op->frames[0]) &
 			     GET_U64(&op->frames[i]))) {
-				bcm_rx_cmp_to_index(op, i, &rxframe);
+				bcm_rx_cmp_to_index(op, i, rxframe);
 				break;
 			}
 		}
-		bcm_rx_starttimer(op);
 	}
+
+rx_freeskb_starttimer:
+	bcm_rx_starttimer(op);
+rx_freeskb:
+	kfree_skb(skb);
 }
 
 /*
@@ -681,6 +705,12 @@ static void bcm_remove_op(struct bcm_op *op)
 	hrtimer_cancel(&op->timer);
 	hrtimer_cancel(&op->thrtimer);
 
+	if (op->tsklet.func)
+		tasklet_kill(&op->tsklet);
+
+	if (op->thrtsklet.func)
+		tasklet_kill(&op->thrtsklet);
+
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
 
@@ -891,6 +921,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->timer.function = bcm_tx_timeout_handler;
 
+		/* initialize tasklet for tx countevent notification */
+		tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
+			     (unsigned long) op);
+
 		/* currently unused in tx_ops */
 		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
@@ -1054,9 +1088,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->timer.function = bcm_rx_timeout_handler;
 
+		/* initialize tasklet for rx timeout notification */
+		tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
+			     (unsigned long) op);
+
 		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->thrtimer.function = bcm_rx_thr_handler;
 
+		/* initialize tasklet for rx throttle handling */
+		tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
+			     (unsigned long) op);
+
 		/* add this bcm_op to the list of the rx_ops */
 		list_add(&op->list, &bo->rx_ops);
 
@@ -1102,7 +1144,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			 */
 			op->kt_lastmsg = ktime_set(0, 0);
 			hrtimer_cancel(&op->thrtimer);
-			bcm_rx_thr_flush(op);
+			bcm_rx_thr_flush(op, 1);
 		}
 
 		if ((op->flags & STARTTIMER) && op->kt_ival1.tv64)
-- 
cgit v0.10.2


From 906f1f0768c736368244ac8fe6023c2b0c31b9ea Mon Sep 17 00:00:00 2001
From: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Date: Sun, 4 Jan 2009 17:34:10 -0800
Subject: qeth: HiperSockets mcl string conversion (pre z9 mach)

The pre z9 machines provide an mcl string in EBCDIC format,
z9 or later provide string in ASCII format.

Signed-off-by: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index e783644..ef6c225 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2250,7 +2250,8 @@ void qeth_print_status_message(struct qeth_card *card)
 		}
 		/* fallthrough */
 	case QETH_CARD_TYPE_IQD:
-		if (card->info.guestlan) {
+		if ((card->info.guestlan) ||
+		    (card->info.mcl_level[0] & 0x80)) {
 			card->info.mcl_level[0] = (char) _ebcasc[(__u8)
 				card->info.mcl_level[0]];
 			card->info.mcl_level[1] = (char) _ebcasc[(__u8)
-- 
cgit v0.10.2


From fe94e2e0a63a49d23753f56eacb446c4f73c1dea Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Sun, 4 Jan 2009 17:34:52 -0800
Subject: qeth: exploit source MAC address for inbound layer3 packets

OSA-devices operating in layer3 mode offer adding of the source MAC
address to the QDIO header of inbound packets. The qeth driver can
exploit this functionality to replace FAKELL-entries in the ethernet
header of received packets.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index d5ccce1..e0c4557 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -643,7 +643,6 @@ struct qeth_card_options {
 	int macaddr_mode;
 	int fake_broadcast;
 	int add_hhlen;
-	int fake_ll;
 	int layer2;
 	enum qeth_large_send_types large_send;
 	int performance_stats;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index ef6c225..89867bc 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1072,7 +1072,6 @@ static void qeth_set_intial_options(struct qeth_card *card)
 	card->options.macaddr_mode = QETH_TR_MACADDR_NONCANONICAL;
 	card->options.fake_broadcast = 0;
 	card->options.add_hhlen = DEFAULT_ADD_HHLEN;
-	card->options.fake_ll = 0;
 	card->options.performance_stats = 0;
 	card->options.rx_sg_cb = QETH_RX_SG_CB;
 }
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c0b30b2..cdd6ab9 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1207,12 +1207,9 @@ static int qeth_l3_start_ipa_source_mac(struct qeth_card *card)
 
 	QETH_DBF_TEXT(TRACE, 3, "stsrcmac");
 
-	if (!card->options.fake_ll)
-		return -EOPNOTSUPP;
-
 	if (!qeth_is_supported(card, IPA_SOURCE_MAC)) {
 		dev_info(&card->gdev->dev,
-			"Inbound source address not supported on %s\n",
+			"Inbound source MAC-address not supported on %s\n",
 			QETH_CARD_IFNAME(card));
 		return -EOPNOTSUPP;
 	}
@@ -1221,7 +1218,7 @@ static int qeth_l3_start_ipa_source_mac(struct qeth_card *card)
 					  IPA_CMD_ASS_START, 0);
 	if (rc)
 		dev_warn(&card->gdev->dev,
-			"Starting proxy ARP support for %s failed\n",
+			"Starting source MAC-address support for %s failed\n",
 			QETH_CARD_IFNAME(card));
 	return rc;
 }
@@ -1921,8 +1918,13 @@ static inline __u16 qeth_l3_rebuild_skb(struct qeth_card *card,
 			memcpy(tg_addr, card->dev->dev_addr,
 				card->dev->addr_len);
 		}
-		card->dev->header_ops->create(skb, card->dev, prot, tg_addr,
-					      "FAKELL", card->dev->addr_len);
+		if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
+			card->dev->header_ops->create(skb, card->dev, prot,
+				tg_addr, &hdr->hdr.l3.dest_addr[2],
+				card->dev->addr_len);
+		else
+			card->dev->header_ops->create(skb, card->dev, prot,
+				tg_addr, "FAKELL", card->dev->addr_len);
 	}
 
 #ifdef CONFIG_TR
-- 
cgit v0.10.2


From fc9c24603c4b93d84160e14c0a98a754d4328d15 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Sun, 4 Jan 2009 17:35:18 -0800
Subject: qeth: avoid crash in case of layer mismatch for VSWITCH

For z/VM GuestLAN or VSWITCH devices the transport layer is
configured in z/VM. The layer2 attribute of a participating Linux
device has to match the z/VM definition. In case of a mismatch
Linux currently crashes in qeth recovery due to a reference to the
not yet existing net_device.
Solution: add a check for existence of net_device and add a message
pointing to the mismatch of layer definitions in Linux and z/VM.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 89867bc..ffe6960 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -572,6 +572,10 @@ static void qeth_send_control_data_cb(struct qeth_channel *channel,
 	card = CARD_FROM_CDEV(channel->ccwdev);
 	if (qeth_check_idx_response(iob->data)) {
 		qeth_clear_ipacmd_list(card);
+		if (((iob->data[2] & 0xc0) == 0xc0) && iob->data[4] == 0xf6)
+			dev_err(&card->gdev->dev,
+				"The qeth device is not configured "
+				"for the OSI layer required by z/VM\n");
 		qeth_schedule_recovery(card);
 		goto out;
 	}
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2c48591..21627ba 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1126,9 +1126,11 @@ static int qeth_l2_recover(void *ptr)
 		dev_info(&card->gdev->dev,
 			"Device successfully recovered!\n");
 	else {
-		rtnl_lock();
-		dev_close(card->dev);
-		rtnl_unlock();
+		if (card->dev) {
+			rtnl_lock();
+			dev_close(card->dev);
+			rtnl_unlock();
+		}
 		dev_warn(&card->gdev->dev, "The qeth device driver "
 			"failed to recover an error on the device\n");
 	}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index cdd6ab9..8f30085 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2082,9 +2082,11 @@ static int qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 		if (recovery_mode)
 			qeth_l3_stop(card->dev);
 		else {
-			rtnl_lock();
-			dev_close(card->dev);
-			rtnl_unlock();
+			if (card->dev) {
+				rtnl_lock();
+				dev_close(card->dev);
+				rtnl_unlock();
+			}
 		}
 		if (!card->use_hard_stop) {
 			rc = qeth_send_stoplan(card);
-- 
cgit v0.10.2


From 5b54e16f1a54cee8e590cb039b5c067914ae5081 Mon Sep 17 00:00:00 2001
From: Frank Blaschka <frank.blaschka@de.ibm.com>
Date: Sun, 4 Jan 2009 17:35:44 -0800
Subject: qeth: do not spin for SETIP ip assist command

The ip assist hw command for setting an IP address last unacceptable
long so we can not spin while we waiting for the irq. Since we can
ensure process context for all occurrences of this command we can use
wait.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index ffe6960..4a2ac0c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1685,6 +1685,7 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 	unsigned long flags;
 	struct qeth_reply *reply = NULL;
 	unsigned long timeout;
+	struct qeth_ipa_cmd *cmd;
 
 	QETH_DBF_TEXT(TRACE, 2, "sendctl");
 
@@ -1731,17 +1732,34 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 		wake_up(&card->wait_q);
 		return rc;
 	}
-	while (!atomic_read(&reply->received)) {
-		if (time_after(jiffies, timeout)) {
-			spin_lock_irqsave(&reply->card->lock, flags);
-			list_del_init(&reply->list);
-			spin_unlock_irqrestore(&reply->card->lock, flags);
-			reply->rc = -ETIME;
-			atomic_inc(&reply->received);
-			wake_up(&reply->wait_q);
-		}
-		cpu_relax();
-	};
+
+	/* we have only one long running ipassist, since we can ensure
+	   process context of this command we can sleep */
+	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	if ((cmd->hdr.command == IPA_CMD_SETIP) &&
+	    (cmd->hdr.prot_version == QETH_PROT_IPV4)) {
+		if (!wait_event_timeout(reply->wait_q,
+		    atomic_read(&reply->received), timeout))
+			goto time_err;
+	} else {
+		while (!atomic_read(&reply->received)) {
+			if (time_after(jiffies, timeout))
+				goto time_err;
+			cpu_relax();
+		};
+	}
+
+	rc = reply->rc;
+	qeth_put_reply(reply);
+	return rc;
+
+time_err:
+	spin_lock_irqsave(&reply->card->lock, flags);
+	list_del_init(&reply->list);
+	spin_unlock_irqrestore(&reply->card->lock, flags);
+	reply->rc = -ETIME;
+	atomic_inc(&reply->received);
+	wake_up(&reply->wait_q);
 	rc = reply->rc;
 	qeth_put_reply(reply);
 	return rc;
-- 
cgit v0.10.2


From 015e691cfef02f41aa4deb8aa7efc394bb6d85f0 Mon Sep 17 00:00:00 2001
From: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Date: Sun, 4 Jan 2009 17:36:05 -0800
Subject: qeth: No large send using EDDP for HiperSockets.

From: Klaus-Dieter Wacker <kdwacker@de.ibm.com>

The device driver qeth dos not support large send using EDDP for
HiperSockets.

Signed-off-by: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 4a2ac0c..6811dd5 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -287,8 +287,15 @@ int qeth_set_large_send(struct qeth_card *card,
 	card->options.large_send = type;
 	switch (card->options.large_send) {
 	case QETH_LARGE_SEND_EDDP:
-		card->dev->features |= NETIF_F_TSO | NETIF_F_SG |
+		if (card->info.type != QETH_CARD_TYPE_IQD) {
+			card->dev->features |= NETIF_F_TSO | NETIF_F_SG |
 					NETIF_F_HW_CSUM;
+		} else {
+			card->dev->features &= ~(NETIF_F_TSO | NETIF_F_SG |
+						NETIF_F_HW_CSUM);
+			card->options.large_send = QETH_LARGE_SEND_NO;
+			rc = -EOPNOTSUPP;
+		}
 		break;
 	case QETH_LARGE_SEND_TSO:
 		if (qeth_is_supported(card, IPA_OUTBOUND_TSO)) {
-- 
cgit v0.10.2


From 6ea2fde13abd3444008ab5e9585f9ed249e6047e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 4 Jan 2009 17:36:32 -0800
Subject: qeth: get rid of extra argument after printk to dev_* conversion

drivers/s390/net/qeth_l3_main.c: In function 'qeth_l3_setadapter_parms':
drivers/s390/net/qeth_l3_main.c:1049: warning: too many arguments for format

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 8f30085..cfda1ec 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1047,7 +1047,7 @@ static int qeth_l3_setadapter_parms(struct qeth_card *card)
 		rc = qeth_setadpparms_change_macaddr(card);
 		if (rc)
 			dev_warn(&card->gdev->dev, "Reading the adapter MAC"
-				" address failed\n", rc);
+				" address failed\n");
 	}
 
 	if ((card->info.link_type == QETH_LINK_TYPE_HSTR) ||
-- 
cgit v0.10.2


From ddebc973c56b51b4e5d84d606f0430d81b895d67 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:42:53 -0800
Subject: dccp: Lockless integration of CCID congestion-control plugins

Based on Arnaldo's earlier patch, this patch integrates the standardised
CCID congestion control plugins (CCID-2 and CCID-3) of DCCP with dccp.ko:

 * enables a faster connection path by eliminating the need to always go
   through the CCID registration lock;

 * updates the implementation to use only a single array whose size equals
   the number of configured CCIDs instead of the maximum (256);

 * since the CCIDs are now fixed array elements, synchronization is no
   longer needed, simplifying use and implementation.

CCID-2 is suggested as minimum for a basic DCCP implementation (RFC 4340, 10);
CCID-3 is a standards-track CCID supported by RFC 4342 and RFC 5348.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 7aa2a7a..ad6dffd 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,7 +1,6 @@
 menuconfig IP_DCCP
 	tristate "The DCCP Protocol (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
-	select IP_DCCP_CCID2
 	---help---
 	  Datagram Congestion Control Protocol (RFC 4340)
 
@@ -25,9 +24,6 @@ config INET_DCCP_DIAG
 	def_tristate y if (IP_DCCP = y && INET_DIAG = y)
 	def_tristate m
 
-config IP_DCCP_ACKVEC
-	bool
-
 source "net/dccp/ccids/Kconfig"
 
 menu "DCCP Kernel Hacking"
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index f4f8793..5ff2e7b 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -2,14 +2,19 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
 dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
 
+#
+# CCID algorithms to be used by dccp.ko
+#
+# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
+dccp-y += ccids/ccid2.o ackvec.o
+dccp-$(CONFIG_IP_DCCP_CCID3)	+= ccids/ccid3.o
+
 dccp_ipv4-y := ipv4.o
 
 # build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
 obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
-dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
-
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 4ccee03..569a33a 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -84,7 +84,7 @@ struct dccp_ackvec_record {
 struct sock;
 struct sk_buff;
 
-#ifdef CONFIG_IP_DCCP_ACKVEC
+#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___
 extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
@@ -106,7 +106,7 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
 	return av->av_vec_len;
 }
-#else /* CONFIG_IP_DCCP_ACKVEC */
+#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 static inline int dccp_ackvec_init(void)
 {
 	return 0;
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index bcc643f..538d3b1 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,6 +13,70 @@
 
 #include "ccid.h"
 
+static struct ccid_operations *ccids[] = {
+	&ccid2_ops,
+#ifdef CONFIG_IP_DCCP_CCID3
+	&ccid3_ops,
+#endif
+};
+
+static struct ccid_operations *ccid_by_number(const u8 id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++)
+		if (ccids[i]->ccid_id == id)
+			return ccids[i];
+	return NULL;
+}
+
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
+{
+	while (array_len > 0)
+		if (ccid_by_number(ccid_array[--array_len]) == NULL)
+			return false;
+	return true;
+}
+
+/**
+ * ccid_get_builtin_ccids  -  Populate a list of built-in CCIDs
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
+{
+	*ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any());
+	if (*ccid_array == NULL)
+		return -ENOBUFS;
+
+	for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1)
+		(*ccid_array)[*array_len] = ccids[*array_len]->ccid_id;
+	return 0;
+}
+
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+				  char __user *optval, int __user *optlen)
+{
+	u8 *ccid_array, array_len;
+	int err = 0;
+
+	if (len < ARRAY_SIZE(ccids))
+		return -EINVAL;
+
+	if (ccid_get_builtin_ccids(&ccid_array, &array_len))
+		return -ENOBUFS;
+
+	if (put_user(array_len, optlen) ||
+	    copy_to_user(optval, ccid_array, array_len))
+		err = -EFAULT;
+
+	kfree(ccid_array);
+	return err;
+}
+
+#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
 static u8 builtin_ccids[] = {
 	DCCPC_CCID2,		/* CCID2 is supported by default */
 #if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
@@ -62,6 +126,7 @@ static inline void ccids_read_unlock(void)
 #define ccids_read_lock() do { } while(0)
 #define ccids_read_unlock() do { } while(0)
 #endif
+#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 
 static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 {
@@ -93,6 +158,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
+#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
 /* check that up to @array_len members in @ccid_array are supported */
 bool ccid_support_check(u8 const *ccid_array, u8 array_len)
 {
@@ -133,8 +199,9 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 		return -EFAULT;
 	return 0;
 }
+#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 
-int ccid_register(struct ccid_operations *ccid_ops)
+static int ccid_activate(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
 
@@ -152,79 +219,40 @@ int ccid_register(struct ccid_operations *ccid_ops)
 	if (ccid_ops->ccid_hc_tx_slab == NULL)
 		goto out_free_rx_slab;
 
-	ccids_write_lock();
-	err = -EEXIST;
-	if (ccids[ccid_ops->ccid_id] == NULL) {
-		ccids[ccid_ops->ccid_id] = ccid_ops;
-		err = 0;
-	}
-	ccids_write_unlock();
-	if (err != 0)
-		goto out_free_tx_slab;
-
-	pr_info("CCID: Registered CCID %d (%s)\n",
+	pr_info("CCID: Activated CCID %d (%s)\n",
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
+	err = 0;
 out:
 	return err;
-out_free_tx_slab:
-	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
-	ccid_ops->ccid_hc_tx_slab = NULL;
-	goto out;
 out_free_rx_slab:
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
 	ccid_ops->ccid_hc_rx_slab = NULL;
 	goto out;
 }
 
-EXPORT_SYMBOL_GPL(ccid_register);
-
-int ccid_unregister(struct ccid_operations *ccid_ops)
+static void ccid_deactivate(struct ccid_operations *ccid_ops)
 {
-	ccids_write_lock();
-	ccids[ccid_ops->ccid_id] = NULL;
-	ccids_write_unlock();
-
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
 	ccid_ops->ccid_hc_tx_slab = NULL;
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
 	ccid_ops->ccid_hc_rx_slab = NULL;
 
-	pr_info("CCID: Unregistered CCID %d (%s)\n",
+	pr_info("CCID: Deactivated CCID %d (%s)\n",
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
-	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ccid_unregister);
-
 struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 {
-	struct ccid_operations *ccid_ops;
+	struct ccid_operations *ccid_ops = ccid_by_number(id);
 	struct ccid *ccid = NULL;
 
-	ccids_read_lock();
-#ifdef CONFIG_MODULES
-	if (ccids[id] == NULL) {
-		/* We only try to load if in process context */
-		ccids_read_unlock();
-		if (gfp & GFP_ATOMIC)
-			goto out;
-		request_module("net-dccp-ccid-%d", id);
-		ccids_read_lock();
-	}
-#endif
-	ccid_ops = ccids[id];
 	if (ccid_ops == NULL)
-		goto out_unlock;
-
-	if (!try_module_get(ccid_ops->ccid_owner))
-		goto out_unlock;
-
-	ccids_read_unlock();
+		goto out;
 
 	ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
 				     ccid_ops->ccid_hc_tx_slab, gfp);
 	if (ccid == NULL)
-		goto out_module_put;
+		goto out;
 	ccid->ccid_ops = ccid_ops;
 	if (rx) {
 		memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
@@ -239,15 +267,10 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 	}
 out:
 	return ccid;
-out_unlock:
-	ccids_read_unlock();
-	goto out;
 out_free_ccid:
 	kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
 			ccid_ops->ccid_hc_tx_slab, ccid);
 	ccid = NULL;
-out_module_put:
-	module_put(ccid_ops->ccid_owner);
 	goto out;
 }
 
@@ -270,10 +293,6 @@ static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
 			ccid_ops->ccid_hc_tx_exit(sk);
 		kmem_cache_free(ccid_ops->ccid_hc_tx_slab,  ccid);
 	}
-	ccids_read_lock();
-	if (ccids[ccid_ops->ccid_id] != NULL)
-		module_put(ccid_ops->ccid_owner);
-	ccids_read_unlock();
 }
 
 void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
@@ -289,3 +308,28 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 }
 
 EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
+
+int __init ccid_initialize_builtins(void)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++) {
+		err = ccid_activate(ccids[i]);
+		if (err)
+			goto unwind_registrations;
+	}
+	return 0;
+
+unwind_registrations:
+	while(--i >= 0)
+		ccid_deactivate(ccids[i]);
+	return err;
+}
+
+void ccid_cleanup_builtins(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++)
+		ccid_deactivate(ccids[i]);
+}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 18f69423..75c21c5 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -29,7 +29,6 @@ struct tcp_info;
  *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
  *  @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
  *  @ccid_name: alphabetical identifier string for @ccid_id
- *  @ccid_owner: module which implements/owns this CCID
  *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
  *  @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
  *
@@ -48,7 +47,6 @@ struct ccid_operations {
 	unsigned char		ccid_id;
 	__u32			ccid_ccmps;
 	const char		*ccid_name;
-	struct module		*ccid_owner;
 	struct kmem_cache	*ccid_hc_rx_slab,
 				*ccid_hc_tx_slab;
 	__u32			ccid_hc_rx_obj_size,
@@ -90,8 +88,13 @@ struct ccid_operations {
 						 int __user *optlen);
 };
 
-extern int ccid_register(struct ccid_operations *ccid_ops);
-extern int ccid_unregister(struct ccid_operations *ccid_ops);
+extern struct ccid_operations ccid2_ops;
+#ifdef CONFIG_IP_DCCP_CCID3
+extern struct ccid_operations ccid3_ops;
+#endif
+
+extern int  ccid_initialize_builtins(void);
+extern void ccid_cleanup_builtins(void);
 
 struct ccid {
 	struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 1227594..b30f049 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,80 +1,52 @@
 menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 
-config IP_DCCP_CCID2
-	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
-	def_tristate IP_DCCP
-	select IP_DCCP_ACKVEC
-	---help---
-	  CCID 2, TCP-like Congestion Control, denotes Additive Increase,
-	  Multiplicative Decrease (AIMD) congestion control with behavior
-	  modelled directly on TCP, including congestion window, slow start,
-	  timeouts, and so forth [RFC 2581].  CCID 2 achieves maximum
-	  bandwidth over the long term, consistent with the use of end-to-end
-	  congestion control, but halves its congestion window in response to
-	  each congestion event.  This leads to the abrupt rate changes
-	  typical of TCP.  Applications should use CCID 2 if they prefer
-	  maximum bandwidth utilization to steadiness of rate.  This is often
-	  the case for applications that are not playing their data directly
-	  to the user.  For example, a hypothetical application that
-	  transferred files over DCCP, using application-level retransmissions
-	  for lost packets, would prefer CCID 2 to CCID 3.  On-line games may
-	  also prefer CCID 2.  See RFC 4341 for further details.
-
-	  CCID2 is the default CCID used by DCCP.
-
 config IP_DCCP_CCID2_DEBUG
-	  bool "CCID2 debugging messages"
-	  depends on IP_DCCP_CCID2
-	  ---help---
-	    Enable CCID2-specific debugging messages.
+	bool "CCID-2 debugging messages"
+	---help---
+	  Enable CCID-2 specific debugging messages.
 
-	    When compiling CCID2 as a module, this debugging output can
-	    additionally be toggled by setting the ccid2_debug module
-	    parameter to 0 or 1.
+	  The debugging output can additionally be toggled by setting the
+	  ccid2_debug parameter to 0 or 1.
 
-	    If in doubt, say N.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3
-	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
-	def_tristate IP_DCCP
+	bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
+	def_bool y if (IP_DCCP = y || IP_DCCP = m)
 	select IP_DCCP_TFRC_LIB
 	---help---
-	  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+	  CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
 	  rate-controlled congestion control mechanism.  TFRC is designed to
 	  be reasonably fair when competing for bandwidth with TCP-like flows,
 	  where a flow is "reasonably fair" if its sending rate is generally
 	  within a factor of two of the sending rate of a TCP flow under the
 	  same conditions.  However, TFRC has a much lower variation of
-	  throughput over time compared with TCP, which makes CCID 3 more
-	  suitable than CCID 2 for applications such streaming media where a
+	  throughput over time compared with TCP, which makes CCID-3 more
+	  suitable than CCID-2 for applications such streaming media where a
 	  relatively smooth sending rate is of importance.
 
-	  CCID 3 is further described in RFC 4342,
+	  CCID-3 is further described in RFC 4342,
 	  http://www.ietf.org/rfc/rfc4342.txt
 
 	  The TFRC congestion control algorithms were initially described in
-	  RFC 3448.
+	  RFC 5448.
 
 	  This text was extracted from RFC 4340 (sec. 10.2),
 	  http://www.ietf.org/rfc/rfc4340.txt
-	  
-	  To compile this CCID as a module, choose M here: the module will be
-	  called dccp_ccid3.
 
-	  If in doubt, say M.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3_DEBUG
-	  bool "CCID3 debugging messages"
-	  depends on IP_DCCP_CCID3
-	  ---help---
-	    Enable CCID3-specific debugging messages.
+	bool "CCID-3 debugging messages"
+	depends on IP_DCCP_CCID3
+	---help---
+	  Enable CCID-3 specific debugging messages.
 
-	    When compiling CCID3 as a module, this debugging output can
-	    additionally be toggled by setting the ccid3_debug module
-	    parameter to 0 or 1.
+	  The debugging output can additionally be toggled by setting the
+	  ccid3_debug parameter to 0 or 1.
 
-	    If in doubt, say N.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3_RTO
 	  int "Use higher bound for nofeedback timer"
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
index 438f20b..cdaefff 100644
--- a/net/dccp/ccids/Makefile
+++ b/net/dccp/ccids/Makefile
@@ -1,9 +1 @@
-obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
-
-dccp_ccid3-y := ccid3.o
-
-obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
-
-dccp_ccid2-y := ccid2.o
-
 obj-y += lib/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c9ea19a..d235294 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -768,10 +768,9 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static struct ccid_operations ccid2 = {
+struct ccid_operations ccid2_ops = {
 	.ccid_id		= DCCPC_CCID2,
 	.ccid_name		= "TCP-like",
-	.ccid_owner		= THIS_MODULE,
 	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
 	.ccid_hc_tx_init	= ccid2_hc_tx_init,
 	.ccid_hc_tx_exit	= ccid2_hc_tx_exit,
@@ -784,22 +783,5 @@ static struct ccid_operations ccid2 = {
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 module_param(ccid2_debug, bool, 0644);
-MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
 #endif
-
-static __init int ccid2_module_init(void)
-{
-	return ccid_register(&ccid2);
-}
-module_init(ccid2_module_init);
-
-static __exit void ccid2_module_exit(void)
-{
-	ccid_unregister(&ccid2);
-}
-module_exit(ccid2_module_exit);
-
-MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
-MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-2");
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3b8bd7c..a27b7f4 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -940,10 +940,9 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	return 0;
 }
 
-static struct ccid_operations ccid3 = {
+struct ccid_operations ccid3_ops = {
 	.ccid_id		   = DCCPC_CCID3,
 	.ccid_name		   = "TCP-Friendly Rate Control",
-	.ccid_owner		   = THIS_MODULE,
 	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
 	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
 	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
@@ -964,23 +963,5 @@ static struct ccid_operations ccid3 = {
 
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 module_param(ccid3_debug, bool, 0644);
-MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages");
 #endif
-
-static __init int ccid3_module_init(void)
-{
-	return ccid_register(&ccid3);
-}
-module_init(ccid3_module_init);
-
-static __exit void ccid3_module_exit(void)
-{
-	ccid_unregister(&ccid3);
-}
-module_exit(ccid3_module_exit);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
-	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
-MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0bc4c9a..f2230fc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -432,10 +432,8 @@ static inline int dccp_ack_pending(const struct sock *sk)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
 	return dp->dccps_timestamp_echo != 0 ||
-#ifdef CONFIG_IP_DCCP_ACKVEC
 	       (dp->dccps_hc_rx_ackvec != NULL &&
 		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-#endif
 	       inet_csk_ack_scheduled(sk);
 }
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1747cca..945b4d5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1118,9 +1118,15 @@ static int __init dccp_init(void)
 	if (rc)
 		goto out_ackvec_exit;
 
+	rc = ccid_initialize_builtins();
+	if (rc)
+		goto out_sysctl_exit;
+
 	dccp_timestamping_init();
 out:
 	return rc;
+out_sysctl_exit:
+	dccp_sysctl_exit();
 out_ackvec_exit:
 	dccp_ackvec_exit();
 out_free_dccp_mib:
@@ -1143,6 +1149,7 @@ out_free_percpu:
 
 static void __exit dccp_fini(void)
 {
+	ccid_cleanup_builtins();
 	dccp_mib_exit();
 	free_pages((unsigned long)dccp_hashinfo.bhash,
 		   get_order(dccp_hashinfo.bhash_size *
-- 
cgit v0.10.2


From e5fd56ca4eb3a130882bbef69d6952ef6aca5c8d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:43:23 -0800
Subject: dccp: Clean up ccid.c after integration of CCID plugins

This patch cleans up after integrating the CCID modules and, in addition,

 * moves the if/else cases from ccid_delete() into ccid_hc_{tx,rx}_delete();
 * removes the 'gfp' argument to ccid_new() - since it is always gfp_any().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 569a33a..45f95e5 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -84,7 +84,6 @@ struct dccp_ackvec_record {
 struct sock;
 struct sk_buff;
 
-#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___
 extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
@@ -106,52 +105,4 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
 	return av->av_vec_len;
 }
-#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-static inline int dccp_ackvec_init(void)
-{
-	return 0;
-}
-
-static inline void dccp_ackvec_exit(void)
-{
-}
-
-static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	return NULL;
-}
-
-static inline void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-}
-
-static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-				  const u64 ackno, const u8 state)
-{
-	return -1;
-}
-
-static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
-					       struct sock *sk, const u64 ackno)
-{
-}
-
-static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-				    const u64 *ackno, const u8 opt,
-				    const u8 *value, const u8 len)
-{
-	return -1;
-}
-
-static inline int dccp_insert_option_ackvec(const struct sock *sk,
-					    const struct sk_buff *skb)
-{
-	return -1;
-}
-
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
-{
-	return 0;
-}
-#endif /* CONFIG_IP_DCCP_ACKVEC */
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 538d3b1..19b214a 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -76,58 +76,6 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 	return err;
 }
 
-#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
-static u8 builtin_ccids[] = {
-	DCCPC_CCID2,		/* CCID2 is supported by default */
-#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
-	DCCPC_CCID3,
-#endif
-};
-
-static struct ccid_operations *ccids[CCID_MAX];
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t ccids_lockct = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(ccids_lock);
-
-/*
- * The strategy is: modifications ccids vector are short, do not sleep and
- * veeery rare, but read access should be free of any exclusive locks.
- */
-static void ccids_write_lock(void)
-{
-	spin_lock(&ccids_lock);
-	while (atomic_read(&ccids_lockct) != 0) {
-		spin_unlock(&ccids_lock);
-		yield();
-		spin_lock(&ccids_lock);
-	}
-}
-
-static inline void ccids_write_unlock(void)
-{
-	spin_unlock(&ccids_lock);
-}
-
-static inline void ccids_read_lock(void)
-{
-	atomic_inc(&ccids_lockct);
-	smp_mb__after_atomic_inc();
-	spin_unlock_wait(&ccids_lock);
-}
-
-static inline void ccids_read_unlock(void)
-{
-	atomic_dec(&ccids_lockct);
-}
-
-#else
-#define ccids_write_lock() do { } while(0)
-#define ccids_write_unlock() do { } while(0)
-#define ccids_read_lock() do { } while(0)
-#define ccids_read_unlock() do { } while(0)
-#endif
-#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-
 static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 {
 	struct kmem_cache *slab;
@@ -158,49 +106,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
-#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
-/* check that up to @array_len members in @ccid_array are supported */
-bool ccid_support_check(u8 const *ccid_array, u8 array_len)
-{
-	u8 i, j, found;
-
-	for (i = 0, found = 0; i < array_len; i++, found = 0) {
-		for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
-			found = (ccid_array[i] == builtin_ccids[j]);
-		if (!found)
-			return false;
-	}
-	return true;
-}
-
-/**
- * ccid_get_builtin_ccids  -  Provide copy of `builtin' CCID array
- * @ccid_array: pointer to copy into
- * @array_len: value to return length into
- * This function allocates memory - caller must see that it is freed after use.
- */
-int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
-{
-	*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
-	if (*ccid_array == NULL)
-		return -ENOBUFS;
-	*array_len = ARRAY_SIZE(builtin_ccids);
-	return 0;
-}
-
-int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
-				    char __user *optval, int __user *optlen)
-{
-	if (len < sizeof(builtin_ccids))
-		return -EINVAL;
-
-	if (put_user(sizeof(builtin_ccids), optlen) ||
-	    copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
-		return -EFAULT;
-	return 0;
-}
-#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-
 static int ccid_activate(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
@@ -241,7 +146,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops)
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
 }
 
-struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
+struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx)
 {
 	struct ccid_operations *ccid_ops = ccid_by_number(id);
 	struct ccid *ccid = NULL;
@@ -250,7 +155,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 		goto out;
 
 	ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
-				     ccid_ops->ccid_hc_tx_slab, gfp);
+				     ccid_ops->ccid_hc_tx_slab, gfp_any());
 	if (ccid == NULL)
 		goto out;
 	ccid->ccid_ops = ccid_ops;
@@ -274,41 +179,24 @@ out_free_ccid:
 	goto out;
 }
 
-EXPORT_SYMBOL_GPL(ccid_new);
-
-static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
-{
-	struct ccid_operations *ccid_ops;
-
-	if (ccid == NULL)
-		return;
-
-	ccid_ops = ccid->ccid_ops;
-	if (rx) {
-		if (ccid_ops->ccid_hc_rx_exit != NULL)
-			ccid_ops->ccid_hc_rx_exit(sk);
-		kmem_cache_free(ccid_ops->ccid_hc_rx_slab,  ccid);
-	} else {
-		if (ccid_ops->ccid_hc_tx_exit != NULL)
-			ccid_ops->ccid_hc_tx_exit(sk);
-		kmem_cache_free(ccid_ops->ccid_hc_tx_slab,  ccid);
-	}
-}
-
 void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
 {
-	ccid_delete(ccid, sk, 1);
+	if (ccid != NULL) {
+		if (ccid->ccid_ops->ccid_hc_rx_exit != NULL)
+			ccid->ccid_ops->ccid_hc_rx_exit(sk);
+		kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid);
+	}
 }
 
-EXPORT_SYMBOL_GPL(ccid_hc_rx_delete);
-
 void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 {
-	ccid_delete(ccid, sk, 0);
+	if (ccid != NULL) {
+		if (ccid->ccid_ops->ccid_hc_tx_exit != NULL)
+			ccid->ccid_ops->ccid_hc_tx_exit(sk);
+		kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid);
+	}
 }
 
-EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
-
 int __init ccid_initialize_builtins(void)
 {
 	int i, err;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c21c5..facedd2 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -111,8 +111,7 @@ extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
 extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 					  char __user *, int __user *);
 
-extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
-			     gfp_t gfp);
+extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
 
 static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
 {
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 30f9fb7..741b2db 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -34,7 +34,7 @@
 static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any());
+	struct ccid *new_ccid = ccid_new(ccid, sk, rx);
 
 	if (new_ccid == NULL)
 		return -ENOMEM;
-- 
cgit v0.10.2


From 129fa44785a399248ae2466b6cb5c655e96668f7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:45:33 -0800
Subject: dccp: Integrate the TFRC library with DCCP

This patch integrates the TFRC library, which is a dependency of CCID-3 (and
CCID-4), with the new use of CCIDs in the DCCP module.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5ff2e7b..2991efc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -8,6 +8,10 @@ dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
 # CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
 dccp-y += ccids/ccid2.o ackvec.o
 dccp-$(CONFIG_IP_DCCP_CCID3)	+= ccids/ccid3.o
+dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o		\
+				   ccids/lib/tfrc_equation.o	\
+				   ccids/lib/packet_history.o	\
+				   ccids/lib/loss_interval.o
 
 dccp_ipv4-y := ipv4.o
 
@@ -22,5 +26,3 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
 dccp_probe-y := probe.o
-
-obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 19b214a..f3e9ba1 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -12,6 +12,7 @@
  */
 
 #include "ccid.h"
+#include "ccids/lib/tfrc.h"
 
 static struct ccid_operations *ccids[] = {
 	&ccid2_ops,
@@ -199,7 +200,10 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 
 int __init ccid_initialize_builtins(void)
 {
-	int i, err;
+	int i, err = tfrc_lib_init();
+
+	if (err)
+		return err;
 
 	for (i = 0; i < ARRAY_SIZE(ccids); i++) {
 		err = ccid_activate(ccids[i]);
@@ -211,6 +215,7 @@ int __init ccid_initialize_builtins(void)
 unwind_registrations:
 	while(--i >= 0)
 		ccid_deactivate(ccids[i]);
+	tfrc_lib_exit();
 	return err;
 }
 
@@ -220,4 +225,5 @@ void ccid_cleanup_builtins(void)
 
 	for (i = 0; i < ARRAY_SIZE(ccids); i++)
 		ccid_deactivate(ccids[i]);
+	tfrc_lib_exit();
 }
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index b30f049..b28bf96 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -14,7 +14,6 @@ config IP_DCCP_CCID2_DEBUG
 config IP_DCCP_CCID3
 	bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
 	def_bool y if (IP_DCCP = y || IP_DCCP = m)
-	select IP_DCCP_TFRC_LIB
 	---help---
 	  CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
 	  rate-controlled congestion control mechanism.  TFRC is designed to
@@ -80,12 +79,8 @@ config IP_DCCP_CCID3_RTO
 	    therefore not be performed on WANs.
 
 config IP_DCCP_TFRC_LIB
-	tristate
-	default n
+	def_bool y if IP_DCCP_CCID3
 
 config IP_DCCP_TFRC_DEBUG
-	bool
-	depends on IP_DCCP_TFRC_LIB
-	default y if IP_DCCP_CCID3_DEBUG
-
+	def_bool y if IP_DCCP_CCID3_DEBUG
 endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
deleted file mode 100644
index cdaefff..0000000
--- a/net/dccp/ccids/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-y += lib/
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
deleted file mode 100644
index 68c93e3..0000000
--- a/net/dccp/ccids/lib/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
-
-dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce06..4d1e401 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -60,7 +60,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
 			lh->ring[LIH_INDEX(lh->counter)] = NULL;
 		}
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
 
 static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 {
@@ -121,7 +120,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 
 	return (lh->i_mean < old_i_mean);
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -169,7 +167,6 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 	}
 	return 1;
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
 int __init tfrc_li_init(void)
 {
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108a..b7785b3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -94,7 +94,6 @@ int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 	*headp	     = entry;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
 
 void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 {
@@ -109,7 +108,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 
 	*headp = NULL;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
 u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 		     const ktime_t now)
@@ -127,7 +125,6 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 
 	return rtt;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 
 
 /*
@@ -172,7 +169,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 
 	tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
 /* has the packet contained in skb been seen before? */
 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
@@ -189,7 +185,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
@@ -390,7 +385,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 	}
 	return is_new_loss;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
 
 int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 {
@@ -412,7 +406,6 @@ out_free:
 	}
 	return -ENOBUFS;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
 
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
@@ -424,7 +417,6 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 			h->ring[i] = NULL;
 		}
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
 /**
  * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
@@ -495,4 +487,3 @@ keep_ref_for_next_time:
 
 	return sample;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 1859162..60c412c 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,20 +1,18 @@
 /*
- * TFRC: main module holding the pieces of the TFRC library together
+ * TFRC library initialisation
  *
  * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
  * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
  */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
 #include "tfrc.h"
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
 int tfrc_debug;
 module_param(tfrc_debug, bool, 0644);
-MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
+MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
 #endif
 
-static int __init tfrc_module_init(void)
+int __init tfrc_lib_init(void)
 {
 	int rc = tfrc_li_init();
 
@@ -38,18 +36,9 @@ out:
 	return rc;
 }
 
-static void __exit tfrc_module_exit(void)
+void __exit tfrc_lib_exit(void)
 {
 	tfrc_rx_packet_history_exit();
 	tfrc_tx_packet_history_exit();
 	tfrc_li_exit();
 }
-
-module_init(tfrc_module_init);
-module_exit(tfrc_module_exit);
-
-MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
-	      "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
-	      "Arnaldo Carvalho de Melo <acme@redhat.com>");
-MODULE_DESCRIPTION("DCCP TFRC library");
-MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed98575..e9720b1 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -17,7 +17,8 @@
 #include <linux/types.h>
 #include <linux/math64.h>
 #include "../../dccp.h"
-/* internal includes that this module exports: */
+
+/* internal includes that this library exports: */
 #include "loss_interval.h"
 #include "packet_history.h"
 
@@ -66,4 +67,12 @@ extern void tfrc_rx_packet_history_exit(void);
 
 extern int  tfrc_li_init(void);
 extern void tfrc_li_exit(void);
+
+#ifdef CONFIG_IP_DCCP_TFRC_LIB
+extern int  tfrc_lib_init(void);
+extern void tfrc_lib_exit(void);
+#else
+#define tfrc_lib_init() (0)
+#define tfrc_lib_exit()
+#endif
 #endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29..c5d3a9e 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -659,8 +659,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 	return scaled_div32(result, f);
 }
 
-EXPORT_SYMBOL_GPL(tfrc_calc_x);
-
 /**
  *  tfrc_calc_x_reverse_lookup  -  try to find p given f(p)
  *
@@ -693,5 +691,3 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 	index = tfrc_binsearch(fvalue, 0);
 	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
-
-EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 741b2db..4152308 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1214,8 +1214,6 @@ const char *dccp_feat_typename(const u8 type)
 	return NULL;
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_typename);
-
 const char *dccp_feat_name(const u8 feat)
 {
 	static const char *feature_names[] = {
@@ -1240,6 +1238,4 @@ const char *dccp_feat_name(const u8 feat)
 
 	return feature_names[feat];
 }
-
-EXPORT_SYMBOL_GPL(dccp_feat_name);
 #endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 5eb443f..7648f31 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -741,5 +741,3 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
 
 	return delta;
 }
-
-EXPORT_SYMBOL_GPL(dccp_sample_rtt);
-- 
cgit v0.10.2


From 761b602620b4627dbd8f1b0bd2896022dc168a6d Mon Sep 17 00:00:00 2001
From: Bastian Blank <waldi@debian.org>
Date: Sun, 4 Jan 2009 23:03:10 -0800
Subject: sparc: Use 64BIT config entry

Use 64BIT config entry to distinguish between 32 and 64bit builds
instead of relying on the ARCH setting.  Using sparc64 as ARCH still
forces 64BIT on.

Inspired by the x86 and s390 configs.

[ Integrated CONFIG_64BIT help text suggestions from Sam -DaveM ]

Signed-off-by: Bastian Blank <waldi@debian.org>
Tested-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 002f7b4..de58c02 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -4,6 +4,17 @@
 
 mainmenu "Linux/SPARC Kernel Configuration"
 
+config 64BIT
+	bool "64-bit kernel" if ARCH = "sparc"
+	default ARCH = "sparc64"
+	help
+	  SPARC is a family of RISC microprocessors designed and marketed by
+	  Sun Microsystems, incorporated.  They are very widely found in Sun
+	  workstations and clones.
+
+	  Say yes to build a 64-bit kernel - formerly known as sparc64
+	  Say no to build a 32-bit kernel - formerly known as sparc
+
 config SPARC
 	bool
 	default y
@@ -15,22 +26,11 @@ config SPARC
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 
-# Identify this as a Sparc32 build
 config SPARC32
-	bool
-	default y if ARCH = "sparc"
-	help
-	  SPARC is a family of RISC microprocessors designed and marketed by
-	  Sun Microsystems, incorporated.  They are very widely found in Sun
-	  workstations and clones. This port covers the original 32-bit SPARC;
-	  it is old and stable and usually considered one of the "big three"
-	  along with the Intel and Alpha ports.  The UltraLinux project
-	  maintains both the SPARC32 and SPARC64 ports; its web page is
-	  available at <http://www.ultralinux.org/>.
+	def_bool !64BIT
 
 config SPARC64
-	bool
-	default y if ARCH = "sparc64"
+	def_bool 64BIT
 	select ARCH_SUPPORTS_MSI
 	select HAVE_FUNCTION_TRACER
 	select HAVE_KRETPROBES
@@ -53,9 +53,6 @@ config BITS
 	default 32 if SPARC32
 	default 64 if SPARC64
 
-config 64BIT
-	def_bool y if SPARC64
-
 config GENERIC_TIME
 	bool
 	default y if SPARC64
-- 
cgit v0.10.2


From 192eee8ef535cfdbdd0c93390e34f27ad7c02084 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 4 Jan 2009 23:17:21 -0800
Subject: sparc: Kill asm/reg*.h

As noticed by Sam Ravnborg, these aren't use for anything.
Neither the kernel nor userland make a reference to this
family of header files.

So just get rid of them.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/reg.h b/arch/sparc/include/asm/reg.h
deleted file mode 100644
index 0c16e19..0000000
--- a/arch/sparc/include/asm/reg.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef ___ASM_SPARC_REG_H
-#define ___ASM_SPARC_REG_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/reg_64.h>
-#else
-#include <asm/reg_32.h>
-#endif
-#endif
diff --git a/arch/sparc/include/asm/reg_32.h b/arch/sparc/include/asm/reg_32.h
deleted file mode 100644
index 1efb056..0000000
--- a/arch/sparc/include/asm/reg_32.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * linux/include/asm/reg.h
- * Layout of the registers as expected by gdb on the Sparc
- * we should replace the user.h definitions with those in
- * this file, we don't even use the other
- * -miguel
- *
- * The names of the structures, constants and aliases in this file
- * have the same names as the sunos ones, some programs rely on these
- * names (gdb for example).
- *
- */
-
-#ifndef __SPARC_REG_H
-#define __SPARC_REG_H
-
-struct regs {
-	int     r_psr;
-#define r_ps r_psr
-        int     r_pc;
-        int     r_npc;
-        int     r_y;
-        int     r_g1;
-        int     r_g2;
-        int     r_g3;
-        int     r_g4;
-        int     r_g5;
-        int     r_g6;
-        int     r_g7;
-        int     r_o0;
-        int     r_o1;
-        int     r_o2;
-        int     r_o3;
-        int     r_o4;
-        int     r_o5;
-        int     r_o6;
-        int     r_o7;
-};
-
-struct fpq {
-        unsigned long *addr;
-        unsigned long instr;
-};
-
-struct  fq {
-        union {
-                double  whole;
-                struct  fpq fpq;
-        } FQu;
-};
-
-#define FPU_REGS_TYPE unsigned int
-#define FPU_FSR_TYPE unsigned
-
-struct fp_status {
-        union {
-                FPU_REGS_TYPE Fpu_regs[32];
-                double  Fpu_dregs[16];
-        } fpu_fr;
-        FPU_FSR_TYPE Fpu_fsr;
-        unsigned Fpu_flags;
-        unsigned Fpu_extra;
-        unsigned Fpu_qcnt;
-        struct fq Fpu_q[16];
-};
-
-#define fpu_regs  f_fpstatus.fpu_fr.Fpu_regs
-#define fpu_dregs f_fpstatus.fpu_fr.Fpu_dregs
-#define fpu_fsr   f_fpstatus.Fpu_fsr
-#define fpu_flags f_fpstatus.Fpu_flags
-#define fpu_extra f_fpstatus.Fpu_extra
-#define fpu_q     f_fpstatus.Fpu_q
-#define fpu_qcnt  f_fpstatus.Fpu_qcnt
-
-struct fpu {
-        struct fp_status f_fpstatus;
-};
-
-#endif /* __SPARC_REG_H */
diff --git a/arch/sparc/include/asm/reg_64.h b/arch/sparc/include/asm/reg_64.h
deleted file mode 100644
index 6f277d7..0000000
--- a/arch/sparc/include/asm/reg_64.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * linux/asm/reg.h
- * Layout of the registers as expected by gdb on the Sparc
- * we should replace the user.h definitions with those in
- * this file, we don't even use the other
- * -miguel
- *
- * The names of the structures, constants and aliases in this file
- * have the same names as the sunos ones, some programs rely on these
- * names (gdb for example).
- *
- */
-
-#ifndef __SPARC64_REG_H
-#define __SPARC64_REG_H
-
-struct regs {
-        unsigned long r_g1;
-        unsigned long r_g2;
-        unsigned long r_g3;
-        unsigned long r_g4;
-        unsigned long r_g5;
-        unsigned long r_g6;
-        unsigned long r_g7;
-        unsigned long r_o0;
-        unsigned long r_o1;
-        unsigned long r_o2;
-        unsigned long r_o3;
-        unsigned long r_o4;
-        unsigned long r_o5;
-        unsigned long r_o6;
-        unsigned long r_o7;
-        unsigned long __pad;
-        unsigned long r_tstate;
-        unsigned long r_tpc;
-        unsigned long r_tnpc;
-        unsigned int  r_y;
-        unsigned int  r_fprs;
-};
-
-#define FPU_REGS_TYPE unsigned int
-#define FPU_FSR_TYPE unsigned long
-
-struct fp_status {
-        unsigned long fpu_fr[32];
-        unsigned long Fpu_fsr;
-};
-
-struct fpu {
-	struct fp_status f_fpstatus;
-};
-
-#define fpu_regs  f_fpstatus.fpu_fr
-#define fpu_fsr   f_fpstatus.Fpu_fsr
-
-#endif /* __SPARC64_REG_H */
-- 
cgit v0.10.2


From e9079cce201784632aed4b1a3121ee38c1ced0b6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 14 Oct 2008 14:43:29 +0100
Subject: GFS2: Support for FIEMAP ioctl

This patch implements the FIEMAP ioctl for GFS2. We can use the generic
code (aside from a lock order issue, solved as per Ted Tso's suggestion)
for which I've introduced a new variant of the generic function. We also
have one exception to deal with, namely stuffed files, so we do that
"by hand", setting all the required flags.

This has been tested with a modified (I could only find an old version) of
Eric's test program, and appears to work correctly.

This patch does not currently support FIEMAP of xattrs, but the plan is to add
that feature at some future point.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Eric Sandeen <sandeen@redhat.com>

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d232991..1e24b65 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -19,6 +19,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/fiemap.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -1212,6 +1213,48 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
 }
 
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		       u64 start, u64 len)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (ret)
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		u64 phys = ip->i_no_addr << inode->i_blkbits;
+		u64 size = i_size_read(inode);
+		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+			    FIEMAP_EXTENT_DATA_INLINE;
+		phys += sizeof(struct gfs2_dinode);
+		phys += start;
+		if (start + len > size)
+			len = size - start;
+		if (start < size)
+			ret = fiemap_fill_next_extent(fieinfo, start, phys,
+						      len, flags);
+		if (ret == 1)
+			ret = 0;
+	} else {
+		ret = __generic_block_fiemap(inode, fieinfo, start, len,
+					     gfs2_block_map);
+	}
+
+	gfs2_glock_dq_uninit(&gh);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 const struct inode_operations gfs2_file_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
@@ -1220,6 +1263,7 @@ const struct inode_operations gfs2_file_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
 const struct inode_operations gfs2_dir_iops = {
@@ -1239,6 +1283,7 @@ const struct inode_operations gfs2_dir_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
 const struct inode_operations gfs2_symlink_iops = {
@@ -1251,5 +1296,6 @@ const struct inode_operations gfs2_symlink_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 43e8b2c..cc3f1aa 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -231,7 +231,8 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
 #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
 
-/*
+/**
+ * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
  * @inode - the inode to map
  * @arg - the pointer to userspace where we copy everything to
  * @get_block - the fs's get_block function
@@ -242,11 +243,15 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  *
  * If it is possible to have data blocks beyond a hole past @inode->i_size, then
  * please do not use this function, it will stop at the first unmapped block
- * beyond i_size
+ * beyond i_size.
+ *
+ * If you use this function directly, you need to do your own locking. Use
+ * generic_block_fiemap if you want the locking done for you.
  */
-int generic_block_fiemap(struct inode *inode,
-			 struct fiemap_extent_info *fieinfo, u64 start,
-			 u64 len, get_block_t *get_block)
+
+int __generic_block_fiemap(struct inode *inode,
+			   struct fiemap_extent_info *fieinfo, u64 start,
+			   u64 len, get_block_t *get_block)
 {
 	struct buffer_head tmp;
 	unsigned int start_blk;
@@ -260,9 +265,6 @@ int generic_block_fiemap(struct inode *inode,
 
 	start_blk = logical_to_blk(inode, start);
 
-	/* guard against change */
-	mutex_lock(&inode->i_mutex);
-
 	length = (long long)min_t(u64, len, i_size_read(inode));
 	map_len = length;
 
@@ -334,14 +336,36 @@ int generic_block_fiemap(struct inode *inode,
 		cond_resched();
 	} while (1);
 
-	mutex_unlock(&inode->i_mutex);
-
 	/* if ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
 	return ret;
 }
+EXPORT_SYMBOL(__generic_block_fiemap);
+
+/**
+ * generic_block_fiemap - FIEMAP for block based inodes
+ * @inode: The inode to map
+ * @fieinfo: The mapping information
+ * @start: The initial block to map
+ * @len: The length of the extect to attempt to map
+ * @get_block: The block mapping function for the fs
+ *
+ * Calls __generic_block_fiemap to map the inode, after taking
+ * the inode's mutex lock.
+ */
+
+int generic_block_fiemap(struct inode *inode,
+			 struct fiemap_extent_info *fieinfo, u64 start,
+			 u64 len, get_block_t *get_block)
+{
+	int ret;
+	mutex_lock(&inode->i_mutex);
+	ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
 EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f2a30101..e34bc69 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2059,6 +2059,9 @@ extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
+extern int __generic_block_fiemap(struct inode *inode,
+				  struct fiemap_extent_info *fieinfo, u64 start,
+				  u64 len, get_block_t *get_block);
 extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
-- 
cgit v0.10.2


From b276058371f5c2ad92f9f27373a72b219ed580ed Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 14 Oct 2008 16:05:55 +0100
Subject: GFS2: Rationalise header files

Move the contents of some headers which contained very
little into more sensible places, and remove the original
header files. This should make it easier to find things.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d576168..bf4676d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -32,7 +32,6 @@
 #include "log.h"
 #include "meta_io.h"
 #include "ops_address.h"
-#include "ops_inode.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 2d43f69..c357790 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,6 +10,7 @@
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__
 
+#include <linux/fs.h>
 #include "util.h"
 
 static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
@@ -97,5 +98,15 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
 void gfs2_dinode_print(const struct gfs2_inode *ip);
 
+extern const struct inode_operations gfs2_file_iops;
+extern const struct inode_operations gfs2_dir_iops;
+extern const struct inode_operations gfs2_symlink_iops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
+
+extern void gfs2_set_inode_flags(struct inode *inode);
+
 #endif /* __INODE_DOT_H__ */
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bb2cc30..3eea03c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -19,7 +19,7 @@
 
 #include "gfs2.h"
 #include "incore.h"
-#include "ops_fstype.h"
+#include "super.h"
 #include "sys.h"
 #include "util.h"
 #include "glock.h"
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 4a5e676..c2ad363 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -19,7 +19,7 @@
 #include "incore.h"
 #include "dir.h"
 #include "glock.h"
-#include "ops_dentry.h"
+#include "super.h"
 #include "util.h"
 #include "inode.h"
 
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
deleted file mode 100644
index 5caa3db..0000000
--- a/fs/gfs2/ops_dentry.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_DENTRY_DOT_H__
-#define __OPS_DENTRY_DOT_H__
-
-#include <linux/dcache.h>
-
-extern struct dentry_operations gfs2_dops;
-
-#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index bbb8c36..3a9b9b4 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,8 +22,7 @@
 #include "glock.h"
 #include "glops.h"
 #include "inode.h"
-#include "ops_dentry.h"
-#include "ops_fstype.h"
+#include "super.h"
 #include "rgrp.h"
 #include "util.h"
 
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3a747f8..fcfaaef 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -39,7 +39,6 @@
 #include "util.h"
 #include "eaops.h"
 #include "ops_address.h"
-#include "ops_inode.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b117fcf..ca463a4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,9 +27,6 @@
 #include "glops.h"
 #include "inode.h"
 #include "mount.h"
-#include "ops_fstype.h"
-#include "ops_dentry.h"
-#include "ops_super.h"
 #include "recovery.h"
 #include "rgrp.h"
 #include "super.h"
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
deleted file mode 100644
index da84905..0000000
--- a/fs/gfs2/ops_fstype.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_FSTYPE_DOT_H__
-#define __OPS_FSTYPE_DOT_H__
-
-#include <linux/fs.h>
-
-extern struct file_system_type gfs2_fs_type;
-extern struct file_system_type gfs2meta_fs_type;
-extern const struct export_operations gfs2_export_ops;
-
-#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e24b65..98440fe 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -32,12 +32,11 @@
 #include "glock.h"
 #include "inode.h"
 #include "meta_io.h"
-#include "ops_dentry.h"
-#include "ops_inode.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "super.h"
 
 /**
  * gfs2_create - Create a file
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
deleted file mode 100644
index 14b4b79..0000000
--- a/fs/gfs2/ops_inode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_INODE_DOT_H__
-#define __OPS_INODE_DOT_H__
-
-#include <linux/fs.h>
-
-extern const struct inode_operations gfs2_file_iops;
-extern const struct inode_operations gfs2_dir_iops;
-extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-extern const struct file_operations gfs2_file_fops_nolock;
-extern const struct file_operations gfs2_dir_fops_nolock;
-
-extern void gfs2_set_inode_flags(struct inode *inode);
-
-#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index d5355d9..9c7678d 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -28,7 +28,6 @@
 #include "inode.h"
 #include "log.h"
 #include "mount.h"
-#include "ops_super.h"
 #include "quota.h"
 #include "recovery.h"
 #include "rgrp.h"
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
deleted file mode 100644
index 442a274..0000000
--- a/fs/gfs2/ops_super.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_SUPER_DOT_H__
-#define __OPS_SUPER_DOT_H__
-
-#include <linux/fs.h>
-
-extern const struct super_operations gfs2_super_ops;
-
-#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 50a4c9b..1848dad 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -10,6 +10,8 @@
 #ifndef __SUPER_DOT_H__
 #define __SUPER_DOT_H__
 
+#include <linux/fs.h>
+#include <linux/dcache.h>
 #include "incore.h"
 
 void gfs2_lm_unmount(struct gfs2_sbd *sdp);
@@ -46,5 +48,11 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
 int gfs2_freeze_fs(struct gfs2_sbd *sdp);
 void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
 
+extern struct file_system_type gfs2_fs_type;
+extern struct file_system_type gfs2meta_fs_type;
+extern const struct export_operations gfs2_export_ops;
+extern const struct super_operations gfs2_super_ops;
+extern struct dentry_operations gfs2_dops;
+
 #endif /* __SUPER_DOT_H__ */
 
-- 
cgit v0.10.2


From 1bb7322fd0d5abdce396de51cbc5dbc489523018 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Oct 2008 09:46:39 +0100
Subject: GFS2: Fix up jdata writepage/delete_inode

There is a bug in writepage and delete_inode which allows jdata files to
invalidate pages from the address space without being in a transaction at
the time. This causes problems in case the pages are in the journal. This
patch fixes that case and prevents the resulting oops.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 15f710f..574b222 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -210,25 +210,23 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc
 {
 	struct inode *inode = page->mapping->host;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	int error;
+	int ret;
 	int done_trans = 0;
 
-	error = gfs2_writepage_common(page, wbc);
-	if (error <= 0)
-		return error;
-
 	if (PageChecked(page)) {
 		if (wbc->sync_mode != WB_SYNC_ALL)
 			goto out_ignore;
-		error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
-		if (error)
+		ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+		if (ret)
 			goto out_ignore;
 		done_trans = 1;
 	}
-	error = __gfs2_jdata_writepage(page, wbc);
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret > 0)
+		ret = __gfs2_jdata_writepage(page, wbc);
 	if (done_trans)
 		gfs2_trans_end(sdp);
-	return error;
+	return ret;
 
 out_ignore:
 	redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 9c7678d..2cb744b 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -493,7 +493,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
 	error = gfs2_glock_nq(&ip->i_iopen_gh);
 	if (error)
-		goto out_uninit;
+		goto out_truncate;
 
 	if (S_ISDIR(inode->i_mode) &&
 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
@@ -518,6 +518,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (error)
 		goto out_unlock;
 
+out_truncate:
 	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 	if (error)
 		goto out_unlock;
@@ -526,8 +527,8 @@ static void gfs2_delete_inode(struct inode *inode)
 	gfs2_trans_end(sdp);
 
 out_unlock:
-	gfs2_glock_dq(&ip->i_iopen_gh);
-out_uninit:
+	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+		gfs2_glock_dq(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
 	gfs2_glock_dq_uninit(&gh);
 	if (error && error != GLR_TRYFAILED)
-- 
cgit v0.10.2


From 55ba474daed9763b2f6fe26ad762ee373554d65e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 24 Oct 2008 11:31:12 -0700
Subject: GFS2: sparse annotation of gl->gl_spin

fs/gfs2/glock.c:308:5: warning: context problem in 'do_promote': '_spin_unlock' expected different context
fs/gfs2/glock.c:308:5:    context '*gl+28': wanted >= 1, got 0
fs/gfs2/glock.c:529:2: warning: context problem in 'do_xmote': '_spin_unlock' expected different context
fs/gfs2/glock.c:529:2:    context '*gl+28': wanted >= 1, got 0
fs/gfs2/glock.c:925:3: warning: context problem in 'add_to_queue': '_spin_unlock' expected different context
fs/gfs2/glock.c:925:3:    context '*gl+28': wanted >= 1, got 0

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c962283..27cb9cc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -293,6 +293,8 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
  */
 
 static int do_promote(struct gfs2_glock *gl)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	struct gfs2_holder *gh, *tmp;
@@ -511,6 +513,8 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
  */
 
 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -576,6 +580,8 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
  */
 
 static void run_queue(struct gfs2_glock *gl, const int nonblock)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
 {
 	struct gfs2_holder *gh = NULL;
 
@@ -877,6 +883,8 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
  */
 
 static inline void add_to_queue(struct gfs2_holder *gh)
+__releases(&gl->gl_spin)
+__acquires(&gl->gl_spin)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-- 
cgit v0.10.2


From bcf0b5b348a1f49c2c878ffdb78e68c930baabb8 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 3 Nov 2008 13:39:46 +0000
Subject: GFS2: Move generation number into "proper" part of inode

This moves the generation number from the gfs2_dinode_host
into the gfs2_inode structure. Eventually the plan is to get
rid of the gfs2_dinode_host structure completely.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index f566ec1..4ff1d7e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -235,7 +235,6 @@ enum {
 
 struct gfs2_dinode_host {
 	u64 di_size;		/* number of bytes in file */
-	u64 di_generation;	/* generation number for NFS */
 	u32 di_flags;		/* GFS2_DIF_... */
 	/* These only apply to directories  */
 	u32 di_entries;		/* The number of entries in the directory */
@@ -246,6 +245,7 @@ struct gfs2_inode {
 	struct inode i_inode;
 	u64 i_no_addr;
 	u64 i_no_formal_ino;
+	u64 i_generation;
 	unsigned long i_flags;		/* GIF_... */
 
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index bf4676d..9d97f69 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -286,7 +286,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
 
 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
-	di->di_generation = be64_to_cpu(str->di_generation);
+	ip->i_generation = be64_to_cpu(str->di_generation);
 
 	di->di_flags = be32_to_cpu(str->di_flags);
 	gfs2_set_inode_flags(&ip->i_inode);
@@ -1263,7 +1263,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	str->di_goal_meta = cpu_to_be64(ip->i_goal);
 	str->di_goal_data = cpu_to_be64(ip->i_goal);
-	str->di_generation = cpu_to_be64(di->di_generation);
+	str->di_generation = cpu_to_be64(ip->i_generation);
 
 	str->di_flags = cpu_to_be32(di->di_flags);
 	str->di_height = cpu_to_be16(ip->i_height);
-- 
cgit v0.10.2


From ad6203f2b46c2217f74b2e88299640eef5889e72 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 3 Nov 2008 13:59:19 +0000
Subject: GFS2: Move "entries" into "proper" inode

This moves the directory entry count into the proper inode.
Potentially we could get this to share the space used by
something else in the future, but this is one more step
on the way to removing the gfs2_dinode_host structure.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index eed040d..830cf48 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -858,8 +858,8 @@ static int dir_make_exhash(struct inode *inode)
 		return -ENOSPC;
 	bn = bh->b_blocknr;
 
-	gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
-	leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
+	gfs2_assert(sdp, dip->i_entries < (1 << 16));
+	leaf->lf_entries = cpu_to_be16(dip->i_entries);
 
 	/*  Copy dirents  */
 
@@ -1426,7 +1426,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 	int copied = 0;
 	int error;
 
-	if (!dip->i_di.di_entries)
+	if (!dip->i_entries)
 		return 0;
 
 	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
@@ -1453,17 +1453,17 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 			error = PTR_ERR(dent);
 			goto out;
 		}
-		if (dip->i_di.di_entries != g.offset) {
+		if (dip->i_entries != g.offset) {
 			fs_warn(sdp, "Number of entries corrupt in dir %llu, "
-				"ip->i_di.di_entries (%u) != g.offset (%u)\n",
+				"ip->i_entries (%u) != g.offset (%u)\n",
 				(unsigned long long)dip->i_no_addr,
-				dip->i_di.di_entries,
+				dip->i_entries,
 				g.offset);
 			error = -EIO;
 			goto out;
 		}
 		error = do_filldir_main(dip, offset, opaque, filldir, darr,
-					dip->i_di.di_entries, &copied);
+					dip->i_entries, &copied);
 out:
 		kfree(darr);
 	}
@@ -1621,7 +1621,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			if (error)
 				break;
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
-			ip->i_di.di_entries++;
+			ip->i_entries++;
 			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
@@ -1704,10 +1704,10 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 	if (error)
 		return error;
 
-	if (!dip->i_di.di_entries)
+	if (!dip->i_entries)
 		gfs2_consist_inode(dip);
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
-	dip->i_di.di_entries--;
+	dip->i_entries--;
 	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4ff1d7e..15ca3a7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -236,8 +236,6 @@ enum {
 struct gfs2_dinode_host {
 	u64 di_size;		/* number of bytes in file */
 	u32 di_flags;		/* GFS2_DIF_... */
-	/* These only apply to directories  */
-	u32 di_entries;		/* The number of entries in the directory */
 	u64 di_eattr;		/* extended attribute block number */
 };
 
@@ -256,6 +254,7 @@ struct gfs2_inode {
 	struct gfs2_alloc *i_alloc;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
+	u32 i_entries;
 	u8 i_height;
 	u8 i_depth;
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9d97f69..015d4c0 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -299,7 +299,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
 		goto corrupt;
 	ip->i_depth = (u8)depth;
-	di->di_entries = be32_to_cpu(str->di_entries);
+	ip->i_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
 	if (S_ISREG(ip->i_inode.i_mode))
@@ -689,7 +689,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 		return error;
 	}
 
-	if (dip->i_di.di_entries == (u32)-1)
+	if (dip->i_entries == (u32)-1)
 		return -EFBIG;
 	if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
 		return -EMLINK;
@@ -1067,7 +1067,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 	struct qstr dotname;
 	int error;
 
-	if (ip->i_di.di_entries != 2) {
+	if (ip->i_entries != 2) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		return -EIO;
@@ -1271,7 +1271,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
 					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(ip->i_depth);
-	str->di_entries = cpu_to_be32(di->di_entries);
+	str->di_entries = cpu_to_be32(ip->i_entries);
 
 	str->di_eattr = cpu_to_be64(di->di_eattr);
 	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
@@ -1295,7 +1295,7 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
 	printk(KERN_INFO "  i_height = %u\n", ip->i_height);
 	printk(KERN_INFO "  i_depth = %u\n", ip->i_depth);
-	printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+	printk(KERN_INFO "  i_entries = %u\n", ip->i_entries);
 	printk(KERN_INFO "  di_eattr = %llu\n",
 	       (unsigned long long)di->di_eattr);
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 98440fe..48468f4 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -185,7 +185,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (!dip->i_inode.i_nlink)
 		goto out_gunlock;
 	error = -EFBIG;
-	if (dip->i_di.di_entries == (u32)-1)
+	if (dip->i_entries == (u32)-1)
 		goto out_gunlock;
 	error = -EPERM;
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -427,7 +427,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	ip->i_inode.i_nlink = 2;
 	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
-	ip->i_di.di_entries = 2;
+	ip->i_entries = 2;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
@@ -517,13 +517,13 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 	if (error)
 		goto out_gunlock;
 
-	if (ip->i_di.di_entries < 2) {
+	if (ip->i_entries < 2) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		error = -EIO;
 		goto out_gunlock;
 	}
-	if (ip->i_di.di_entries > 2) {
+	if (ip->i_entries > 2) {
 		error = -ENOTEMPTY;
 		goto out_gunlock;
 	}
@@ -726,13 +726,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock;
 
 		if (S_ISDIR(nip->i_inode.i_mode)) {
-			if (nip->i_di.di_entries < 2) {
+			if (nip->i_entries < 2) {
 				if (gfs2_consist_inode(nip))
 					gfs2_dinode_print(nip);
 				error = -EIO;
 				goto out_gunlock;
 			}
-			if (nip->i_di.di_entries > 2) {
+			if (nip->i_entries > 2) {
 				error = -ENOTEMPTY;
 				goto out_gunlock;
 			}
@@ -758,7 +758,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 				error = -EINVAL;
 				goto out_gunlock;
 			}
-			if (ndip->i_di.di_entries == (u32)-1) {
+			if (ndip->i_entries == (u32)-1) {
 				error = -EFBIG;
 				goto out_gunlock;
 			}
-- 
cgit v0.10.2


From 3767ac21f471fe669a7d9f6abef682ddac8fc3d8 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 3 Nov 2008 14:28:42 +0000
Subject: GFS2: Move di_eattr into "proper" inode

This moves the di_eattr field out of gfs2_inode_host and
into the inode proper.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3e9bd46..e335dce 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -91,7 +91,7 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
 	struct gfs2_ea_location el_this;
 	int error;
 
-	if (!ip->i_di.di_eattr)
+	if (!ip->i_eattr)
 		return 0;
 
 	memset(&er, 0, sizeof(struct gfs2_ea_request));
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index e3f76f4..1c1e061 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -114,7 +114,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 	__be64 *eablk, *end;
 	int error;
 
-	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
+	error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh);
 	if (error)
 		return error;
 
@@ -414,7 +414,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 	if (error)
 		return error;
 
-	if (ip->i_di.di_eattr) {
+	if (ip->i_eattr) {
 		struct ea_list ei = { .ei_er = er, .ei_size = 0 };
 
 		error = ea_foreach(ip, ea_list_i, &ei);
@@ -514,7 +514,7 @@ int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 	struct gfs2_ea_location el;
 	int error;
 
-	if (!ip->i_di.di_eattr)
+	if (!ip->i_eattr)
 		return -ENODATA;
 
 	error = gfs2_ea_find(ip, er, &el);
@@ -741,7 +741,7 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		return error;
 
-	ip->i_di.di_eattr = bh->b_blocknr;
+	ip->i_eattr = bh->b_blocknr;
 	error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
 
 	brelse(bh);
@@ -938,7 +938,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
 		__be64 *end;
 
-		error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
+		error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT,
 				       &indbh);
 		if (error)
 			return error;
@@ -972,8 +972,8 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		gfs2_buffer_clear_tail(indbh, mh_size);
 
 		eablk = (__be64 *)(indbh->b_data + mh_size);
-		*eablk = cpu_to_be64(ip->i_di.di_eattr);
-		ip->i_di.di_eattr = blk;
+		*eablk = cpu_to_be64(ip->i_eattr);
+		ip->i_eattr = blk;
 		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
 		gfs2_add_inode_blocks(&ip->i_inode, 1);
 
@@ -1040,7 +1040,7 @@ int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 	struct gfs2_ea_location el;
 	int error;
 
-	if (!ip->i_di.di_eattr) {
+	if (!ip->i_eattr) {
 		if (er->er_flags & XATTR_REPLACE)
 			return -ENODATA;
 		return ea_init(ip, er);
@@ -1145,7 +1145,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 	struct gfs2_ea_location el;
 	int error;
 
-	if (!ip->i_di.di_eattr)
+	if (!ip->i_eattr)
 		return -ENODATA;
 
 	error = gfs2_ea_find(ip, er, &el);
@@ -1309,7 +1309,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 
 	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
 
-	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh);
+	error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh);
 	if (error)
 		return error;
 
@@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	struct buffer_head *dibh;
 	int error;
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		return -EIO;
@@ -1432,9 +1432,9 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	if (error)
 		goto out_gunlock;
 
-	gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
+	gfs2_free_meta(ip, ip->i_eattr, 1);
 
-	ip->i_di.di_eattr = 0;
+	ip->i_eattr = 0;
 	gfs2_add_inode_blocks(&ip->i_inode, -1);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 15ca3a7..fb2fd4a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -236,7 +236,6 @@ enum {
 struct gfs2_dinode_host {
 	u64 di_size;		/* number of bytes in file */
 	u32 di_flags;		/* GFS2_DIF_... */
-	u64 di_eattr;		/* extended attribute block number */
 };
 
 struct gfs2_inode {
@@ -244,6 +243,7 @@ struct gfs2_inode {
 	u64 i_no_addr;
 	u64 i_no_formal_ino;
 	u64 i_generation;
+	u64 i_eattr;
 	unsigned long i_flags;		/* GIF_... */
 
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 015d4c0..91735b8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -301,7 +301,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_depth = (u8)depth;
 	ip->i_entries = be32_to_cpu(str->di_entries);
 
-	di->di_eattr = be64_to_cpu(str->di_eattr);
+	ip->i_eattr = be64_to_cpu(str->di_eattr);
 	if (S_ISREG(ip->i_inode.i_mode))
 		gfs2_set_aops(&ip->i_inode);
 
@@ -1273,7 +1273,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_depth = cpu_to_be16(ip->i_depth);
 	str->di_entries = cpu_to_be32(ip->i_entries);
 
-	str->di_eattr = cpu_to_be64(di->di_eattr);
+	str->di_eattr = cpu_to_be64(ip->i_eattr);
 	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
 	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
 	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
@@ -1296,7 +1296,7 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  i_height = %u\n", ip->i_height);
 	printk(KERN_INFO "  i_depth = %u\n", ip->i_depth);
 	printk(KERN_INFO "  i_entries = %u\n", ip->i_entries);
-	printk(KERN_INFO "  di_eattr = %llu\n",
-	       (unsigned long long)di->di_eattr);
+	printk(KERN_INFO "  i_eattr = %llu\n",
+	       (unsigned long long)ip->i_eattr);
 }
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 2cb744b..aee6cba 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -502,7 +502,7 @@ static void gfs2_delete_inode(struct inode *inode)
 			goto out_unlock;
 	}
 
-	if (ip->i_di.di_eattr) {
+	if (ip->i_eattr) {
 		error = gfs2_ea_dealloc(ip);
 		if (error)
 			goto out_unlock;
-- 
cgit v0.10.2


From c9e98886776386f1f7828d9685e78cd341849867 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Nov 2008 09:47:33 +0000
Subject: GFS2: Move i_size from gfs2_dinode_host and rename it to i_disksize

This patch moved the i_size field from the gfs2_dinode_host and
following the ext3 convention renames it i_disksize.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index bec76b1..b43aee7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -75,9 +75,9 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		void *kaddr = kmap(page);
 
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
-		       ip->i_di.di_size);
-		memset(kaddr + ip->i_di.di_size, 0,
-		       PAGE_CACHE_SIZE - ip->i_di.di_size);
+		       ip->i_disksize);
+		memset(kaddr + ip->i_disksize, 0,
+		       PAGE_CACHE_SIZE - ip->i_disksize);
 		kunmap(page);
 
 		SetPageUptodate(page);
@@ -132,7 +132,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out;
 
-	if (ip->i_di.di_size) {
+	if (ip->i_disksize) {
 		/* Get a free block, fill it with the stuffed data,
 		   and write it out to disk */
 
@@ -159,7 +159,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	di = (struct gfs2_dinode *)dibh->b_data;
 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 
-	if (ip->i_di.di_size) {
+	if (ip->i_disksize) {
 		*(__be64 *)(di + 1) = cpu_to_be64(block);
 		gfs2_add_inode_blocks(&ip->i_inode, 1);
 		di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -926,7 +926,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 		}
 	}
 
-	ip->i_di.di_size = size;
+	ip->i_disksize = size;
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1033,7 +1033,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 		goto out;
 
 	if (gfs2_is_stuffed(ip)) {
-		ip->i_di.di_size = size;
+		ip->i_disksize = size;
 		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
@@ -1045,7 +1045,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 			error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
 
 		if (!error) {
-			ip->i_di.di_size = size;
+			ip->i_disksize = size;
 			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -1114,7 +1114,7 @@ static int trunc_end(struct gfs2_inode *ip)
 	if (error)
 		goto out;
 
-	if (!ip->i_di.di_size) {
+	if (!ip->i_disksize) {
 		ip->i_height = 0;
 		ip->i_goal = ip->i_no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
@@ -1205,9 +1205,9 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
 	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
 		return -EINVAL;
 
-	if (size > ip->i_di.di_size)
+	if (size > ip->i_disksize)
 		error = do_grow(ip, size);
-	else if (size < ip->i_di.di_size)
+	else if (size < ip->i_disksize)
 		error = do_shrink(ip, size);
 	else
 		/* update time stamps */
@@ -1219,7 +1219,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
 int gfs2_truncatei_resume(struct gfs2_inode *ip)
 {
 	int error;
-	error = trunc_dealloc(ip, ip->i_di.di_size);
+	error = trunc_dealloc(ip, ip->i_disksize);
 	if (!error)
 		error = trunc_end(ip);
 	return error;
@@ -1298,7 +1298,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 		lblock_stop = offset + len + bsize - 1;
 		do_div(lblock_stop, bsize);
 	} else {
-		u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
+		u64 end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
 		lblock = offset >> shift;
 		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
 		if (lblock_stop > end_of_file)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 830cf48..d8d8232 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -128,8 +128,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
-	if (ip->i_di.di_size < offset + size)
-		ip->i_di.di_size = offset + size;
+	if (ip->i_disksize < offset + size)
+		ip->i_disksize = offset + size;
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(ip, dibh->b_data);
 
@@ -226,8 +226,8 @@ out:
 	if (error)
 		return error;
 
-	if (ip->i_di.di_size < offset + copied)
-		ip->i_di.di_size = offset + copied;
+	if (ip->i_disksize < offset + copied)
+		ip->i_disksize = offset + copied;
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -277,11 +277,11 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 	int copied = 0;
 	int error = 0;
 
-	if (offset >= ip->i_di.di_size)
+	if (offset >= ip->i_disksize)
 		return 0;
 
-	if (offset + size > ip->i_di.di_size)
-		size = ip->i_di.di_size - offset;
+	if (offset + size > ip->i_disksize)
+		size = ip->i_disksize - offset;
 
 	if (!size)
 		return 0;
@@ -760,7 +760,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
 		unsigned hsize = 1 << ip->i_depth;
 		unsigned index;
 		u64 ln;
-		if (hsize * sizeof(u64) != ip->i_di.di_size) {
+		if (hsize * sizeof(u64) != ip->i_disksize) {
 			gfs2_consist_inode(ip);
 			return ERR_PTR(-EIO);
 		}
@@ -905,7 +905,7 @@ static int dir_make_exhash(struct inode *inode)
 	for (x = sdp->sd_hash_ptrs; x--; lp++)
 		*lp = cpu_to_be64(bn);
 
-	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
+	dip->i_disksize = sdp->sd_sb.sb_bsize / 2;
 	gfs2_add_inode_blocks(&dip->i_inode, 1);
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
 
@@ -1082,7 +1082,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 	int error = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != dip->i_di.di_size) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -1091,7 +1091,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 
 	buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL);
 
-	for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
+	for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
 		error = gfs2_dir_read_data(dip, (char *)buf,
 					    block * sdp->sd_hash_bsize,
 					    sdp->sd_hash_bsize, 1);
@@ -1370,7 +1370,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 	unsigned depth = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != dip->i_di.di_size) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -1784,7 +1784,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 	int error = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != dip->i_di.di_size) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index fb2fd4a..4596cd2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -234,7 +234,6 @@ enum {
 };
 
 struct gfs2_dinode_host {
-	u64 di_size;		/* number of bytes in file */
 	u32 di_flags;		/* GFS2_DIF_... */
 };
 
@@ -244,6 +243,7 @@ struct gfs2_inode {
 	u64 i_no_formal_ino;
 	u64 i_generation;
 	u64 i_eattr;
+	loff_t i_disksize;
 	unsigned long i_flags;		/* GIF_... */
 
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 91735b8..baf8b24 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -273,8 +273,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	 * to do that.
 	 */
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
-	di->di_size = be64_to_cpu(str->di_size);
-	i_size_write(&ip->i_inode, di->di_size);
+	ip->i_disksize = be64_to_cpu(str->di_size);
+	i_size_write(&ip->i_inode, ip->i_disksize);
 	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
 	atime.tv_sec = be64_to_cpu(str->di_atime);
 	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
@@ -1167,7 +1167,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
 		return error;
 	}
 
-	if (!ip->i_di.di_size) {
+	if (!ip->i_disksize) {
 		gfs2_consist_inode(ip);
 		error = -EIO;
 		goto out;
@@ -1177,7 +1177,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
 	if (error)
 		goto out;
 
-	x = ip->i_di.di_size + 1;
+	x = ip->i_disksize + 1;
 	if (x > *len) {
 		*buf = kmalloc(x, GFP_NOFS);
 		if (!*buf) {
@@ -1255,7 +1255,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
 	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
 	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(di->di_size);
+	str->di_size = cpu_to_be64(ip->i_disksize);
 	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
 	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
@@ -1287,7 +1287,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	       (unsigned long long)ip->i_no_formal_ino);
 	printk(KERN_INFO "  no_addr = %llu\n",
 	       (unsigned long long)ip->i_no_addr);
-	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+	printk(KERN_INFO "  i_disksize = %llu\n",
+	       (unsigned long long)ip->i_disksize);
 	printk(KERN_INFO "  blocks = %llu\n",
 	       (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
 	printk(KERN_INFO "  i_goal = %llu\n",
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 574b222..0df560f 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -451,8 +451,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
-	       ip->i_di.di_size);
-	memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
+	       ip->i_disksize);
+	memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize);
 	kunmap_atomic(kaddr, KM_USER0);
 	flush_dcache_page(page);
 	brelse(dibh);
@@ -780,7 +780,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 
 	if (inode->i_size < to) {
 		i_size_write(inode, to);
-		ip->i_di.di_size = inode->i_size;
+		ip->i_disksize = inode->i_size;
 		di->di_size = cpu_to_be64(inode->i_size);
 		mark_inode_dirty(inode);
 	}
@@ -845,9 +845,9 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 
-	if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) {
+	if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) {
 		di = (struct gfs2_dinode *)dibh->b_data;
-		ip->i_di.di_size = inode->i_size;
+		ip->i_disksize = inode->i_size;
 		di->di_size = cpu_to_be64(inode->i_size);
 		mark_inode_dirty(inode);
 	}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index fcfaaef..d7e649e 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -478,7 +478,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
 			goto fail;
 
 		if (!(file->f_flags & O_LARGEFILE) &&
-		    ip->i_di.di_size > MAX_NON_LFS) {
+		    ip->i_disksize > MAX_NON_LFS) {
 			error = -EOVERFLOW;
 			goto fail_gunlock;
 		}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ca463a4..dd83e83 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -617,7 +617,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
 
 	prev_db = 0;
 
-	for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) {
+	for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) {
 		bh.b_state = 0;
 		bh.b_blocknr = 0;
 		bh.b_size = 1 << ip->i_inode.i_blkbits;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 48468f4..b932d72 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -371,7 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 	ip = ghs[1].gh_gl->gl_object;
 
-	ip->i_di.di_size = size;
+	ip->i_disksize = size;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
@@ -425,7 +425,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	ip = ghs[1].gh_gl->gl_object;
 
 	ip->i_inode.i_nlink = 2;
-	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+	ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
 	ip->i_entries = 2;
 
@@ -990,7 +990,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int error;
 
-	if (attr->ia_size != ip->i_di.di_size) {
+	if (attr->ia_size != ip->i_disksize) {
 		error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 		if (error)
 			return error;
@@ -1001,8 +1001,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 	}
 
 	error = gfs2_truncatei(ip, attr->ia_size);
-	if (error && (inode->i_size != ip->i_di.di_size))
-		i_size_write(inode, ip->i_di.di_size);
+	if (error && (inode->i_size != ip->i_disksize))
+		i_size_write(inode, ip->i_disksize);
 
 	return error;
 }
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3e073f5..188d0a2 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1100,15 +1100,15 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
-	unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+	unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
 	unsigned int x, slot = 0;
 	unsigned int found = 0;
 	u64 dblock;
 	u32 extlen = 0;
 	int error;
 
-	if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) ||
-	    ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
+	if (!ip->i_disksize || ip->i_disksize > (64 << 20) ||
+	    ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) {
 		gfs2_consist_inode(ip);
 		return -EIO;
 	}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2d90fb2..bdad0df 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -501,7 +501,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 	for (rgrps = 0;; rgrps++) {
 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 
-		if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+		if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize)
 			break;
 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
 					   sizeof(struct gfs2_rindex));
@@ -590,7 +590,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
 	struct file_ra_state ra_state;
-	u64 rgrp_count = ip->i_di.di_size;
+	u64 rgrp_count = ip->i_disksize;
 	int error;
 
 	if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
@@ -634,7 +634,7 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
 	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
 		/* Ignore partials */
 		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
-		    ip->i_di.di_size)
+		    ip->i_disksize)
 			break;
 		error = read_rindex_entry(ip, &ra_state);
 		if (error) {
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c3ba3d9..f5cef2a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -206,14 +206,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
 	int ar;
 	int error;
 
-	if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
-	    (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
+	if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
+	    (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
 		gfs2_consist_inode(ip);
 		return -EIO;
 	}
-	jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
+	jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
 
-	error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
+	error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar);
 	if (!error && ar) {
 		gfs2_consist_inode(ip);
 		error = -EIO;
-- 
cgit v0.10.2


From 383f01fbf4a701b73f5e35ea805ed1700b4b4db9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Nov 2008 10:05:22 +0000
Subject: GFS2: Banish struct gfs2_dinode_host

The final field in gfs2_dinode_host was the i_flags field. Thats
renamed to i_diskflags in order to avoid confusion with the existing
inode flags, and moved into the inode proper at a suitable location
to avoid creating a "hole".

At that point struct gfs2_dinode_host is no longer needed and as
promised (quite some time ago!) it can now be removed completely.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index b43aee7..789f28c 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1047,7 +1047,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 		if (!error) {
 			ip->i_disksize = size;
 			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
-			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
+			ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			gfs2_dinode_out(ip, dibh->b_data);
 		}
@@ -1120,7 +1120,7 @@ static int trunc_end(struct gfs2_inode *ip)
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 	}
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
-	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
+	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index d8d8232..b7c8e5c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -36,7 +36,7 @@
  * the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
  * beginning of the leaf block. The dirents reside in leaves when
  *
- * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
+ * dip->i_diskflags & GFS2_DIF_EXHASH is true
  *
  * Otherwise, the dirents are "linear", within a single stuffed dinode block.
  *
@@ -755,7 +755,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	int error;
 
-	if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
+	if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 		struct gfs2_leaf *leaf;
 		unsigned hsize = 1 << ip->i_depth;
 		unsigned index;
@@ -907,7 +907,7 @@ static int dir_make_exhash(struct inode *inode)
 
 	dip->i_disksize = sdp->sd_sb.sb_bsize / 2;
 	gfs2_add_inode_blocks(&dip->i_inode, 1);
-	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
+	dip->i_diskflags |= GFS2_DIF_EXHASH;
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_depth = y;
@@ -1429,7 +1429,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 	if (!dip->i_entries)
 		return 0;
 
-	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+	if (dip->i_diskflags & GFS2_DIF_EXHASH)
 		return dir_e_read(inode, offset, opaque, filldir);
 
 	if (!gfs2_is_stuffed(dip)) {
@@ -1612,7 +1612,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			dent = gfs2_init_dirent(inode, dent, name, bh);
 			gfs2_inum_out(nip, dent);
 			dent->de_type = cpu_to_be16(type);
-			if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
+			if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
 				be16_add_cpu(&leaf->lf_entries, 1);
 			}
@@ -1628,7 +1628,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			error = 0;
 			break;
 		}
-		if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+		if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
 			error = dir_make_exhash(inode);
 			if (error)
 				break;
@@ -1691,7 +1691,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 	}
 
 	dirent_del(dip, bh, prev, dent);
-	if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
+	if (dip->i_diskflags & GFS2_DIF_EXHASH) {
 		struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
 		u16 entries = be16_to_cpu(leaf->lf_entries);
 		if (!entries)
@@ -1748,7 +1748,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	gfs2_inum_out(nip, dent);
 	dent->de_type = cpu_to_be16(new_type);
 
-	if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
+	if (dip->i_diskflags & GFS2_DIF_EXHASH) {
 		brelse(bh);
 		error = gfs2_meta_inode_buffer(dip, &bh);
 		if (error)
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 1c1e061..0d1c76d 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -118,7 +118,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 	if (error)
 		return error;
 
-	if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
+	if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) {
 		error = ea_foreach_i(ip, bh, ea_call, data);
 		goto out;
 	}
@@ -935,7 +935,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
 
-	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+	if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
 		__be64 *end;
 
 		error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT,
@@ -974,7 +974,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		eablk = (__be64 *)(indbh->b_data + mh_size);
 		*eablk = cpu_to_be64(ip->i_eattr);
 		ip->i_eattr = blk;
-		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
+		ip->i_diskflags |= GFS2_DIF_EA_INDIRECT;
 		gfs2_add_inode_blocks(&ip->i_inode, 1);
 
 		eablk++;
@@ -1015,7 +1015,7 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		return error;
 
-	if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
+	if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
 		blks++;
 	if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
 		blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
@@ -1051,7 +1051,7 @@ int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 		return error;
 
 	if (el.el_ea) {
-		if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
+		if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
 			brelse(el.el_bh);
 			return -EPERM;
 		}
@@ -1388,7 +1388,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	if (bstart)
 		gfs2_free_meta(ip, bstart, blen);
 
-	ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
+	ip->i_diskflags &= ~GFS2_DIF_EA_INDIRECT;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
@@ -1479,7 +1479,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 	if (error)
 		goto out_rindex;
 
-	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
+	if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
 		error = ea_dealloc_indirect(ip);
 		if (error)
 			goto out_rindex;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index c6c318c..848d64c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -239,7 +239,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
 			return error;
 	}
 
-	if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
+	if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
 	    (gl->gl_state == LM_ST_EXCLUSIVE) &&
 	    (gh->gh_state == LM_ST_EXCLUSIVE))
 		error = gfs2_truncatei_resume(ip);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4596cd2..6f67e75 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -233,9 +233,6 @@ enum {
 	GIF_USER                = 4, /* user inode, not metadata addr space */
 };
 
-struct gfs2_dinode_host {
-	u32 di_flags;		/* GFS2_DIF_... */
-};
 
 struct gfs2_inode {
 	struct inode i_inode;
@@ -245,9 +242,6 @@ struct gfs2_inode {
 	u64 i_eattr;
 	loff_t i_disksize;
 	unsigned long i_flags;		/* GIF_... */
-
-	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
-
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
@@ -255,6 +249,7 @@ struct gfs2_inode {
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
 	u32 i_entries;
+	u32 i_diskflags;
 	u8 i_height;
 	u8 i_depth;
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index baf8b24..97d3ce65 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -247,7 +247,6 @@ fail:
 
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
-	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 	struct timespec atime;
 	u16 height, depth;
@@ -288,7 +287,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
 	ip->i_generation = be64_to_cpu(str->di_generation);
 
-	di->di_flags = be32_to_cpu(str->di_flags);
+	ip->i_diskflags = be32_to_cpu(str->di_flags);
 	gfs2_set_inode_flags(&ip->i_inode);
 	height = be16_to_cpu(str->di_height);
 	if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -789,11 +788,11 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_flags = 0;
 
 	if (S_ISREG(mode)) {
-		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
+		if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
 		    gfs2_tune_get(sdp, gt_new_files_jdata))
 			di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
 	} else if (S_ISDIR(mode)) {
-		di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
+		di->di_flags |= cpu_to_be32(dip->i_diskflags &
 					    GFS2_DIF_INHERIT_JDATA);
 	}
 
@@ -1241,7 +1240,6 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
-	const struct gfs2_dinode_host *di = &ip->i_di;
 	struct gfs2_dinode *str = buf;
 
 	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -1265,10 +1263,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_goal_data = cpu_to_be64(ip->i_goal);
 	str->di_generation = cpu_to_be64(ip->i_generation);
 
-	str->di_flags = cpu_to_be32(di->di_flags);
+	str->di_flags = cpu_to_be32(ip->i_diskflags);
 	str->di_height = cpu_to_be16(ip->i_height);
 	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
 					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(ip->i_depth);
 	str->di_entries = cpu_to_be32(ip->i_entries);
@@ -1281,8 +1279,6 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 void gfs2_dinode_print(const struct gfs2_inode *ip)
 {
-	const struct gfs2_dinode_host *di = &ip->i_di;
-
 	printk(KERN_INFO "  no_formal_ino = %llu\n",
 	       (unsigned long long)ip->i_no_formal_ino);
 	printk(KERN_INFO "  no_addr = %llu\n",
@@ -1293,7 +1289,7 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	       (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
 	printk(KERN_INFO "  i_goal = %llu\n",
 	       (unsigned long long)ip->i_goal);
-	printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+	printk(KERN_INFO "  i_diskflags = 0x%.8X\n", ip->i_diskflags);
 	printk(KERN_INFO "  i_height = %u\n", ip->i_height);
 	printk(KERN_INFO "  i_depth = %u\n", ip->i_depth);
 	printk(KERN_INFO "  i_entries = %u\n", ip->i_entries);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c357790..d532936 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -20,7 +20,7 @@ static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 
 static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
-	return ip->i_di.di_flags & GFS2_DIF_JDATA;
+	return ip->i_diskflags & GFS2_DIF_JDATA;
 }
 
 static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 3a9b9b4..7fdeb14 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -213,7 +213,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 	}
 
 	error = -EIO;
-	if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
+	if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
 		iput(inode);
 		goto fail;
 	}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index d7e649e..a6b7a73 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -157,8 +157,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	if (error)
 		return error;
 
-	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
-	if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA)
+	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
+	if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
 		fsflags |= FS_JOURNAL_DATA_FL;
 	if (put_user(fsflags, ptr))
 		error = -EFAULT;
@@ -171,17 +171,16 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 void gfs2_set_inode_flags(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_dinode_host *di = &ip->i_di;
 	unsigned int flags = inode->i_flags;
 
 	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	if (di->di_flags & GFS2_DIF_IMMUTABLE)
+	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
 		flags |= S_IMMUTABLE;
-	if (di->di_flags & GFS2_DIF_APPENDONLY)
+	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
 		flags |= S_APPEND;
-	if (di->di_flags & GFS2_DIF_NOATIME)
+	if (ip->i_diskflags & GFS2_DIF_NOATIME)
 		flags |= S_NOATIME;
-	if (di->di_flags & GFS2_DIF_SYNC)
+	if (ip->i_diskflags & GFS2_DIF_SYNC)
 		flags |= S_SYNC;
 	inode->i_flags = flags;
 }
@@ -220,7 +219,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	if (error)
 		goto out_drop_write;
 
-	flags = ip->i_di.di_flags;
+	flags = ip->i_diskflags;
 	new_flags = (flags & ~mask) | (reqflags & mask);
 	if ((new_flags ^ flags) == 0)
 		goto out;
@@ -259,7 +258,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	if (error)
 		goto out_trans_end;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
-	ip->i_di.di_flags = new_flags;
+	ip->i_diskflags = new_flags;
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	gfs2_set_inode_flags(inode);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index b932d72..4987754 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -426,7 +426,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	ip->i_inode.i_nlink = 2;
 	ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
-	ip->i_di.di_flags |= GFS2_DIF_JDATA;
+	ip->i_diskflags |= GFS2_DIF_JDATA;
 	ip->i_entries = 2;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index aee6cba..ad36af2 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -496,7 +496,7 @@ static void gfs2_delete_inode(struct inode *inode)
 		goto out_truncate;
 
 	if (S_ISDIR(inode->i_mode) &&
-	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
+	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
 		error = gfs2_dir_exhash_dealloc(ip);
 		if (error)
 			goto out_unlock;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 188d0a2..228a465 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1013,7 +1013,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 
 	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
 		return;
-	if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
+	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
 		return;
 
 	for (x = 0; x < al->al_qd_num; x++) {
-- 
cgit v0.10.2


From d8b71f7381769177998acb2f59ddc73465a60fe0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Nov 2008 10:19:03 +0000
Subject: GFS2: Move rg_igeneration into struct gfs2_rgrpd

This moves one of the fields of struct gfs2_rgrpd_host into
the struct gfs2_rgrpd with the eventual aim of removing
the struct rgrpd_host completely.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6f67e75..869ac83 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -71,7 +71,6 @@ struct gfs2_bitmap {
 struct gfs2_rgrp_host {
 	u32 rg_free;
 	u32 rg_dinodes;
-	u64 rg_igeneration;
 };
 
 struct gfs2_rgrpd {
@@ -84,6 +83,7 @@ struct gfs2_rgrpd {
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	struct gfs2_rgrp_host rd_rg;
+	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
 	unsigned int rd_bh_count;
 	struct mutex rd_mutex;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bdad0df..8e93d62 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -702,7 +702,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
 	rg->rg_free = be32_to_cpu(str->rg_free);
 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 }
 
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
@@ -717,7 +717,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 	str->rg_free = cpu_to_be32(rg->rg_free);
 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
 	str->__pad = cpu_to_be32(0);
-	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
@@ -1448,7 +1448,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
 	rgd->rd_rg.rg_free--;
 	rgd->rd_rg.rg_dinodes++;
-	*generation = rgd->rd_rg.rg_igeneration++;
+	*generation = rgd->rd_igeneration++;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 
-- 
cgit v0.10.2


From cfc8b54922db7b647b6d88914dc7ef8c63b6671d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Nov 2008 10:25:13 +0000
Subject: GFS2: Move rg_free from gfs2_rgrpd_host to gfs2_rgrpd

The second of three fields which need to move, in order
to remove the struct gfs2_rgrpd_host.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 869ac83..f8d9773 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -69,7 +69,6 @@ struct gfs2_bitmap {
 };
 
 struct gfs2_rgrp_host {
-	u32 rg_free;
 	u32 rg_dinodes;
 };
 
@@ -82,6 +81,7 @@ struct gfs2_rgrpd {
 	u32 rd_length;			/* length of rgrp header in fs blocks */
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
+	u32 rd_free;
 	struct gfs2_rgrp_host rd_rg;
 	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8e93d62..bab9cfa 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -269,15 +269,15 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 						  bi->bi_len, x);
 	}
 
-	if (count[0] != rgd->rd_rg.rg_free) {
+	if (count[0] != rgd->rd_free) {
 		if (gfs2_consist_rgrpd(rgd))
 			fs_err(sdp, "free data mismatch:  %u != %u\n",
-			       count[0], rgd->rd_rg.rg_free);
+			       count[0], rgd->rd_free);
 		return;
 	}
 
 	tmp = rgd->rd_data -
-		rgd->rd_rg.rg_free -
+		rgd->rd_free -
 		rgd->rd_rg.rg_dinodes;
 	if (count[1] + count[2] != tmp) {
 		if (gfs2_consist_rgrpd(rgd))
@@ -700,7 +700,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 		rgd->rd_flags |= GFS2_RDF_NOALLOC;
 	else
 		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
-	rg->rg_free = be32_to_cpu(str->rg_free);
+	rgd->rd_free = be32_to_cpu(str->rg_free);
 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 }
@@ -714,7 +714,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
 		rg_flags |= GFS2_RGF_NOALLOC;
 	str->rg_flags = cpu_to_be32(rg_flags);
-	str->rg_free = cpu_to_be32(rg->rg_free);
+	str->rg_free = cpu_to_be32(rgd->rd_free);
 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
 	str->__pad = cpu_to_be32(0);
 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
@@ -776,7 +776,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 	}
 
 	spin_lock(&sdp->sd_rindex_spin);
-	rgd->rd_free_clone = rgd->rd_rg.rg_free;
+	rgd->rd_free_clone = rgd->rd_free;
 	rgd->rd_bh_count++;
 	spin_unlock(&sdp->sd_rindex_spin);
 
@@ -850,7 +850,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 	}
 
 	spin_lock(&sdp->sd_rindex_spin);
-	rgd->rd_free_clone = rgd->rd_rg.rg_free;
+	rgd->rd_free_clone = rgd->rd_free;
 	spin_unlock(&sdp->sd_rindex_spin);
 }
 
@@ -1403,8 +1403,8 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
 	block = rgd->rd_data0 + blk;
 	ip->i_goal = block;
 
-	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n);
-	rgd->rd_rg.rg_free -= *n;
+	gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
+	rgd->rd_free -= *n;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1445,8 +1445,8 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 
 	block = rgd->rd_data0 + blk;
 
-	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
-	rgd->rd_rg.rg_free--;
+	gfs2_assert_withdraw(sdp, rgd->rd_free);
+	rgd->rd_free--;
 	rgd->rd_rg.rg_dinodes++;
 	*generation = rgd->rd_igeneration++;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1481,7 +1481,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	if (!rgd)
 		return;
 
-	rgd->rd_rg.rg_free += blen;
+	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1509,7 +1509,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	if (!rgd)
 		return;
 
-	rgd->rd_rg.rg_free += blen;
+	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1549,7 +1549,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 	if (!rgd->rd_rg.rg_dinodes)
 		gfs2_consist_rgrpd(rgd);
 	rgd->rd_rg.rg_dinodes--;
-	rgd->rd_rg.rg_free++;
+	rgd->rd_free++;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f5cef2a..e769076 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -468,7 +468,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 {
 	gfs2_rgrp_verify(rgd);
 	sc->sc_total += rgd->rd_data;
-	sc->sc_free += rgd->rd_rg.rg_free;
+	sc->sc_free += rgd->rd_free;
 	sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
 	return 0;
 }
-- 
cgit v0.10.2


From 73f749483ed18f3b5759909cc4187b1741f54b10 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Nov 2008 10:32:57 +0000
Subject: GFS2: Banish struct gfs2_rgrpd_host

This patch moves the final field so that we can get rid
of struct gfs2_rgrpd_host, as promised some time ago. Also
by rearranging the fields slightly, we are able to reduce
the size of the gfs2_rgrpd structure at the same time.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index f8d9773..9e3b613 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -68,10 +68,6 @@ struct gfs2_bitmap {
 	u32 bi_len;
 };
 
-struct gfs2_rgrp_host {
-	u32 rg_dinodes;
-};
-
 struct gfs2_rgrpd {
 	struct list_head rd_list;	/* Link with superblock */
 	struct list_head rd_list_mru;
@@ -82,15 +78,15 @@ struct gfs2_rgrpd {
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
-	struct gfs2_rgrp_host rd_rg;
+	u32 rd_free_clone;
+	u32 rd_dinodes;
 	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
-	unsigned int rd_bh_count;
 	struct mutex rd_mutex;
-	u32 rd_free_clone;
 	struct gfs2_log_element rd_le;
-	u32 rd_last_alloc;
 	struct gfs2_sbd *rd_sbd;
+	unsigned int rd_bh_count;
+	u32 rd_last_alloc;
 	unsigned char rd_flags;
 #define GFS2_RDF_CHECK        0x01      /* Need to check for unlinked inodes */
 #define GFS2_RDF_NOALLOC      0x02      /* rg prohibits allocation */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bab9cfa..8b01c63 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -276,9 +276,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 		return;
 	}
 
-	tmp = rgd->rd_data -
-		rgd->rd_free -
-		rgd->rd_rg.rg_dinodes;
+	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 	if (count[1] + count[2] != tmp) {
 		if (gfs2_consist_rgrpd(rgd))
 			fs_err(sdp, "used data mismatch:  %u != %u\n",
@@ -286,10 +284,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 		return;
 	}
 
-	if (count[3] != rgd->rd_rg.rg_dinodes) {
+	if (count[3] != rgd->rd_dinodes) {
 		if (gfs2_consist_rgrpd(rgd))
 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
-			       count[3], rgd->rd_rg.rg_dinodes);
+			       count[3], rgd->rd_dinodes);
 		return;
 	}
 
@@ -692,7 +690,6 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 {
 	const struct gfs2_rgrp *str = buf;
-	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
 	u32 rg_flags;
 
 	rg_flags = be32_to_cpu(str->rg_flags);
@@ -701,21 +698,20 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 	else
 		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
 	rgd->rd_free = be32_to_cpu(str->rg_free);
-	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 }
 
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
-	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
 	u32 rg_flags = 0;
 
 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
 		rg_flags |= GFS2_RGF_NOALLOC;
 	str->rg_flags = cpu_to_be32(rg_flags);
 	str->rg_free = cpu_to_be32(rgd->rd_free);
-	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
 	str->__pad = cpu_to_be32(0);
 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
@@ -1447,7 +1443,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 
 	gfs2_assert_withdraw(sdp, rgd->rd_free);
 	rgd->rd_free--;
-	rgd->rd_rg.rg_dinodes++;
+	rgd->rd_dinodes++;
 	*generation = rgd->rd_igeneration++;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1546,9 +1542,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 		return;
 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
 
-	if (!rgd->rd_rg.rg_dinodes)
+	if (!rgd->rd_dinodes)
 		gfs2_consist_rgrpd(rgd);
-	rgd->rd_rg.rg_dinodes--;
+	rgd->rd_dinodes--;
 	rgd->rd_free++;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e769076..b858770 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -469,7 +469,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 	gfs2_rgrp_verify(rgd);
 	sc->sc_total += rgd->rd_data;
 	sc->sc_free += rgd->rd_free;
-	sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
+	sc->sc_dinodes += rgd->rd_dinodes;
 	return 0;
 }
 
-- 
cgit v0.10.2


From fa75cedc3da5923b8ea3877be9d5bc09b02e3860 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 10 Nov 2008 10:10:12 +0000
Subject: GFS2: Add more detail to debugfs glock dumps

Although the glock dumps print quite a lot of information about
the glocks themselves, there are more things which can be
usefully added to the dump realting to the objects themselves.

This patch adds a few more fields to the inode and resource
group lines, which should be useful for debugging.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 848d64c..68ee665 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -260,10 +260,13 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 	const struct gfs2_inode *ip = gl->gl_object;
 	if (ip == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n",
+	gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n",
 		  (unsigned long long)ip->i_no_formal_ino,
 		  (unsigned long long)ip->i_no_addr,
-		  IF2DT(ip->i_inode.i_mode), ip->i_flags);
+		  IF2DT(ip->i_inode.i_mode), ip->i_flags,
+		  (unsigned int)ip->i_diskflags,
+		  (unsigned long long)ip->i_inode.i_size,
+		  (unsigned long long)ip->i_disksize);
 	return 0;
 }
 
@@ -318,7 +321,9 @@ static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 	const struct gfs2_rgrpd *rgd = gl->gl_object;
 	if (rgd == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr);
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
+		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 37b2c8377c98acb60cf4d0126e385ef2153bded9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 17 Nov 2008 14:25:37 +0000
Subject: GFS2: Clean up & move gfs2_quotad

This patch is a clean up of gfs2_quotad prior to giving it an
extra job to do in addition to the current portfolio of updating
the quota and statfs information from time to time.

As a result it has been moved into quota.c allowing one of the
functions it calls to be made static. Also the clean up allows
the two existing functions to have separate timeouts and also
to coexist with its future role of dealing with the "truncate in
progress" inode flag.

The (pointless) setting of gfs2_quotad_secs is removed since we
arrange to only wake up quotad when one of the two timers expires.

In addition the struct gfs2_quota_data is moved into a slab cache,
mainly for easier debugging. It should also be possible to use
a shrinker in the future, rather than the current scheme of scanning
the quota data entries from time to time.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index e519919..5668aa7 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -23,7 +23,6 @@
 #include "daemon.h"
 #include "glock.h"
 #include "log.h"
-#include "quota.h"
 #include "recovery.h"
 #include "super.h"
 #include "util.h"
@@ -82,55 +81,3 @@ int gfs2_recoverd(void *data)
 	return 0;
 }
 
-/**
- * gfs2_quotad - Write cached quota changes into the quota file
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_quotad(void *data)
-{
-	struct gfs2_sbd *sdp = data;
-	unsigned long t;
-	int error;
-
-	while (!kthread_should_stop()) {
-		/* Update the master statfs file */
-
-		t = sdp->sd_statfs_sync_time +
-		    gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
-
-		if (time_after_eq(jiffies, t)) {
-			error = gfs2_statfs_sync(sdp);
-			if (error &&
-			    error != -EROFS &&
-			    !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-				fs_err(sdp, "quotad: (1) error=%d\n", error);
-			sdp->sd_statfs_sync_time = jiffies;
-		}
-
-		/* Update quota file */
-
-		t = sdp->sd_quota_sync_time +
-		    gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
-
-		if (time_after_eq(jiffies, t)) {
-			error = gfs2_quota_sync(sdp);
-			if (error &&
-			    error != -EROFS &&
-			    !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-				fs_err(sdp, "quotad: (2) error=%d\n", error);
-			sdp->sd_quota_sync_time = jiffies;
-		}
-
-		gfs2_quota_scan(sdp);
-
-		t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
-		schedule_timeout_interruptible(t);
-	}
-
-	return 0;
-}
-
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9e3b613..cfebc17 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -402,7 +402,6 @@ struct gfs2_tune {
 
 	unsigned int gt_recoverd_secs;
 	unsigned int gt_logd_secs;
-	unsigned int gt_quotad_secs;
 
 	unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
 	unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
@@ -509,7 +508,6 @@ struct gfs2_sbd {
 	spinlock_t sd_statfs_spin;
 	struct gfs2_statfs_change_host sd_statfs_master;
 	struct gfs2_statfs_change_host sd_statfs_local;
-	unsigned long sd_statfs_sync_time;
 
 	/* Resource group stuff */
 
@@ -551,13 +549,13 @@ struct gfs2_sbd {
 	atomic_t sd_quota_count;
 	spinlock_t sd_quota_spin;
 	struct mutex sd_quota_mutex;
+	wait_queue_head_t sd_quota_wait;
 
 	unsigned int sd_quota_slots;
 	unsigned int sd_quota_chunks;
 	unsigned char **sd_quota_bitmap;
 
 	u64 sd_quota_sync_gen;
-	unsigned long sd_quota_sync_time;
 
 	/* Log stuff */
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 3eea03c..e3f6f18 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -93,6 +93,12 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_rgrpd_cachep)
 		goto fail;
 
+	gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
+					       sizeof(struct gfs2_quota_data),
+					       0, 0, NULL);
+	if (!gfs2_quotad_cachep)
+		goto fail;
+
 	error = register_filesystem(&gfs2_fs_type);
 	if (error)
 		goto fail;
@@ -112,6 +118,9 @@ fail_unregister:
 fail:
 	gfs2_glock_exit();
 
+	if (gfs2_quotad_cachep)
+		kmem_cache_destroy(gfs2_quotad_cachep);
+
 	if (gfs2_rgrpd_cachep)
 		kmem_cache_destroy(gfs2_rgrpd_cachep);
 
@@ -140,6 +149,7 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
 
+	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
 	kmem_cache_destroy(gfs2_bufdata_cachep);
 	kmem_cache_destroy(gfs2_inode_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index dd83e83..5d13706 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -60,7 +60,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	gt->gt_log_flush_secs = 60;
 	gt->gt_recoverd_secs = 60;
 	gt->gt_logd_secs = 1;
-	gt->gt_quotad_secs = 5;
 	gt->gt_quota_simul_sync = 64;
 	gt->gt_quota_warn_period = 10;
 	gt->gt_quota_scale_num = 1;
@@ -107,6 +106,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	INIT_LIST_HEAD(&sdp->sd_quota_list);
 	spin_lock_init(&sdp->sd_quota_spin);
 	mutex_init(&sdp->sd_quota_mutex);
+	init_waitqueue_head(&sdp->sd_quota_wait);
 
 	spin_lock_init(&sdp->sd_log_lock);
 
@@ -970,9 +970,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
 	}
 	sdp->sd_logd_process = p;
 
-	sdp->sd_statfs_sync_time = jiffies;
-	sdp->sd_quota_sync_time = jiffies;
-
 	p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
 	error = IS_ERR(p);
 	if (error) {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 228a465..0cfe44f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -46,6 +46,8 @@
 #include <linux/bio.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -94,7 +96,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
 	struct gfs2_quota_data *qd;
 	int error;
 
-	qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_NOFS);
+	qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
 	if (!qd)
 		return -ENOMEM;
 
@@ -119,7 +121,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
 	return 0;
 
 fail:
-	kfree(qd);
+	kmem_cache_free(gfs2_quotad_cachep, qd);
 	return error;
 }
 
@@ -158,7 +160,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
 		if (qd || !create) {
 			if (new_qd) {
 				gfs2_lvb_unhold(new_qd->qd_gl);
-				kfree(new_qd);
+				kmem_cache_free(gfs2_quotad_cachep, new_qd);
 			}
 			*qdp = qd;
 			return 0;
@@ -1195,7 +1197,7 @@ fail:
 	return error;
 }
 
-void gfs2_quota_scan(struct gfs2_sbd *sdp)
+static void gfs2_quota_scan(struct gfs2_sbd *sdp)
 {
 	struct gfs2_quota_data *qd, *safe;
 	LIST_HEAD(dead);
@@ -1222,7 +1224,7 @@ void gfs2_quota_scan(struct gfs2_sbd *sdp)
 		gfs2_assert_warn(sdp, !qd->qd_bh_count);
 
 		gfs2_lvb_unhold(qd->qd_gl);
-		kfree(qd);
+		kmem_cache_free(gfs2_quotad_cachep, qd);
 	}
 }
 
@@ -1257,7 +1259,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 		gfs2_assert_warn(sdp, !qd->qd_bh_count);
 
 		gfs2_lvb_unhold(qd->qd_gl);
-		kfree(qd);
+		kmem_cache_free(gfs2_quotad_cachep, qd);
 
 		spin_lock(&sdp->sd_quota_spin);
 	}
@@ -1272,3 +1274,65 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 	}
 }
 
+static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
+{
+	if (error == 0 || error == -EROFS)
+		return;
+	if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
+}
+
+static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
+			       int (*fxn)(struct gfs2_sbd *sdp),
+			       unsigned long t, unsigned long *timeo,
+			       unsigned int *new_timeo)
+{
+	if (t >= *timeo) {
+		int error = fxn(sdp);
+		quotad_error(sdp, msg, error);
+		*timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
+	} else {
+		*timeo -= t;
+	}
+}
+
+/**
+ * gfs2_quotad - Write cached quota changes into the quota file
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_quotad(void *data)
+{
+	struct gfs2_sbd *sdp = data;
+	struct gfs2_tune *tune = &sdp->sd_tune;
+	unsigned long statfs_timeo = 0;
+	unsigned long quotad_timeo = 0;
+	unsigned long t = 0;
+	DEFINE_WAIT(wait);
+
+	while (!kthread_should_stop()) {
+
+		/* Update the master statfs file */
+		quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
+				   &statfs_timeo, &tune->gt_statfs_quantum);
+
+		/* Update quota file */
+		quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
+				   &quotad_timeo, &tune->gt_quota_quantum);
+
+		/* FIXME: This should be turned into a shrinker */
+		gfs2_quota_scan(sdp);
+
+		if (freezing(current))
+			refrigerator();
+		t = min(quotad_timeo, statfs_timeo);
+
+		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
+		t -= schedule_timeout(t);
+		finish_wait(&sdp->sd_quota_wait, &wait);
+	}
+
+	return 0;
+}
+
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 3b7f4b0..1d08aee 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -29,7 +29,6 @@ int gfs2_quota_sync(struct gfs2_sbd *sdp);
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 int gfs2_quota_init(struct gfs2_sbd *sdp);
-void gfs2_quota_scan(struct gfs2_sbd *sdp);
 void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
 
 static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7e1879f..59e36fd 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -408,7 +408,6 @@ TUNE_ATTR(stall_secs, 1);
 TUNE_ATTR(statfs_quantum, 1);
 TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
 TUNE_ATTR_DAEMON(logd_secs, logd_process);
-TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
@@ -426,7 +425,6 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_statfs_quantum.attr,
 	&tune_attr_recoverd_secs.attr,
 	&tune_attr_logd_secs.attr,
-	&tune_attr_quotad_secs.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
 	NULL,
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index d31e355..374f50e 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_glock_cachep __read_mostly;
 struct kmem_cache *gfs2_inode_cachep __read_mostly;
 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
+struct kmem_cache *gfs2_quotad_cachep __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 7f48576..33e96b0 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -148,6 +148,7 @@ extern struct kmem_cache *gfs2_glock_cachep;
 extern struct kmem_cache *gfs2_inode_cachep;
 extern struct kmem_cache *gfs2_bufdata_cachep;
 extern struct kmem_cache *gfs2_rgrpd_cachep;
+extern struct kmem_cache *gfs2_quotad_cachep;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 					   unsigned int *p)
-- 
cgit v0.10.2


From 813e0c46c9e2a0c6f0b6e774faac82afd7a2e812 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 18 Nov 2008 13:38:48 +0000
Subject: GFS2: Fix "truncate in progress" hang

Following on from the recent clean up of gfs2_quotad, this patch moves
the processing of "truncate in progress" inodes from the glock workqueue
into gfs2_quotad. This fixes a hang due to the "truncate in progress"
processing requiring glocks in order to complete.

It might seem odd to use gfs2_quotad for this particular item, but
we have to use a pre-existing thread since creating a thread implies
a GFP_KERNEL memory allocation which is not allowed from the glock
workqueue context. Of the existing threads, gfs2_logd and gfs2_recoverd
may deadlock if used for this operation. gfs2_scand and gfs2_glockd are
both scheduled for removal at some (hopefully not too distant) future
point. That leaves only gfs2_quotad whose workload is generally fairly
light and is easily adapted for this extra task.

Also, as a result of this change, it opens the way for a future patch to
make the reading of the inode's information asynchronous with respect to
the glock workqueue, which is another improvement that has been on the list
for some time now.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 27cb9cc..4ddf3bd 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -40,6 +40,7 @@
 #include "quota.h"
 #include "super.h"
 #include "util.h"
+#include "bmap.h"
 
 struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
@@ -289,7 +290,8 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
  * do_promote - promote as many requests as possible on the current queue
  * @gl: The glock
  * 
- * Returns: true if there is a blocked holder at the head of the list
+ * Returns: 1 if there is a blocked holder at the head of the list, or 2
+ *          if a type specific operation is underway.
  */
 
 static int do_promote(struct gfs2_glock *gl)
@@ -312,6 +314,8 @@ restart:
 				ret = glops->go_lock(gh);
 				spin_lock(&gl->gl_spin);
 				if (ret) {
+					if (ret == 1)
+						return 2;
 					gh->gh_error = ret;
 					list_del_init(&gh->gh_list);
 					gfs2_holder_wake(gh);
@@ -416,6 +420,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	struct gfs2_holder *gh;
 	unsigned state = ret & LM_OUT_ST_MASK;
+	int rv;
 
 	spin_lock(&gl->gl_spin);
 	state_change(gl, state);
@@ -470,7 +475,6 @@ retry:
 		gfs2_demote_wake(gl);
 	if (state != LM_ST_UNLOCKED) {
 		if (glops->go_xmote_bh) {
-			int rv;
 			spin_unlock(&gl->gl_spin);
 			rv = glops->go_xmote_bh(gl, gh);
 			if (rv == -EAGAIN)
@@ -481,10 +485,13 @@ retry:
 				goto out;
 			}
 		}
-		do_promote(gl);
+		rv = do_promote(gl);
+		if (rv == 2)
+			goto out_locked;
 	}
 out:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
+out_locked:
 	spin_unlock(&gl->gl_spin);
 	gfs2_glock_put(gl);
 }
@@ -584,6 +591,7 @@ __releases(&gl->gl_spin)
 __acquires(&gl->gl_spin)
 {
 	struct gfs2_holder *gh = NULL;
+	int ret;
 
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
 		return;
@@ -602,8 +610,11 @@ __acquires(&gl->gl_spin)
 	} else {
 		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
 			gfs2_demote_wake(gl);
-		if (do_promote(gl) == 0)
+		ret = do_promote(gl);
+		if (ret == 0)
 			goto out;
+		if (ret == 2)
+			return;
 		gh = find_first_waiter(gl);
 		gl->gl_target = gh->gh_state;
 		if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
@@ -1556,6 +1567,20 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 	}
 }
 
+void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
+{
+	struct gfs2_glock *gl = ip->i_gl;
+	int ret;
+
+	ret = gfs2_truncatei_resume(ip);
+	gfs2_assert_withdraw(gl->gl_sbd, ret == 0);
+
+	spin_lock(&gl->gl_spin);
+	clear_bit(GLF_LOCK, &gl->gl_flags);
+	run_queue(gl, 1);
+	spin_unlock(&gl->gl_spin);
+}
+
 static const char *state2str(unsigned state)
 {
 	switch(state) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 695c6b1..13a64ee 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -132,6 +132,7 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
 void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 
 int __init gfs2_glock_init(void);
 void gfs2_glock_exit(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 68ee665..8ebff8e 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -227,6 +227,7 @@ static int inode_go_demote_ok(struct gfs2_glock *gl)
 static int inode_go_lock(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct gfs2_inode *ip = gl->gl_object;
 	int error = 0;
 
@@ -241,8 +242,14 @@ static int inode_go_lock(struct gfs2_holder *gh)
 
 	if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
 	    (gl->gl_state == LM_ST_EXCLUSIVE) &&
-	    (gh->gh_state == LM_ST_EXCLUSIVE))
-		error = gfs2_truncatei_resume(ip);
+	    (gh->gh_state == LM_ST_EXCLUSIVE)) {
+		spin_lock(&sdp->sd_trunc_lock);
+		if (list_empty(&ip->i_trunc_list))
+			list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
+		spin_unlock(&sdp->sd_trunc_lock);
+		wake_up(&sdp->sd_quota_wait);
+		return 1;
+	}
 
 	return error;
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index cfebc17..dd7d0f8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -244,6 +244,7 @@ struct gfs2_inode {
 	struct gfs2_alloc *i_alloc;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
+	struct list_head i_trunc_list;
 	u32 i_entries;
 	u32 i_diskflags;
 	u8 i_height;
@@ -550,6 +551,8 @@ struct gfs2_sbd {
 	spinlock_t sd_quota_spin;
 	struct mutex sd_quota_mutex;
 	wait_queue_head_t sd_quota_wait;
+	struct list_head sd_trunc_list;
+	spinlock_t sd_trunc_lock;
 
 	unsigned int sd_quota_slots;
 	unsigned int sd_quota_chunks;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e3f6f18..cf39295 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -30,6 +30,7 @@ static void gfs2_init_inode_once(void *foo)
 
 	inode_init_once(&ip->i_inode);
 	init_rwsem(&ip->i_rw_mutex);
+	INIT_LIST_HEAD(&ip->i_trunc_list);
 	ip->i_alloc = NULL;
 }
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 5d13706..a9a8380 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -107,6 +107,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	spin_lock_init(&sdp->sd_quota_spin);
 	mutex_init(&sdp->sd_quota_mutex);
 	init_waitqueue_head(&sdp->sd_quota_wait);
+	INIT_LIST_HEAD(&sdp->sd_trunc_list);
+	spin_lock_init(&sdp->sd_trunc_lock);
 
 	spin_lock_init(&sdp->sd_log_lock);
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 0cfe44f..b08d096 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1296,6 +1296,25 @@ static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
 	}
 }
 
+static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
+{
+	struct gfs2_inode *ip;
+
+	while(1) {
+		ip = NULL;
+		spin_lock(&sdp->sd_trunc_lock);
+		if (!list_empty(&sdp->sd_trunc_list)) {
+			ip = list_entry(sdp->sd_trunc_list.next,
+					struct gfs2_inode, i_trunc_list);
+			list_del_init(&ip->i_trunc_list);
+		}
+		spin_unlock(&sdp->sd_trunc_lock);
+		if (ip == NULL)
+			return;
+		gfs2_glock_finish_truncate(ip);
+	}
+}
+
 /**
  * gfs2_quotad - Write cached quota changes into the quota file
  * @sdp: Pointer to GFS2 superblock
@@ -1310,6 +1329,7 @@ int gfs2_quotad(void *data)
 	unsigned long quotad_timeo = 0;
 	unsigned long t = 0;
 	DEFINE_WAIT(wait);
+	int empty;
 
 	while (!kthread_should_stop()) {
 
@@ -1324,12 +1344,21 @@ int gfs2_quotad(void *data)
 		/* FIXME: This should be turned into a shrinker */
 		gfs2_quota_scan(sdp);
 
+		/* Check for & recover partially truncated inodes */
+		quotad_check_trunc_list(sdp);
+
 		if (freezing(current))
 			refrigerator();
 		t = min(quotad_timeo, statfs_timeo);
 
 		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
-		t -= schedule_timeout(t);
+		spin_lock(&sdp->sd_trunc_lock);
+		empty = list_empty(&sdp->sd_trunc_list);
+		spin_unlock(&sdp->sd_trunc_lock);
+		if (empty)
+			t -= schedule_timeout(t);
+		else
+			t = 0;
 		finish_wait(&sdp->sd_quota_wait, &wait);
 	}
 
-- 
cgit v0.10.2


From 9ac1b4d9b6f885ccd7d8f56bceb609003a920ff7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 19 Nov 2008 10:08:22 +0000
Subject: GFS2: Move gfs2_recoverd into recovery.c

By moving gfs2_recoverd, we can make an additional function static
and it also leaves only (the already scheduled for removal) gfs2_glockd
in daemon.c.

At the same time the declaration of gfs2_quotad is moved to quota.h
to reflect the new location of gfs2_quotad in a previous patch. Also
the recovery.h and quota.h headers are cleaned up.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 5668aa7..2662df0 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -59,25 +59,3 @@ int gfs2_glockd(void *data)
 	return 0;
 }
 
-/**
- * gfs2_recoverd - Recover dead machine's journals
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_recoverd(void *data)
-{
-	struct gfs2_sbd *sdp = data;
-	unsigned long t;
-
-	while (!kthread_should_stop()) {
-		gfs2_check_journals(sdp);
-		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
-		schedule_timeout_interruptible(t);
-	}
-
-	return 0;
-}
-
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
index 4be084f..5258954 100644
--- a/fs/gfs2/daemon.h
+++ b/fs/gfs2/daemon.h
@@ -11,7 +11,5 @@
 #define __DAEMON_DOT_H__
 
 int gfs2_glockd(void *data);
-int gfs2_recoverd(void *data);
-int gfs2_quotad(void *data);
 
 #endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a9a8380..d159e7e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
 #include "sys.h"
 #include "util.h"
 #include "log.h"
+#include "quota.h"
 
 #define DO 0
 #define UNDO 1
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 1d08aee..cec9032 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,21 +15,22 @@ struct gfs2_sbd;
 
 #define NO_QUOTA_CHANGE ((u32)-1)
 
-int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_unhold(struct gfs2_inode *ip);
+extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_unhold(struct gfs2_inode *ip);
 
-int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_unlock(struct gfs2_inode *ip);
+extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_unlock(struct gfs2_inode *ip);
 
-int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
-void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
-		       u32 uid, u32 gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
+			      u32 uid, u32 gid);
 
-int gfs2_quota_sync(struct gfs2_sbd *sdp);
-int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
+extern int gfs2_quota_sync(struct gfs2_sbd *sdp);
+extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
-int gfs2_quota_init(struct gfs2_sbd *sdp);
-void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+extern int gfs2_quota_init(struct gfs2_sbd *sdp);
+extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+extern int gfs2_quotad(void *data);
 
 static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
 {
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index d5e91f4..b56ba3d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,6 +14,8 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -589,7 +591,7 @@ fail:
  *
  */
 
-void gfs2_check_journals(struct gfs2_sbd *sdp)
+static void gfs2_check_journals(struct gfs2_sbd *sdp)
 {
 	struct gfs2_jdesc *jd;
 
@@ -603,3 +605,25 @@ void gfs2_check_journals(struct gfs2_sbd *sdp)
 	}
 }
 
+/**
+ * gfs2_recoverd - Recover dead machine's journals
+ * @sdp: Pointer to GFS2 superblock
+ *
+ */
+
+int gfs2_recoverd(void *data)
+{
+	struct gfs2_sbd *sdp = data;
+	unsigned long t;
+
+	while (!kthread_should_stop()) {
+		gfs2_check_journals(sdp);
+		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
+		schedule_timeout_interruptible(t);
+	}
+
+	return 0;
+}
+
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index f7235e6..a8218ea 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -18,17 +18,17 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
 	        *blk = 0;
 }
 
-int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
 			   struct buffer_head **bh);
 
-int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
-void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
+extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
+extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
 
-int gfs2_find_jhead(struct gfs2_jdesc *jd,
+extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 		    struct gfs2_log_header_host *head);
-int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
-void gfs2_check_journals(struct gfs2_sbd *sdp);
+extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
+extern int gfs2_recoverd(void *data);
 
 #endif /* __RECOVERY_DOT_H__ */
 
-- 
cgit v0.10.2


From 97cc1025b1a91c52e84f12478dcf0f853abc6564 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 20 Nov 2008 13:39:47 +0000
Subject: GFS2: Kill two daemons with one patch

This patch removes the two daemons, gfs2_scand and gfs2_glockd
and replaces them with a shrinker which is called from the VM.

The net result is that GFS2 responds better when there is memory
pressure, since it shrinks the glock cache at the same rate
as the VFS shrinks the dcache and icache. There are no longer
any time based criteria for shrinking glocks, they are kept
until such time as the VM asks for more memory and then we
demote just as many glocks as required.

There are potential future changes to this code, including the
possibility of sorting the glocks which are to be written back
into inode number order, to get a better I/O ordering. It would
be very useful to have an elevator based workqueue implementation
for this, as that would automatically deal with the read I/O cases
at the same time.

This patch is my answer to Andrew Morton's remark, made during
the initial review of GFS2, asking why GFS2 needs so many kernel
threads, the answer being that it doesn't :-) This patch is a
net loss of about 200 lines of code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index ec65851..c1b4ec6 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
-gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
+gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o locking.o main.o meta_io.o \
 	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
 	ops_fstype.o ops_inode.o ops_super.o quota.o \
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
deleted file mode 100644
index 2662df0..0000000
--- a/fs/gfs2/daemon.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include <linux/freezer.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "daemon.h"
-#include "glock.h"
-#include "log.h"
-#include "recovery.h"
-#include "super.h"
-#include "util.h"
-
-/* This uses schedule_timeout() instead of msleep() because it's good for
-   the daemons to wake up more often than the timeout when unmounting so
-   the user's unmount doesn't sit there forever.
-
-   The kthread functions used to start these daemons block and flush signals. */
-
-/**
- * gfs2_glockd - Reclaim unused glock structures
- * @sdp: Pointer to GFS2 superblock
- *
- * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
- * Number of daemons can be set by user, with num_glockd mount option.
- */
-
-int gfs2_glockd(void *data)
-{
-	struct gfs2_sbd *sdp = data;
-
-	while (!kthread_should_stop()) {
-		while (atomic_read(&sdp->sd_reclaim_count))
-			gfs2_reclaim_glock(sdp);
-
-		wait_event_interruptible(sdp->sd_reclaim_wq,
-					 (atomic_read(&sdp->sd_reclaim_count) ||
-					 kthread_should_stop()));
-		if (freezing(current))
-			refrigerator();
-	}
-
-	return 0;
-}
-
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
deleted file mode 100644
index 5258954..0000000
--- a/fs/gfs2/daemon.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __DAEMON_DOT_H__
-#define __DAEMON_DOT_H__
-
-int gfs2_glockd(void *data);
-
-#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4ddf3bd..07ffc81 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -62,9 +62,10 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int
 
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
 static struct dentry *gfs2_root;
-static struct task_struct *scand_process;
-static unsigned int scand_secs = 5;
 static struct workqueue_struct *glock_workqueue;
+static LIST_HEAD(lru_list);
+static atomic_t lru_count = ATOMIC_INIT(0);
+static spinlock_t lru_lock = SPIN_LOCK_UNLOCKED;
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
@@ -175,6 +176,22 @@ static void gfs2_glock_hold(struct gfs2_glock *gl)
 }
 
 /**
+ * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * @gl: the glock
+ *
+ */
+
+static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+	spin_lock(&lru_lock);
+	if (list_empty(&gl->gl_lru) && gl->gl_state != LM_ST_UNLOCKED) {
+		list_add_tail(&gl->gl_lru, &lru_list);
+		atomic_inc(&lru_count);
+	}
+	spin_unlock(&lru_lock);
+}
+
+/**
  * gfs2_glock_put() - Decrement reference count on glock
  * @gl: The glock to put
  *
@@ -188,14 +205,23 @@ int gfs2_glock_put(struct gfs2_glock *gl)
 	if (atomic_dec_and_test(&gl->gl_ref)) {
 		hlist_del(&gl->gl_list);
 		write_unlock(gl_lock_addr(gl->gl_hash));
+		spin_lock(&lru_lock);
+		if (!list_empty(&gl->gl_lru)) {
+			list_del_init(&gl->gl_lru);
+			atomic_dec(&lru_count);
+		}
+		spin_unlock(&lru_lock);
 		GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
-		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim));
+		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
 		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
 		glock_free(gl);
 		rv = 1;
 		goto out;
 	}
 	write_unlock(gl_lock_addr(gl->gl_hash));
+	/* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
+	if (atomic_read(&gl->gl_ref) == 2)
+		gfs2_glock_schedule_for_reclaim(gl);
 out:
 	return rv;
 }
@@ -837,7 +863,7 @@ static void wait_on_demote(struct gfs2_glock *gl)
  */
 
 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
-			    int remote, unsigned long delay)
+			    unsigned long delay)
 {
 	int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
 
@@ -845,9 +871,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
 	if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
 		gl->gl_demote_state = state;
 		gl->gl_demote_time = jiffies;
-		if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
-		    gl->gl_object)
-			gfs2_glock_schedule_for_reclaim(gl);
 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
 			gl->gl_demote_state != state) {
 		gl->gl_demote_state = LM_ST_UNLOCKED;
@@ -1017,7 +1040,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 
 	spin_lock(&gl->gl_spin);
 	if (gh->gh_flags & GL_NOCACHE)
-		handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+		handle_callback(gl, LM_ST_UNLOCKED, 0);
 
 	list_del_init(&gh->gh_list);
 	if (find_first_holder(gl) == NULL) {
@@ -1288,7 +1311,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
 		delay = gl->gl_ops->go_min_hold_time;
 
 	spin_lock(&gl->gl_spin);
-	handle_callback(gl, state, 1, delay);
+	handle_callback(gl, state, delay);
 	spin_unlock(&gl->gl_spin);
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
 		gfs2_glock_put(gl);
@@ -1357,80 +1380,83 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
  * Returns: 1 if it's ok
  */
 
-static int demote_ok(struct gfs2_glock *gl)
+static int demote_ok(const struct gfs2_glock *gl)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
-	int demote = 1;
-
-	if (test_bit(GLF_STICKY, &gl->gl_flags))
-		demote = 0;
-	else if (glops->go_demote_ok)
-		demote = glops->go_demote_ok(gl);
-
-	return demote;
-}
 
-/**
- * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
- * @gl: the glock
- *
- */
-
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
-	struct gfs2_sbd *sdp = gl->gl_sbd;
-
-	spin_lock(&sdp->sd_reclaim_lock);
-	if (list_empty(&gl->gl_reclaim)) {
-		gfs2_glock_hold(gl);
-		list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
-		atomic_inc(&sdp->sd_reclaim_count);
-		spin_unlock(&sdp->sd_reclaim_lock);
-		wake_up(&sdp->sd_reclaim_wq);
-	} else
-		spin_unlock(&sdp->sd_reclaim_lock);
+	if (gl->gl_state == LM_ST_UNLOCKED)
+		return 0;
+	if (!list_empty(&gl->gl_holders))
+		return 0;
+	if (glops->go_demote_ok)
+		return glops->go_demote_ok(gl);
+	return 1;
 }
 
-/**
- * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
- * @sdp: the filesystem
- *
- * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
- * different glock and we notice that there are a lot of glocks in the
- * reclaim list.
- *
- */
 
-void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
+static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
 {
 	struct gfs2_glock *gl;
-	int done_callback = 0;
+	int may_demote;
+	int nr_skipped = 0;
+	int got_ref = 0;
+	LIST_HEAD(skipped);
 
-	spin_lock(&sdp->sd_reclaim_lock);
-	if (list_empty(&sdp->sd_reclaim_list)) {
-		spin_unlock(&sdp->sd_reclaim_lock);
-		return;
-	}
-	gl = list_entry(sdp->sd_reclaim_list.next,
-			struct gfs2_glock, gl_reclaim);
-	list_del_init(&gl->gl_reclaim);
-	spin_unlock(&sdp->sd_reclaim_lock);
+	if (nr == 0)
+		goto out;
 
-	atomic_dec(&sdp->sd_reclaim_count);
-	atomic_inc(&sdp->sd_reclaimed);
+	if (!(gfp_mask & __GFP_FS))
+		return -1;
 
-	spin_lock(&gl->gl_spin);
-	if (find_first_holder(gl) == NULL &&
-	    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) {
-		handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
-		done_callback = 1;
+	spin_lock(&lru_lock);
+	while(nr && !list_empty(&lru_list)) {
+		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
+		list_del_init(&gl->gl_lru);
+		atomic_dec(&lru_count);
+
+		/* Test for being demotable */
+		if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+			gfs2_glock_hold(gl);
+			got_ref = 1;
+			spin_unlock(&lru_lock);
+			spin_lock(&gl->gl_spin);
+			may_demote = demote_ok(gl);
+			spin_unlock(&gl->gl_spin);
+			clear_bit(GLF_LOCK, &gl->gl_flags);
+			if (may_demote) {
+				handle_callback(gl, LM_ST_UNLOCKED, 0);
+				nr--;
+				if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+					gfs2_glock_put(gl);
+			}
+			spin_lock(&lru_lock);
+			if (may_demote)
+				continue;
+		}
+		if (list_empty(&gl->gl_lru) &&
+		    (atomic_read(&gl->gl_ref) <= (2 + got_ref))) {
+			nr_skipped++;
+			list_add(&gl->gl_lru, &skipped);
+		}
+		if (got_ref) {
+			spin_unlock(&lru_lock);
+			gfs2_glock_put(gl);
+			spin_lock(&lru_lock);
+			got_ref = 0;
+		}
 	}
-	spin_unlock(&gl->gl_spin);
-	if (!done_callback ||
-	    queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
-		gfs2_glock_put(gl);
+	list_splice(&skipped, &lru_list);
+	atomic_add(nr_skipped, &lru_count);
+	spin_unlock(&lru_lock);
+out:
+	return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
 }
 
+static struct shrinker glock_shrinker = {
+	.shrink = gfs2_shrink_glock_memory,
+	.seeks = DEFAULT_SEEKS,
+};
+
 /**
  * examine_bucket - Call a function for glock in a hash bucket
  * @examiner: the function
@@ -1476,26 +1502,6 @@ out:
 }
 
 /**
- * scan_glock - look at a glock and see if we can reclaim it
- * @gl: the glock to look at
- *
- */
-
-static void scan_glock(struct gfs2_glock *gl)
-{
-	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
-		return;
-	if (test_bit(GLF_LOCK, &gl->gl_flags))
-		return;
-
-	spin_lock(&gl->gl_spin);
-	if (find_first_holder(gl) == NULL &&
-	    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-		gfs2_glock_schedule_for_reclaim(gl);
-	spin_unlock(&gl->gl_spin);
-}
-
-/**
  * clear_glock - look at a glock and see if we can free it from glock cache
  * @gl: the glock to look at
  *
@@ -1503,23 +1509,16 @@ static void scan_glock(struct gfs2_glock *gl)
 
 static void clear_glock(struct gfs2_glock *gl)
 {
-	struct gfs2_sbd *sdp = gl->gl_sbd;
-	int released;
-
-	spin_lock(&sdp->sd_reclaim_lock);
-	if (!list_empty(&gl->gl_reclaim)) {
-		list_del_init(&gl->gl_reclaim);
-		atomic_dec(&sdp->sd_reclaim_count);
-		spin_unlock(&sdp->sd_reclaim_lock);
-		released = gfs2_glock_put(gl);
-		gfs2_assert(sdp, !released);
-	} else {
-		spin_unlock(&sdp->sd_reclaim_lock);
+	spin_lock(&lru_lock);
+	if (!list_empty(&gl->gl_lru)) {
+		list_del_init(&gl->gl_lru);
+		atomic_dec(&lru_count);
 	}
+	spin_unlock(&lru_lock);
 
 	spin_lock(&gl->gl_spin);
 	if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
-		handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+		handle_callback(gl, LM_ST_UNLOCKED, 0);
 	spin_unlock(&gl->gl_spin);
 	gfs2_glock_hold(gl);
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
@@ -1656,8 +1655,6 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
 	char *p = buf;
 	if (test_bit(GLF_LOCK, gflags))
 		*p++ = 'l';
-	if (test_bit(GLF_STICKY, gflags))
-		*p++ = 's';
 	if (test_bit(GLF_DEMOTE, gflags))
 		*p++ = 'D';
 	if (test_bit(GLF_PENDING_DEMOTE, gflags))
@@ -1776,34 +1773,6 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
 	return error;
 }
 
-/**
- * gfs2_scand - Look for cached glocks and inodes to toss from memory
- * @sdp: Pointer to GFS2 superblock
- *
- * One of these daemons runs, finding candidates to add to sd_reclaim_list.
- * See gfs2_glockd()
- */
-
-static int gfs2_scand(void *data)
-{
-	unsigned x;
-	unsigned delay;
-
-	while (!kthread_should_stop()) {
-		for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
-			examine_bucket(scan_glock, NULL, x);
-		if (freezing(current))
-			refrigerator();
-		delay = scand_secs;
-		if (delay < 1)
-			delay = 1;
-		schedule_timeout_interruptible(delay * HZ);
-	}
-
-	return 0;
-}
-
-
 
 int __init gfs2_glock_init(void)
 {
@@ -1817,28 +1786,21 @@ int __init gfs2_glock_init(void)
 	}
 #endif
 
-	scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
-	if (IS_ERR(scand_process))
-		return PTR_ERR(scand_process);
-
 	glock_workqueue = create_workqueue("glock_workqueue");
-	if (IS_ERR(glock_workqueue)) {
-		kthread_stop(scand_process);
+	if (IS_ERR(glock_workqueue))
 		return PTR_ERR(glock_workqueue);
-	}
+
+	register_shrinker(&glock_shrinker);
 
 	return 0;
 }
 
 void gfs2_glock_exit(void)
 {
+	unregister_shrinker(&glock_shrinker);
 	destroy_workqueue(glock_workqueue);
-	kthread_stop(scand_process);
 }
 
-module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
-
 static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 {
 	struct gfs2_glock *gl;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 13a64ee..543ec7e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -129,7 +129,6 @@ int gfs2_lvb_hold(struct gfs2_glock *gl);
 void gfs2_lvb_unhold(struct gfs2_glock *gl);
 
 void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8ebff8e..8522d3a 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -201,19 +201,12 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
  * Returns: 1 if it's ok
  */
 
-static int inode_go_demote_ok(struct gfs2_glock *gl)
+static int inode_go_demote_ok(const struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	int demote = 0;
-
-	if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
-		demote = 1;
-	else if (!sdp->sd_args.ar_localcaching &&
-		 time_after_eq(jiffies, gl->gl_stamp +
-			       gfs2_tune_get(sdp, gt_demote_secs) * HZ))
-		demote = 1;
-
-	return demote;
+	if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
+		return 0;
+	return 1;
 }
 
 /**
@@ -284,7 +277,7 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
  * Returns: 1 if it's ok
  */
 
-static int rgrp_go_demote_ok(struct gfs2_glock *gl)
+static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
 {
 	return !gl->gl_aspace->i_mapping->nrpages;
 }
@@ -386,13 +379,25 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
 }
 
 /**
+ * trans_go_demote_ok
+ * @gl: the glock
+ *
+ * Always returns 0
+ */
+
+static int trans_go_demote_ok(const struct gfs2_glock *gl)
+{
+	return 0;
+}
+
+/**
  * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
  * @gl: the glock
  *
  * Returns: 1 if it's ok
  */
 
-static int quota_go_demote_ok(struct gfs2_glock *gl)
+static int quota_go_demote_ok(const struct gfs2_glock *gl)
 {
 	return !atomic_read(&gl->gl_lvb_count);
 }
@@ -426,6 +431,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 const struct gfs2_glock_operations gfs2_trans_glops = {
 	.go_xmote_th = trans_go_sync,
 	.go_xmote_bh = trans_go_xmote_bh,
+	.go_demote_ok = trans_go_demote_ok,
 	.go_type = LM_TYPE_NONDISK,
 };
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dd7d0f8..608849d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -125,7 +125,7 @@ struct gfs2_glock_operations {
 	void (*go_xmote_th) (struct gfs2_glock *gl);
 	int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
 	void (*go_inval) (struct gfs2_glock *gl, int flags);
-	int (*go_demote_ok) (struct gfs2_glock *gl);
+	int (*go_demote_ok) (const struct gfs2_glock *gl);
 	int (*go_lock) (struct gfs2_holder *gh);
 	void (*go_unlock) (struct gfs2_holder *gh);
 	int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
@@ -155,7 +155,6 @@ struct gfs2_holder {
 
 enum {
 	GLF_LOCK			= 1,
-	GLF_STICKY			= 2,
 	GLF_DEMOTE			= 3,
 	GLF_PENDING_DEMOTE		= 4,
 	GLF_DEMOTE_IN_PROGRESS		= 5,
@@ -190,7 +189,7 @@ struct gfs2_glock {
 	unsigned long gl_tchange;
 	void *gl_object;
 
-	struct list_head gl_reclaim;
+	struct list_head gl_lru;
 
 	struct gfs2_sbd *gl_sbd;
 
@@ -397,7 +396,6 @@ struct gfs2_args {
 struct gfs2_tune {
 	spinlock_t gt_spin;
 
-	unsigned int gt_demote_secs; /* Cache retention for unheld glock */
 	unsigned int gt_incore_log_blocks;
 	unsigned int gt_log_flush_secs;
 
@@ -478,10 +476,6 @@ struct gfs2_sbd {
 	/* Lock Stuff */
 
 	struct lm_lockstruct sd_lockstruct;
-	struct list_head sd_reclaim_list;
-	spinlock_t sd_reclaim_lock;
-	wait_queue_head_t sd_reclaim_wq;
-	atomic_t sd_reclaim_count;
 	struct gfs2_holder sd_live_gh;
 	struct gfs2_glock *sd_rename_gl;
 	struct gfs2_glock *sd_trans_gl;
@@ -541,8 +535,6 @@ struct gfs2_sbd {
 	struct task_struct *sd_recoverd_process;
 	struct task_struct *sd_logd_process;
 	struct task_struct *sd_quotad_process;
-	struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
-	unsigned int sd_glockd_num;
 
 	/* Quota stuff */
 
@@ -615,10 +607,6 @@ struct gfs2_sbd {
 	struct mutex sd_freeze_lock;
 	unsigned int sd_freeze_count;
 
-	/* Counters */
-
-	atomic_t sd_reclaimed;
-
 	char sd_fsname[GFS2_FSNAME_LEN];
 	char sd_table_name[GFS2_FSNAME_LEN];
 	char sd_proto_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d3ce65..3b87c18 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -386,7 +386,6 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 	gfs2_free_di(rgd, ip);
 
 	gfs2_trans_end(sdp);
-	clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
 
 out_rg_gunlock:
 	gfs2_glock_dq_uninit(&al->al_rgd_gh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index cf39295..7cacfde 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -43,7 +43,7 @@ static void gfs2_init_glock_once(void *foo)
 	INIT_LIST_HEAD(&gl->gl_holders);
 	gl->gl_lvb = NULL;
 	atomic_set(&gl->gl_lvb_count, 0);
-	INIT_LIST_HEAD(&gl->gl_reclaim);
+	INIT_LIST_HEAD(&gl->gl_lru);
 	INIT_LIST_HEAD(&gl->gl_ail_list);
 	atomic_set(&gl->gl_ail_count, 0);
 }
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index f96eb90..8c0f16e 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -32,7 +32,6 @@ enum {
 	Opt_debug,
 	Opt_nodebug,
 	Opt_upgrade,
-	Opt_num_glockd,
 	Opt_acl,
 	Opt_noacl,
 	Opt_quota_off,
@@ -57,7 +56,6 @@ static const match_table_t tokens = {
 	{Opt_debug, "debug"},
 	{Opt_nodebug, "nodebug"},
 	{Opt_upgrade, "upgrade"},
-	{Opt_num_glockd, "num_glockd=%d"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
 	{Opt_quota_off, "quota=off"},
@@ -96,7 +94,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 		spin_unlock(&gfs2_sys_margs_lock);
 
 		/*  Set some defaults  */
-		args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
 		args->ar_quota = GFS2_QUOTA_DEFAULT;
 		args->ar_data = GFS2_DATA_DEFAULT;
 	}
@@ -105,7 +102,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 	   process them */
 
 	for (options = data; (o = strsep(&options, ",")); ) {
-		int token, option;
+		int token;
 		substring_t tmp[MAX_OPT_ARGS];
 
 		if (!*o)
@@ -196,22 +193,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 				goto cant_remount;
 			args->ar_upgrade = 1;
 			break;
-		case Opt_num_glockd:
-			if ((error = match_int(&tmp[0], &option))) {
-				fs_info(sdp, "problem getting num_glockd\n");
-				goto out_error;
-			}
-
-			if (remount && option != args->ar_num_glockd)
-				goto cant_remount;
-			if (!option || option > GFS2_GLOCKD_MAX) {
-				fs_info(sdp, "0 < num_glockd <= %u  (not %u)\n",
-				        GFS2_GLOCKD_MAX, option);
-				error = -EINVAL;
-				goto out_error;
-			}
-			args->ar_num_glockd = option;
-			break;
 		case Opt_acl:
 			args->ar_posix_acl = 1;
 			sdp->sd_vfs->s_flags |= MS_POSIXACL;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d159e7e..fc300ea 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -22,7 +22,6 @@
 #include "gfs2.h"
 #include "incore.h"
 #include "bmap.h"
-#include "daemon.h"
 #include "glock.h"
 #include "glops.h"
 #include "inode.h"
@@ -56,7 +55,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 {
 	spin_lock_init(&gt->gt_spin);
 
-	gt->gt_demote_secs = 300;
 	gt->gt_incore_log_blocks = 1024;
 	gt->gt_log_flush_secs = 60;
 	gt->gt_recoverd_secs = 60;
@@ -88,10 +86,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	gfs2_tune_init(&sdp->sd_tune);
 
-	INIT_LIST_HEAD(&sdp->sd_reclaim_list);
-	spin_lock_init(&sdp->sd_reclaim_lock);
-	init_waitqueue_head(&sdp->sd_reclaim_wq);
-
 	mutex_init(&sdp->sd_inum_mutex);
 	spin_lock_init(&sdp->sd_statfs_spin);
 
@@ -443,24 +437,11 @@ out:
 static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
 			int undo)
 {
-	struct task_struct *p;
 	int error = 0;
 
 	if (undo)
 		goto fail_trans;
 
-	for (sdp->sd_glockd_num = 0;
-	     sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
-	     sdp->sd_glockd_num++) {
-		p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
-		error = IS_ERR(p);
-		if (error) {
-			fs_err(sdp, "can't start glockd thread: %d\n", error);
-			goto fail;
-		}
-		sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
-	}
-
 	error = gfs2_glock_nq_num(sdp,
 				  GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
 				  LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
@@ -493,7 +474,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
 		fs_err(sdp, "can't create transaction glock: %d\n", error);
 		goto fail_rename;
 	}
-	set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
 
 	return 0;
 
@@ -506,9 +486,6 @@ fail_live:
 fail_mount:
 	gfs2_glock_dq_uninit(mount_gh);
 fail:
-	while (sdp->sd_glockd_num--)
-		kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
-
 	return error;
 }
 
@@ -681,7 +658,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		return PTR_ERR(sdp->sd_jindex);
 	}
 	ip = GFS2_I(sdp->sd_jindex);
-	set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
 
 	/* Load in the journal index special file */
 
@@ -832,7 +808,6 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		goto fail_statfs;
 	}
 	ip = GFS2_I(sdp->sd_rindex);
-	set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
 	sdp->sd_rindex_uptodate = 0;
 
 	/* Read in the quota inode */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index ad36af2..29f8a5c 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -142,8 +142,6 @@ static void gfs2_put_super(struct super_block *sb)
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
 	kthread_stop(sdp->sd_recoverd_process);
-	while (sdp->sd_glockd_num--)
-		kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		error = gfs2_make_fs_ro(sdp);
@@ -369,7 +367,6 @@ static void gfs2_clear_inode(struct inode *inode)
 	 */
 	if (test_bit(GIF_USER, &ip->i_flags)) {
 		ip->i_gl->gl_object = NULL;
-		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
 		ip->i_gl = NULL;
 		if (ip->i_iopen_gh.gh_gl) {
@@ -422,8 +419,6 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 		seq_printf(s, ",debug");
 	if (args->ar_upgrade)
 		seq_printf(s, ",upgrade");
-	if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
-		seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
 	if (args->ar_posix_acl)
 		seq_printf(s, ",acl");
 	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 59e36fd..67ba5b7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -263,7 +263,6 @@ ARGS_ATTR(localcaching,    "%d\n");
 ARGS_ATTR(localflocks,     "%d\n");
 ARGS_ATTR(debug,           "%d\n");
 ARGS_ATTR(upgrade,         "%d\n");
-ARGS_ATTR(num_glockd,      "%u\n");
 ARGS_ATTR(posix_acl,       "%d\n");
 ARGS_ATTR(quota,           "%u\n");
 ARGS_ATTR(suiddir,         "%d\n");
@@ -279,7 +278,6 @@ static struct attribute *args_attrs[] = {
 	&args_attr_localflocks.attr,
 	&args_attr_debug.attr,
 	&args_attr_upgrade.attr,
-	&args_attr_num_glockd.attr,
 	&args_attr_posix_acl.attr,
 	&args_attr_quota.attr,
 	&args_attr_suiddir.attr,
@@ -288,30 +286,6 @@ static struct attribute *args_attrs[] = {
 };
 
 /*
- * display counters from superblock
- */
-
-struct counters_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
-#define COUNTERS_ATTR(name, fmt)                                            \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
-{                                                                           \
-	return snprintf(buf, PAGE_SIZE, fmt,                                \
-			(unsigned int)atomic_read(&sdp->sd_##name));        \
-}                                                                           \
-static struct counters_attr counters_attr_##name = __ATTR_RO(name)
-
-COUNTERS_ATTR(reclaimed,        "%u\n");
-
-static struct attribute *counters_attrs[] = {
-	&counters_attr_reclaimed.attr,
-	NULL,
-};
-
-/*
  * get and set struct gfs2_tune fields
  */
 
@@ -393,7 +367,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-TUNE_ATTR(demote_secs, 0);
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
@@ -411,7 +384,6 @@ TUNE_ATTR_DAEMON(logd_secs, logd_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
-	&tune_attr_demote_secs.attr,
 	&tune_attr_incore_log_blocks.attr,
 	&tune_attr_log_flush_secs.attr,
 	&tune_attr_quota_warn_period.attr,
@@ -435,11 +407,6 @@ static struct attribute_group lockstruct_group = {
 	.attrs = lockstruct_attrs,
 };
 
-static struct attribute_group counters_group = {
-	.name = "counters",
-	.attrs = counters_attrs,
-};
-
 static struct attribute_group args_group = {
 	.name = "args",
 	.attrs = args_attrs,
@@ -464,13 +431,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	if (error)
 		goto fail_reg;
 
-	error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
-	if (error)
-		goto fail_lockstruct;
-
 	error = sysfs_create_group(&sdp->sd_kobj, &args_group);
 	if (error)
-		goto fail_counters;
+		goto fail_lockstruct;
 
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
@@ -481,8 +444,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 
 fail_args:
 	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-fail_counters:
-	sysfs_remove_group(&sdp->sd_kobj, &counters_group);
 fail_lockstruct:
 	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 fail_reg:
@@ -496,7 +457,6 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
 {
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-	sysfs_remove_group(&sdp->sd_kobj, &counters_group);
 	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 	kobject_put(&sdp->sd_kobj);
 }
-- 
cgit v0.10.2


From fdd1062ebaa422c5684f97fa91da06f91167d76b Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 26 Nov 2008 10:26:38 +0000
Subject: GFS2: Send some sensible sysfs stuff

We ought to inform the user of the locktable and lockproto for each
uevent we generate.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 4ec571c..9b7edcf 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -195,9 +195,23 @@ void gdlm_kobject_release(struct gdlm_ls *ls)
 	kobject_put(&ls->kobj);
 }
 
+static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
+		       struct kobj_uevent_env *env)
+{
+        struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+        add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
+        add_uevent_var(env, "LOCKPROTO=lock_dlm");
+        return 0;
+}
+
+static struct kset_uevent_ops gdlm_uevent_ops = {
+	.uevent = gdlm_uevent,
+};
+
+
 int gdlm_sysfs_init(void)
 {
-	gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
+	gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
 	if (!gdlm_kset) {
 		printk(KERN_WARNING "%s: can not create kset\n", __func__);
 		return -ENOMEM;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 67ba5b7..298bcb6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -461,11 +461,25 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
 	kobject_put(&sdp->sd_kobj);
 }
 
+static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
+		       struct kobj_uevent_env *env)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
+	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
+	return 0;
+}
+
+static struct kset_uevent_ops gfs2_uevent_ops = {
+	.uevent = gfs2_uevent,
+};
+
+
 int gfs2_sys_init(void)
 {
 	gfs2_sys_margs = NULL;
 	spin_lock_init(&gfs2_sys_margs_lock);
-	gfs2_kset = kset_create_and_add("gfs2", NULL, fs_kobj);
+	gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
 	if (!gfs2_kset)
 		return -ENOMEM;
 	return 0;
-- 
cgit v0.10.2


From b52896813c2f16bcc5c5b67bb3c3f75bc084439b Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 26 Nov 2008 12:49:26 +0000
Subject: GFS2: Fix bug in gfs2_lock_fs_check_clean()

gfs2_lock_fs_check_clean() should not be calling gfs2_jindex_hold()
since it doesn't work like rindex hold, despite the comment. That
allows gfs2_jindex_hold() to be moved into ops_fstype.c where it
can be made static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 8a468ca..4f91944 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -11,6 +11,7 @@
 #define __DIR_DOT_H__
 
 #include <linux/dcache.h>
+#include <linux/crc32.h>
 
 struct inode;
 struct gfs2_inode;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fc300ea..4cae60f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
 #include "util.h"
 #include "log.h"
 #include "quota.h"
+#include "dir.h"
 
 #define DO 0
 #define UNDO 1
@@ -638,6 +639,72 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
 					sdp->sd_lockstruct.ls_lockspace);
 }
 
+/**
+ * gfs2_jindex_hold - Grab a lock on the jindex
+ * @sdp: The GFS2 superblock
+ * @ji_gh: the holder for the jindex glock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
+{
+	struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
+	struct qstr name;
+	char buf[20];
+	struct gfs2_jdesc *jd;
+	int error;
+
+	name.name = buf;
+
+	mutex_lock(&sdp->sd_jindex_mutex);
+
+	for (;;) {
+		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
+		if (error)
+			break;
+
+		name.len = sprintf(buf, "journal%u", sdp->sd_journals);
+		name.hash = gfs2_disk_hash(name.name, name.len);
+
+		error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
+		if (error == -ENOENT) {
+			error = 0;
+			break;
+		}
+
+		gfs2_glock_dq_uninit(ji_gh);
+
+		if (error)
+			break;
+
+		error = -ENOMEM;
+		jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
+		if (!jd)
+			break;
+
+		INIT_LIST_HEAD(&jd->extent_list);
+		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
+		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
+			if (!jd->jd_inode)
+				error = -ENOENT;
+			else
+				error = PTR_ERR(jd->jd_inode);
+			kfree(jd);
+			break;
+		}
+
+		spin_lock(&sdp->sd_jindex_spin);
+		jd->jd_jid = sdp->sd_journals++;
+		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
+		spin_unlock(&sdp->sd_jindex_spin);
+	}
+
+	mutex_unlock(&sdp->sd_jindex_mutex);
+
+	return error;
+}
+
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b858770..3dd9f57 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -34,76 +34,6 @@
 #include "util.h"
 
 /**
- * gfs2_jindex_hold - Grab a lock on the jindex
- * @sdp: The GFS2 superblock
- * @ji_gh: the holder for the jindex glock
- *
- * This is very similar to the gfs2_rindex_hold() function, except that
- * in general we hold the jindex lock for longer periods of time and
- * we grab it far less frequently (in general) then the rgrp lock.
- *
- * Returns: errno
- */
-
-int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
-{
-	struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
-	struct qstr name;
-	char buf[20];
-	struct gfs2_jdesc *jd;
-	int error;
-
-	name.name = buf;
-
-	mutex_lock(&sdp->sd_jindex_mutex);
-
-	for (;;) {
-		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
-		if (error)
-			break;
-
-		name.len = sprintf(buf, "journal%u", sdp->sd_journals);
-		name.hash = gfs2_disk_hash(name.name, name.len);
-
-		error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
-		if (error == -ENOENT) {
-			error = 0;
-			break;
-		}
-
-		gfs2_glock_dq_uninit(ji_gh);
-
-		if (error)
-			break;
-
-		error = -ENOMEM;
-		jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
-		if (!jd)
-			break;
-
-		INIT_LIST_HEAD(&jd->extent_list);
-		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
-		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
-			if (!jd->jd_inode)
-				error = -ENOENT;
-			else
-				error = PTR_ERR(jd->jd_inode);
-			kfree(jd);
-			break;
-		}
-
-		spin_lock(&sdp->sd_jindex_spin);
-		jd->jd_jid = sdp->sd_journals++;
-		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
-		spin_unlock(&sdp->sd_jindex_spin);
-	}
-
-	mutex_unlock(&sdp->sd_jindex_mutex);
-
-	return error;
-}
-
-/**
  * gfs2_jindex_free - Clear all the journal index information
  * @sdp: The GFS2 superblock
  *
@@ -580,10 +510,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
 	struct gfs2_log_header_host lh;
 	int error;
 
-	error = gfs2_jindex_hold(sdp, &ji_gh);
-	if (error)
-		return error;
-
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
 		if (!lfcc) {
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 1848dad..c625459 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,7 +25,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 	return x;
 }
 
-int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
 void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-- 
cgit v0.10.2


From 2bfb6449b7a1f29a2a63e1d869103b5811c3b69f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 26 Nov 2008 13:30:49 +0000
Subject: GFS2: Move four functions from super.c

The functions which are being moved can all be marked
static in their new locations, since they only have
a single caller each. Their new locations are more
logical than before and some of the functions are
small enough that the compiler might well inline them.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 07ffc81..6e298b0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1317,6 +1317,20 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
 		gfs2_glock_put(gl);
 }
 
+static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+{
+	struct gfs2_jdesc *jd;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (jd->jd_jid != jid)
+			continue;
+		jd->jd_dirty = 1;
+		break;
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+}
+
 /**
  * gfs2_glock_cb - Callback used by locking module
  * @sdp: Pointer to the superblock
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 29f8a5c..08837a7 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -257,6 +257,137 @@ static void gfs2_unlockfs(struct super_block *sb)
 }
 
 /**
+ * statfs_fill - fill in the sg for a given RG
+ * @rgd: the RG
+ * @sc: the sc structure
+ *
+ * Returns: 0 on success, -ESTALE if the LVB is invalid
+ */
+
+static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
+			    struct gfs2_statfs_change_host *sc)
+{
+	gfs2_rgrp_verify(rgd);
+	sc->sc_total += rgd->rd_data;
+	sc->sc_free += rgd->rd_free;
+	sc->sc_dinodes += rgd->rd_dinodes;
+	return 0;
+}
+
+/**
+ * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
+ * @sdp: the filesystem
+ * @sc: the sc info that will be returned
+ *
+ * Any error (other than a signal) will cause this routine to fall back
+ * to the synchronous version.
+ *
+ * FIXME: This really shouldn't busy wait like this.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_holder ri_gh;
+	struct gfs2_rgrpd *rgd_next;
+	struct gfs2_holder *gha, *gh;
+	unsigned int slots = 64;
+	unsigned int x;
+	int done;
+	int error = 0, err;
+
+	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
+	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!gha)
+		return -ENOMEM;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto out;
+
+	rgd_next = gfs2_rgrpd_get_first(sdp);
+
+	for (;;) {
+		done = 1;
+
+		for (x = 0; x < slots; x++) {
+			gh = gha + x;
+
+			if (gh->gh_gl && gfs2_glock_poll(gh)) {
+				err = gfs2_glock_wait(gh);
+				if (err) {
+					gfs2_holder_uninit(gh);
+					error = err;
+				} else {
+					if (!error)
+						error = statfs_slow_fill(
+							gh->gh_gl->gl_object, sc);
+					gfs2_glock_dq_uninit(gh);
+				}
+			}
+
+			if (gh->gh_gl)
+				done = 0;
+			else if (rgd_next && !error) {
+				error = gfs2_glock_nq_init(rgd_next->rd_gl,
+							   LM_ST_SHARED,
+							   GL_ASYNC,
+							   gh);
+				rgd_next = gfs2_rgrpd_get_next(rgd_next);
+				done = 0;
+			}
+
+			if (signal_pending(current))
+				error = -ERESTARTSYS;
+		}
+
+		if (done)
+			break;
+
+		yield();
+	}
+
+	gfs2_glock_dq_uninit(&ri_gh);
+
+out:
+	kfree(gha);
+	return error;
+}
+
+/**
+ * gfs2_statfs_i - Do a statfs
+ * @sdp: the filesystem
+ * @sg: the sg structure
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+
+	spin_lock(&sdp->sd_statfs_spin);
+
+	*sc = *m_sc;
+	sc->sc_total += l_sc->sc_total;
+	sc->sc_free += l_sc->sc_free;
+	sc->sc_dinodes += l_sc->sc_dinodes;
+
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	if (sc->sc_free < 0)
+		sc->sc_free = 0;
+	if (sc->sc_free > sc->sc_total)
+		sc->sc_free = sc->sc_total;
+	if (sc->sc_dinodes < 0)
+		sc->sc_dinodes = 0;
+
+	return 0;
+}
+
+/**
  * gfs2_statfs - Gather and return stats about the filesystem
  * @sb: The superblock
  * @statfsbuf: The buffer
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index b56ba3d..efd09c3 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -585,6 +585,28 @@ fail:
 	return error;
 }
 
+static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
+{
+	struct gfs2_jdesc *jd;
+	int found = 0;
+
+	spin_lock(&sdp->sd_jindex_spin);
+
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (jd->jd_dirty) {
+			jd->jd_dirty = 0;
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	if (!found)
+		jd = NULL;
+
+	return jd;
+}
+
 /**
  * gfs2_check_journals - Recover any dirty journals
  * @sdp: the filesystem
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3dd9f57..141b781 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -96,39 +96,6 @@ struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
 	return jd;
 }
 
-void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
-{
-	struct gfs2_jdesc *jd;
-
-	spin_lock(&sdp->sd_jindex_spin);
-	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
-	if (jd)
-		jd->jd_dirty = 1;
-	spin_unlock(&sdp->sd_jindex_spin);
-}
-
-struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
-{
-	struct gfs2_jdesc *jd;
-	int found = 0;
-
-	spin_lock(&sdp->sd_jindex_spin);
-
-	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-		if (jd->jd_dirty) {
-			jd->jd_dirty = 0;
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	if (!found)
-		jd = NULL;
-
-	return jd;
-}
-
 int gfs2_jdesc_check(struct gfs2_jdesc *jd)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
@@ -353,137 +320,6 @@ out:
 	return error;
 }
 
-/**
- * gfs2_statfs_i - Do a statfs
- * @sdp: the filesystem
- * @sg: the sg structure
- *
- * Returns: errno
- */
-
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-
-	spin_lock(&sdp->sd_statfs_spin);
-
-	*sc = *m_sc;
-	sc->sc_total += l_sc->sc_total;
-	sc->sc_free += l_sc->sc_free;
-	sc->sc_dinodes += l_sc->sc_dinodes;
-
-	spin_unlock(&sdp->sd_statfs_spin);
-
-	if (sc->sc_free < 0)
-		sc->sc_free = 0;
-	if (sc->sc_free > sc->sc_total)
-		sc->sc_free = sc->sc_total;
-	if (sc->sc_dinodes < 0)
-		sc->sc_dinodes = 0;
-
-	return 0;
-}
-
-/**
- * statfs_fill - fill in the sg for a given RG
- * @rgd: the RG
- * @sc: the sc structure
- *
- * Returns: 0 on success, -ESTALE if the LVB is invalid
- */
-
-static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
-			    struct gfs2_statfs_change_host *sc)
-{
-	gfs2_rgrp_verify(rgd);
-	sc->sc_total += rgd->rd_data;
-	sc->sc_free += rgd->rd_free;
-	sc->sc_dinodes += rgd->rd_dinodes;
-	return 0;
-}
-
-/**
- * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
- * @sdp: the filesystem
- * @sc: the sc info that will be returned
- *
- * Any error (other than a signal) will cause this routine to fall back
- * to the synchronous version.
- *
- * FIXME: This really shouldn't busy wait like this.
- *
- * Returns: errno
- */
-
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_holder ri_gh;
-	struct gfs2_rgrpd *rgd_next;
-	struct gfs2_holder *gha, *gh;
-	unsigned int slots = 64;
-	unsigned int x;
-	int done;
-	int error = 0, err;
-
-	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
-	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
-	if (!gha)
-		return -ENOMEM;
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		goto out;
-
-	rgd_next = gfs2_rgrpd_get_first(sdp);
-
-	for (;;) {
-		done = 1;
-
-		for (x = 0; x < slots; x++) {
-			gh = gha + x;
-
-			if (gh->gh_gl && gfs2_glock_poll(gh)) {
-				err = gfs2_glock_wait(gh);
-				if (err) {
-					gfs2_holder_uninit(gh);
-					error = err;
-				} else {
-					if (!error)
-						error = statfs_slow_fill(
-							gh->gh_gl->gl_object, sc);
-					gfs2_glock_dq_uninit(gh);
-				}
-			}
-
-			if (gh->gh_gl)
-				done = 0;
-			else if (rgd_next && !error) {
-				error = gfs2_glock_nq_init(rgd_next->rd_gl,
-							   LM_ST_SHARED,
-							   GL_ASYNC,
-							   gh);
-				rgd_next = gfs2_rgrpd_get_next(rgd_next);
-				done = 0;
-			}
-
-			if (signal_pending(current))
-				error = -ERESTARTSYS;
-		}
-
-		if (done)
-			break;
-
-		yield();
-	}
-
-	gfs2_glock_dq_uninit(&ri_gh);
-
-out:
-	kfree(gha);
-	return error;
-}
-
 struct lfcc {
 	struct list_head list;
 	struct gfs2_holder gh;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index c625459..f6b8b00 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -28,8 +28,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
-struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
 int gfs2_jdesc_check(struct gfs2_jdesc *jd);
 
 int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
@@ -41,8 +39,6 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
 			s64 total, s64 free, s64 dinodes);
 int gfs2_statfs_sync(struct gfs2_sbd *sdp);
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
 
 int gfs2_freeze_fs(struct gfs2_sbd *sdp);
 void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
-- 
cgit v0.10.2


From 2e204703a1161e9bae38ba0d3d0df04a679e6f4f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 26 Nov 2008 14:01:26 +0000
Subject: GFS2: Remove ancient, unused code

Remove code that used to have something to do with initrd
but has been unused for a long time.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 8c0f16e..3cb0a44 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -85,14 +85,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 	int error = 0;
 
 	if (!remount) {
-		/*  If someone preloaded options, use those instead  */
-		spin_lock(&gfs2_sys_margs_lock);
-		if (gfs2_sys_margs) {
-			data = gfs2_sys_margs;
-			gfs2_sys_margs = NULL;
-		}
-		spin_unlock(&gfs2_sys_margs_lock);
-
 		/*  Set some defaults  */
 		args->ar_quota = GFS2_QUOTA_DEFAULT;
 		args->ar_data = GFS2_DATA_DEFAULT;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 298bcb6..26c1fa7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -26,9 +26,6 @@
 #include "quota.h"
 #include "util.h"
 
-char *gfs2_sys_margs;
-spinlock_t gfs2_sys_margs_lock;
-
 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%u:%u\n",
@@ -477,8 +474,6 @@ static struct kset_uevent_ops gfs2_uevent_ops = {
 
 int gfs2_sys_init(void)
 {
-	gfs2_sys_margs = NULL;
-	spin_lock_init(&gfs2_sys_margs_lock);
 	gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
 	if (!gfs2_kset)
 		return -ENOMEM;
@@ -487,7 +482,6 @@ int gfs2_sys_init(void)
 
 void gfs2_sys_uninit(void)
 {
-	kfree(gfs2_sys_margs);
 	kset_unregister(gfs2_kset);
 }
 
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index 1ca8cda..e94560e 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h
@@ -13,10 +13,6 @@
 #include <linux/spinlock.h>
 struct gfs2_sbd;
 
-/* Allow args to be passed to GFS2 when using an initial ram disk */
-extern char *gfs2_sys_margs;
-extern spinlock_t gfs2_sys_margs_lock;
-
 int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
 void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
 
-- 
cgit v0.10.2


From 3af165ac4d099385b12e3e75a9ee3ffd02da33e0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 27 Nov 2008 08:27:28 +0000
Subject: GFS2: Fix use-after-free bug on umount

There was a use-after-free with the GFS2 super block during
umount. This patch moves almost all of the umount code from
->put_super into ->kill_sb, the only bit that cannot be moved
being the glock hash clearing which has to remain as ->put_super
due to umount ordering requirements. As a result its now obvious
that the kfree is the final operation, whereas before it was
hidden in ->put_super.

Also gfs2_jindex_free is then only referenced from a single file
so thats moved and marked static too.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6e298b0..5eae62e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1547,8 +1547,9 @@ static void clear_glock(struct gfs2_glock *gl)
  * Called when unmounting the filesystem.
  */
 
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
+void gfs2_gl_hash_clear(struct super_block *sb)
 {
+	struct gfs2_sbd *sdp = sb->s_fs_info;
 	unsigned long t;
 	unsigned int x;
 	int cont;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 543ec7e..ce54f33 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -130,7 +130,7 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl);
 
 void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+void gfs2_gl_hash_clear(struct super_block *sb);
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 
 int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4cae60f..2e735be 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -705,6 +705,40 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 	return error;
 }
 
+/**
+ * gfs2_jindex_free - Clear all the journal index information
+ * @sdp: The GFS2 superblock
+ *
+ */
+
+static void gfs2_jindex_free(struct gfs2_sbd *sdp)
+{
+	struct list_head list, *head;
+	struct gfs2_jdesc *jd;
+	struct gfs2_journal_extent *jext;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	list_add(&list, &sdp->sd_jindex_list);
+	list_del_init(&sdp->sd_jindex_list);
+	sdp->sd_journals = 0;
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	while (!list_empty(&list)) {
+		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+		head = &jd->extent_list;
+		while (!list_empty(head)) {
+			jext = list_entry(head->next,
+					  struct gfs2_journal_extent,
+					  extent_list);
+			list_del(&jext->extent_list);
+			kfree(jext);
+		}
+		list_del(&jd->jd_list);
+		iput(jd->jd_inode);
+		kfree(jd);
+	}
+}
+
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
@@ -1203,7 +1237,7 @@ fail_sb:
 fail_locking:
 	init_locking(sdp, &mount_gh, UNDO);
 fail_lm:
-	gfs2_gl_hash_clear(sdp);
+	gfs2_gl_hash_clear(sb);
 	gfs2_lm_unmount(sdp);
 	while (invalidate_inodes(sb))
 		yield();
@@ -1263,17 +1297,61 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 static void gfs2_kill_sb(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	if (sdp) {
-		gfs2_meta_syncfs(sdp);
-		dput(sdp->sd_root_dir);
-		dput(sdp->sd_master_dir);
-		sdp->sd_root_dir = NULL;
-		sdp->sd_master_dir = NULL;
+
+	if (sdp == NULL) {
+		kill_block_super(sb);
+		return;
 	}
-	shrink_dcache_sb(sb);
+	gfs2_meta_syncfs(sdp);
+	dput(sdp->sd_root_dir);
+	dput(sdp->sd_master_dir);
+	sdp->sd_root_dir = NULL;
+	sdp->sd_master_dir = NULL;
+
+	/*  Unfreeze the filesystem, if we need to  */
+	mutex_lock(&sdp->sd_freeze_lock);
+	if (sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+	mutex_unlock(&sdp->sd_freeze_lock);
+
+	kthread_stop(sdp->sd_quotad_process);
+	kthread_stop(sdp->sd_logd_process);
+	kthread_stop(sdp->sd_recoverd_process);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		int error = gfs2_make_fs_ro(sdp);
+		if (error)
+			gfs2_io_error(sdp);
+	}
+
+	/* At this point, we're through modifying the disk */
+	gfs2_jindex_free(sdp);
+	gfs2_clear_rgrpd(sdp);
+	iput(sdp->sd_jindex);
+	iput(sdp->sd_inum_inode);
+	iput(sdp->sd_statfs_inode);
+	iput(sdp->sd_rindex);
+	iput(sdp->sd_quota_inode);
+
+	gfs2_glock_put(sdp->sd_rename_gl);
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+	if (!sdp->sd_args.ar_spectator) {
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+		iput(sdp->sd_ir_inode);
+		iput(sdp->sd_sc_inode);
+		iput(sdp->sd_qc_inode);
+	}
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
 	kill_block_super(sb);
-	if (sdp)
-		gfs2_delete_debugfs_file(sdp);
+	gfs2_lm_unmount(sdp);
+	gfs2_sys_fs_del(sdp);
+	gfs2_delete_debugfs_file(sdp);
+	kfree(sdp);
 }
 
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 08837a7..bd08a0a 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -95,7 +95,7 @@ do_flush:
  * Returns: errno
  */
 
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
 	struct gfs2_holder t_gh;
 	int error;
@@ -122,70 +122,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 }
 
 /**
- * gfs2_put_super - Unmount the filesystem
- * @sb: The VFS superblock
- *
- */
-
-static void gfs2_put_super(struct super_block *sb)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
-
-	/*  Unfreeze the filesystem, if we need to  */
-
-	mutex_lock(&sdp->sd_freeze_lock);
-	if (sdp->sd_freeze_count)
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	mutex_unlock(&sdp->sd_freeze_lock);
-
-	kthread_stop(sdp->sd_quotad_process);
-	kthread_stop(sdp->sd_logd_process);
-	kthread_stop(sdp->sd_recoverd_process);
-
-	if (!(sb->s_flags & MS_RDONLY)) {
-		error = gfs2_make_fs_ro(sdp);
-		if (error)
-			gfs2_io_error(sdp);
-	}
-	/*  At this point, we're through modifying the disk  */
-
-	/*  Release stuff  */
-
-	iput(sdp->sd_jindex);
-	iput(sdp->sd_inum_inode);
-	iput(sdp->sd_statfs_inode);
-	iput(sdp->sd_rindex);
-	iput(sdp->sd_quota_inode);
-
-	gfs2_glock_put(sdp->sd_rename_gl);
-	gfs2_glock_put(sdp->sd_trans_gl);
-
-	if (!sdp->sd_args.ar_spectator) {
-		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-		iput(sdp->sd_ir_inode);
-		iput(sdp->sd_sc_inode);
-		iput(sdp->sd_qc_inode);
-	}
-
-	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
-	gfs2_clear_rgrpd(sdp);
-	gfs2_jindex_free(sdp);
-	/*  Take apart glock structures and buffer lists  */
-	gfs2_gl_hash_clear(sdp);
-	/*  Unmount the locking protocol  */
-	gfs2_lm_unmount(sdp);
-
-	/*  At this point, we're through participating in the lockspace  */
-	gfs2_sys_fs_del(sdp);
-	kfree(sdp);
-}
-
-/**
  * gfs2_write_super
  * @sb: the superblock
  *
@@ -686,7 +622,7 @@ const struct super_operations gfs2_super_ops = {
 	.destroy_inode		= gfs2_destroy_inode,
 	.write_inode		= gfs2_write_inode,
 	.delete_inode		= gfs2_delete_inode,
-	.put_super		= gfs2_put_super,
+	.put_super		= gfs2_gl_hash_clear,
 	.write_super		= gfs2_write_super,
 	.sync_fs		= gfs2_sync_fs,
 	.write_super_lockfs 	= gfs2_write_super_lockfs,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 141b781..f14658b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,40 +33,6 @@
 #include "trans.h"
 #include "util.h"
 
-/**
- * gfs2_jindex_free - Clear all the journal index information
- * @sdp: The GFS2 superblock
- *
- */
-
-void gfs2_jindex_free(struct gfs2_sbd *sdp)
-{
-	struct list_head list, *head;
-	struct gfs2_jdesc *jd;
-	struct gfs2_journal_extent *jext;
-
-	spin_lock(&sdp->sd_jindex_spin);
-	list_add(&list, &sdp->sd_jindex_list);
-	list_del_init(&sdp->sd_jindex_list);
-	sdp->sd_journals = 0;
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	while (!list_empty(&list)) {
-		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
-		head = &jd->extent_list;
-		while (!list_empty(head)) {
-			jext = list_entry(head->next,
-					  struct gfs2_journal_extent,
-					  extent_list);
-			list_del(&jext->extent_list);
-			kfree(jext);
-		}
-		list_del(&jd->jd_list);
-		iput(jd->jd_inode);
-		kfree(jd);
-	}
-}
-
 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
 {
 	struct gfs2_jdesc *jd;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index f6b8b00..4d2492b 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,8 +25,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 	return x;
 }
 
-void gfs2_jindex_free(struct gfs2_sbd *sdp);
-
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
 int gfs2_jdesc_check(struct gfs2_jdesc *jd);
 
@@ -34,6 +32,7 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
 			      struct gfs2_inode **ipp);
 
 int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 
 int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
-- 
cgit v0.10.2


From 9a776db7371b9c77a8f4f0d2ac6374d78ac7db7d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 27 Nov 2008 09:42:51 +0000
Subject: GFS2: Send useful information with uevent messages

In order to distinguish between two differing uevent messages
and to avoid using the (racy) method of reading status from
sysfs in future, this adds some status information to our
uevent messages.

Btw, before anybody says "sysfs isn't racy", I'm aware of that,
but the way that GFS2 was using it (send an ambiugous uevent and
then expect the receiver to read sysfs to find out the status
of the reported operation) was.

The additional benefit of using the new interface is that it
should be possible for a node to recover multiple journals
at the same time, since there is no longer any confusion as
to which journal the status belongs to.

At some future stage, when all the userland programs have been
converted, I intend to remove the old interface.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 0c4cbe6..1aa7eb6 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -194,17 +194,25 @@ out:
 static void gdlm_recovery_done(void *lockspace, unsigned int jid,
                                unsigned int message)
 {
+	char env_jid[20];
+	char env_status[20];
+	char *envp[] = { env_jid, env_status, NULL };
 	struct gdlm_ls *ls = lockspace;
 	ls->recover_jid_done = jid;
 	ls->recover_jid_status = message;
-	kobject_uevent(&ls->kobj, KOBJ_CHANGE);
+	sprintf(env_jid, "JID=%d", jid);
+	sprintf(env_status, "RECOVERY=%s",
+		message == LM_RD_SUCCESS ? "Done" : "Failed");
+	kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
 }
 
 static void gdlm_others_may_mount(void *lockspace)
 {
+	char *message = "FIRSTMOUNT=Done";
+	char *envp[] = { message, NULL };
 	struct gdlm_ls *ls = lockspace;
 	ls->first_done = 1;
-	kobject_uevent(&ls->kobj, KOBJ_CHANGE);
+	kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
 }
 
 /* Userspace gets the offline uevent, blocks new gfs locks on
-- 
cgit v0.10.2


From 7ed122e42c72b3e4531f8b4a9f72159e8303ac15 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 10 Dec 2008 10:28:10 +0000
Subject: GFS2: Streamline alloc calculations for writes

This patch removes some unused code, and make the calculation
of the number of blocks required conditional in order to reduce
the number of times this (potentially expensive) calculation
is done.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 789f28c..11ffc56 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1231,35 +1231,6 @@ int gfs2_file_dealloc(struct gfs2_inode *ip)
 }
 
 /**
- * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
- * @ip: the file
- * @len: the number of bytes to be written to the file
- * @data_blocks: returns the number of data blocks required
- * @ind_blocks: returns the number of indirect blocks required
- *
- */
-
-void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
-			    unsigned int *data_blocks, unsigned int *ind_blocks)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned int tmp;
-
-	if (gfs2_is_dir(ip)) {
-		*data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
-		*ind_blocks = 3 * (sdp->sd_max_jheight - 1);
-	} else {
-		*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
-		*ind_blocks = 3 * (sdp->sd_max_height - 1);
-	}
-
-	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
-		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
-		*ind_blocks += tmp;
-	}
-}
-
-/**
  * gfs2_write_alloc_required - figure out if a write will require an allocation
  * @ip: the file being written to
  * @offset: the offset to write to
@@ -1276,6 +1247,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 	struct buffer_head bh;
 	unsigned int shift;
 	u64 lblock, lblock_stop, size;
+	u64 end_of_file;
 
 	*alloc_required = 0;
 
@@ -1291,19 +1263,12 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 
 	*alloc_required = 1;
 	shift = sdp->sd_sb.sb_bsize_shift;
-	if (gfs2_is_dir(ip)) {
-		unsigned int bsize = sdp->sd_jbsize;
-		lblock = offset;
-		do_div(lblock, bsize);
-		lblock_stop = offset + len + bsize - 1;
-		do_div(lblock_stop, bsize);
-	} else {
-		u64 end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
-		lblock = offset >> shift;
-		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
-		if (lblock_stop > end_of_file)
-			return 0;
-	}
+	BUG_ON(gfs2_is_dir(ip));
+	end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
+	lblock = offset >> shift;
+	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
+	if (lblock_stop > end_of_file)
+		return 0;
 
 	size = (lblock_stop - lblock) << shift;
 	do {
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 4e6cde2..c983177 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,10 +10,40 @@
 #ifndef __BMAP_DOT_H__
 #define __BMAP_DOT_H__
 
+#include "inode.h"
+
 struct inode;
 struct gfs2_inode;
 struct page;
 
+
+/**
+ * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
+ * @ip: the file
+ * @len: the number of bytes to be written to the file
+ * @data_blocks: returns the number of data blocks required
+ * @ind_blocks: returns the number of indirect blocks required
+ *
+ */
+
+static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+					  unsigned int len,
+					  unsigned int *data_blocks,
+					  unsigned int *ind_blocks)
+{
+	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	unsigned int tmp;
+
+	BUG_ON(gfs2_is_dir(ip));
+	*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
+	*ind_blocks = 3 * (sdp->sd_max_height - 1);
+
+	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
+		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+		*ind_blocks += tmp;
+	}
+}
+
 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create);
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
@@ -21,10 +51,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
 int gfs2_truncatei_resume(struct gfs2_inode *ip);
 int gfs2_file_dealloc(struct gfs2_inode *ip);
-
-void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
-			    unsigned int *data_blocks,
-			    unsigned int *ind_blocks);
 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 			      unsigned int len, int *alloc_required);
 
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 0df560f..6e4ea36 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -625,7 +625,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct gfs2_inode *ip = GFS2_I(mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
-	unsigned int data_blocks, ind_blocks, rblocks;
+	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	int alloc_required;
 	int error = 0;
 	struct gfs2_alloc *al;
@@ -639,11 +639,13 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	if (unlikely(error))
 		goto out_uninit;
 
-	gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 	error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
 	if (error)
 		goto out_unlock;
 
+	if (alloc_required || gfs2_is_jdata(ip))
+		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
+
 	if (alloc_required) {
 		al = gfs2_alloc_get(ip);
 		if (!al) {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index a6b7a73..289c5f5 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -355,7 +355,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 		goto out;
 
 	set_bit(GIF_SW_PAGED, &ip->i_flags);
-	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
 	ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
 	if (ret || !alloc_required)
 		goto out_unlock;
@@ -367,6 +366,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	ret = gfs2_quota_lock_check(ip);
 	if (ret)
 		goto out_alloc_put;
+	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
 	al->al_requested = data_blocks + ind_blocks;
 	ret = gfs2_inplace_reserve(ip);
 	if (ret)
-- 
cgit v0.10.2


From fefc03bfedeff2002f14e848ecb7c0cd77ee0b15 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 19 Dec 2008 15:32:06 +0000
Subject: Revert "GFS2: Fix use-after-free bug on umount"

This reverts commit 78802499912f1ba31ce83a94c55b5a980f250a43.

The original patch is causing problems in relation to order of
operations at umount in relation to jdata files. I need to fix
this a different way.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 5eae62e..6e298b0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1547,9 +1547,8 @@ static void clear_glock(struct gfs2_glock *gl)
  * Called when unmounting the filesystem.
  */
 
-void gfs2_gl_hash_clear(struct super_block *sb)
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
-	struct gfs2_sbd *sdp = sb->s_fs_info;
 	unsigned long t;
 	unsigned int x;
 	int cont;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index ce54f33..543ec7e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -130,7 +130,7 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl);
 
 void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct super_block *sb);
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 
 int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2e735be..4cae60f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -705,40 +705,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 	return error;
 }
 
-/**
- * gfs2_jindex_free - Clear all the journal index information
- * @sdp: The GFS2 superblock
- *
- */
-
-static void gfs2_jindex_free(struct gfs2_sbd *sdp)
-{
-	struct list_head list, *head;
-	struct gfs2_jdesc *jd;
-	struct gfs2_journal_extent *jext;
-
-	spin_lock(&sdp->sd_jindex_spin);
-	list_add(&list, &sdp->sd_jindex_list);
-	list_del_init(&sdp->sd_jindex_list);
-	sdp->sd_journals = 0;
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	while (!list_empty(&list)) {
-		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
-		head = &jd->extent_list;
-		while (!list_empty(head)) {
-			jext = list_entry(head->next,
-					  struct gfs2_journal_extent,
-					  extent_list);
-			list_del(&jext->extent_list);
-			kfree(jext);
-		}
-		list_del(&jd->jd_list);
-		iput(jd->jd_inode);
-		kfree(jd);
-	}
-}
-
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
@@ -1237,7 +1203,7 @@ fail_sb:
 fail_locking:
 	init_locking(sdp, &mount_gh, UNDO);
 fail_lm:
-	gfs2_gl_hash_clear(sb);
+	gfs2_gl_hash_clear(sdp);
 	gfs2_lm_unmount(sdp);
 	while (invalidate_inodes(sb))
 		yield();
@@ -1297,61 +1263,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 static void gfs2_kill_sb(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-
-	if (sdp == NULL) {
-		kill_block_super(sb);
-		return;
-	}
-	gfs2_meta_syncfs(sdp);
-	dput(sdp->sd_root_dir);
-	dput(sdp->sd_master_dir);
-	sdp->sd_root_dir = NULL;
-	sdp->sd_master_dir = NULL;
-
-	/*  Unfreeze the filesystem, if we need to  */
-	mutex_lock(&sdp->sd_freeze_lock);
-	if (sdp->sd_freeze_count)
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	mutex_unlock(&sdp->sd_freeze_lock);
-
-	kthread_stop(sdp->sd_quotad_process);
-	kthread_stop(sdp->sd_logd_process);
-	kthread_stop(sdp->sd_recoverd_process);
-
-	if (!(sb->s_flags & MS_RDONLY)) {
-		int error = gfs2_make_fs_ro(sdp);
-		if (error)
-			gfs2_io_error(sdp);
-	}
-
-	/* At this point, we're through modifying the disk */
-	gfs2_jindex_free(sdp);
-	gfs2_clear_rgrpd(sdp);
-	iput(sdp->sd_jindex);
-	iput(sdp->sd_inum_inode);
-	iput(sdp->sd_statfs_inode);
-	iput(sdp->sd_rindex);
-	iput(sdp->sd_quota_inode);
-
-	gfs2_glock_put(sdp->sd_rename_gl);
-	gfs2_glock_put(sdp->sd_trans_gl);
-
-	if (!sdp->sd_args.ar_spectator) {
-		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-		iput(sdp->sd_ir_inode);
-		iput(sdp->sd_sc_inode);
-		iput(sdp->sd_qc_inode);
+	if (sdp) {
+		gfs2_meta_syncfs(sdp);
+		dput(sdp->sd_root_dir);
+		dput(sdp->sd_master_dir);
+		sdp->sd_root_dir = NULL;
+		sdp->sd_master_dir = NULL;
 	}
-	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+	shrink_dcache_sb(sb);
 	kill_block_super(sb);
-	gfs2_lm_unmount(sdp);
-	gfs2_sys_fs_del(sdp);
-	gfs2_delete_debugfs_file(sdp);
-	kfree(sdp);
+	if (sdp)
+		gfs2_delete_debugfs_file(sdp);
 }
 
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index bd08a0a..08837a7 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -95,7 +95,7 @@ do_flush:
  * Returns: errno
  */
 
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
 	struct gfs2_holder t_gh;
 	int error;
@@ -122,6 +122,70 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 }
 
 /**
+ * gfs2_put_super - Unmount the filesystem
+ * @sb: The VFS superblock
+ *
+ */
+
+static void gfs2_put_super(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	int error;
+
+	/*  Unfreeze the filesystem, if we need to  */
+
+	mutex_lock(&sdp->sd_freeze_lock);
+	if (sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+	mutex_unlock(&sdp->sd_freeze_lock);
+
+	kthread_stop(sdp->sd_quotad_process);
+	kthread_stop(sdp->sd_logd_process);
+	kthread_stop(sdp->sd_recoverd_process);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		error = gfs2_make_fs_ro(sdp);
+		if (error)
+			gfs2_io_error(sdp);
+	}
+	/*  At this point, we're through modifying the disk  */
+
+	/*  Release stuff  */
+
+	iput(sdp->sd_jindex);
+	iput(sdp->sd_inum_inode);
+	iput(sdp->sd_statfs_inode);
+	iput(sdp->sd_rindex);
+	iput(sdp->sd_quota_inode);
+
+	gfs2_glock_put(sdp->sd_rename_gl);
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+	if (!sdp->sd_args.ar_spectator) {
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+		iput(sdp->sd_ir_inode);
+		iput(sdp->sd_sc_inode);
+		iput(sdp->sd_qc_inode);
+	}
+
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+	gfs2_clear_rgrpd(sdp);
+	gfs2_jindex_free(sdp);
+	/*  Take apart glock structures and buffer lists  */
+	gfs2_gl_hash_clear(sdp);
+	/*  Unmount the locking protocol  */
+	gfs2_lm_unmount(sdp);
+
+	/*  At this point, we're through participating in the lockspace  */
+	gfs2_sys_fs_del(sdp);
+	kfree(sdp);
+}
+
+/**
  * gfs2_write_super
  * @sb: the superblock
  *
@@ -622,7 +686,7 @@ const struct super_operations gfs2_super_ops = {
 	.destroy_inode		= gfs2_destroy_inode,
 	.write_inode		= gfs2_write_inode,
 	.delete_inode		= gfs2_delete_inode,
-	.put_super		= gfs2_gl_hash_clear,
+	.put_super		= gfs2_put_super,
 	.write_super		= gfs2_write_super,
 	.sync_fs		= gfs2_sync_fs,
 	.write_super_lockfs 	= gfs2_write_super_lockfs,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f14658b..141b781 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,6 +33,40 @@
 #include "trans.h"
 #include "util.h"
 
+/**
+ * gfs2_jindex_free - Clear all the journal index information
+ * @sdp: The GFS2 superblock
+ *
+ */
+
+void gfs2_jindex_free(struct gfs2_sbd *sdp)
+{
+	struct list_head list, *head;
+	struct gfs2_jdesc *jd;
+	struct gfs2_journal_extent *jext;
+
+	spin_lock(&sdp->sd_jindex_spin);
+	list_add(&list, &sdp->sd_jindex_list);
+	list_del_init(&sdp->sd_jindex_list);
+	sdp->sd_journals = 0;
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	while (!list_empty(&list)) {
+		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+		head = &jd->extent_list;
+		while (!list_empty(head)) {
+			jext = list_entry(head->next,
+					  struct gfs2_journal_extent,
+					  extent_list);
+			list_del(&jext->extent_list);
+			kfree(jext);
+		}
+		list_del(&jd->jd_list);
+		iput(jd->jd_inode);
+		kfree(jd);
+	}
+}
+
 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
 {
 	struct gfs2_jdesc *jd;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 4d2492b..f6b8b00 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,6 +25,8 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 	return x;
 }
 
+void gfs2_jindex_free(struct gfs2_sbd *sdp);
+
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
 int gfs2_jdesc_check(struct gfs2_jdesc *jd);
 
@@ -32,7 +34,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
 			      struct gfs2_inode **ipp);
 
 int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 
 int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
-- 
cgit v0.10.2


From 88a19ad066c1aab2f9713beb670525fcc06e1c09 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 19 Dec 2008 15:43:05 +0000
Subject: GFS2: Fix use-after-free bug on umount (try #2)

This should solve the issue with the previous attempt at fixing this.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4cae60f..f91eebd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1263,17 +1263,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 static void gfs2_kill_sb(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	if (sdp) {
-		gfs2_meta_syncfs(sdp);
-		dput(sdp->sd_root_dir);
-		dput(sdp->sd_master_dir);
-		sdp->sd_root_dir = NULL;
-		sdp->sd_master_dir = NULL;
+
+	if (sdp == NULL) {
+		kill_block_super(sb);
+		return;
 	}
+
+	gfs2_meta_syncfs(sdp);
+	dput(sdp->sd_root_dir);
+	dput(sdp->sd_master_dir);
+	sdp->sd_root_dir = NULL;
+	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
 	kill_block_super(sb);
-	if (sdp)
-		gfs2_delete_debugfs_file(sdp);
+	gfs2_delete_debugfs_file(sdp);
+	kfree(sdp);
 }
 
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 08837a7..777783d 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -182,7 +182,6 @@ static void gfs2_put_super(struct super_block *sb)
 
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
-	kfree(sdp);
 }
 
 /**
-- 
cgit v0.10.2


From eb8374e71f941a1b3c2ed6ea19dc809e7124dc5d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 25 Dec 2008 15:35:27 +0100
Subject: GFS2: Use DEFINE_SPINLOCK

SPIN_LOCK_UNLOCKED is deprecated.  The following makes the change suggested
in Documentation/spinlocks.txt

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
declarer name DEFINE_SPINLOCK;
identifier xxx_lock;
@@

- spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
+ DEFINE_SPINLOCK(xxx_lock);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6e298b0..6b983ae 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -65,7 +65,7 @@ static struct dentry *gfs2_root;
 static struct workqueue_struct *glock_workqueue;
 static LIST_HEAD(lru_list);
 static atomic_t lru_count = ATOMIC_INIT(0);
-static spinlock_t lru_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(lru_lock);
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
-- 
cgit v0.10.2


From 4f7d54f59bc470f0aaa932f747a95232d7ebf8b1 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Mon, 5 Jan 2009 00:00:12 -0800
Subject: tcp: don't mask EOF and socket errors on nonblocking splice receive

Currently, setting SPLICE_F_NONBLOCK on splice from a TCP socket
results in masking of EOF (RDHUP) and error conditions on the socket
by an -EAGAIN return.  Move the NONBLOCK check in tcp_splice_read()
to be after the EOF and error checks to fix this.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d655e9..bce1b06 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -580,10 +580,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 		else if (!ret) {
 			if (spliced)
 				break;
-			if (flags & SPLICE_F_NONBLOCK) {
-				ret = -EAGAIN;
-				break;
-			}
 			if (sock_flag(sk, SOCK_DONE))
 				break;
 			if (sk->sk_err) {
@@ -601,6 +597,10 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 					ret = -ENOTCONN;
 				break;
 			}
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
 			if (!timeo) {
 				ret = -EAGAIN;
 				break;
-- 
cgit v0.10.2


From 47cd5265ea8fe0b246bfd9b42ba69e13aa8b99bd Mon Sep 17 00:00:00 2001
From: Julian Calaby <julian.calaby@gmail.com>
Date: Mon, 5 Jan 2009 00:07:18 -0800
Subject: sparc: Clean arch-specific code in prom_common.c

prom_nextprop() and prom_firstprop() have slightly different calling
conventions in 32 and 64 bit SPARC.

prom_common.c uses a ifdef guard to ensure that these functions are
called correctly.

Adjust code to eliminate this ifdef by using a calling convention that
is compatible with both 32 and 64 bit SPARC.

Signed-off-by: Julian Calaby <julian.calaby@gmail.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 4e9af59..ff7b591 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -155,20 +155,12 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 		p->value = prom_early_alloc(special_len);
 		memcpy(p->value, special_val, special_len);
 	} else {
-#ifdef CONFIG_SPARC32
-		if (prev == NULL) {
-			name = prom_firstprop(node, NULL);
-		} else {
-			name = prom_nextprop(node, prev, NULL);
-		}
-#else
 		if (prev == NULL) {
-			prom_firstprop(node, p->name);
+			name = prom_firstprop(node, p->name);
 		} else {
-			prom_nextprop(node, prev, p->name);
+			name = prom_nextprop(node, prev, p->name);
 		}
-		name = p->name;
-#endif
+
 		if (strlen(name) == 0) {
 			tmp = p;
 			return NULL;
-- 
cgit v0.10.2


From 576b4d0cce9716a3a6c67ded27a638ef833b0a54 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 5 Jan 2009 00:55:24 -0800
Subject: sparc: Remove reg*.h from Kbuild

Forgot to commit this in previous change, noticed by
Sam.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index aeaec45..deeb0fb 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -13,9 +13,6 @@ header-y += perfctr.h
 header-y += psrcompat.h
 header-y += psr.h
 header-y += pstate.h
-header-y += reg.h
-header-y += reg_32.h
-header-y += reg_64.h
 header-y += traps.h
 header-y += uctx.h
 header-y += utrap.h
-- 
cgit v0.10.2


From 7945cc6464a4db0caf6dfacdfe05806051c4cb7b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 5 Jan 2009 00:59:00 -0800
Subject: tcp: Kill extraneous SPLICE_F_NONBLOCK checks.

In splice TCP receive, the SPLICE_F_NONBLOCK flag is used
to compute the "timeo" value.  So checking it again inside
of the main receive loop to trigger -EAGAIN processing is
entirely unnecessary.

Noticed by Jarek P. and Lennert Buytenhek.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bce1b06..35bcddf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -597,10 +597,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 					ret = -ENOTCONN;
 				break;
 			}
-			if (flags & SPLICE_F_NONBLOCK) {
-				ret = -EAGAIN;
-				break;
-			}
 			if (!timeo) {
 				ret = -EAGAIN;
 				break;
-- 
cgit v0.10.2


From 5cf1c00b0ef3ba964b2ad268a55c278cf43f798f Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 5 Jan 2009 02:08:30 -0800
Subject: ASoC: fix davinci-sffsdr buglet

Minor bugfix:  now that DaVinci kernels can support multiple
boards, board-specific ASoC components need to verify they're
running on the right board before initializing.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c
index f67579d..4935d1b 100644
--- a/sound/soc/davinci/davinci-sffsdr.c
+++ b/sound/soc/davinci/davinci-sffsdr.c
@@ -24,6 +24,7 @@
 #include <sound/soc-dapm.h>
 
 #include <asm/dma.h>
+#include <asm/mach-types.h>
 #include <asm/plat-sffsdr/sffsdr-fpga.h>
 
 #include <mach/mcbsp.h>
@@ -115,6 +116,9 @@ static int __init sffsdr_init(void)
 {
 	int ret;
 
+	if (!machine_is_sffsdr())
+		return -EINVAL;
+
 	sffsdr_snd_device = platform_device_alloc("soc-audio", 0);
 	if (!sffsdr_snd_device) {
 		printk(KERN_ERR "platform device allocation failed\n");
-- 
cgit v0.10.2


From 4ac6032d6c92f0ac65cf5bc56b68557b3f099b66 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Sat, 18 Oct 2008 19:11:42 -0700
Subject: ocfs2: Field prefixes for the xattr_bucket structure

The ocfs2_xattr_bucket structure keeps track of the buffers for one
xattr bucket.  Let's prefix the fields for easier code navigation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 74d7367..9c0ee42 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -61,8 +61,8 @@ struct ocfs2_xattr_def_value_root {
 };
 
 struct ocfs2_xattr_bucket {
-	struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
-	struct ocfs2_xattr_header *xh;
+	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
+	struct ocfs2_xattr_header *bu_xh;
 };
 
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
@@ -795,11 +795,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
 			ret = ocfs2_xattr_bucket_get_name_value(inode,
-								xs->bucket.xh,
+								xs->bucket.bu_xh,
 								i,
 								&block_off,
 								&name_offset);
-			xs->base = xs->bucket.bhs[block_off]->b_data;
+			xs->base = xs->bucket.bu_bhs[block_off]->b_data;
 		}
 		if (ocfs2_xattr_is_local(xs->here)) {
 			memcpy(buffer, (void *)xs->base +
@@ -818,7 +818,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 	ret = size;
 cleanup:
 	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
-		brelse(xs->bucket.bhs[i]);
+		brelse(xs->bucket.bu_bhs[i]);
 	memset(&xs->bucket, 0, sizeof(xs->bucket));
 
 	brelse(xs->xattr_bh);
@@ -2032,7 +2032,7 @@ cleanup:
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);
 	for (i = 0; i < blk_per_bucket; i++)
-		brelse(xbs.bucket.bhs[i]);
+		brelse(xbs.bucket.bu_bhs[i]);
 
 	return ret;
 }
@@ -2276,13 +2276,13 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		lower_bh = bh;
 		bh = NULL;
 	}
-	xs->bucket.bhs[0] = lower_bh;
-	xs->bucket.xh = (struct ocfs2_xattr_header *)
-					xs->bucket.bhs[0]->b_data;
+	xs->bucket.bu_bhs[0] = lower_bh;
+	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)
+					xs->bucket.bu_bhs[0]->b_data;
 	lower_bh = NULL;
 
-	xs->header = xs->bucket.xh;
-	xs->base = xs->bucket.bhs[0]->b_data;
+	xs->header = xs->bucket.bu_xh;
+	xs->base = xs->bucket.bu_bhs[0]->b_data;
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
 	if (found) {
@@ -2290,8 +2290,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		 * If we have found the xattr enty, read all the blocks in
 		 * this bucket.
 		 */
-		ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
-					blk_per_bucket - 1, &xs->bucket.bhs[1],
+		ret = ocfs2_read_blocks(inode, xs->bucket.bu_bhs[0]->b_blocknr + 1,
+					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 		if (ret) {
 			mlog_errno(ret);
@@ -2300,7 +2300,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 
 		xs->here = &xs->header->xh_entries[index];
 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
-		     (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
+		     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, index);
 	} else
 		ret = -ENODATA;
 
@@ -2370,23 +2370,23 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 
 	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
 		ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
-					bucket.bhs, 0);
+					bucket.bu_bhs, 0);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
+		bucket.bu_xh = (struct ocfs2_xattr_header *)bucket.bu_bhs[0]->b_data;
 		/*
 		 * The real bucket num in this series of blocks is stored
 		 * in the 1st bucket.
 		 */
 		if (i == 0)
-			num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
+			num_buckets = le16_to_cpu(bucket.bu_xh->xh_num_buckets);
 
 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
 		     (unsigned long long)blkno,
-		     le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
+		     le32_to_cpu(bucket.bu_xh->xh_entries[0].xe_name_hash));
 		if (func) {
 			ret = func(inode, &bucket, para);
 			if (ret) {
@@ -2396,13 +2396,13 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 		}
 
 		for (j = 0; j < blk_per_bucket; j++)
-			brelse(bucket.bhs[j]);
+			brelse(bucket.bu_bhs[j]);
 		memset(&bucket, 0, sizeof(bucket));
 	}
 
 out:
 	for (j = 0; j < blk_per_bucket; j++)
-		brelse(bucket.bhs[j]);
+		brelse(bucket.bu_bhs[j]);
 
 	return ret;
 }
@@ -2441,21 +2441,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 	int i, block_off, new_offset;
 	const char *prefix, *name;
 
-	for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
-		struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
+	for (i = 0 ; i < le16_to_cpu(bucket->bu_xh->xh_count); i++) {
+		struct ocfs2_xattr_entry *entry = &bucket->bu_xh->xh_entries[i];
 		type = ocfs2_xattr_get_type(entry);
 		prefix = ocfs2_xattr_prefix(type);
 
 		if (prefix) {
 			ret = ocfs2_xattr_bucket_get_name_value(inode,
-								bucket->xh,
+								bucket->bu_xh,
 								i,
 								&block_off,
 								&new_offset);
 			if (ret)
 				break;
 
-			name = (const char *)bucket->bhs[block_off]->b_data +
+			name = (const char *)bucket->bu_bhs[block_off]->b_data +
 				new_offset;
 			ret = ocfs2_xattr_list_entry(xl->buffer,
 						     xl->buffer_size,
@@ -2626,10 +2626,10 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 	int i, blocksize = inode->i_sb->s_blocksize;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-	xs->bucket.bhs[0] = new_bh;
+	xs->bucket.bu_bhs[0] = new_bh;
 	get_bh(new_bh);
-	xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
-	xs->header = xs->bucket.xh;
+	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)xs->bucket.bu_bhs[0]->b_data;
+	xs->header = xs->bucket.bu_xh;
 
 	xs->base = new_bh->b_data;
 	xs->end = xs->base + inode->i_sb->s_blocksize;
@@ -2637,8 +2637,8 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 	if (!xs->not_found) {
 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
 			ret = ocfs2_read_blocks(inode,
-					xs->bucket.bhs[0]->b_blocknr + 1,
-					blk_per_bucket - 1, &xs->bucket.bhs[1],
+					xs->bucket.bu_bhs[0]->b_blocknr + 1,
+					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 			if (ret) {
 				mlog_errno(ret);
@@ -2835,7 +2835,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	size_t end, offset, len, value_len;
 	struct ocfs2_xattr_header *xh;
 	char *entries, *buf, *bucket_buf = NULL;
-	u64 blkno = bucket->bhs[0]->b_blocknr;
+	u64 blkno = bucket->bu_bhs[0]->b_blocknr;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u16 xh_free_start;
 	size_t blocksize = inode->i_sb->s_blocksize;
@@ -3929,7 +3929,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
 
 	offs = offs % inode->i_sb->s_blocksize;
-	return bucket->bhs[block_off]->b_data + offs;
+	return bucket->bu_bhs[block_off]->b_data + offs;
 }
 
 /*
@@ -4124,12 +4124,12 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 
 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
 	     (unsigned long)xi->value_len, xi->name_index,
-	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
+	     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr);
 
-	if (!xs->bucket.bhs[1]) {
+	if (!xs->bucket.bu_bhs[1]) {
 		ret = ocfs2_read_blocks(inode,
-					xs->bucket.bhs[0]->b_blocknr + 1,
-					blk_per_bucket - 1, &xs->bucket.bhs[1],
+					xs->bucket.bu_bhs[0]->b_blocknr + 1,
+					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 		if (ret) {
 			mlog_errno(ret);
@@ -4146,7 +4146,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 	}
 
 	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
+		ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i],
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -4158,7 +4158,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 
 	/*Only dirty the blocks we have touched in set xattr. */
 	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
-						xs->bucket.bhs, blk_per_bucket);
+						xs->bucket.bu_bhs, blk_per_bucket);
 	if (ret)
 		mlog_errno(ret);
 out:
@@ -4272,10 +4272,10 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
 	struct ocfs2_xattr_entry *xe = xs->here;
 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
 
-	BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
+	BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
 
 	offset = xe - xh->xh_entries;
-	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
+	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0],
 						offset, len);
 	if (ret)
 		mlog_errno(ret);
@@ -4395,7 +4395,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 					 struct ocfs2_xattr_search *xs)
 {
 	handle_t *handle = NULL;
-	struct ocfs2_xattr_header *xh = xs->bucket.xh;
+	struct ocfs2_xattr_header *xh = xs->bucket.bu_xh;
 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
 						le16_to_cpu(xh->xh_count) - 1];
 	int ret = 0;
@@ -4407,7 +4407,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 		return;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
+	ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0],
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4420,7 +4420,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
 	le16_add_cpu(&xh->xh_count, -1);
 
-	ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
+	ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]);
 	if (ret < 0)
 		mlog_errno(ret);
 out_commit:
@@ -4530,7 +4530,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 					      struct ocfs2_xattr_bucket *bucket,
 					      const char *name)
 {
-	struct ocfs2_xattr_header *xh = bucket->xh;
+	struct ocfs2_xattr_header *xh = bucket->bu_xh;
 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
 
 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4540,7 +4540,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 	    xh->xh_entries[0].xe_name_hash) {
 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
 		     "hash = %u\n",
-		     (unsigned long long)bucket->bhs[0]->b_blocknr,
+		     (unsigned long long)bucket->bu_bhs[0]->b_blocknr,
 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
 		return -ENOSPC;
 	}
@@ -4574,7 +4574,7 @@ try_again:
 
 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
 			"of %u which exceed block size\n",
-			(unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+			(unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr,
 			header_size);
 
 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,7 +4614,7 @@ try_again:
 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
 	     " %u\n", xs->not_found,
-	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+	     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr,
 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
 	     le16_to_cpu(xh->xh_name_value_len));
 
@@ -4667,14 +4667,14 @@ try_again:
 
 		ret = ocfs2_add_new_xattr_bucket(inode,
 						 xs->xattr_bh,
-						 xs->bucket.bhs[0]);
+						 xs->bucket.bu_bhs[0]);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
 		for (i = 0; i < blk_per_bucket; i++)
-			brelse(xs->bucket.bhs[i]);
+			brelse(xs->bucket.bu_bhs[i]);
 
 		memset(&xs->bucket, 0, sizeof(xs->bucket));
 
@@ -4700,7 +4700,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 					void *para)
 {
 	int ret = 0;
-	struct ocfs2_xattr_header *xh = bucket->xh;
+	struct ocfs2_xattr_header *xh = bucket->bu_xh;
 	u16 i;
 	struct ocfs2_xattr_entry *xe;
 
@@ -4710,7 +4710,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 			continue;
 
 		ret = ocfs2_xattr_bucket_value_truncate(inode,
-							bucket->bhs[0],
+							bucket->bu_bhs[0],
 							i, 0);
 		if (ret) {
 			mlog_errno(ret);
-- 
cgit v0.10.2


From 9c7759aa670918a48f0c6e06779cd20f2781a2ac Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 16:21:03 -0700
Subject: ocfs2: Convenient access to an xattr bucket's block number.

The xattr code often wants to know the block number of an xattr bucket.
This is usually found by dereferencing the first bh hanging off of the
ocfs2_xattr_bucket structure.  Rather than do this all the time, let's
provide a nice little macro.  The idea is ripped from the ocfs2_path
code.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 9c0ee42..3cf8e80 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -154,6 +154,8 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 	return len / sizeof(struct ocfs2_xattr_entry);
 }
 
+#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
+
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
 	struct xattr_handler *handler = NULL;
@@ -2290,7 +2292,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		 * If we have found the xattr enty, read all the blocks in
 		 * this bucket.
 		 */
-		ret = ocfs2_read_blocks(inode, xs->bucket.bu_bhs[0]->b_blocknr + 1,
+		ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1,
 					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 		if (ret) {
@@ -2300,7 +2302,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 
 		xs->here = &xs->header->xh_entries[index];
 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
-		     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr, index);
+		     (unsigned long long)bucket_blkno(&xs->bucket), index);
 	} else
 		ret = -ENODATA;
 
@@ -2637,7 +2639,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 	if (!xs->not_found) {
 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
 			ret = ocfs2_read_blocks(inode,
-					xs->bucket.bu_bhs[0]->b_blocknr + 1,
+					bucket_blkno(&xs->bucket) + 1,
 					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 			if (ret) {
@@ -2835,7 +2837,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	size_t end, offset, len, value_len;
 	struct ocfs2_xattr_header *xh;
 	char *entries, *buf, *bucket_buf = NULL;
-	u64 blkno = bucket->bu_bhs[0]->b_blocknr;
+	u64 blkno = bucket_blkno(bucket);
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u16 xh_free_start;
 	size_t blocksize = inode->i_sb->s_blocksize;
@@ -4124,11 +4126,11 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 
 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
 	     (unsigned long)xi->value_len, xi->name_index,
-	     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr);
+	     (unsigned long long)bucket_blkno(&xs->bucket));
 
 	if (!xs->bucket.bu_bhs[1]) {
 		ret = ocfs2_read_blocks(inode,
-					xs->bucket.bu_bhs[0]->b_blocknr + 1,
+					bucket_blkno(&xs->bucket) + 1,
 					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
 					0);
 		if (ret) {
@@ -4540,7 +4542,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 	    xh->xh_entries[0].xe_name_hash) {
 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
 		     "hash = %u\n",
-		     (unsigned long long)bucket->bu_bhs[0]->b_blocknr,
+		     (unsigned long long)bucket_blkno(bucket),
 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
 		return -ENOSPC;
 	}
@@ -4574,7 +4576,7 @@ try_again:
 
 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
 			"of %u which exceed block size\n",
-			(unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr,
+			(unsigned long long)bucket_blkno(&xs->bucket),
 			header_size);
 
 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,7 +4616,7 @@ try_again:
 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
 	     " %u\n", xs->not_found,
-	     (unsigned long long)xs->bucket.bu_bhs[0]->b_blocknr,
+	     (unsigned long long)bucket_blkno(&xs->bucket),
 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
 	     le16_to_cpu(xh->xh_name_value_len));
 
-- 
cgit v0.10.2


From 51def39f0cabd46131c7c4df08751cb0cb9433d1 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 16:57:21 -0700
Subject: ocfs2: Convenient access to xattr bucket data blocks.

The xattr code often wants to access the data pointer for blocks in an
xattr bucket.  This is usually found by dereferencing the bh array
hanging off of the ocfs2_xattr_bucket structure.  Rather than do this
all the time, let's provide a nice little macro.  The idea is ripped
from the ocfs2_path code.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3cf8e80..8594df3 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -155,6 +155,7 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 }
 
 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
+#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
@@ -801,7 +802,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 								i,
 								&block_off,
 								&name_offset);
-			xs->base = xs->bucket.bu_bhs[block_off]->b_data;
+			xs->base = bucket_block(&xs->bucket, block_off);
 		}
 		if (ocfs2_xattr_is_local(xs->here)) {
 			memcpy(buffer, (void *)xs->base +
@@ -2280,11 +2281,11 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 	}
 	xs->bucket.bu_bhs[0] = lower_bh;
 	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)
-					xs->bucket.bu_bhs[0]->b_data;
+					bucket_block(&xs->bucket, 0);
 	lower_bh = NULL;
 
 	xs->header = xs->bucket.bu_xh;
-	xs->base = xs->bucket.bu_bhs[0]->b_data;
+	xs->base = bucket_block(&xs->bucket, 0);
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
 	if (found) {
@@ -2378,7 +2379,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 			goto out;
 		}
 
-		bucket.bu_xh = (struct ocfs2_xattr_header *)bucket.bu_bhs[0]->b_data;
+		bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&bucket, 0);
 		/*
 		 * The real bucket num in this series of blocks is stored
 		 * in the 1st bucket.
@@ -2457,7 +2458,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 			if (ret)
 				break;
 
-			name = (const char *)bucket->bu_bhs[block_off]->b_data +
+			name = (const char *)bucket_block(bucket, block_off) +
 				new_offset;
 			ret = ocfs2_xattr_list_entry(xl->buffer,
 						     xl->buffer_size,
@@ -2630,7 +2631,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 
 	xs->bucket.bu_bhs[0] = new_bh;
 	get_bh(new_bh);
-	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)xs->bucket.bu_bhs[0]->b_data;
+	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&xs->bucket, 0);
 	xs->header = xs->bucket.bu_xh;
 
 	xs->base = new_bh->b_data;
@@ -3931,7 +3932,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
 
 	offs = offs % inode->i_sb->s_blocksize;
-	return bucket->bu_bhs[block_off]->b_data + offs;
+	return bucket_block(bucket, block_off) + offs;
 }
 
 /*
-- 
cgit v0.10.2


From 3e6329463e3a5c311e1d607ff3db735a18b6d67a Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 17:04:49 -0700
Subject: ocfs2: Convenient access to an xattr bucket's header.

The xattr code often wants to access the ocfs2_xattr_header at the start
of an bucket.  Rather than walk the pointer chains, let's just create
another nice macro.  As a side benefit, we can get rid of the mostly
spurious ->bu_xh element on the bucket structure.  The idea is ripped
from the ocfs2_path code.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 8594df3..1b77302 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -62,7 +62,6 @@ struct ocfs2_xattr_def_value_root {
 
 struct ocfs2_xattr_bucket {
 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
-	struct ocfs2_xattr_header *bu_xh;
 };
 
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
@@ -156,6 +155,7 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 
 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
+#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
@@ -798,7 +798,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
 			ret = ocfs2_xattr_bucket_get_name_value(inode,
-								xs->bucket.bu_xh,
+								bucket_xh(&xs->bucket),
 								i,
 								&block_off,
 								&name_offset);
@@ -2280,11 +2280,9 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		bh = NULL;
 	}
 	xs->bucket.bu_bhs[0] = lower_bh;
-	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)
-					bucket_block(&xs->bucket, 0);
 	lower_bh = NULL;
 
-	xs->header = xs->bucket.bu_xh;
+	xs->header = bucket_xh(&xs->bucket);
 	xs->base = bucket_block(&xs->bucket, 0);
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
@@ -2379,17 +2377,16 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 			goto out;
 		}
 
-		bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&bucket, 0);
 		/*
 		 * The real bucket num in this series of blocks is stored
 		 * in the 1st bucket.
 		 */
 		if (i == 0)
-			num_buckets = le16_to_cpu(bucket.bu_xh->xh_num_buckets);
+			num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets);
 
 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
 		     (unsigned long long)blkno,
-		     le32_to_cpu(bucket.bu_xh->xh_entries[0].xe_name_hash));
+		     le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash));
 		if (func) {
 			ret = func(inode, &bucket, para);
 			if (ret) {
@@ -2444,14 +2441,14 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 	int i, block_off, new_offset;
 	const char *prefix, *name;
 
-	for (i = 0 ; i < le16_to_cpu(bucket->bu_xh->xh_count); i++) {
-		struct ocfs2_xattr_entry *entry = &bucket->bu_xh->xh_entries[i];
+	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
+		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
 		type = ocfs2_xattr_get_type(entry);
 		prefix = ocfs2_xattr_prefix(type);
 
 		if (prefix) {
 			ret = ocfs2_xattr_bucket_get_name_value(inode,
-								bucket->bu_xh,
+								bucket_xh(bucket),
 								i,
 								&block_off,
 								&new_offset);
@@ -2631,8 +2628,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 
 	xs->bucket.bu_bhs[0] = new_bh;
 	get_bh(new_bh);
-	xs->bucket.bu_xh = (struct ocfs2_xattr_header *)bucket_block(&xs->bucket, 0);
-	xs->header = xs->bucket.bu_xh;
+	xs->header = bucket_xh(&xs->bucket);
 
 	xs->base = new_bh->b_data;
 	xs->end = xs->base + inode->i_sb->s_blocksize;
@@ -4398,7 +4394,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 					 struct ocfs2_xattr_search *xs)
 {
 	handle_t *handle = NULL;
-	struct ocfs2_xattr_header *xh = xs->bucket.bu_xh;
+	struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket);
 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
 						le16_to_cpu(xh->xh_count) - 1];
 	int ret = 0;
@@ -4533,7 +4529,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 					      struct ocfs2_xattr_bucket *bucket,
 					      const char *name)
 {
-	struct ocfs2_xattr_header *xh = bucket->bu_xh;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
 
 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4703,7 +4699,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 					void *para)
 {
 	int ret = 0;
-	struct ocfs2_xattr_header *xh = bucket->bu_xh;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u16 i;
 	struct ocfs2_xattr_entry *xe;
 
-- 
cgit v0.10.2


From 6dde41d9e7ba62f84cd7e91c0e993500af32ceb6 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 17:16:48 -0700
Subject: ocfs2: Provide a wrapper to brelse() xattr bucket buffers.

A common theme is walking all the buffer heads on an ocfs2_xattr_bucket
and releasing them.  Let's wrap that.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 1b77302..3478ad1 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -157,6 +157,17 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 
+static void ocfs2_xattr_bucket_relse(struct inode *inode,
+				     struct ocfs2_xattr_bucket *bucket)
+{
+	int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	for (i = 0; i < blks; i++) {
+		brelse(bucket->bu_bhs[i]);
+		bucket->bu_bhs[i] = NULL;
+	}
+}
+
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
 	struct xattr_handler *handler = NULL;
@@ -820,8 +831,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 	}
 	ret = size;
 cleanup:
-	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
-		brelse(xs->bucket.bu_bhs[i]);
+	ocfs2_xattr_bucket_relse(inode, &xs->bucket);
 	memset(&xs->bucket, 0, sizeof(xs->bucket));
 
 	brelse(xs->xattr_bh);
@@ -1932,7 +1942,6 @@ int ocfs2_xattr_set(struct inode *inode,
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
 	int ret;
-	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
 	struct ocfs2_xattr_info xi = {
 		.name_index = name_index,
@@ -2034,8 +2043,7 @@ cleanup:
 	ocfs2_inode_unlock(inode, 1);
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);
-	for (i = 0; i < blk_per_bucket; i++)
-		brelse(xbs.bucket.bu_bhs[i]);
+	ocfs2_xattr_bucket_relse(inode, &xbs.bucket);
 
 	return ret;
 }
@@ -2358,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 				       xattr_bucket_func *func,
 				       void *para)
 {
-	int i, j, ret = 0;
+	int i, ret = 0;
 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
 	u32 num_buckets = clusters * bpc;
@@ -2395,14 +2403,12 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 			}
 		}
 
-		for (j = 0; j < blk_per_bucket; j++)
-			brelse(bucket.bu_bhs[j]);
+		ocfs2_xattr_bucket_relse(inode, &bucket);
 		memset(&bucket, 0, sizeof(bucket));
 	}
 
 out:
-	for (j = 0; j < blk_per_bucket; j++)
-		brelse(bucket.bu_bhs[j]);
+	ocfs2_xattr_bucket_relse(inode, &bucket);
 
 	return ret;
 }
@@ -4554,11 +4560,10 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 	struct ocfs2_xattr_header *xh;
 	struct ocfs2_xattr_entry *xe;
 	u16 count, header_size, xh_free_start;
-	int i, free, max_free, need, old;
+	int free, max_free, need, old;
 	size_t value_size = 0, name_len = strlen(xi->name);
 	size_t blocksize = inode->i_sb->s_blocksize;
 	int ret, allocation = 0;
-	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
 	mlog_entry("Set xattr %s in xattr index block\n", xi->name);
 
@@ -4672,9 +4677,7 @@ try_again:
 			goto out;
 		}
 
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(xs->bucket.bu_bhs[i]);
-
+		ocfs2_xattr_bucket_relse(inode, &xs->bucket);
 		memset(&xs->bucket, 0, sizeof(xs->bucket));
 
 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
-- 
cgit v0.10.2


From 784b816a9198dc3782c97cde8ddcf52fecdf1797 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 17:33:40 -0700
Subject: ocfs2: Improve ocfs2_read_xattr_bucket().

The ocfs2_read_xattr_bucket() function would read an xattr bucket into a
list of buffer heads.  However, we have a nice ocfs2_xattr_bucket
structure.  Let's have it fill that out instead.

In addition, ocfs2_read_xattr_bucket() would initialize buffer heads for
a bucket that's never been on disk before.  That's confusing.  Let's
call that functionality ocfs2_init_xattr_bucket().

The functions ocfs2_cp_xattr_bucket() and ocfs2_half_xattr_bucket() are
updated to use the ocfs2_xattr_bucket structure rather than raw bh
lists.  That way they can use the new read/init calls.  In addition,
they drop the wasted read of an existing target bucket.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3478ad1..fa13fa4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -168,6 +168,48 @@ static void ocfs2_xattr_bucket_relse(struct inode *inode,
 	}
 }
 
+/*
+ * A bucket that has never been written to disk doesn't need to be
+ * read.  We just need the buffer_heads.  Don't call this for
+ * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
+ * them fully.
+ */
+static int ocfs2_init_xattr_bucket(struct inode *inode,
+				   struct ocfs2_xattr_bucket *bucket,
+				   u64 xb_blkno)
+{
+	int i, rc = 0;
+	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	for (i = 0; i < blks; i++) {
+		bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i);
+		if (!bucket->bu_bhs[i]) {
+			rc = -EIO;
+			mlog_errno(rc);
+			break;
+		}
+
+		ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]);
+	}
+
+	if (rc)
+		ocfs2_xattr_bucket_relse(inode, bucket);
+	return rc;
+}
+
+/* Read the xattr bucket at xb_blkno */
+static int ocfs2_read_xattr_bucket(struct inode *inode,
+				   struct ocfs2_xattr_bucket *bucket,
+				   u64 xb_blkno)
+{
+	int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0);
+	if (rc)
+		ocfs2_xattr_bucket_relse(inode, bucket);
+	return rc;
+}
+
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
 	struct xattr_handler *handler = NULL;
@@ -3097,31 +3139,6 @@ out:
 	return ret;
 }
 
-static int ocfs2_read_xattr_bucket(struct inode *inode,
-				   u64 blkno,
-				   struct buffer_head **bhs,
-				   int new)
-{
-	int ret = 0;
-	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
-	if (!new)
-		return ocfs2_read_blocks(inode, blkno,
-					 blk_per_bucket, bhs, 0);
-
-	for (i = 0; i < blk_per_bucket; i++) {
-		bhs[i] = sb_getblk(inode->i_sb, blkno + i);
-		if (bhs[i] == NULL) {
-			ret = -EIO;
-			mlog_errno(ret);
-			break;
-		}
-		ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
-	}
-
-	return ret;
-}
-
 /*
  * Find the suitable pos when we divide a bucket into 2.
  * We have to make sure the xattrs with the same hash value exist
@@ -3184,7 +3201,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	int ret, i;
 	int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	struct buffer_head **s_bhs, **t_bhs = NULL;
+	struct ocfs2_xattr_bucket s_bucket, t_bucket;
 	struct ocfs2_xattr_header *xh;
 	struct ocfs2_xattr_entry *xe;
 	int blocksize = inode->i_sb->s_blocksize;
@@ -3192,37 +3209,34 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	mlog(0, "move some of xattrs from bucket %llu to %llu\n",
 	     (unsigned long long)blk, (unsigned long long)new_blk);
 
-	s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
-	if (!s_bhs)
-		return -ENOMEM;
+	memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
+	memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
 
-	ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
+	ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, s_bhs[0],
+	ret = ocfs2_journal_access(handle, inode, s_bucket.bu_bhs[0],
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
-	if (!t_bhs) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
+	/*
+	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
+	 * there's no need to read it.
+	 */
+	ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
 	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
+		ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
 					   new_bucket_head ?
 					   OCFS2_JOURNAL_ACCESS_CREATE :
 					   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -3232,7 +3246,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		}
 	}
 
-	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+	xh = bucket_xh(&s_bucket);
 	count = le16_to_cpu(xh->xh_count);
 	start = ocfs2_xattr_find_divide_pos(xh);
 
@@ -3245,9 +3259,9 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		 * that of the last entry in the previous bucket.
 		 */
 		for (i = 0; i < blk_per_bucket; i++)
-			memset(t_bhs[i]->b_data, 0, blocksize);
+			memset(bucket_block(&t_bucket, i), 0, blocksize);
 
-		xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+		xh = bucket_xh(&t_bucket);
 		xh->xh_free_start = cpu_to_le16(blocksize);
 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3257,10 +3271,11 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 
 	/* copy the whole bucket to the new first. */
 	for (i = 0; i < blk_per_bucket; i++)
-		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
+		memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
+		       blocksize);
 
 	/* update the new bucket. */
-	xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+	xh = bucket_xh(&t_bucket);
 
 	/*
 	 * Calculate the total name/value len and xh_free_start for
@@ -3325,7 +3340,7 @@ set_num_buckets:
 		xh->xh_num_buckets = 0;
 
 	for (i = 0; i < blk_per_bucket; i++) {
-		ocfs2_journal_dirty(handle, t_bhs[i]);
+		ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
 		if (ret)
 			mlog_errno(ret);
 	}
@@ -3342,29 +3357,20 @@ set_num_buckets:
 	if (start == count)
 		goto out;
 
-	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+	xh = bucket_xh(&s_bucket);
 	memset(&xh->xh_entries[start], 0,
 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
 	xh->xh_count = cpu_to_le16(start);
 	xh->xh_free_start = cpu_to_le16(name_offset);
 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
 
-	ocfs2_journal_dirty(handle, s_bhs[0]);
+	ocfs2_journal_dirty(handle, s_bucket.bu_bhs[0]);
 	if (ret)
 		mlog_errno(ret);
 
 out:
-	if (s_bhs) {
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(s_bhs[i]);
-	}
-	kfree(s_bhs);
-
-	if (t_bhs) {
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(t_bhs[i]);
-	}
-	kfree(t_bhs);
+	ocfs2_xattr_bucket_relse(inode, &s_bucket);
+	ocfs2_xattr_bucket_relse(inode, &t_bucket);
 
 	return ret;
 }
@@ -3384,7 +3390,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	int ret, i;
 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	int blocksize = inode->i_sb->s_blocksize;
-	struct buffer_head **s_bhs, **t_bhs = NULL;
+	struct ocfs2_xattr_bucket s_bucket, t_bucket;
 
 	BUG_ON(s_blkno == t_blkno);
 
@@ -3392,28 +3398,23 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	     (unsigned long long)s_blkno, (unsigned long long)t_blkno,
 	     t_is_new);
 
-	s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
-			GFP_NOFS);
-	if (!s_bhs)
-		return -ENOMEM;
+	memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
+	memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
 
-	ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
+	ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno);
 	if (ret)
 		goto out;
 
-	t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
-			GFP_NOFS);
-	if (!t_bhs) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
+	/*
+	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
+	 * there's no need to read it.
+	 */
+	ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno);
 	if (ret)
 		goto out;
 
 	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
+		ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
 					   t_is_new ?
 					   OCFS2_JOURNAL_ACCESS_CREATE :
 					   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -3422,22 +3423,14 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	}
 
 	for (i = 0; i < blk_per_bucket; i++) {
-		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
-		ocfs2_journal_dirty(handle, t_bhs[i]);
+		memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
+		       blocksize);
+		ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
 	}
 
 out:
-	if (s_bhs) {
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(s_bhs[i]);
-	}
-	kfree(s_bhs);
-
-	if (t_bhs) {
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(t_bhs[i]);
-	}
-	kfree(t_bhs);
+	ocfs2_xattr_bucket_relse(inode, &s_bucket);
+	ocfs2_xattr_bucket_relse(inode, &t_bucket);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 1224be020f62ada3e19822feeac3840abf80de3e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 18:47:33 -0700
Subject: ocfs2: Wrap journal_access/journal_dirty for xattr buckets.

A common action is to call ocfs2_journal_access() and
ocfs2_journal_dirty() on the buffer heads of an xattr bucket.  Let's
create nice wrappers.

While we're there, let's drop the places that try to be smart by writing
only the first and last blocks of a bucket.  A bucket is contiguous, so
writing the whole thing is actually more efficient.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fa13fa4..99aefe4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -210,6 +210,37 @@ static int ocfs2_read_xattr_bucket(struct inode *inode,
 	return rc;
 }
 
+static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
+					     struct inode *inode,
+					     struct ocfs2_xattr_bucket *bucket,
+					     int type)
+{
+	int i, rc = 0;
+	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	for (i = 0; i < blks; i++) {
+		rc = ocfs2_journal_access(handle, inode,
+					  bucket->bu_bhs[i], type);
+		if (rc) {
+			mlog_errno(rc);
+			break;
+		}
+	}
+
+	return rc;
+}
+
+static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
+					     struct inode *inode,
+					     struct ocfs2_xattr_bucket *bucket)
+{
+	int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	for (i = 0; i < blks; i++)
+		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
+}
+
+
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
 	struct xattr_handler *handler = NULL;
@@ -3218,8 +3249,8 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, s_bucket.bu_bhs[0],
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &s_bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3235,15 +3266,13 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
-					   new_bucket_head ?
-					   OCFS2_JOURNAL_ACCESS_CREATE :
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
+	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+						new_bucket_head ?
+						OCFS2_JOURNAL_ACCESS_CREATE :
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
 	}
 
 	xh = bucket_xh(&s_bucket);
@@ -3339,11 +3368,7 @@ set_num_buckets:
 	else
 		xh->xh_num_buckets = 0;
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
-		if (ret)
-			mlog_errno(ret);
-	}
+	ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
 
 	/* store the first_hash of the new bucket. */
 	if (first_hash)
@@ -3364,9 +3389,7 @@ set_num_buckets:
 	xh->xh_free_start = cpu_to_le16(name_offset);
 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
 
-	ocfs2_journal_dirty(handle, s_bucket.bu_bhs[0]);
-	if (ret)
-		mlog_errno(ret);
+	ocfs2_xattr_bucket_journal_dirty(handle, inode, &s_bucket);
 
 out:
 	ocfs2_xattr_bucket_relse(inode, &s_bucket);
@@ -3413,20 +3436,18 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	if (ret)
 		goto out;
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, t_bucket.bu_bhs[i],
-					   t_is_new ?
-					   OCFS2_JOURNAL_ACCESS_CREATE :
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret)
-			goto out;
-	}
+	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+						t_is_new ?
+						OCFS2_JOURNAL_ACCESS_CREATE :
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret)
+		goto out;
 
 	for (i = 0; i < blk_per_bucket; i++) {
 		memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
 		       blocksize);
-		ocfs2_journal_dirty(handle, t_bucket.bu_bhs[i]);
 	}
+	ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
 
 out:
 	ocfs2_xattr_bucket_relse(inode, &s_bucket);
@@ -3799,9 +3820,9 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 
 	/*
 	 * We will touch all the buckets after the start_bh(include it).
-	 * Add one more bucket and modify the first_bh.
+	 * Then we add one more bucket.
 	 */
-	credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
+	credits = end_blk - start_blk + 3 * blk_per_bucket + 1;
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -4077,33 +4098,6 @@ set_new_name_value:
 	return;
 }
 
-static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
-					     handle_t *handle,
-					     struct ocfs2_xattr_search *xs,
-					     struct buffer_head **bhs,
-					     u16 bh_num)
-{
-	int ret = 0, off, block_off;
-	struct ocfs2_xattr_entry *xe = xs->here;
-
-	/*
-	 * First calculate all the blocks we should journal_access
-	 * and journal_dirty. The first block should always be touched.
-	 */
-	ret = ocfs2_journal_dirty(handle, bhs[0]);
-	if (ret)
-		mlog_errno(ret);
-
-	/* calc the data. */
-	off = le16_to_cpu(xe->xe_name_offset);
-	block_off = off >> inode->i_sb->s_blocksize_bits;
-	ret = ocfs2_journal_dirty(handle, bhs[block_off]);
-	if (ret)
-		mlog_errno(ret);
-
-	return ret;
-}
-
 /*
  * Set the xattr entry in the specified bucket.
  * The bucket is indicated by xs->bucket and it should have the enough
@@ -4115,7 +4109,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 					   u32 name_hash,
 					   int local)
 {
-	int i, ret;
+	int ret;
 	handle_t *handle = NULL;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4143,22 +4137,16 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 		goto out;
 	}
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i],
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
+	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
 	}
 
 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
+	ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
 
-	/*Only dirty the blocks we have touched in set xattr. */
-	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
-						xs->bucket.bu_bhs, blk_per_bucket);
-	if (ret)
-		mlog_errno(ret);
 out:
 	ocfs2_commit_trans(osb, handle);
 
@@ -4398,15 +4386,16 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 						le16_to_cpu(xh->xh_count) - 1];
 	int ret = 0;
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
+	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
+				   ocfs2_blocks_per_xattr_bucket(inode->i_sb));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		mlog_errno(ret);
 		return;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0],
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -4418,9 +4407,8 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
 	le16_add_cpu(&xh->xh_count, -1);
 
-	ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]);
-	if (ret < 0)
-		mlog_errno(ret);
+	ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
+
 out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 }
-- 
cgit v0.10.2


From 4980c6daba967124ed6420032960abd2b48412e2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 18:54:43 -0700
Subject: ocfs2: Copy xattr buckets with a dedicated function.

Now that the places that copy whole buckets are using struct
ocfs2_xattr_bucket, we can do the copy in a dedicated function.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 99aefe4..71d9e7b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -240,6 +240,19 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 }
 
+static void ocfs2_xattr_bucket_copy_data(struct inode *inode,
+					 struct ocfs2_xattr_bucket *dest,
+					 struct ocfs2_xattr_bucket *src)
+{
+	int i;
+	int blocksize = inode->i_sb->s_blocksize;
+	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+	for (i = 0; i < blks; i++) {
+		memcpy(bucket_block(dest, i), bucket_block(src, i),
+		       blocksize);
+	}
+}
 
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
@@ -3299,9 +3312,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	}
 
 	/* copy the whole bucket to the new first. */
-	for (i = 0; i < blk_per_bucket; i++)
-		memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
-		       blocksize);
+	ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
 
 	/* update the new bucket. */
 	xh = bucket_xh(&t_bucket);
@@ -3410,9 +3421,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 				 u64 t_blkno,
 				 int t_is_new)
 {
-	int ret, i;
-	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	int blocksize = inode->i_sb->s_blocksize;
+	int ret;
 	struct ocfs2_xattr_bucket s_bucket, t_bucket;
 
 	BUG_ON(s_blkno == t_blkno);
@@ -3443,10 +3452,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	if (ret)
 		goto out;
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		memcpy(bucket_block(&t_bucket, i), bucket_block(&s_bucket, i),
-		       blocksize);
-	}
+	ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
 	ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
 
 out:
-- 
cgit v0.10.2


From ba937127596ec2c61437006741f7d29999284de4 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 24 Oct 2008 19:13:20 -0700
Subject: ocfs2: Take ocfs2_xattr_bucket structures off of the stack.

The ocfs2_xattr_bucket structure is a nice abstraction, but it is a bit
large to have on the stack.  Just like ocfs2_path, let's allocate it
with a ocfs2_xattr_bucket_new() function.

We can now store the inode on the bucket, cleaning up all the other
bucket functions.  While we're here, we catch another place or two that
wasn't using ocfs2_read_xattr_bucket().

Updates:
- No longer allocating xis.bucket, as it will never be used.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 71d9e7b..766494e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -61,7 +61,14 @@ struct ocfs2_xattr_def_value_root {
 };
 
 struct ocfs2_xattr_bucket {
+	/* The inode these xattrs are associated with */
+	struct inode *bu_inode;
+
+	/* The actual buffers that make up the bucket */
 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
+
+	/* How many blocks make up one bucket for this filesystem */
+	int bu_blocks;
 };
 
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
@@ -97,7 +104,7 @@ struct ocfs2_xattr_search {
 	 */
 	struct buffer_head *xattr_bh;
 	struct ocfs2_xattr_header *header;
-	struct ocfs2_xattr_bucket bucket;
+	struct ocfs2_xattr_bucket *bucket;
 	void *base;
 	void *end;
 	struct ocfs2_xattr_entry *here;
@@ -157,69 +164,91 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 
-static void ocfs2_xattr_bucket_relse(struct inode *inode,
-				     struct ocfs2_xattr_bucket *bucket)
+static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
 {
-	int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+	struct ocfs2_xattr_bucket *bucket;
+	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-	for (i = 0; i < blks; i++) {
+	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
+
+	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
+	if (bucket) {
+		bucket->bu_inode = inode;
+		bucket->bu_blocks = blks;
+	}
+
+	return bucket;
+}
+
+static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
+{
+	int i;
+
+	for (i = 0; i < bucket->bu_blocks; i++) {
 		brelse(bucket->bu_bhs[i]);
 		bucket->bu_bhs[i] = NULL;
 	}
 }
 
+static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
+{
+	if (bucket) {
+		ocfs2_xattr_bucket_relse(bucket);
+		bucket->bu_inode = NULL;
+		kfree(bucket);
+	}
+}
+
 /*
  * A bucket that has never been written to disk doesn't need to be
  * read.  We just need the buffer_heads.  Don't call this for
  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
  * them fully.
  */
-static int ocfs2_init_xattr_bucket(struct inode *inode,
-				   struct ocfs2_xattr_bucket *bucket,
+static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 				   u64 xb_blkno)
 {
 	int i, rc = 0;
-	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-	for (i = 0; i < blks; i++) {
-		bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i);
+	for (i = 0; i < bucket->bu_blocks; i++) {
+		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
+					      xb_blkno + i);
 		if (!bucket->bu_bhs[i]) {
 			rc = -EIO;
 			mlog_errno(rc);
 			break;
 		}
 
-		ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]);
+		ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+					      bucket->bu_bhs[i]);
 	}
 
 	if (rc)
-		ocfs2_xattr_bucket_relse(inode, bucket);
+		ocfs2_xattr_bucket_relse(bucket);
 	return rc;
 }
 
 /* Read the xattr bucket at xb_blkno */
-static int ocfs2_read_xattr_bucket(struct inode *inode,
-				   struct ocfs2_xattr_bucket *bucket,
+static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 				   u64 xb_blkno)
 {
-	int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+	int rc;
 
-	rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0);
+	rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+			       bucket->bu_blocks, bucket->bu_bhs, 0);
 	if (rc)
-		ocfs2_xattr_bucket_relse(inode, bucket);
+		ocfs2_xattr_bucket_relse(bucket);
 	return rc;
 }
 
 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
-					     struct inode *inode,
 					     struct ocfs2_xattr_bucket *bucket,
 					     int type)
 {
 	int i, rc = 0;
-	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-	for (i = 0; i < blks; i++) {
-		rc = ocfs2_journal_access(handle, inode,
+	for (i = 0; i < bucket->bu_blocks; i++) {
+		rc = ocfs2_journal_access(handle, bucket->bu_inode,
 					  bucket->bu_bhs[i], type);
 		if (rc) {
 			mlog_errno(rc);
@@ -231,24 +260,24 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 }
 
 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
-					     struct inode *inode,
 					     struct ocfs2_xattr_bucket *bucket)
 {
-	int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+	int i;
 
-	for (i = 0; i < blks; i++)
+	for (i = 0; i < bucket->bu_blocks; i++)
 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 }
 
-static void ocfs2_xattr_bucket_copy_data(struct inode *inode,
-					 struct ocfs2_xattr_bucket *dest,
+static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 					 struct ocfs2_xattr_bucket *src)
 {
 	int i;
-	int blocksize = inode->i_sb->s_blocksize;
-	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+	int blocksize = src->bu_inode->i_sb->s_blocksize;
+
+	BUG_ON(dest->bu_blocks != src->bu_blocks);
+	BUG_ON(dest->bu_inode != src->bu_inode);
 
-	for (i = 0; i < blks; i++) {
+	for (i = 0; i < src->bu_blocks; i++) {
 		memcpy(bucket_block(dest, i), bucket_block(src, i),
 		       blocksize);
 	}
@@ -869,7 +898,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 	size_t size;
 	int ret = -ENODATA, name_offset, name_len, block_off, i;
 
-	memset(&xs->bucket, 0, sizeof(xs->bucket));
+	xs->bucket = ocfs2_xattr_bucket_new(inode);
+	if (!xs->bucket) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto cleanup;
+	}
 
 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
 	if (ret) {
@@ -895,11 +929,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
 			ret = ocfs2_xattr_bucket_get_name_value(inode,
-								bucket_xh(&xs->bucket),
+								bucket_xh(xs->bucket),
 								i,
 								&block_off,
 								&name_offset);
-			xs->base = bucket_block(&xs->bucket, block_off);
+			xs->base = bucket_block(xs->bucket, block_off);
 		}
 		if (ocfs2_xattr_is_local(xs->here)) {
 			memcpy(buffer, (void *)xs->base +
@@ -917,8 +951,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 	}
 	ret = size;
 cleanup:
-	ocfs2_xattr_bucket_relse(inode, &xs->bucket);
-	memset(&xs->bucket, 0, sizeof(xs->bucket));
+	ocfs2_xattr_bucket_free(xs->bucket);
 
 	brelse(xs->xattr_bh);
 	xs->xattr_bh = NULL;
@@ -2047,10 +2080,20 @@ int ocfs2_xattr_set(struct inode *inode,
 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
 		return -EOPNOTSUPP;
 
+	/*
+	 * Only xbs will be used on indexed trees.  xis doesn't need a
+	 * bucket.
+	 */
+	xbs.bucket = ocfs2_xattr_bucket_new(inode);
+	if (!xbs.bucket) {
+		mlog_errno(-ENOMEM);
+		return -ENOMEM;
+	}
+
 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret < 0) {
 		mlog_errno(ret);
-		return ret;
+		goto cleanup_nolock;
 	}
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -2127,9 +2170,10 @@ int ocfs2_xattr_set(struct inode *inode,
 cleanup:
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
 	ocfs2_inode_unlock(inode, 1);
+cleanup_nolock:
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);
-	ocfs2_xattr_bucket_relse(inode, &xbs.bucket);
+	ocfs2_xattr_bucket_free(xbs.bucket);
 
 	return ret;
 }
@@ -2373,11 +2417,11 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		lower_bh = bh;
 		bh = NULL;
 	}
-	xs->bucket.bu_bhs[0] = lower_bh;
+	xs->bucket->bu_bhs[0] = lower_bh;
 	lower_bh = NULL;
 
-	xs->header = bucket_xh(&xs->bucket);
-	xs->base = bucket_block(&xs->bucket, 0);
+	xs->header = bucket_xh(xs->bucket);
+	xs->base = bucket_block(xs->bucket, 0);
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
 	if (found) {
@@ -2385,8 +2429,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		 * If we have found the xattr enty, read all the blocks in
 		 * this bucket.
 		 */
-		ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
+		ret = ocfs2_read_blocks(inode, bucket_blkno(xs->bucket) + 1,
+					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
 					0);
 		if (ret) {
 			mlog_errno(ret);
@@ -2395,7 +2439,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 
 		xs->here = &xs->header->xh_entries[index];
 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
-		     (unsigned long long)bucket_blkno(&xs->bucket), index);
+		     (unsigned long long)bucket_blkno(xs->bucket), index);
 	} else
 		ret = -ENODATA;
 
@@ -2453,22 +2497,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 				       void *para)
 {
 	int i, ret = 0;
-	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
 	u32 num_buckets = clusters * bpc;
-	struct ocfs2_xattr_bucket bucket;
+	struct ocfs2_xattr_bucket *bucket;
 
-	memset(&bucket, 0, sizeof(bucket));
+	bucket = ocfs2_xattr_bucket_new(inode);
+	if (!bucket) {
+		mlog_errno(-ENOMEM);
+		return -ENOMEM;
+	}
 
 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
 	     clusters, (unsigned long long)blkno);
 
-	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
-		ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
-					bucket.bu_bhs, 0);
+	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
+		ret = ocfs2_read_xattr_bucket(bucket, blkno);
 		if (ret) {
 			mlog_errno(ret);
-			goto out;
+			break;
 		}
 
 		/*
@@ -2476,26 +2522,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
 		 * in the 1st bucket.
 		 */
 		if (i == 0)
-			num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets);
+			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
 
 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
 		     (unsigned long long)blkno,
-		     le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash));
+		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
 		if (func) {
-			ret = func(inode, &bucket, para);
-			if (ret) {
+			ret = func(inode, bucket, para);
+			if (ret)
 				mlog_errno(ret);
-				break;
-			}
+			/* Fall through to bucket_relse() */
 		}
 
-		ocfs2_xattr_bucket_relse(inode, &bucket);
-		memset(&bucket, 0, sizeof(bucket));
+		ocfs2_xattr_bucket_relse(bucket);
+		if (ret)
+			break;
 	}
 
-out:
-	ocfs2_xattr_bucket_relse(inode, &bucket);
-
+	ocfs2_xattr_bucket_free(bucket);
 	return ret;
 }
 
@@ -2718,9 +2762,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 	int i, blocksize = inode->i_sb->s_blocksize;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-	xs->bucket.bu_bhs[0] = new_bh;
+	xs->bucket->bu_bhs[0] = new_bh;
 	get_bh(new_bh);
-	xs->header = bucket_xh(&xs->bucket);
+	xs->header = bucket_xh(xs->bucket);
 
 	xs->base = new_bh->b_data;
 	xs->end = xs->base + inode->i_sb->s_blocksize;
@@ -2728,8 +2772,8 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
 	if (!xs->not_found) {
 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
 			ret = ocfs2_read_blocks(inode,
-					bucket_blkno(&xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
+					bucket_blkno(xs->bucket) + 1,
+					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
 					0);
 			if (ret) {
 				mlog_errno(ret);
@@ -3244,8 +3288,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 {
 	int ret, i;
 	int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
-	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	struct ocfs2_xattr_bucket s_bucket, t_bucket;
+	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
 	struct ocfs2_xattr_header *xh;
 	struct ocfs2_xattr_entry *xe;
 	int blocksize = inode->i_sb->s_blocksize;
@@ -3253,16 +3296,21 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	mlog(0, "move some of xattrs from bucket %llu to %llu\n",
 	     (unsigned long long)blk, (unsigned long long)new_blk);
 
-	memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-	memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
+	s_bucket = ocfs2_xattr_bucket_new(inode);
+	t_bucket = ocfs2_xattr_bucket_new(inode);
+	if (!s_bucket || !t_bucket) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
 
-	ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk);
+	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &s_bucket,
+	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3273,13 +3321,13 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
 	 * there's no need to read it.
 	 */
-	ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk);
+	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
 						new_bucket_head ?
 						OCFS2_JOURNAL_ACCESS_CREATE :
 						OCFS2_JOURNAL_ACCESS_WRITE);
@@ -3288,7 +3336,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	xh = bucket_xh(&s_bucket);
+	xh = bucket_xh(s_bucket);
 	count = le16_to_cpu(xh->xh_count);
 	start = ocfs2_xattr_find_divide_pos(xh);
 
@@ -3300,10 +3348,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		 * The hash value is set as one larger than
 		 * that of the last entry in the previous bucket.
 		 */
-		for (i = 0; i < blk_per_bucket; i++)
-			memset(bucket_block(&t_bucket, i), 0, blocksize);
+		for (i = 0; i < t_bucket->bu_blocks; i++)
+			memset(bucket_block(t_bucket, i), 0, blocksize);
 
-		xh = bucket_xh(&t_bucket);
+		xh = bucket_xh(t_bucket);
 		xh->xh_free_start = cpu_to_le16(blocksize);
 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3312,10 +3360,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 	}
 
 	/* copy the whole bucket to the new first. */
-	ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
+	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
 
 	/* update the new bucket. */
-	xh = bucket_xh(&t_bucket);
+	xh = bucket_xh(t_bucket);
 
 	/*
 	 * Calculate the total name/value len and xh_free_start for
@@ -3379,7 +3427,7 @@ set_num_buckets:
 	else
 		xh->xh_num_buckets = 0;
 
-	ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
 
 	/* store the first_hash of the new bucket. */
 	if (first_hash)
@@ -3393,18 +3441,18 @@ set_num_buckets:
 	if (start == count)
 		goto out;
 
-	xh = bucket_xh(&s_bucket);
+	xh = bucket_xh(s_bucket);
 	memset(&xh->xh_entries[start], 0,
 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
 	xh->xh_count = cpu_to_le16(start);
 	xh->xh_free_start = cpu_to_le16(name_offset);
 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
 
-	ocfs2_xattr_bucket_journal_dirty(handle, inode, &s_bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
 
 out:
-	ocfs2_xattr_bucket_relse(inode, &s_bucket);
-	ocfs2_xattr_bucket_relse(inode, &t_bucket);
+	ocfs2_xattr_bucket_free(s_bucket);
+	ocfs2_xattr_bucket_free(t_bucket);
 
 	return ret;
 }
@@ -3422,7 +3470,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 				 int t_is_new)
 {
 	int ret;
-	struct ocfs2_xattr_bucket s_bucket, t_bucket;
+	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
 
 	BUG_ON(s_blkno == t_blkno);
 
@@ -3430,10 +3478,15 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	     (unsigned long long)s_blkno, (unsigned long long)t_blkno,
 	     t_is_new);
 
-	memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-	memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-
-	ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno);
+	s_bucket = ocfs2_xattr_bucket_new(inode);
+	t_bucket = ocfs2_xattr_bucket_new(inode);
+	if (!s_bucket || !t_bucket) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+  
+	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
 	if (ret)
 		goto out;
 
@@ -3441,23 +3494,23 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
 	 * there's no need to read it.
 	 */
-	ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno);
+	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
 	if (ret)
 		goto out;
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
 						t_is_new ?
 						OCFS2_JOURNAL_ACCESS_CREATE :
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret)
 		goto out;
 
-	ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
-	ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
+	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
 
 out:
-	ocfs2_xattr_bucket_relse(inode, &s_bucket);
-	ocfs2_xattr_bucket_relse(inode, &t_bucket);
+	ocfs2_xattr_bucket_free(t_bucket);
+	ocfs2_xattr_bucket_free(s_bucket);
 
 	return ret;
 }
@@ -4009,7 +4062,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
 				xe->xe_value_size = 0;
 
 			val = ocfs2_xattr_bucket_get_val(inode,
-							 &xs->bucket, offs);
+							 xs->bucket, offs);
 			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
 			       size - OCFS2_XATTR_SIZE(name_len));
 			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4087,8 +4140,7 @@ set_new_name_value:
 		xh->xh_free_start = cpu_to_le16(offs);
 	}
 
-	val = ocfs2_xattr_bucket_get_val(inode,
-					 &xs->bucket, offs - size);
+	val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
 	xe->xe_name_offset = cpu_to_le16(offs - size);
 
 	memset(val, 0, size);
@@ -4122,12 +4174,12 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 
 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
 	     (unsigned long)xi->value_len, xi->name_index,
-	     (unsigned long long)bucket_blkno(&xs->bucket));
+	     (unsigned long long)bucket_blkno(xs->bucket));
 
-	if (!xs->bucket.bu_bhs[1]) {
+	if (!xs->bucket->bu_bhs[1]) {
 		ret = ocfs2_read_blocks(inode,
-					bucket_blkno(&xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
+					bucket_blkno(xs->bucket) + 1,
+					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
 					0);
 		if (ret) {
 			mlog_errno(ret);
@@ -4143,7 +4195,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -4151,7 +4203,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 	}
 
 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
-	ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
 out:
 	ocfs2_commit_trans(osb, handle);
@@ -4264,10 +4316,10 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
 	struct ocfs2_xattr_entry *xe = xs->here;
 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
 
-	BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
+	BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
 
 	offset = xe - xh->xh_entries;
-	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0],
+	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
 						offset, len);
 	if (ret)
 		mlog_errno(ret);
@@ -4387,7 +4439,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 					 struct ocfs2_xattr_search *xs)
 {
 	handle_t *handle = NULL;
-	struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket);
+	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
 						le16_to_cpu(xh->xh_count) - 1];
 	int ret = 0;
@@ -4400,7 +4452,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 		return;
 	}
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4413,7 +4465,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
 	le16_add_cpu(&xh->xh_count, -1);
 
-	ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
 out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -4565,7 +4617,7 @@ try_again:
 
 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
 			"of %u which exceed block size\n",
-			(unsigned long long)bucket_blkno(&xs->bucket),
+			(unsigned long long)bucket_blkno(xs->bucket),
 			header_size);
 
 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4605,7 +4657,7 @@ try_again:
 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
 	     " %u\n", xs->not_found,
-	     (unsigned long long)bucket_blkno(&xs->bucket),
+	     (unsigned long long)bucket_blkno(xs->bucket),
 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
 	     le16_to_cpu(xh->xh_name_value_len));
 
@@ -4617,7 +4669,7 @@ try_again:
 			 * name/value will be moved, the xe shouldn't be changed
 			 * in xs.
 			 */
-			ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
+			ret = ocfs2_defrag_xattr_bucket(inode, xs->bucket);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -4649,7 +4701,7 @@ try_again:
 		 * add a new bucket for the insert.
 		 */
 		ret = ocfs2_check_xattr_bucket_collision(inode,
-							 &xs->bucket,
+							 xs->bucket,
 							 xi->name);
 		if (ret) {
 			mlog_errno(ret);
@@ -4658,14 +4710,13 @@ try_again:
 
 		ret = ocfs2_add_new_xattr_bucket(inode,
 						 xs->xattr_bh,
-						 xs->bucket.bu_bhs[0]);
+						 xs->bucket->bu_bhs[0]);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ocfs2_xattr_bucket_relse(inode, &xs->bucket);
-		memset(&xs->bucket, 0, sizeof(xs->bucket));
+		ocfs2_xattr_bucket_relse(xs->bucket);
 
 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
 						   xi->name_index,
-- 
cgit v0.10.2


From e2356a3f02cfdbce735465a2b40b6dc72a764c26 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 27 Oct 2008 15:01:54 -0700
Subject: ocfs2: Use buckets in ocfs2_xattr_bucket_find().

Change the ocfs2_xattr_bucket_find() function to use ocfs2_xattr_bucket
as its abstraction.  This makes for more efficient reads, as buckets are
linear blocks, and also has improved caching characteristics.  It also
reads better.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 766494e..46986c6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2248,7 +2248,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
 				void *para);
 
 static int ocfs2_find_xe_in_bucket(struct inode *inode,
-				   struct buffer_head *header_bh,
+				   struct ocfs2_xattr_bucket *bucket,
 				   int name_index,
 				   const char *name,
 				   u32 name_hash,
@@ -2256,11 +2256,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
 				   int *found)
 {
 	int i, ret = 0, cmp = 1, block_off, new_offset;
-	struct ocfs2_xattr_header *xh =
-			(struct ocfs2_xattr_header *)header_bh->b_data;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	size_t name_len = strlen(name);
 	struct ocfs2_xattr_entry *xe = NULL;
-	struct buffer_head *name_bh = NULL;
 	char *xe_name;
 
 	/*
@@ -2291,19 +2289,8 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
 			break;
 		}
 
-		ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
-				       &name_bh);
-		if (ret) {
-			mlog_errno(ret);
-			break;
-		}
-		xe_name = name_bh->b_data + new_offset;
-
-		cmp = memcmp(name, xe_name, name_len);
-		brelse(name_bh);
-		name_bh = NULL;
-
-		if (cmp == 0) {
+		xe_name = bucket_block(bucket, block_off) + new_offset;
+		if (!memcmp(name, xe_name, name_len)) {
 			*xe_index = i;
 			*found = 1;
 			ret = 0;
@@ -2333,39 +2320,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 				   struct ocfs2_xattr_search *xs)
 {
 	int ret, found = 0;
-	struct buffer_head *bh = NULL;
-	struct buffer_head *lower_bh = NULL;
 	struct ocfs2_xattr_header *xh = NULL;
 	struct ocfs2_xattr_entry *xe = NULL;
 	u16 index = 0;
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	int low_bucket = 0, bucket, high_bucket;
+	struct ocfs2_xattr_bucket *search;
 	u32 last_hash;
-	u64 blkno;
+	u64 blkno, lower_blkno = 0;
 
-	ret = ocfs2_read_block(inode, p_blkno, &bh);
+	search = ocfs2_xattr_bucket_new(inode);
+	if (!search) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_xattr_bucket(search, p_blkno);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	xh = (struct ocfs2_xattr_header *)bh->b_data;
+	xh = bucket_xh(search);
 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
-
 	while (low_bucket <= high_bucket) {
-		brelse(bh);
-		bh = NULL;
-		bucket = (low_bucket + high_bucket) / 2;
+		ocfs2_xattr_bucket_relse(search);
 
+		bucket = (low_bucket + high_bucket) / 2;
 		blkno = p_blkno + bucket * blk_per_bucket;
-
-		ret = ocfs2_read_block(inode, blkno, &bh);
+		ret = ocfs2_read_xattr_bucket(search, blkno);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		xh = (struct ocfs2_xattr_header *)bh->b_data;
+		xh = bucket_xh(search);
 		xe = &xh->xh_entries[0];
 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
 			high_bucket = bucket - 1;
@@ -2382,10 +2372,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 
 		last_hash = le32_to_cpu(xe->xe_name_hash);
 
-		/* record lower_bh which may be the insert place. */
-		brelse(lower_bh);
-		lower_bh = bh;
-		bh = NULL;
+		/* record lower_blkno which may be the insert place. */
+		lower_blkno = blkno;
 
 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
 			low_bucket = bucket + 1;
@@ -2393,7 +2381,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		}
 
 		/* the searched xattr should reside in this bucket if exists. */
-		ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
+		ret = ocfs2_find_xe_in_bucket(inode, search,
 					      name_index, name, name_hash,
 					      &index, &found);
 		if (ret) {
@@ -2408,35 +2396,21 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 	 * When the xattr's hash value is in the gap of 2 buckets, we will
 	 * always set it to the previous bucket.
 	 */
-	if (!lower_bh) {
-		/*
-		 * We can't find any bucket whose first name_hash is less
-		 * than the find name_hash.
-		 */
-		BUG_ON(bh->b_blocknr != p_blkno);
-		lower_bh = bh;
-		bh = NULL;
+	if (!lower_blkno)
+		lower_blkno = p_blkno;
+
+	/* This should be in cache - we just read it during the search */
+	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
 	}
-	xs->bucket->bu_bhs[0] = lower_bh;
-	lower_bh = NULL;
 
 	xs->header = bucket_xh(xs->bucket);
 	xs->base = bucket_block(xs->bucket, 0);
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
 	if (found) {
-		/*
-		 * If we have found the xattr enty, read all the blocks in
-		 * this bucket.
-		 */
-		ret = ocfs2_read_blocks(inode, bucket_blkno(xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
-					0);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
 		xs->here = &xs->header->xh_entries[index];
 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
 		     (unsigned long long)bucket_blkno(xs->bucket), index);
@@ -2444,8 +2418,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 		ret = -ENODATA;
 
 out:
-	brelse(bh);
-	brelse(lower_bh);
+	ocfs2_xattr_bucket_free(search);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 178eeac354ea28828d5e94a3a7b51368c171d6a5 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 27 Oct 2008 15:18:29 -0700
Subject: ocfs2: Use buckets in ocfs2_xattr_create_index_block().

Use the ocfs2_xattr_bucket abstraction in
ocfs2_xattr_create_index_block() and its helpers.  We get more efficient
reads, a lot less buffer_head munging, and nicer code to boot.  While
we're at it, ocfs2_xattr_update_xattr_search() becomes void.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 46986c6..76969b9 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2649,32 +2649,34 @@ static void swap_xe(void *a, void *b, int size)
 /*
  * When the ocfs2_xattr_block is filled up, new bucket will be created
  * and all the xattr entries will be moved to the new bucket.
+ * The header goes at the start of the bucket, and the names+values are
+ * filled from the end.  This is why *target starts as the last buffer.
  * Note: we need to sort the entries since they are not saved in order
  * in the ocfs2_xattr_block.
  */
 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
 					   struct buffer_head *xb_bh,
-					   struct buffer_head *xh_bh,
-					   struct buffer_head *data_bh)
+					   struct ocfs2_xattr_bucket *bucket)
 {
 	int i, blocksize = inode->i_sb->s_blocksize;
+	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u16 offset, size, off_change;
 	struct ocfs2_xattr_entry *xe;
 	struct ocfs2_xattr_block *xb =
 				(struct ocfs2_xattr_block *)xb_bh->b_data;
 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
-	struct ocfs2_xattr_header *xh =
-				(struct ocfs2_xattr_header *)xh_bh->b_data;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u16 count = le16_to_cpu(xb_xh->xh_count);
-	char *target = xh_bh->b_data, *src = xb_bh->b_data;
+	char *src = xb_bh->b_data;
+	char *target = bucket_block(bucket, blks - 1);
 
 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
 	     (unsigned long long)xb_bh->b_blocknr,
-	     (unsigned long long)xh_bh->b_blocknr);
+	     (unsigned long long)bucket_blkno(bucket));
+
+	for (i = 0; i < blks; i++)
+		memset(bucket_block(bucket, i), 0, blocksize);
 
-	memset(xh_bh->b_data, 0, blocksize);
-	if (data_bh)
-		memset(data_bh->b_data, 0, blocksize);
 	/*
 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
 	 * there is a offset change corresponding to the change of
@@ -2686,8 +2688,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
 	size = blocksize - offset;
 
 	/* copy all the names and values. */
-	if (data_bh)
-		target = data_bh->b_data;
 	memcpy(target + offset, src + offset, size);
 
 	/* Init new header now. */
@@ -2697,7 +2697,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
 
 	/* copy all the entries. */
-	target = xh_bh->b_data;
+	target = bucket_block(bucket, 0);
 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
 	size = count * sizeof(struct ocfs2_xattr_entry);
 	memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2723,42 +2723,24 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
  * While if the entry is in index b-tree, "bucket" indicates the
  * real place of the xattr.
  */
-static int ocfs2_xattr_update_xattr_search(struct inode *inode,
-					   struct ocfs2_xattr_search *xs,
-					   struct buffer_head *old_bh,
-					   struct buffer_head *new_bh)
+static void ocfs2_xattr_update_xattr_search(struct inode *inode,
+					    struct ocfs2_xattr_search *xs,
+					    struct buffer_head *old_bh)
 {
-	int ret = 0;
 	char *buf = old_bh->b_data;
 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
-	int i, blocksize = inode->i_sb->s_blocksize;
-	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+	int i;
 
-	xs->bucket->bu_bhs[0] = new_bh;
-	get_bh(new_bh);
 	xs->header = bucket_xh(xs->bucket);
-
-	xs->base = new_bh->b_data;
+	xs->base = bucket_block(xs->bucket, 0);
 	xs->end = xs->base + inode->i_sb->s_blocksize;
 
-	if (!xs->not_found) {
-		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
-			ret = ocfs2_read_blocks(inode,
-					bucket_blkno(xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
-					0);
-			if (ret) {
-				mlog_errno(ret);
-				return ret;
-			}
-
-		}
-		i = xs->here - old_xh->xh_entries;
-		xs->here = &xs->header->xh_entries[i];
-	}
+	if (xs->not_found)
+		return;
 
-	return ret;
+	i = xs->here - old_xh->xh_entries;
+	xs->here = &xs->header->xh_entries[i];
 }
 
 static int ocfs2_xattr_create_index_block(struct inode *inode,
@@ -2771,18 +2753,17 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_alloc_context *data_ac;
-	struct buffer_head *xh_bh = NULL, *data_bh = NULL;
 	struct buffer_head *xb_bh = xs->xattr_bh;
 	struct ocfs2_xattr_block *xb =
 			(struct ocfs2_xattr_block *)xb_bh->b_data;
 	struct ocfs2_xattr_tree_root *xr;
 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
-	u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
 	mlog(0, "create xattr index block for %llu\n",
 	     (unsigned long long)xb_bh->b_blocknr);
 
 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
+	BUG_ON(!xs->bucket);
 
 	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
 	if (ret) {
@@ -2798,10 +2779,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	down_write(&oi->ip_alloc_sem);
 
 	/*
-	 * 3 more credits, one for xattr block update, one for the 1st block
-	 * of the new xattr bucket and one for the value/data.
+	 * We need more credits.  One for the xattr block update and one
+	 * for each block of the new xattr bucket.
 	 */
-	credits += 3;
+	credits += 1 + ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -2832,51 +2813,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	mlog(0, "allocate 1 cluster from %llu to xattr block\n",
 	     (unsigned long long)blkno);
 
-	xh_bh = sb_getblk(inode->i_sb, blkno);
-	if (!xh_bh) {
-		ret = -EIO;
+	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
+	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
 	}
 
-	ocfs2_set_new_buffer_uptodate(inode, xh_bh);
-
-	ret = ocfs2_journal_access(handle, inode, xh_bh,
-				   OCFS2_JOURNAL_ACCESS_CREATE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+						OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
 	}
 
-	if (bpb > 1) {
-		data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
-		if (!data_bh) {
-			ret = -EIO;
-			mlog_errno(ret);
-			goto out_commit;
-		}
-
-		ocfs2_set_new_buffer_uptodate(inode, data_bh);
-
-		ret = ocfs2_journal_access(handle, inode, data_bh,
-					   OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
-	}
-
-	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
-
-	ocfs2_journal_dirty(handle, xh_bh);
-	if (data_bh)
-		ocfs2_journal_dirty(handle, data_bh);
+	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
+	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
-	ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
-	}
+	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
 
 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2911,9 +2864,6 @@ out:
 	if (data_ac)
 		ocfs2_free_alloc_context(data_ac);
 
-	brelse(xh_bh);
-	brelse(data_bh);
-
 	return ret;
 }
 
-- 
cgit v0.10.2


From 161d6f30f18c4a7e2b24705b6690cce3ff276eb9 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 27 Oct 2008 15:25:18 -0700
Subject: ocfs2: Use buckets in ocfs2_defrag_xattr_bucket().

Use the ocfs2_xattr_bucket abstraction for reading and writing the
bucket in ocfs2_defrag_xattr_bucket().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 76969b9..127a628 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2894,21 +2894,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	struct ocfs2_xattr_header *xh;
 	char *entries, *buf, *bucket_buf = NULL;
 	u64 blkno = bucket_blkno(bucket);
-	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u16 xh_free_start;
 	size_t blocksize = inode->i_sb->s_blocksize;
 	handle_t *handle;
-	struct buffer_head **bhs;
 	struct ocfs2_xattr_entry *xe;
-
-	bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
-			GFP_NOFS);
-	if (!bhs)
-		return -ENOMEM;
-
-	ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
-	if (ret)
-		goto out;
+	struct ocfs2_xattr_bucket *wb = NULL;
 
 	/*
 	 * In order to make the operation more efficient and generic,
@@ -2922,11 +2912,21 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
+	wb = ocfs2_xattr_bucket_new(inode);
+	if (!wb) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = ocfs2_read_xattr_bucket(wb, blkno);
+	if (ret)
+		goto out;
+
 	buf = bucket_buf;
-	for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
-		memcpy(buf, bhs[i]->b_data, blocksize);
+	for (i = 0; i < wb->bu_blocks; i++, buf += blocksize)
+		memcpy(buf, bucket_block(wb, i), blocksize);
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
+	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), wb->bu_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		handle = NULL;
@@ -2934,13 +2934,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	for (i = 0; i < blk_per_bucket; i++) {
-		ret = ocfs2_journal_access(handle, inode, bhs[i],
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto commit;
-		}
+	ret = ocfs2_xattr_bucket_journal_access(handle, wb,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto commit;
 	}
 
 	xh = (struct ocfs2_xattr_header *)bucket_buf;
@@ -3009,21 +3007,14 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	     cmp_xe, swap_xe);
 
 	buf = bucket_buf;
-	for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
-		memcpy(bhs[i]->b_data, buf, blocksize);
-		ocfs2_journal_dirty(handle, bhs[i]);
-	}
+	for (i = 0; i < wb->bu_blocks; i++, buf += blocksize)
+		memcpy(bucket_block(wb, i), buf, blocksize);
+	ocfs2_xattr_bucket_journal_dirty(handle, wb);
 
 commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
-
-	if (bhs) {
-		for (i = 0; i < blk_per_bucket; i++)
-			brelse(bhs[i]);
-	}
-	kfree(bhs);
-
+	ocfs2_xattr_bucket_free(wb);
 	kfree(bucket_buf);
 	return ret;
 }
-- 
cgit v0.10.2


From 02dbf38d19c19016f558fe0dc0c44f8041d3eb8e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 27 Oct 2008 18:07:45 -0700
Subject: ocfs2: Use buckets in ocfs2_xattr_set_entry_in_bucket().

The ocfs2_xattr_set_entry_in_bucket() function is already working on an
ocfs2_xattr_bucket structure, so let's use the bucket API.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 127a628..029a9f4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4083,25 +4083,24 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 {
 	int ret;
 	handle_t *handle = NULL;
-	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	u64 blkno;
 
 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
 	     (unsigned long)xi->value_len, xi->name_index,
 	     (unsigned long long)bucket_blkno(xs->bucket));
 
 	if (!xs->bucket->bu_bhs[1]) {
-		ret = ocfs2_read_blocks(inode,
-					bucket_blkno(xs->bucket) + 1,
-					blk_per_bucket - 1, &xs->bucket->bu_bhs[1],
-					0);
+		blkno = bucket_blkno(xs->bucket);
+		ocfs2_xattr_bucket_relse(xs->bucket);
+		ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, blk_per_bucket);
+	handle = ocfs2_start_trans(osb, xs->bucket->bu_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		handle = NULL;
-- 
cgit v0.10.2


From 1c32a2fd46ddc01bd86bff56a8f5d98c815750f4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 6 Nov 2008 08:10:47 +0800
Subject: ocfs2/xattr: Remove additional bucket allocation in bucket
 defragment.

Joel has refactored xattr bucket and make xattr bucket a general
wrapper. So in ocfs2_defrag_xattr_bucket, we have already passed the
bucket in, so there is no need to allocate a new one and read it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 029a9f4..87cf39d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2898,7 +2898,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	size_t blocksize = inode->i_sb->s_blocksize;
 	handle_t *handle;
 	struct ocfs2_xattr_entry *xe;
-	struct ocfs2_xattr_bucket *wb = NULL;
 
 	/*
 	 * In order to make the operation more efficient and generic,
@@ -2912,21 +2911,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	wb = ocfs2_xattr_bucket_new(inode);
-	if (!wb) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = ocfs2_read_xattr_bucket(wb, blkno);
-	if (ret)
-		goto out;
-
 	buf = bucket_buf;
-	for (i = 0; i < wb->bu_blocks; i++, buf += blocksize)
-		memcpy(buf, bucket_block(wb, i), blocksize);
+	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+		memcpy(buf, bucket_block(bucket, i), blocksize);
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), wb->bu_blocks);
+	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), bucket->bu_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		handle = NULL;
@@ -2934,7 +2923,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_xattr_bucket_journal_access(handle, wb,
+	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -3007,14 +2996,13 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	     cmp_xe, swap_xe);
 
 	buf = bucket_buf;
-	for (i = 0; i < wb->bu_blocks; i++, buf += blocksize)
-		memcpy(bucket_block(wb, i), buf, blocksize);
-	ocfs2_xattr_bucket_journal_dirty(handle, wb);
+	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+		memcpy(bucket_block(bucket, i), buf, blocksize);
+	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
 
 commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
-	ocfs2_xattr_bucket_free(wb);
 	kfree(bucket_buf);
 	return ret;
 }
-- 
cgit v0.10.2


From 757055adc5d41b910bdead925060f077dd2d9169 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 6 Nov 2008 08:10:48 +0800
Subject: ocfs2/xattr: Only set buffer update if it doesn't exist in cache.

When we call ocfs2_init_xattr_bucket, we deem that the new buffer head
will be written to disk immediately, so we just use sb_getblk. But in
some cases the buffer may have already been in ocfs2 uptodate cache,
so we only call ocfs2_set_buffer_uptodate if the buffer head isn't
in the cache.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 87cf39d..d8fc714 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -219,8 +219,10 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 			break;
 		}
 
-		ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
-					      bucket->bu_bhs[i]);
+		if (!ocfs2_buffer_uptodate(bucket->bu_inode,
+					   bucket->bu_bhs[i]))
+			ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+						      bucket->bu_bhs[i]);
 	}
 
 	if (rc)
-- 
cgit v0.10.2


From 976331d8789d4d84a11b45b87c520ade83715343 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 12 Nov 2008 08:26:57 +0800
Subject: ocfs2/xattr: Only extend xattr bucket in need.

When the first block of a bucket is filled up with xattr
entries, we normally extend the bucket. But if we are
just replace one xattr with small length, we don't need
to extend it. This is important since we will calculate
what we need before the transaction and in this situation
no resources will be allocated.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d8fc714..4501c63 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4564,7 +4564,9 @@ try_again:
 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
 	     le16_to_cpu(xh->xh_name_value_len));
 
-	if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
+	if (free < need ||
+	    (xs->not_found &&
+	     count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
 		if (need <= max_free &&
 		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
 			/*
-- 
cgit v0.10.2


From 2891d290aa6eee0821f7e4ad0b1c4ae4d964b0f1 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 12 Nov 2008 08:26:58 +0800
Subject: ocfs2: Add clusters free in dealloc_ctxt.

Now in ocfs2 xattr set, the whole process are divided into many small
parts and they are wrapped into diffrent transactions and it make the
set doesn't look like a real transaction. So we want to integrate it
into a real one.

In some cases we will allocate some clusters and free some in just one
transaction. e.g, one xattr is larger than inline size, so it and its
value root is stored within the inode while the value is outside in a
cluster. Then we try to update it with a smaller value(larger than the
size of root but smaller than inline size), we may need to free the
outside cluster while allocate a new bucket(one cluster) since now the
inode may be full. The old solution will lock the global_bitmap(if the
local alloc failed in stress test) and then the truncate log. This will
cause a ABBA lock with truncate log flush.

This patch add the clusters free in dealloc_ctxt, so that we can record
the free clusters during the transaction and then free it after we
release the global_bitmap in xattr set.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cc2deb..4614614 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5800,7 +5800,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
  */
 
 /*
- * Describes a single block free from a suballocator
+ * Describe a single bit freed from a suballocator.  For the block
+ * suballocators, it represents one block.  For the global cluster
+ * allocator, it represents some clusters and free_bit indicates
+ * clusters number.
  */
 struct ocfs2_cached_block_free {
 	struct ocfs2_cached_block_free		*free_next;
@@ -5815,10 +5818,10 @@ struct ocfs2_per_slot_free_list {
 	struct ocfs2_cached_block_free		*f_first;
 };
 
-static int ocfs2_free_cached_items(struct ocfs2_super *osb,
-				   int sysfile_type,
-				   int slot,
-				   struct ocfs2_cached_block_free *head)
+static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
+				    int sysfile_type,
+				    int slot,
+				    struct ocfs2_cached_block_free *head)
 {
 	int ret;
 	u64 bg_blkno;
@@ -5893,6 +5896,82 @@ out:
 	return ret;
 }
 
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+				u64 blkno, unsigned int bit)
+{
+	int ret = 0;
+	struct ocfs2_cached_block_free *item;
+
+	item = kmalloc(sizeof(*item), GFP_NOFS);
+	if (item == NULL) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		return ret;
+	}
+
+	mlog(0, "Insert clusters: (bit %u, blk %llu)\n",
+	     bit, (unsigned long long)blkno);
+
+	item->free_blk = blkno;
+	item->free_bit = bit;
+	item->free_next = ctxt->c_global_allocator;
+
+	ctxt->c_global_allocator = item;
+	return ret;
+}
+
+static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
+				      struct ocfs2_cached_block_free *head)
+{
+	struct ocfs2_cached_block_free *tmp;
+	struct inode *tl_inode = osb->osb_tl_inode;
+	handle_t *handle;
+	int ret = 0;
+
+	mutex_lock(&tl_inode->i_mutex);
+
+	while (head) {
+		if (ocfs2_truncate_log_needs_flush(osb)) {
+			ret = __ocfs2_flush_truncate_log(osb);
+			if (ret < 0) {
+				mlog_errno(ret);
+				break;
+			}
+		}
+
+		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
+						head->free_bit);
+
+		ocfs2_commit_trans(osb, handle);
+		tmp = head;
+		head = head->free_next;
+		kfree(tmp);
+
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	mutex_unlock(&tl_inode->i_mutex);
+
+	while (head) {
+		/* Premature exit may have left some dangling items. */
+		tmp = head;
+		head = head->free_next;
+		kfree(tmp);
+	}
+
+	return ret;
+}
+
 int ocfs2_run_deallocs(struct ocfs2_super *osb,
 		       struct ocfs2_cached_dealloc_ctxt *ctxt)
 {
@@ -5908,8 +5987,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
 		if (fl->f_first) {
 			mlog(0, "Free items: (type %u, slot %d)\n",
 			     fl->f_inode_type, fl->f_slot);
-			ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
-						       fl->f_slot, fl->f_first);
+			ret2 = ocfs2_free_cached_blocks(osb,
+							fl->f_inode_type,
+							fl->f_slot,
+							fl->f_first);
 			if (ret2)
 				mlog_errno(ret2);
 			if (!ret)
@@ -5920,6 +6001,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
 		kfree(fl);
 	}
 
+	if (ctxt->c_global_allocator) {
+		ret2 = ocfs2_free_cached_clusters(osb,
+						  ctxt->c_global_allocator);
+		if (ret2)
+			mlog_errno(ret2);
+		if (!ret)
+			ret = ret2;
+
+		ctxt->c_global_allocator = NULL;
+	}
+
 	return ret;
 }
 
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 70257c8..c301cf2 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -167,11 +167,15 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
  */
 struct ocfs2_cached_dealloc_ctxt {
 	struct ocfs2_per_slot_free_list		*c_first_suballocator;
+	struct ocfs2_cached_block_free 		*c_global_allocator;
 };
 static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
 {
 	c->c_first_suballocator = NULL;
+	c->c_global_allocator = NULL;
 }
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+				u64 blkno, unsigned int bit);
 int ocfs2_run_deallocs(struct ocfs2_super *osb,
 		       struct ocfs2_cached_dealloc_ctxt *ctxt);
 
-- 
cgit v0.10.2


From c73f60f900ddf73ec4ea2a143829ab97242c4e8c Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 12 Nov 2008 08:26:59 +0800
Subject: ocfs2/xattr: Move clusters free into dealloc.

Move clusters free process into dealloc context so that
they can be freed after the transaction.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4501c63..f1da381 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -457,7 +457,6 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	int ret;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct inode *tl_inode = osb->osb_tl_inode;
 	handle_t *handle;
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	struct ocfs2_extent_tree et;
@@ -470,16 +469,6 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		return ret;
 	}
 
-	mutex_lock(&tl_inode->i_mutex);
-
-	if (ocfs2_truncate_log_needs_flush(osb)) {
-		ret = __ocfs2_flush_truncate_log(osb);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-	}
-
 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -509,14 +498,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+	ret = ocfs2_cache_cluster_dealloc(dealloc, phys_blkno, len);
 	if (ret)
 		mlog_errno(ret);
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
 out:
-	mutex_unlock(&tl_inode->i_mutex);
 
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
-- 
cgit v0.10.2


From 78f30c314a74b9dc5d7368d96fe4be883d9a3a04 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 12 Nov 2008 08:27:00 +0800
Subject: ocfs2/xattr: Reserve meta/data at the beginning of ocfs2_xattr_set.

In ocfs2 xattr set, we reserve metadata and clusters in any place
they are needed. It is time-consuming and ineffective, so this
patch try to reserve metadata and clusters at the beginning of
ocfs2_xattr_set.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index c301cf2..3eb735e 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -176,6 +176,10 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
 }
 int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
 				u64 blkno, unsigned int bit);
+static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
+{
+	return c->c_global_allocator != NULL;
+}
 int ocfs2_run_deallocs(struct ocfs2_super *osb,
 		       struct ocfs2_cached_dealloc_ctxt *ctxt);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index f1da381..4fd201a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -71,6 +71,12 @@ struct ocfs2_xattr_bucket {
 	int bu_blocks;
 };
 
+struct ocfs2_xattr_set_ctxt {
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_alloc_context *data_ac;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+};
+
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
 #define OCFS2_XATTR_INLINE_SIZE	80
 
@@ -133,11 +139,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 					size_t buffer_size);
 
 static int ocfs2_xattr_create_index_block(struct inode *inode,
-					  struct ocfs2_xattr_search *xs);
+					  struct ocfs2_xattr_search *xs,
+					  struct ocfs2_xattr_set_ctxt *ctxt);
 
 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 					     struct ocfs2_xattr_info *xi,
-					     struct ocfs2_xattr_search *xs);
+					     struct ocfs2_xattr_search *xs,
+					     struct ocfs2_xattr_set_ctxt *ctxt);
 
 static int ocfs2_delete_xattr_index_block(struct inode *inode,
 					  struct buffer_head *xb_bh);
@@ -334,14 +342,13 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
 static int ocfs2_xattr_extend_allocation(struct inode *inode,
 					 u32 clusters_to_add,
 					 struct buffer_head *xattr_bh,
-					 struct ocfs2_xattr_value_root *xv)
+					 struct ocfs2_xattr_value_root *xv,
+					 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int status = 0;
 	int restart_func = 0;
 	int credits = 0;
 	handle_t *handle = NULL;
-	struct ocfs2_alloc_context *data_ac = NULL;
-	struct ocfs2_alloc_context *meta_ac = NULL;
 	enum ocfs2_alloc_restarted why;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
@@ -353,13 +360,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 restart_all:
 
-	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
-				       &data_ac, &meta_ac);
-	if (status) {
-		mlog_errno(status);
-		goto leave;
-	}
-
 	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
 					    clusters_to_add);
 	handle = ocfs2_start_trans(osb, credits);
@@ -386,8 +386,8 @@ restarted_transaction:
 					     0,
 					     &et,
 					     handle,
-					     data_ac,
-					     meta_ac,
+					     ctxt->data_ac,
+					     ctxt->meta_ac,
 					     &why);
 	if ((status < 0) && (status != -EAGAIN)) {
 		if (status != -ENOSPC)
@@ -432,14 +432,6 @@ leave:
 		ocfs2_commit_trans(osb, handle);
 		handle = NULL;
 	}
-	if (data_ac) {
-		ocfs2_free_alloc_context(data_ac);
-		data_ac = NULL;
-	}
-	if (meta_ac) {
-		ocfs2_free_alloc_context(meta_ac);
-		meta_ac = NULL;
-	}
 	if ((!status) && restart_func) {
 		restart_func = 0;
 		goto restart_all;
@@ -452,23 +444,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 				      struct buffer_head *root_bh,
 				      struct ocfs2_xattr_value_root *xv,
 				      u32 cpos, u32 phys_cpos, u32 len,
-				      struct ocfs2_cached_dealloc_ctxt *dealloc)
+				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	handle_t *handle;
-	struct ocfs2_alloc_context *meta_ac = NULL;
 	struct ocfs2_extent_tree et;
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
 
-	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
-	if (ret) {
-		mlog_errno(ret);
-		return ret;
-	}
-
 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -483,8 +468,8 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
-				  dealloc);
+	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
+				  &ctxt->dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -498,17 +483,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_cache_cluster_dealloc(dealloc, phys_blkno, len);
+	ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
 	if (ret)
 		mlog_errno(ret);
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
 out:
-
-	if (meta_ac)
-		ocfs2_free_alloc_context(meta_ac);
-
 	return ret;
 }
 
@@ -516,15 +497,12 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 				   u32 old_clusters,
 				   u32 new_clusters,
 				   struct buffer_head *root_bh,
-				   struct ocfs2_xattr_value_root *xv)
+				   struct ocfs2_xattr_value_root *xv,
+				   struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret = 0;
 	u32 trunc_len, cpos, phys_cpos, alloc_size;
 	u64 block;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_cached_dealloc_ctxt dealloc;
-
-	ocfs2_init_dealloc_ctxt(&dealloc);
 
 	if (old_clusters <= new_clusters)
 		return 0;
@@ -544,7 +522,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 
 		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
 						 phys_cpos, alloc_size,
-						 &dealloc);
+						 ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -558,16 +536,14 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	}
 
 out:
-	ocfs2_schedule_truncate_log_flush(osb, 1);
-	ocfs2_run_deallocs(osb, &dealloc);
-
 	return ret;
 }
 
 static int ocfs2_xattr_value_truncate(struct inode *inode,
 				      struct buffer_head *root_bh,
 				      struct ocfs2_xattr_value_root *xv,
-				      int len)
+				      int len,
+				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret;
 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
@@ -579,11 +555,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
 	if (new_clusters > old_clusters)
 		ret = ocfs2_xattr_extend_allocation(inode,
 						    new_clusters - old_clusters,
-						    root_bh, xv);
+						    root_bh, xv, ctxt);
 	else
 		ret = ocfs2_xattr_shrink_size(inode,
 					      old_clusters, new_clusters,
-					      root_bh, xv);
+					      root_bh, xv, ctxt);
 
 	return ret;
 }
@@ -1167,6 +1143,7 @@ out:
 static int ocfs2_xattr_set_value_outside(struct inode *inode,
 					 struct ocfs2_xattr_info *xi,
 					 struct ocfs2_xattr_search *xs,
+					 struct ocfs2_xattr_set_ctxt *ctxt,
 					 size_t offs)
 {
 	size_t name_len = strlen(xi->name);
@@ -1186,7 +1163,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 	xv->xr_list.l_next_free_rec = 0;
 
 	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
-					 xi->value_len);
+					 xi->value_len, ctxt);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
@@ -1317,6 +1294,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode,
 static int ocfs2_xattr_set_entry(struct inode *inode,
 				 struct ocfs2_xattr_info *xi,
 				 struct ocfs2_xattr_search *xs,
+				 struct ocfs2_xattr_set_ctxt *ctxt,
 				 int flag)
 {
 	struct ocfs2_xattr_entry *last;
@@ -1387,7 +1365,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
 			/* Replace existing local xattr with tree root */
 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
-							    offs);
+							    ctxt, offs);
 			if (ret < 0)
 				mlog_errno(ret);
 			goto out;
@@ -1406,7 +1384,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 				ret = ocfs2_xattr_value_truncate(inode,
 								 xs->xattr_bh,
 								 xv,
-								 xi->value_len);
+								 xi->value_len,
+								 ctxt);
 				if (ret < 0) {
 					mlog_errno(ret);
 					goto out;
@@ -1436,7 +1415,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 				 ret = ocfs2_xattr_value_truncate(inode,
 								 xs->xattr_bh,
 								 xv,
-								 0);
+								 0,
+								 ctxt);
 				if (ret < 0)
 					mlog_errno(ret);
 			}
@@ -1531,7 +1511,7 @@ out_commit:
 		 * This is the second step for value size > INLINE_SIZE.
 		 */
 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
-		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
+		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
 		if (ret < 0) {
 			int ret2;
 
@@ -1555,6 +1535,10 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 				      struct ocfs2_xattr_header *header)
 {
 	int ret = 0, i;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+
+	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
@@ -1567,14 +1551,17 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 				le16_to_cpu(entry->xe_name_offset);
 			xv = (struct ocfs2_xattr_value_root *)
 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
-			ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
+			ret = ocfs2_xattr_value_truncate(inode, bh, xv,
+							 0, &ctxt);
 			if (ret < 0) {
 				mlog_errno(ret);
-				return ret;
+				break;
 			}
 		}
 	}
 
+	ocfs2_schedule_truncate_log_flush(osb, 1);
+	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 	return ret;
 }
 
@@ -1836,7 +1823,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
  */
 static int ocfs2_xattr_ibody_set(struct inode *inode,
 				 struct ocfs2_xattr_info *xi,
-				 struct ocfs2_xattr_search *xs)
+				 struct ocfs2_xattr_search *xs,
+				 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
@@ -1853,7 +1841,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_xattr_set_entry(inode, xi, xs,
+	ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
 				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
 out:
 	up_write(&oi->ip_alloc_sem);
@@ -1926,12 +1914,12 @@ cleanup:
  */
 static int ocfs2_xattr_block_set(struct inode *inode,
 				 struct ocfs2_xattr_info *xi,
-				 struct ocfs2_xattr_search *xs)
+				 struct ocfs2_xattr_search *xs,
+				 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct buffer_head *new_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
-	struct ocfs2_alloc_context *meta_ac = NULL;
 	handle_t *handle = NULL;
 	struct ocfs2_xattr_block *xblk = NULL;
 	u16 suballoc_bit_start;
@@ -1940,15 +1928,6 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	int ret;
 
 	if (!xs->xattr_bh) {
-		/*
-		 * Alloc one external block for extended attribute
-		 * outside of inode.
-		 */
-		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
 		handle = ocfs2_start_trans(osb,
 					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
 		if (IS_ERR(handle)) {
@@ -1963,7 +1942,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 			goto out_commit;
 		}
 
-		ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+		ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
 					   &suballoc_bit_start, &num_got,
 					   &first_blkno);
 		if (ret < 0) {
@@ -1996,7 +1975,6 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
 		xs->here = xs->header->xh_entries;
 
-
 		ret = ocfs2_journal_dirty(handle, new_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -2009,8 +1987,6 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 out_commit:
 		ocfs2_commit_trans(osb, handle);
 out:
-		if (meta_ac)
-			ocfs2_free_alloc_context(meta_ac);
 		if (ret < 0)
 			return ret;
 	} else
@@ -2018,22 +1994,266 @@ out:
 
 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		/* Set extended attribute into external block */
-		ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
+		ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
+					    OCFS2_HAS_XATTR_FL);
 		if (!ret || ret != -ENOSPC)
 			goto end;
 
-		ret = ocfs2_xattr_create_index_block(inode, xs);
+		ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
 		if (ret)
 			goto end;
 	}
 
-	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
+	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
 
 end:
 
 	return ret;
 }
 
+/* Check whether the new xattr can be inserted into the inode. */
+static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
+				       struct ocfs2_xattr_info *xi,
+				       struct ocfs2_xattr_search *xs)
+{
+	u64 value_size;
+	struct ocfs2_xattr_entry *last;
+	int free, i;
+	size_t min_offs = xs->end - xs->base;
+
+	if (!xs->header)
+		return 0;
+
+	last = xs->header->xh_entries;
+
+	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+		size_t offs = le16_to_cpu(last->xe_name_offset);
+		if (offs < min_offs)
+			min_offs = offs;
+		last += 1;
+	}
+
+	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+	if (free < 0)
+		return 0;
+
+	BUG_ON(!xs->not_found);
+
+	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+		value_size = OCFS2_XATTR_ROOT_SIZE;
+	else
+		value_size = OCFS2_XATTR_SIZE(xi->value_len);
+
+	if (free >= sizeof(struct ocfs2_xattr_entry) +
+		   OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
+		return 1;
+
+	return 0;
+}
+
+static int ocfs2_calc_xattr_set_need(struct inode *inode,
+				     struct ocfs2_dinode *di,
+				     struct ocfs2_xattr_info *xi,
+				     struct ocfs2_xattr_search *xis,
+				     struct ocfs2_xattr_search *xbs,
+				     int *clusters_need,
+				     int *meta_need)
+{
+	int ret = 0, old_in_xb = 0;
+	int clusters_add = 0, meta_add = 0;
+	struct buffer_head *bh = NULL;
+	struct ocfs2_xattr_block *xb = NULL;
+	struct ocfs2_xattr_entry *xe = NULL;
+	struct ocfs2_xattr_value_root *xv = NULL;
+	char *base = NULL;
+	int name_offset, name_len = 0;
+	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
+						    xi->value_len);
+	u64 value_size;
+
+	/*
+	 * delete a xattr doesn't need metadata and cluster allocation.
+	 * so return.
+	 */
+	if (!xi->value)
+		goto out;
+
+	if (xis->not_found && xbs->not_found) {
+		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+			clusters_add += new_clusters;
+
+		goto meta_guess;
+	}
+
+	if (!xis->not_found) {
+		xe = xis->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		base = xis->base;
+	} else {
+		int i, block_off;
+		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+		xe = xbs->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		i = xbs->here - xbs->header->xh_entries;
+		old_in_xb = 1;
+
+		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+			ret = ocfs2_xattr_bucket_get_name_value(inode,
+							bucket_xh(xbs->bucket),
+							i, &block_off,
+							&name_offset);
+			base = bucket_block(xbs->bucket, block_off);
+		} else
+			base = xbs->base;
+	}
+
+	/* do cluster allocation guess first. */
+	value_size = le64_to_cpu(xe->xe_value_size);
+
+	if (old_in_xb) {
+		/*
+		 * In xattr set, we always try to set the xe in inode first,
+		 * so if it can be inserted into inode successfully, the old
+		 * one will be removed from the xattr block, and this xattr
+		 * will be inserted into inode as a new xattr in inode.
+		 */
+		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
+			clusters_add += new_clusters;
+			goto out;
+		}
+	}
+
+	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
+		/* the new values will be stored outside. */
+		u32 old_clusters = 0;
+
+		if (!ocfs2_xattr_is_local(xe)) {
+			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
+								 value_size);
+			xv = (struct ocfs2_xattr_value_root *)
+			     (base + name_offset + name_len);
+		} else
+			xv = &def_xv.xv;
+
+		if (old_clusters >= new_clusters)
+			goto out;
+		else {
+			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
+			clusters_add += new_clusters - old_clusters;
+			goto out;
+		}
+	} else {
+		/*
+		 * Now the new value will be stored inside. So if the new
+		 * value is smaller than the size of value root or the old
+		 * value, we don't need any allocation, otherwise we have
+		 * to guess metadata allocation.
+		 */
+		if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
+		    (!ocfs2_xattr_is_local(xe) &&
+		     OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
+			goto out;
+	}
+
+meta_guess:
+	/* calculate metadata allocation. */
+	if (di->i_xattr_loc) {
+		if (!xbs->xattr_bh) {
+			ret = ocfs2_read_block(inode,
+					       le64_to_cpu(di->i_xattr_loc),
+					       &bh);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			xb = (struct ocfs2_xattr_block *)bh->b_data;
+		} else
+			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+
+		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+			struct ocfs2_extent_list *el =
+				 &xb->xb_attrs.xb_root.xt_list;
+			meta_add += ocfs2_extend_meta_needed(el);
+		}
+
+		/*
+		 * This cluster will be used either for new bucket or for
+		 * new xattr block.
+		 * If the cluster size is the same as the bucket size, one
+		 * more is needed since we may need to extend the bucket
+		 * also.
+		 */
+		clusters_add += 1;
+		if (OCFS2_XATTR_BUCKET_SIZE ==
+			OCFS2_SB(inode->i_sb)->s_clustersize)
+			clusters_add += 1;
+	} else
+		meta_add += 1;
+out:
+	if (clusters_need)
+		*clusters_need = clusters_add;
+	if (meta_need)
+		*meta_need = meta_add;
+	brelse(bh);
+	return ret;
+}
+
+static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
+				     struct ocfs2_dinode *di,
+				     struct ocfs2_xattr_info *xi,
+				     struct ocfs2_xattr_search *xis,
+				     struct ocfs2_xattr_search *xbs,
+				     struct ocfs2_xattr_set_ctxt *ctxt)
+{
+	int clusters_add, meta_add, ret;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
+
+	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
+
+	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
+					&clusters_add, &meta_add);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d\n",
+	     xi->name, meta_add, clusters_add);
+
+	if (meta_add) {
+		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
+							&ctxt->meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	if (clusters_add) {
+		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+out:
+	if (ret) {
+		if (ctxt->meta_ac) {
+			ocfs2_free_alloc_context(ctxt->meta_ac);
+			ctxt->meta_ac = NULL;
+		}
+
+		/*
+		 * We cannot have an error and a non null ctxt->data_ac.
+		 */
+	}
+
+	return ret;
+}
+
 /*
  * ocfs2_xattr_set()
  *
@@ -2051,6 +2271,8 @@ int ocfs2_xattr_set(struct inode *inode,
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
 	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
 
 	struct ocfs2_xattr_info xi = {
 		.name_index = name_index,
@@ -2115,15 +2337,21 @@ int ocfs2_xattr_set(struct inode *inode,
 			goto cleanup;
 	}
 
+	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, &xbs, &ctxt);
+	if (ret) {
+		mlog_errno(ret);
+		goto cleanup;
+	}
+
 	if (!value) {
 		/* Remove existing extended attribute */
 		if (!xis.not_found)
-			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
+			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt);
 		else if (!xbs.not_found)
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
+			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
 	} else {
 		/* We always try to set extended attribute into inode first*/
-		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
+		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt);
 		if (!ret && !xbs.not_found) {
 			/*
 			 * If succeed and that extended attribute existing in
@@ -2131,7 +2359,7 @@ int ocfs2_xattr_set(struct inode *inode,
 			 */
 			xi.value = NULL;
 			xi.value_len = 0;
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
+			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
 		} else if (ret == -ENOSPC) {
 			if (di->i_xattr_loc && !xbs.xattr_bh) {
 				ret = ocfs2_xattr_block_find(inode, name_index,
@@ -2143,9 +2371,9 @@ int ocfs2_xattr_set(struct inode *inode,
 			 * If no space in inode, we will set extended attribute
 			 * into external block.
 			 */
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
+			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
 			if (ret)
-				goto cleanup;
+				goto free;
 			if (!xis.not_found) {
 				/*
 				 * If succeed and that extended attribute
@@ -2153,10 +2381,19 @@ int ocfs2_xattr_set(struct inode *inode,
 				 */
 				xi.value = NULL;
 				xi.value_len = 0;
-				ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
+				ret = ocfs2_xattr_ibody_set(inode, &xi,
+							    &xis, &ctxt);
 			}
 		}
 	}
+free:
+	if (ctxt.data_ac)
+		ocfs2_free_alloc_context(ctxt.data_ac);
+	if (ctxt.meta_ac)
+		ocfs2_free_alloc_context(ctxt.meta_ac);
+	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 cleanup:
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
 	ocfs2_inode_unlock(inode, 1);
@@ -2734,7 +2971,8 @@ static void ocfs2_xattr_update_xattr_search(struct inode *inode,
 }
 
 static int ocfs2_xattr_create_index_block(struct inode *inode,
-					  struct ocfs2_xattr_search *xs)
+					  struct ocfs2_xattr_search *xs,
+					  struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret, credits = OCFS2_SUBALLOC_ALLOC;
 	u32 bit_off, len;
@@ -2742,7 +2980,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	handle_t *handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_alloc_context *data_ac;
 	struct buffer_head *xb_bh = xs->xattr_bh;
 	struct ocfs2_xattr_block *xb =
 			(struct ocfs2_xattr_block *)xb_bh->b_data;
@@ -2755,12 +2992,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
 	BUG_ON(!xs->bucket);
 
-	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	/*
 	 * XXX:
 	 * We can use this lock for now, and maybe move to a dedicated mutex
@@ -2787,7 +3018,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
+	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
+				     1, 1, &bit_off, &len);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -2850,10 +3082,6 @@ out_commit:
 out_sem:
 	up_write(&oi->ip_alloc_sem);
 
-out:
-	if (data_ac)
-		ocfs2_free_alloc_context(data_ac);
-
 	return ret;
 }
 
@@ -3614,7 +3842,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 				       u32 *num_clusters,
 				       u32 prev_cpos,
 				       u64 prev_blkno,
-				       int *extend)
+				       int *extend,
+				       struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret, credits;
 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
@@ -3622,8 +3851,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
 	u64 block;
 	handle_t *handle = NULL;
-	struct ocfs2_alloc_context *data_ac = NULL;
-	struct ocfs2_alloc_context *meta_ac = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_extent_tree et;
 
@@ -3634,13 +3861,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
-				    &data_ac, &meta_ac);
-	if (ret) {
-		mlog_errno(ret);
-		goto leave;
-	}
-
 	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
 					    clusters_to_add);
 	handle = ocfs2_start_trans(osb, credits);
@@ -3658,7 +3878,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 		goto leave;
 	}
 
-	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
 				     clusters_to_add, &bit_off, &num_bits);
 	if (ret < 0) {
 		if (ret != -ENOSPC)
@@ -3719,7 +3939,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
 	     num_bits, (unsigned long long)block, v_start);
 	ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
-				  num_bits, 0, meta_ac);
+				  num_bits, 0, ctxt->meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto leave;
@@ -3734,10 +3954,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 leave:
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
-	if (data_ac)
-		ocfs2_free_alloc_context(data_ac);
-	if (meta_ac)
-		ocfs2_free_alloc_context(meta_ac);
 
 	return ret;
 }
@@ -3821,7 +4037,8 @@ out:
  */
 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 				      struct buffer_head *xb_bh,
-				      struct buffer_head *header_bh)
+				      struct buffer_head *header_bh,
+				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct ocfs2_xattr_header *first_xh = NULL;
 	struct buffer_head *first_bh = NULL;
@@ -3872,7 +4089,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 						  &num_clusters,
 						  e_cpos,
 						  p_blkno,
-						  &extend);
+						  &extend,
+						  ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4147,7 +4365,8 @@ out:
 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 					     struct buffer_head *header_bh,
 					     int xe_off,
-					     int len)
+					     int len,
+					     struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret, offset;
 	u64 value_blk;
@@ -4182,7 +4401,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 
 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
 	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
-	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
+	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4200,8 +4419,9 @@ out:
 }
 
 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
-						struct ocfs2_xattr_search *xs,
-						int len)
+					struct ocfs2_xattr_search *xs,
+					int len,
+					struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret, offset;
 	struct ocfs2_xattr_entry *xe = xs->here;
@@ -4211,7 +4431,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
 
 	offset = xe - xh->xh_entries;
 	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
-						offset, len);
+						offset, len, ctxt);
 	if (ret)
 		mlog_errno(ret);
 
@@ -4375,7 +4595,8 @@ out_commit:
  */
 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
 				     struct ocfs2_xattr_info *xi,
-				     struct ocfs2_xattr_search *xs)
+				     struct ocfs2_xattr_search *xs,
+				     struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret, local = 1;
 	size_t value_len;
@@ -4403,7 +4624,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
 			value_len = 0;
 
 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
-							   value_len);
+							   value_len,
+							   ctxt);
 		if (ret)
 			goto out;
 
@@ -4434,7 +4656,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
 
 	/* allocate the space now for the outside block storage. */
 	ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
-						   value_len);
+						   value_len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
 
@@ -4485,7 +4707,8 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 
 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 					     struct ocfs2_xattr_info *xi,
-					     struct ocfs2_xattr_search *xs)
+					     struct ocfs2_xattr_search *xs,
+					     struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct ocfs2_xattr_header *xh;
 	struct ocfs2_xattr_entry *xe;
@@ -4603,7 +4826,8 @@ try_again:
 
 		ret = ocfs2_add_new_xattr_bucket(inode,
 						 xs->xattr_bh,
-						 xs->bucket->bu_bhs[0]);
+						 xs->bucket->bu_bhs[0],
+						 ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4622,7 +4846,7 @@ try_again:
 	}
 
 xattr_set:
-	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
+	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
 out:
 	mlog_exit(ret);
 	return ret;
@@ -4636,6 +4860,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u16 i;
 	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
+
+	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
 		xe = &xh->xh_entries[i];
@@ -4644,13 +4872,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 
 		ret = ocfs2_xattr_bucket_value_truncate(inode,
 							bucket->bu_bhs[0],
-							i, 0);
+							i, 0, &ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			break;
 		}
 	}
 
+	ocfs2_schedule_truncate_log_flush(osb, 1);
+	ocfs2_run_deallocs(osb, &ctxt.dealloc);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 85db90e77806d48a19fda77dabe8897d369a1710 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 12 Nov 2008 08:27:01 +0800
Subject: ocfs2/xattr: Merge xattr set transaction.

In current ocfs2/xattr, the whole xattr set is divided into
many steps are many transaction are used, this make the
xattr set process isn't like a real transaction, so this
patch try to merge all the transaction into one. Another
benefit is that acl can use it easily now.

I don't merge the transaction of deleting xattr when we
remove an inode. The reason is that if we have a large number
of xattrs and every xattrs has large values(large enough
for outside storage), the whole transaction will be very
huge and it looks like jbd can't handle it(I meet with a
jbd complain once). And the old inode removal is also divided
into many steps, so I'd like to leave as it is.

Note:
In xattr set, I try to avoid ocfs2_extend_trans since if
the credits aren't enough for the extension, it will commit
all the dirty blocks and create a new transaction which may
lead to inconsistency in metadata. All ocfs2_extend_trans
remained are safe now.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4fd201a..7a90892 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -72,6 +72,7 @@ struct ocfs2_xattr_bucket {
 };
 
 struct ocfs2_xattr_set_ctxt {
+	handle_t *handle;
 	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_alloc_context *data_ac;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
@@ -346,9 +347,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 					 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int status = 0;
-	int restart_func = 0;
-	int credits = 0;
-	handle_t *handle = NULL;
+	handle_t *handle = ctxt->handle;
 	enum ocfs2_alloc_restarted why;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
@@ -358,19 +357,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
 
-restart_all:
-
-	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
-					    clusters_to_add);
-	handle = ocfs2_start_trans(osb, credits);
-	if (IS_ERR(handle)) {
-		status = PTR_ERR(handle);
-		handle = NULL;
-		mlog_errno(status);
-		goto leave;
-	}
-
-restarted_transaction:
 	status = ocfs2_journal_access(handle, inode, xattr_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
@@ -389,9 +375,8 @@ restarted_transaction:
 					     ctxt->data_ac,
 					     ctxt->meta_ac,
 					     &why);
-	if ((status < 0) && (status != -EAGAIN)) {
-		if (status != -ENOSPC)
-			mlog_errno(status);
+	if (status < 0) {
+		mlog_errno(status);
 		goto leave;
 	}
 
@@ -403,39 +388,13 @@ restarted_transaction:
 
 	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
 
-	if (why != RESTART_NONE && clusters_to_add) {
-		if (why == RESTART_META) {
-			mlog(0, "restarting function.\n");
-			restart_func = 1;
-		} else {
-			BUG_ON(why != RESTART_TRANS);
-
-			mlog(0, "restarting transaction.\n");
-			/* TODO: This can be more intelligent. */
-			credits = ocfs2_calc_extend_credits(osb->sb,
-							    et.et_root_el,
-							    clusters_to_add);
-			status = ocfs2_extend_trans(handle, credits);
-			if (status < 0) {
-				/* handle still has to be committed at
-				 * this point. */
-				status = -ENOMEM;
-				mlog_errno(status);
-				goto leave;
-			}
-			goto restarted_transaction;
-		}
-	}
+	/*
+	 * We should have already allocated enough space before the transaction,
+	 * so no need to restart.
+	 */
+	BUG_ON(why != RESTART_NONE || clusters_to_add);
 
 leave:
-	if (handle) {
-		ocfs2_commit_trans(osb, handle);
-		handle = NULL;
-	}
-	if ((!status) && restart_func) {
-		restart_func = 0;
-		goto restart_all;
-	}
 
 	return status;
 }
@@ -448,31 +407,23 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 {
 	int ret;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	handle_t *handle;
+	handle_t *handle = ctxt->handle;
 	struct ocfs2_extent_tree et;
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
 
-	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
 	ret = ocfs2_journal_access(handle, inode, root_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
 				  &ctxt->dealloc);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	le32_add_cpu(&xv->xr_clusters, -len);
@@ -480,15 +431,13 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	ret = ocfs2_journal_dirty(handle, root_bh);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
 	if (ret)
 		mlog_errno(ret);
 
-out_commit:
-	ocfs2_commit_trans(osb, handle);
 out:
 	return ret;
 }
@@ -975,6 +924,7 @@ static int ocfs2_xattr_get(struct inode *inode,
 }
 
 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
+					   handle_t *handle,
 					   struct ocfs2_xattr_value_root *xv,
 					   const void *value,
 					   int value_len)
@@ -986,14 +936,17 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
 	u64 blkno;
 	struct buffer_head *bh = NULL;
-	handle_t *handle;
 
 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
 
+	/*
+	 * In __ocfs2_xattr_set_value_outside has already been dirtied,
+	 * so we don't need to worry about whether ocfs2_extend_trans
+	 * will create a new transactio for us or not.
+	 */
 	credits = clusters * bpc;
-	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
+	ret = ocfs2_extend_trans(handle, credits);
+	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
@@ -1003,7 +956,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 					       &num_clusters, &xv->xr_list);
 		if (ret) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto out;
 		}
 
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
@@ -1012,7 +965,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 			ret = ocfs2_read_block(inode, blkno, &bh);
 			if (ret) {
 				mlog_errno(ret);
-				goto out_commit;
+				goto out;
 			}
 
 			ret = ocfs2_journal_access(handle,
@@ -1021,7 +974,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret < 0) {
 				mlog_errno(ret);
-				goto out_commit;
+				goto out;
 			}
 
 			cp_len = value_len > blocksize ? blocksize : value_len;
@@ -1035,7 +988,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 			ret = ocfs2_journal_dirty(handle, bh);
 			if (ret < 0) {
 				mlog_errno(ret);
-				goto out_commit;
+				goto out;
 			}
 			brelse(bh);
 			bh = NULL;
@@ -1049,8 +1002,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 		}
 		cpos += num_clusters;
 	}
-out_commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	brelse(bh);
 
@@ -1058,28 +1009,21 @@ out:
 }
 
 static int ocfs2_xattr_cleanup(struct inode *inode,
+			       handle_t *handle,
 			       struct ocfs2_xattr_info *xi,
 			       struct ocfs2_xattr_search *xs,
 			       size_t offs)
 {
-	handle_t *handle = NULL;
 	int ret = 0;
 	size_t name_len = strlen(xi->name);
 	void *val = xs->base + offs;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
-				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out;
-	}
 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 	/* Decrease xattr count */
 	le16_add_cpu(&xs->header->xh_count, -1);
@@ -1090,32 +1034,23 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
 	if (ret < 0)
 		mlog_errno(ret);
-out_commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	return ret;
 }
 
 static int ocfs2_xattr_update_entry(struct inode *inode,
+				    handle_t *handle,
 				    struct ocfs2_xattr_info *xi,
 				    struct ocfs2_xattr_search *xs,
 				    size_t offs)
 {
-	handle_t *handle = NULL;
-	int ret = 0;
+	int ret;
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
-				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out;
-	}
 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	xs->here->xe_name_offset = cpu_to_le16(offs);
@@ -1129,8 +1064,6 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
 	if (ret < 0)
 		mlog_errno(ret);
-out_commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	return ret;
 }
@@ -1168,13 +1101,13 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
-					      xi->value_len);
+	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
+	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
+					      xi->value, xi->value_len);
 	if (ret < 0)
 		mlog_errno(ret);
 
@@ -1302,7 +1235,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
 	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
 	size_t size_l = 0;
-	handle_t *handle = NULL;
+	handle_t *handle = ctxt->handle;
 	int free, i, ret;
 	struct ocfs2_xattr_info xi_l = {
 		.name_index = xi->name_index,
@@ -1391,19 +1324,21 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 					goto out;
 				}
 
-				ret = __ocfs2_xattr_set_value_outside(inode,
-								xv,
-								xi->value,
-								xi->value_len);
+				ret = ocfs2_xattr_update_entry(inode,
+							       handle,
+							       xi,
+							       xs,
+							       offs);
 				if (ret < 0) {
 					mlog_errno(ret);
 					goto out;
 				}
 
-				ret = ocfs2_xattr_update_entry(inode,
-							       xi,
-							       xs,
-							       offs);
+				ret = __ocfs2_xattr_set_value_outside(inode,
+								handle,
+								xv,
+								xi->value,
+								xi->value_len);
 				if (ret < 0)
 					mlog_errno(ret);
 				goto out;
@@ -1413,45 +1348,29 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 				 * just trucate old value to zero.
 				 */
 				 ret = ocfs2_xattr_value_truncate(inode,
-								 xs->xattr_bh,
-								 xv,
-								 0,
-								 ctxt);
+								  xs->xattr_bh,
+								  xv,
+								  0,
+								  ctxt);
 				if (ret < 0)
 					mlog_errno(ret);
 			}
 		}
 	}
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
-				   OCFS2_INODE_UPDATE_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
 	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
-		/* set extended attribute in external block. */
-		ret = ocfs2_extend_trans(handle,
-					 OCFS2_INODE_UPDATE_CREDITS +
-					 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
-		if (ret) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
 		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto out;
 		}
 	}
 
@@ -1465,7 +1384,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto out;
 		}
 	}
 
@@ -1502,9 +1421,6 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 	if (ret < 0)
 		mlog_errno(ret);
 
-out_commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
-
 	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
 		/*
 		 * Set value outside in B tree.
@@ -1520,14 +1436,14 @@ out_commit:
 			 * If set value outside failed, we have to clean
 			 * the junk tree root we have already set in local.
 			 */
-			ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
+			ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
+						   xi, xs, offs);
 			if (ret2 < 0)
 				mlog_errno(ret2);
 		}
 	}
 out:
 	return ret;
-
 }
 
 static int ocfs2_remove_value_outside(struct inode*inode,
@@ -1540,6 +1456,13 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
+	ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	if (IS_ERR(ctxt.handle)) {
+		ret = PTR_ERR(ctxt.handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 
@@ -1560,8 +1483,10 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 		}
 	}
 
+	ocfs2_commit_trans(osb, ctxt.handle);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
+out:
 	return ret;
 }
 
@@ -1920,7 +1845,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	struct buffer_head *new_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
-	handle_t *handle = NULL;
+	handle_t *handle = ctxt->handle;
 	struct ocfs2_xattr_block *xblk = NULL;
 	u16 suballoc_bit_start;
 	u32 num_got;
@@ -1928,18 +1853,11 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	int ret;
 
 	if (!xs->xattr_bh) {
-		handle = ocfs2_start_trans(osb,
-					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			mlog_errno(ret);
-			goto out;
-		}
 		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
 					   OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto end;
 		}
 
 		ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
@@ -1947,7 +1865,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 					   &first_blkno);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto end;
 		}
 
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
@@ -1957,7 +1875,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 					   OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto end;
 		}
 
 		/* Initialize ocfs2_xattr_block */
@@ -1978,17 +1896,10 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		ret = ocfs2_journal_dirty(handle, new_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
-			goto out_commit;
+			goto end;
 		}
 		di->i_xattr_loc = cpu_to_le64(first_blkno);
-		ret = ocfs2_journal_dirty(handle, xs->inode_bh);
-		if (ret < 0)
-			mlog_errno(ret);
-out_commit:
-		ocfs2_commit_trans(osb, handle);
-out:
-		if (ret < 0)
-			return ret;
+		ocfs2_journal_dirty(handle, xs->inode_bh);
 	} else
 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
 
@@ -2057,10 +1968,11 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 				     struct ocfs2_xattr_search *xis,
 				     struct ocfs2_xattr_search *xbs,
 				     int *clusters_need,
-				     int *meta_need)
+				     int *meta_need,
+				     int *credits_need)
 {
 	int ret = 0, old_in_xb = 0;
-	int clusters_add = 0, meta_add = 0;
+	int clusters_add = 0, meta_add = 0, credits = 0;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_xattr_block *xb = NULL;
 	struct ocfs2_xattr_entry *xe = NULL;
@@ -2071,16 +1983,15 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 						    xi->value_len);
 	u64 value_size;
 
-	/*
-	 * delete a xattr doesn't need metadata and cluster allocation.
-	 * so return.
-	 */
-	if (!xi->value)
-		goto out;
-
 	if (xis->not_found && xbs->not_found) {
-		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
 			clusters_add += new_clusters;
+			credits += ocfs2_calc_extend_credits(inode->i_sb,
+							&def_xv.xv.xr_list,
+							new_clusters);
+		}
 
 		goto meta_guess;
 	}
@@ -2090,6 +2001,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		name_offset = le16_to_cpu(xe->xe_name_offset);
 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
 		base = xis->base;
+		credits += OCFS2_INODE_UPDATE_CREDITS;
 	} else {
 		int i, block_off;
 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
@@ -2105,8 +2017,25 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 							i, &block_off,
 							&name_offset);
 			base = bucket_block(xbs->bucket, block_off);
-		} else
+			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+		} else {
 			base = xbs->base;
+			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
+		}
+	}
+
+	/*
+	 * delete a xattr doesn't need metadata and cluster allocation.
+	 * so just calculate the credits and return.
+	 *
+	 * The credits for removing the value tree will be extended
+	 * by ocfs2_remove_extent itself.
+	 */
+	if (!xi->value) {
+		if (!ocfs2_xattr_is_local(xe))
+			credits += OCFS2_REMOVE_EXTENT_CREDITS;
+
+		goto out;
 	}
 
 	/* do cluster allocation guess first. */
@@ -2121,6 +2050,13 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		 */
 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
 			clusters_add += new_clusters;
+			credits += OCFS2_REMOVE_EXTENT_CREDITS +
+				    OCFS2_INODE_UPDATE_CREDITS;
+			if (!ocfs2_xattr_is_local(xe))
+				credits += ocfs2_calc_extend_credits(
+							inode->i_sb,
+							&def_xv.xv.xr_list,
+							new_clusters);
 			goto out;
 		}
 	}
@@ -2137,11 +2073,16 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		} else
 			xv = &def_xv.xv;
 
-		if (old_clusters >= new_clusters)
+		if (old_clusters >= new_clusters) {
+			credits += OCFS2_REMOVE_EXTENT_CREDITS;
 			goto out;
-		else {
+		} else {
 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
 			clusters_add += new_clusters - old_clusters;
+			credits += ocfs2_calc_extend_credits(inode->i_sb,
+							     &xv->xr_list,
+							     new_clusters -
+							     old_clusters);
 			goto out;
 		}
 	} else {
@@ -2177,6 +2118,8 @@ meta_guess:
 			struct ocfs2_extent_list *el =
 				 &xb->xb_attrs.xb_root.xt_list;
 			meta_add += ocfs2_extend_meta_needed(el);
+			credits += ocfs2_calc_extend_credits(inode->i_sb,
+							     el, 1);
 		}
 
 		/*
@@ -2187,16 +2130,23 @@ meta_guess:
 		 * also.
 		 */
 		clusters_add += 1;
+		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 		if (OCFS2_XATTR_BUCKET_SIZE ==
-			OCFS2_SB(inode->i_sb)->s_clustersize)
+			OCFS2_SB(inode->i_sb)->s_clustersize) {
+			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 			clusters_add += 1;
-	} else
+		}
+	} else {
 		meta_add += 1;
+		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+	}
 out:
 	if (clusters_need)
 		*clusters_need = clusters_add;
 	if (meta_need)
 		*meta_need = meta_add;
+	if (credits_need)
+		*credits_need = credits;
 	brelse(bh);
 	return ret;
 }
@@ -2206,7 +2156,8 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 				     struct ocfs2_xattr_info *xi,
 				     struct ocfs2_xattr_search *xis,
 				     struct ocfs2_xattr_search *xbs,
-				     struct ocfs2_xattr_set_ctxt *ctxt)
+				     struct ocfs2_xattr_set_ctxt *ctxt,
+				     int *credits)
 {
 	int clusters_add, meta_add, ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2216,14 +2167,14 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
 
 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
-					&clusters_add, &meta_add);
+					&clusters_add, &meta_add, credits);
 	if (ret) {
 		mlog_errno(ret);
 		return ret;
 	}
 
-	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d\n",
-	     xi->name, meta_add, clusters_add);
+	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
+	     "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
 
 	if (meta_add) {
 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
@@ -2254,6 +2205,126 @@ out:
 	return ret;
 }
 
+static int __ocfs2_xattr_set_handle(struct inode *inode,
+				    struct ocfs2_dinode *di,
+				    struct ocfs2_xattr_info *xi,
+				    struct ocfs2_xattr_search *xis,
+				    struct ocfs2_xattr_search *xbs,
+				    struct ocfs2_xattr_set_ctxt *ctxt)
+{
+	int ret = 0, credits;
+
+	if (!xi->value) {
+		/* Remove existing extended attribute */
+		if (!xis->not_found)
+			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+		else if (!xbs->not_found)
+			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+	} else {
+		/* We always try to set extended attribute into inode first*/
+		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+		if (!ret && !xbs->not_found) {
+			/*
+			 * If succeed and that extended attribute existing in
+			 * external block, then we will remove it.
+			 */
+			xi->value = NULL;
+			xi->value_len = 0;
+
+			xis->not_found = -ENODATA;
+			ret = ocfs2_calc_xattr_set_need(inode,
+							di,
+							xi,
+							xis,
+							xbs,
+							NULL,
+							NULL,
+							&credits);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			ret = ocfs2_extend_trans(ctxt->handle, credits +
+					ctxt->handle->h_buffer_credits);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+		} else if (ret == -ENOSPC) {
+			if (di->i_xattr_loc && !xbs->xattr_bh) {
+				ret = ocfs2_xattr_block_find(inode,
+							     xi->name_index,
+							     xi->name, xbs);
+				if (ret)
+					goto out;
+
+				xis->not_found = -ENODATA;
+				ret = ocfs2_calc_xattr_set_need(inode,
+								di,
+								xi,
+								xis,
+								xbs,
+								NULL,
+								NULL,
+								&credits);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+
+				ret = ocfs2_extend_trans(ctxt->handle, credits +
+					ctxt->handle->h_buffer_credits);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+			}
+			/*
+			 * If no space in inode, we will set extended attribute
+			 * into external block.
+			 */
+			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+			if (ret)
+				goto out;
+			if (!xis->not_found) {
+				/*
+				 * If succeed and that extended attribute
+				 * existing in inode, we will remove it.
+				 */
+				xi->value = NULL;
+				xi->value_len = 0;
+				xbs->not_found = -ENODATA;
+				ret = ocfs2_calc_xattr_set_need(inode,
+								di,
+								xi,
+								xis,
+								xbs,
+								NULL,
+								NULL,
+								&credits);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+
+				ret = ocfs2_extend_trans(ctxt->handle, credits +
+						ctxt->handle->h_buffer_credits);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+				ret = ocfs2_xattr_ibody_set(inode, xi,
+							    xis, ctxt);
+			}
+		}
+	}
+
+out:
+	return ret;
+}
+
 /*
  * ocfs2_xattr_set()
  *
@@ -2270,8 +2341,9 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret;
+	int ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
 
 	struct ocfs2_xattr_info xi = {
@@ -2337,56 +2409,37 @@ int ocfs2_xattr_set(struct inode *inode,
 			goto cleanup;
 	}
 
-	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, &xbs, &ctxt);
+
+	mutex_lock(&tl_inode->i_mutex);
+
+	if (ocfs2_truncate_log_needs_flush(osb)) {
+		ret = __ocfs2_flush_truncate_log(osb);
+		if (ret < 0) {
+			mutex_unlock(&tl_inode->i_mutex);
+			mlog_errno(ret);
+			goto cleanup;
+		}
+	}
+	mutex_unlock(&tl_inode->i_mutex);
+
+	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
+					&xbs, &ctxt, &credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto cleanup;
 	}
 
-	if (!value) {
-		/* Remove existing extended attribute */
-		if (!xis.not_found)
-			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt);
-		else if (!xbs.not_found)
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
-	} else {
-		/* We always try to set extended attribute into inode first*/
-		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis, &ctxt);
-		if (!ret && !xbs.not_found) {
-			/*
-			 * If succeed and that extended attribute existing in
-			 * external block, then we will remove it.
-			 */
-			xi.value = NULL;
-			xi.value_len = 0;
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
-		} else if (ret == -ENOSPC) {
-			if (di->i_xattr_loc && !xbs.xattr_bh) {
-				ret = ocfs2_xattr_block_find(inode, name_index,
-							     name, &xbs);
-				if (ret)
-					goto cleanup;
-			}
-			/*
-			 * If no space in inode, we will set extended attribute
-			 * into external block.
-			 */
-			ret = ocfs2_xattr_block_set(inode, &xi, &xbs, &ctxt);
-			if (ret)
-				goto free;
-			if (!xis.not_found) {
-				/*
-				 * If succeed and that extended attribute
-				 * existing in inode, we will remove it.
-				 */
-				xi.value = NULL;
-				xi.value_len = 0;
-				ret = ocfs2_xattr_ibody_set(inode, &xi,
-							    &xis, &ctxt);
-			}
-		}
+	ctxt.handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(ctxt.handle)) {
+		ret = PTR_ERR(ctxt.handle);
+		mlog_errno(ret);
+		goto cleanup;
 	}
-free:
+
+	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+	ocfs2_commit_trans(osb, ctxt.handle);
+
 	if (ctxt.data_ac)
 		ocfs2_free_alloc_context(ctxt.data_ac);
 	if (ctxt.meta_ac)
@@ -2974,10 +3027,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 					  struct ocfs2_xattr_search *xs,
 					  struct ocfs2_xattr_set_ctxt *ctxt)
 {
-	int ret, credits = OCFS2_SUBALLOC_ALLOC;
+	int ret;
 	u32 bit_off, len;
 	u64 blkno;
-	handle_t *handle;
+	handle_t *handle = ctxt->handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct buffer_head *xb_bh = xs->xattr_bh;
@@ -2999,30 +3052,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	 */
 	down_write(&oi->ip_alloc_sem);
 
-	/*
-	 * We need more credits.  One for the xattr block update and one
-	 * for each block of the new xattr bucket.
-	 */
-	credits += 1 + ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	handle = ocfs2_start_trans(osb, credits);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out_sem;
-	}
-
 	ret = ocfs2_journal_access(handle, inode, xb_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
 				     1, 1, &bit_off, &len);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	/*
@@ -3038,14 +3079,14 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
 						OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
@@ -3070,16 +3111,9 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 
 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
 
-	ret = ocfs2_journal_dirty(handle, xb_bh);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
-	}
+	ocfs2_journal_dirty(handle, xb_bh);
 
-out_commit:
-	ocfs2_commit_trans(osb, handle);
-
-out_sem:
+out:
 	up_write(&oi->ip_alloc_sem);
 
 	return ret;
@@ -3105,6 +3139,7 @@ static int cmp_xe_offset(const void *a, const void *b)
  * so that we can spare some space for insertion.
  */
 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
+				     handle_t *handle,
 				     struct ocfs2_xattr_bucket *bucket)
 {
 	int ret, i;
@@ -3114,7 +3149,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	u64 blkno = bucket_blkno(bucket);
 	u16 xh_free_start;
 	size_t blocksize = inode->i_sb->s_blocksize;
-	handle_t *handle;
 	struct ocfs2_xattr_entry *xe;
 
 	/*
@@ -3133,19 +3167,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
 		memcpy(buf, bucket_block(bucket, i), blocksize);
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), bucket->bu_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		handle = NULL;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
-		goto commit;
+		goto out;
 	}
 
 	xh = (struct ocfs2_xattr_header *)bucket_buf;
@@ -3203,7 +3229,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 			"bucket %llu\n", (unsigned long long)blkno);
 
 	if (xh_free_start == end)
-		goto commit;
+		goto out;
 
 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
 	xh->xh_free_start = cpu_to_le16(end);
@@ -3218,8 +3244,6 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
 		memcpy(bucket_block(bucket, i), buf, blocksize);
 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
 
-commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	kfree(bucket_buf);
 	return ret;
@@ -3270,7 +3294,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 	 * 1 more for the update of the 1st bucket of the previous
 	 * extent record.
 	 */
-	credits = bpc / 2 + 1;
+	credits = bpc / 2 + 1 + handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
 		mlog_errno(ret);
@@ -3662,7 +3686,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 	 * We need to update the new cluster and 1 more for the update of
 	 * the 1st bucket of the previous extent rec.
 	 */
-	credits = bpc + 1;
+	credits = bpc + 1 + handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
 		mlog_errno(ret);
@@ -3732,7 +3756,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
 				      u32 *first_hash)
 {
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	int ret, credits = 2 * blk_per_bucket;
+	int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
 
 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
 
@@ -3845,12 +3869,12 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 				       int *extend,
 				       struct ocfs2_xattr_set_ctxt *ctxt)
 {
-	int ret, credits;
+	int ret;
 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 	u32 prev_clusters = *num_clusters;
 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
 	u64 block;
-	handle_t *handle = NULL;
+	handle_t *handle = ctxt->handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_extent_tree et;
 
@@ -3861,16 +3885,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
-					    clusters_to_add);
-	handle = ocfs2_start_trans(osb, credits);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		handle = NULL;
-		mlog_errno(ret);
-		goto leave;
-	}
-
 	ret = ocfs2_journal_access(handle, inode, root_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
@@ -3924,18 +3938,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 		}
 	}
 
-	if (handle->h_buffer_credits < credits) {
-		/*
-		 * The journal has been restarted before, and don't
-		 * have enough space for the insertion, so extend it
-		 * here.
-		 */
-		ret = ocfs2_extend_trans(handle, credits);
-		if (ret) {
-			mlog_errno(ret);
-			goto leave;
-		}
-	}
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
 	     num_bits, (unsigned long long)block, v_start);
 	ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
@@ -3946,15 +3948,10 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	}
 
 	ret = ocfs2_journal_dirty(handle, root_bh);
-	if (ret < 0) {
+	if (ret < 0)
 		mlog_errno(ret);
-		goto leave;
-	}
 
 leave:
-	if (handle)
-		ocfs2_commit_trans(osb, handle);
-
 	return ret;
 }
 
@@ -3963,6 +3960,7 @@ leave:
  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
  */
 static int ocfs2_extend_xattr_bucket(struct inode *inode,
+				     handle_t *handle,
 				     struct buffer_head *first_bh,
 				     struct buffer_head *start_bh,
 				     u32 num_clusters)
@@ -3972,7 +3970,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	u64 start_blk = start_bh->b_blocknr, end_blk;
 	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
-	handle_t *handle;
 	struct ocfs2_xattr_header *first_xh =
 				(struct ocfs2_xattr_header *)first_bh->b_data;
 	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
@@ -3989,11 +3986,10 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 	 * We will touch all the buckets after the start_bh(include it).
 	 * Then we add one more bucket.
 	 */
-	credits = end_blk - start_blk + 3 * blk_per_bucket + 1;
-	handle = ocfs2_start_trans(osb, credits);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		handle = NULL;
+	credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
+		  handle->h_buffer_credits;
+	ret = ocfs2_extend_trans(handle, credits);
+	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
@@ -4002,14 +3998,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto commit;
+		goto out;
 	}
 
 	while (end_blk != start_blk) {
 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
 					    end_blk + blk_per_bucket, 0);
 		if (ret)
-			goto commit;
+			goto out;
 		end_blk -= blk_per_bucket;
 	}
 
@@ -4020,8 +4016,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 	le16_add_cpu(&first_xh->xh_num_buckets, 1);
 	ocfs2_journal_dirty(handle, first_bh);
 
-commit:
-	ocfs2_commit_trans(osb, handle);
 out:
 	return ret;
 }
@@ -4099,6 +4093,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 
 	if (extend)
 		ret = ocfs2_extend_xattr_bucket(inode,
+						ctxt->handle,
 						first_bh,
 						header_bh,
 						num_clusters);
@@ -4272,14 +4267,13 @@ set_new_name_value:
  * space for the xattr insertion.
  */
 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
+					   handle_t *handle,
 					   struct ocfs2_xattr_info *xi,
 					   struct ocfs2_xattr_search *xs,
 					   u32 name_hash,
 					   int local)
 {
 	int ret;
-	handle_t *handle = NULL;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u64 blkno;
 
 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
@@ -4296,14 +4290,6 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, xs->bucket->bu_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		handle = NULL;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
@@ -4315,32 +4301,22 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
 out:
-	ocfs2_commit_trans(osb, handle);
-
 	return ret;
 }
 
 static int ocfs2_xattr_value_update_size(struct inode *inode,
+					 handle_t *handle,
 					 struct buffer_head *xe_bh,
 					 struct ocfs2_xattr_entry *xe,
 					 u64 new_size)
 {
 	int ret;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	handle_t *handle = NULL;
-
-	handle = ocfs2_start_trans(osb, 1);
-	if (IS_ERR(handle)) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		goto out;
-	}
 
 	ret = ocfs2_journal_access(handle, inode, xe_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out;
 	}
 
 	xe->xe_value_size = cpu_to_le64(new_size);
@@ -4349,8 +4325,6 @@ static int ocfs2_xattr_value_update_size(struct inode *inode,
 	if (ret < 0)
 		mlog_errno(ret);
 
-out_commit:
-	ocfs2_commit_trans(osb, handle);
 out:
 	return ret;
 }
@@ -4407,7 +4381,8 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
+	ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
+					    header_bh, xe, len);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4439,6 +4414,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
 }
 
 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
+						handle_t *handle,
 						struct ocfs2_xattr_search *xs,
 						char *val,
 						int value_len)
@@ -4454,7 +4430,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 
 	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
 
-	return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
+	return __ocfs2_xattr_set_value_outside(inode, handle,
+					       xv, val, value_len);
 }
 
 static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -4547,27 +4524,19 @@ out:
 }
 
 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
+					 handle_t *handle,
 					 struct ocfs2_xattr_search *xs)
 {
-	handle_t *handle = NULL;
 	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
 						le16_to_cpu(xh->xh_count) - 1];
 	int ret = 0;
 
-	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
-				   ocfs2_blocks_per_xattr_bucket(inode->i_sb));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		return;
-	}
-
 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		return;
 	}
 
 	/* Remove the old entry. */
@@ -4577,9 +4546,6 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
 	le16_add_cpu(&xh->xh_count, -1);
 
 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
-
-out_commit:
-	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 }
 
 /*
@@ -4645,7 +4611,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
 		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
 	}
 
-	ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
+	ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
+					      name_hash, local);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4666,13 +4633,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
 			 * storage and we have allocated xattr already,
 			 * so need to remove it.
 			 */
-			ocfs2_xattr_bucket_remove_xs(inode, xs);
+			ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
 		}
 		goto out;
 	}
 
 set_value_outside:
-	ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
+	ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
+						   xs, val, value_len);
 out:
 	return ret;
 }
@@ -4785,7 +4753,8 @@ try_again:
 			 * name/value will be moved, the xe shouldn't be changed
 			 * in xs.
 			 */
-			ret = ocfs2_defrag_xattr_bucket(inode, xs->bucket);
+			ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
+							xs->bucket);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -4865,6 +4834,13 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
+	ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	if (IS_ERR(ctxt.handle)) {
+		ret = PTR_ERR(ctxt.handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
 		xe = &xh->xh_entries[i];
 		if (ocfs2_xattr_is_local(xe))
@@ -4879,9 +4855,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 		}
 	}
 
+	ret = ocfs2_commit_trans(osb, ctxt.handle);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
-
+out:
 	return ret;
 }
 
-- 
cgit v0.10.2


From fecc01126d7a244b7e9b563c80663ffdca35343b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Wed, 12 Nov 2008 15:16:38 -0800
Subject: ocfs2: turn __ocfs2_remove_inode_range() into
 ocfs2_remove_btree_range()

This patch genericizes the high level handling of extent removal.
ocfs2_remove_btree_range() is nearly identical to
__ocfs2_remove_inode_range(), except that extent tree operations have been
used where necessary. We update ocfs2_remove_inode_range() to use the
generic helper. Now extent tree based structures have an easy way to
truncate ranges.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Acked-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4614614..5592a2f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5255,6 +5255,78 @@ out:
 	return ret;
 }
 
+int ocfs2_remove_btree_range(struct inode *inode,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 phys_cpos, u32 len,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct inode *tl_inode = osb->osb_tl_inode;
+	handle_t *handle;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+
+	ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	mutex_lock(&tl_inode->i_mutex);
+
+	if (ocfs2_truncate_log_needs_flush(osb)) {
+		ret = __ocfs2_flush_truncate_log(osb);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
+				  dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ocfs2_et_update_clusters(inode, et, -len);
+
+	ret = ocfs2_journal_dirty(handle, et->et_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	mutex_unlock(&tl_inode->i_mutex);
+
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+
+	return ret;
+}
+
 int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
 {
 	struct buffer_head *tl_bh = osb->osb_tl_bh;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3eb735e..0fbf8fc 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -110,6 +110,11 @@ int ocfs2_remove_extent(struct inode *inode,
 			u32 cpos, u32 len, handle_t *handle,
 			struct ocfs2_alloc_context *meta_ac,
 			struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_remove_btree_range(struct inode *inode,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 phys_cpos, u32 len,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc);
+
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
 			   struct inode *inode,
 			   struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e2570a3..3605491 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1226,83 +1226,6 @@ out:
 	return ret;
 }
 
-static int __ocfs2_remove_inode_range(struct inode *inode,
-				      struct buffer_head *di_bh,
-				      u32 cpos, u32 phys_cpos, u32 len,
-				      struct ocfs2_cached_dealloc_ctxt *dealloc)
-{
-	int ret;
-	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct inode *tl_inode = osb->osb_tl_inode;
-	handle_t *handle;
-	struct ocfs2_alloc_context *meta_ac = NULL;
-	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
-	struct ocfs2_extent_tree et;
-
-	ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
-
-	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
-	if (ret) {
-		mlog_errno(ret);
-		return ret;
-	}
-
-	mutex_lock(&tl_inode->i_mutex);
-
-	if (ocfs2_truncate_log_needs_flush(osb)) {
-		ret = __ocfs2_flush_truncate_log(osb);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-	}
-
-	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
-				  dealloc);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
-	}
-
-	OCFS2_I(inode)->ip_clusters -= len;
-	di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
-
-	ret = ocfs2_journal_dirty(handle, di_bh);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
-	}
-
-	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
-	if (ret)
-		mlog_errno(ret);
-
-out_commit:
-	ocfs2_commit_trans(osb, handle);
-out:
-	mutex_unlock(&tl_inode->i_mutex);
-
-	if (meta_ac)
-		ocfs2_free_alloc_context(meta_ac);
-
-	return ret;
-}
-
 /*
  * Truncate a byte range, avoiding pages within partial clusters. This
  * preserves those pages for the zeroing code to write to.
@@ -1402,7 +1325,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct address_space *mapping = inode->i_mapping;
+	struct ocfs2_extent_tree et;
 
+	ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
 	ocfs2_init_dealloc_ctxt(&dealloc);
 
 	if (byte_len == 0)
@@ -1458,9 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 
 		/* Only do work for non-holes */
 		if (phys_cpos != 0) {
-			ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
-							 phys_cpos, alloc_size,
-							 &dealloc);
+			ret = ocfs2_remove_btree_range(inode, &et, cpos,
+						       phys_cpos, alloc_size,
+						       &dealloc);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
-- 
cgit v0.10.2


From f5d362022a947e84b0a3dd656d09c6b2322e234f Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:15:44 +0800
Subject: ocfs2: move new inode allocation out of the transaction

Move out inode allocation from ocfs2_mknod_locked() because
vfs_dq_init() must be called outside of a transaction.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2545e74..e8ff0ba 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -66,12 +66,12 @@
 
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
-			      struct dentry *dentry, int mode,
+			      struct inode *inode,
+			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
 			      handle_t *handle,
-			      struct inode **ret_inode,
 			      struct ocfs2_alloc_context *inode_ac);
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@@ -186,6 +186,34 @@ bail:
 	return ret;
 }
 
+static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
+{
+	struct inode *inode;
+
+	inode = new_inode(dir->i_sb);
+	if (!inode) {
+		mlog(ML_ERROR, "new_inode failed!\n");
+		return NULL;
+	}
+
+	/* populate as many fields early on as possible - many of
+	 * these are used by the support functions here and in
+	 * callers. */
+	if (S_ISDIR(mode))
+		inode->i_nlink = 2;
+	else
+		inode->i_nlink = 1;
+	inode->i_uid = current_fsuid();
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current_fsgid();
+	inode->i_mode = mode;
+	return inode;
+}
+
 static int ocfs2_mknod(struct inode *dir,
 		       struct dentry *dentry,
 		       int mode,
@@ -250,6 +278,13 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	inode = ocfs2_get_init_inode(dir, mode);
+	if (!inode) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto leave;
+	}
+
 	/* Reserve a cluster if creating an extent based directory. */
 	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
@@ -269,9 +304,9 @@ static int ocfs2_mknod(struct inode *dir,
 	}
 
 	/* do the real work now. */
-	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
+	status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
 				    &new_fe_bh, parent_fe_bh, handle,
-				    &inode, inode_ac);
+				    inode_ac);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -332,8 +367,10 @@ leave:
 	brelse(de_bh);
 	brelse(parent_fe_bh);
 
-	if ((status < 0) && inode)
+	if ((status < 0) && inode) {
+		clear_nlink(inode);
 		iput(inode);
+	}
 
 	if (inode_ac)
 		ocfs2_free_alloc_context(inode_ac);
@@ -348,12 +385,12 @@ leave:
 
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
-			      struct dentry *dentry, int mode,
+			      struct inode *inode,
+			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
 			      handle_t *handle,
-			      struct inode **ret_inode,
 			      struct ocfs2_alloc_context *inode_ac)
 {
 	int status = 0;
@@ -361,14 +398,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	struct ocfs2_extent_list *fel;
 	u64 fe_blkno = 0;
 	u16 suballoc_bit;
-	struct inode *inode = NULL;
 
-	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
-		   (unsigned long)dev, dentry->d_name.len,
+	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
+		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
 	*new_fe_bh = NULL;
-	*ret_inode = NULL;
 
 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
 				       &fe_blkno);
@@ -377,23 +412,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	inode = new_inode(dir->i_sb);
-	if (!inode) {
-		status = -ENOMEM;
-		mlog(ML_ERROR, "new_inode failed!\n");
-		goto leave;
-	}
-
 	/* populate as many fields early on as possible - many of
 	 * these are used by the support functions here and in
 	 * callers. */
 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
 	OCFS2_I(inode)->ip_blkno = fe_blkno;
-	if (S_ISDIR(mode))
-		inode->i_nlink = 2;
-	else
-		inode->i_nlink = 1;
-	inode->i_mode = mode;
 	spin_lock(&osb->osb_lock);
 	inode->i_generation = osb->s_next_generation++;
 	spin_unlock(&osb->osb_lock);
@@ -421,17 +444,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	fe->i_blkno = cpu_to_le64(fe_blkno);
 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
 	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
-	fe->i_uid = cpu_to_le32(current_fsuid());
-	if (dir->i_mode & S_ISGID) {
-		fe->i_gid = cpu_to_le32(dir->i_gid);
-		if (S_ISDIR(mode))
-			mode |= S_ISGID;
-	} else
-		fe->i_gid = cpu_to_le32(current_fsgid());
-	fe->i_mode = cpu_to_le16(mode);
-	if (S_ISCHR(mode) || S_ISBLK(mode))
+	fe->i_uid = cpu_to_le32(inode->i_uid);
+	fe->i_gid = cpu_to_le32(inode->i_gid);
+	fe->i_mode = cpu_to_le16(inode->i_mode);
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
-
 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
 
 	fe->i_last_eb_blk = 0;
@@ -446,7 +463,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	/*
 	 * If supported, directories start with inline data.
 	 */
-	if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
+	if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
 		u16 feat = le16_to_cpu(fe->i_dyn_features);
 
 		fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@@ -484,17 +501,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	status = 0; /* error in ocfs2_create_new_inode_locks is not
 		     * critical */
 
-	*ret_inode = inode;
 leave:
 	if (status < 0) {
 		if (*new_fe_bh) {
 			brelse(*new_fe_bh);
 			*new_fe_bh = NULL;
 		}
-		if (inode) {
-			clear_nlink(inode);
-			iput(inode);
-		}
 	}
 
 	mlog_exit(status);
@@ -1542,6 +1554,13 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
+	inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
+	if (!inode) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
 	/* don't reserve bitmap space for fast symlinks. */
 	if (l > ocfs2_fast_symlink_chars(sb)) {
 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
@@ -1560,10 +1579,9 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	status = ocfs2_mknod_locked(osb, dir, dentry,
-				    S_IFLNK | S_IRWXUGO, 0,
-				    &new_fe_bh, parent_fe_bh, handle,
-				    &inode, inode_ac);
+	status = ocfs2_mknod_locked(osb, dir, inode, dentry,
+				    0, &new_fe_bh, parent_fe_bh, handle,
+				    inode_ac);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1644,8 +1662,10 @@ bail:
 		ocfs2_free_alloc_context(inode_ac);
 	if (data_ac)
 		ocfs2_free_alloc_context(data_ac);
-	if ((status < 0) && inode)
+	if ((status < 0) && inode) {
+		clear_nlink(inode);
 		iput(inode);
+	}
 
 	mlog_exit(status);
 
-- 
cgit v0.10.2


From 6c3faba4421e230d77a181c260972229c542dec9 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:16:03 +0800
Subject: ocfs2: add ocfs2_xattr_set_handle

This function is used to set xattr's in a started transaction. It is only
called during inode creation inode for initial security/acl xattrs of the
new inode. These xattrs could be put into ibody or extent block, so xattr
bucket would not be use in this case.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 7a90892..6480254 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2326,6 +2326,74 @@ out:
 }
 
 /*
+ * This function only called duing creating inode
+ * for init security/acl xattrs of the new inode.
+ * The xattrs could be put into ibody or extent block,
+ * xattr bucket would not be use in this case.
+ * transanction credits also be reserved in here.
+ */
+int ocfs2_xattr_set_handle(handle_t *handle,
+			   struct inode *inode,
+			   struct buffer_head *di_bh,
+			   int name_index,
+			   const char *name,
+			   const void *value,
+			   size_t value_len,
+			   int flags,
+			   struct ocfs2_alloc_context *meta_ac,
+			   struct ocfs2_alloc_context *data_ac)
+{
+	struct ocfs2_dinode *di;
+	int ret;
+
+	struct ocfs2_xattr_info xi = {
+		.name_index = name_index,
+		.name = name,
+		.value = value,
+		.value_len = value_len,
+	};
+
+	struct ocfs2_xattr_search xis = {
+		.not_found = -ENODATA,
+	};
+
+	struct ocfs2_xattr_search xbs = {
+		.not_found = -ENODATA,
+	};
+
+	struct ocfs2_xattr_set_ctxt ctxt = {
+		.handle = handle,
+		.meta_ac = meta_ac,
+		.data_ac = data_ac,
+	};
+
+	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
+		return -EOPNOTSUPP;
+
+	xis.inode_bh = xbs.inode_bh = di_bh;
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	down_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
+	if (ret)
+		goto cleanup;
+	if (xis.not_found) {
+		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
+		if (ret)
+			goto cleanup;
+	}
+
+	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+cleanup:
+	up_write(&OCFS2_I(inode)->ip_xattr_sem);
+	brelse(xbs.xattr_bh);
+
+	return ret;
+}
+
+/*
  * ocfs2_xattr_set()
  *
  * Set, replace or remove an extended attribute for this inode.
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1d8314c..8fbdc16 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -37,6 +37,10 @@ extern struct xattr_handler *ocfs2_xattr_handlers[];
 ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
 int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
 		    size_t, int);
+int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
+			   int, const char *, const void *, size_t, int,
+			   struct ocfs2_alloc_context *,
+			   struct ocfs2_alloc_context *);
 int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
 
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From 923f7f3102b80403152e05aee3d55ecfce240440 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:16:27 +0800
Subject: ocfs2: add security xattr API

This patch add security xattr set/get/list APIs to
support security attributes in Ocfs2.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6480254..db03162 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/security.h>
 
 #define MLOG_MASK_PREFIX ML_XATTR
 #include <cluster/masklog.h>
@@ -88,12 +89,14 @@ static struct ocfs2_xattr_def_value_root def_xv = {
 struct xattr_handler *ocfs2_xattr_handlers[] = {
 	&ocfs2_xattr_user_handler,
 	&ocfs2_xattr_trusted_handler,
+	&ocfs2_xattr_security_handler,
 	NULL
 };
 
 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
+	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
 };
 
 struct ocfs2_xattr_info {
@@ -4977,6 +4980,50 @@ out:
 }
 
 /*
+ * 'security' attributes support
+ */
+static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+					size_t list_size, const char *name,
+					size_t name_len)
+{
+	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+		memcpy(list + prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
+				    void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
+			       buffer, size);
+}
+
+static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
+				    const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
+			       size, flags);
+}
+
+struct xattr_handler ocfs2_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.list	= ocfs2_xattr_security_list,
+	.get	= ocfs2_xattr_security_get,
+	.set	= ocfs2_xattr_security_set,
+};
+
+/*
  * 'trusted' attributes support
  */
 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 8fbdc16..55c5256 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -32,6 +32,7 @@ enum ocfs2_xattr_type {
 
 extern struct xattr_handler ocfs2_xattr_user_handler;
 extern struct xattr_handler ocfs2_xattr_trusted_handler;
+extern struct xattr_handler ocfs2_xattr_security_handler;
 extern struct xattr_handler *ocfs2_xattr_handlers[];
 
 ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
-- 
cgit v0.10.2


From 534eadddc1de8754a227202c0e747af4973f82ce Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:16:41 +0800
Subject: ocfs2: add ocfs2_init_security in during file create

Security attributes must be set when creating a new inode.

We do this in three steps.

- First, get security xattr's name and value by security_operation

- Calculate and reserve the meta data and clusters needed by this security
  xattr before starting transaction

- Finally, we set it before add_entry

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e8ff0ba..40da46b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -229,6 +229,12 @@ static int ocfs2_mknod(struct inode *dir,
 	struct inode *inode = NULL;
 	struct ocfs2_alloc_context *inode_ac = NULL;
 	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_alloc_context *xattr_ac = NULL;
+	int want_clusters = 0;
+	int xattr_credits = 0;
+	struct ocfs2_security_xattr_info si = {
+		.enable = 1,
+	};
 
 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
 		   (unsigned long)dev, dentry->d_name.len,
@@ -285,17 +291,39 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	/* Reserve a cluster if creating an extent based directory. */
-	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
-		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
+	/* get security xattr */
+	status = ocfs2_init_security_get(inode, dir, &si);
+	if (status) {
+		if (status == -EOPNOTSUPP)
+			si.enable = 0;
+		else {
+			mlog_errno(status);
+			goto leave;
+		}
+	}
+
+	/* calculate meta data/clusters for setting security xattr */
+	if (si.enable) {
+		status = ocfs2_calc_security_init(dir, &si, &want_clusters,
+						  &xattr_credits, &xattr_ac);
 		if (status < 0) {
-			if (status != -ENOSPC)
-				mlog_errno(status);
+			mlog_errno(status);
 			goto leave;
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
+	/* Reserve a cluster if creating an extent based directory. */
+	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
+		want_clusters += 1;
+
+	status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+	if (status < 0) {
+		if (status != -ENOSPC)
+			mlog_errno(status);
+		goto leave;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS + xattr_credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -335,6 +363,15 @@ static int ocfs2_mknod(struct inode *dir,
 		inc_nlink(dir);
 	}
 
+	if (si.enable) {
+		status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+						 xattr_ac, data_ac);
+		if (status < 0) {
+			mlog_errno(status);
+			goto leave;
+		}
+	}
+
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
 				 de_bh);
@@ -366,6 +403,8 @@ leave:
 	brelse(new_fe_bh);
 	brelse(de_bh);
 	brelse(parent_fe_bh);
+	kfree(si.name);
+	kfree(si.value);
 
 	if ((status < 0) && inode) {
 		clear_nlink(inode);
@@ -378,6 +417,9 @@ leave:
 	if (data_ac)
 		ocfs2_free_alloc_context(data_ac);
 
+	if (xattr_ac)
+		ocfs2_free_alloc_context(xattr_ac);
+
 	mlog_exit(status);
 
 	return status;
@@ -1508,6 +1550,12 @@ static int ocfs2_symlink(struct inode *dir,
 	handle_t *handle = NULL;
 	struct ocfs2_alloc_context *inode_ac = NULL;
 	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_alloc_context *xattr_ac = NULL;
+	int want_clusters = 0;
+	int xattr_credits = 0;
+	struct ocfs2_security_xattr_info si = {
+		.enable = 1,
+	};
 
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1561,17 +1609,39 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	/* don't reserve bitmap space for fast symlinks. */
-	if (l > ocfs2_fast_symlink_chars(sb)) {
-		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
+	/* get security xattr */
+	status = ocfs2_init_security_get(inode, dir, &si);
+	if (status) {
+		if (status == -EOPNOTSUPP)
+			si.enable = 0;
+		else {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
+	/* calculate meta data/clusters for setting security xattr */
+	if (si.enable) {
+		status = ocfs2_calc_security_init(dir, &si, &want_clusters,
+						  &xattr_credits, &xattr_ac);
 		if (status < 0) {
-			if (status != -ENOSPC)
-				mlog_errno(status);
+			mlog_errno(status);
 			goto bail;
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, credits);
+	/* don't reserve bitmap space for fast symlinks. */
+	if (l > ocfs2_fast_symlink_chars(sb))
+		want_clusters += 1;
+
+	status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+	if (status < 0) {
+		if (status != -ENOSPC)
+			mlog_errno(status);
+		goto bail;
+	}
+
+	handle = ocfs2_start_trans(osb, credits + xattr_credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1632,6 +1702,15 @@ static int ocfs2_symlink(struct inode *dir,
 		}
 	}
 
+	if (si.enable) {
+		status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+						 xattr_ac, data_ac);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
 				 de_bh);
@@ -1658,10 +1737,14 @@ bail:
 	brelse(new_fe_bh);
 	brelse(parent_fe_bh);
 	brelse(de_bh);
+	kfree(si.name);
+	kfree(si.value);
 	if (inode_ac)
 		ocfs2_free_alloc_context(inode_ac);
 	if (data_ac)
 		ocfs2_free_alloc_context(data_ac);
+	if (xattr_ac)
+		ocfs2_free_alloc_context(xattr_ac);
 	if ((status < 0) && inode) {
 		clear_nlink(inode);
 		iput(inode);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index db03162..2cab0d6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -81,6 +81,9 @@ struct ocfs2_xattr_set_ctxt {
 
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
 #define OCFS2_XATTR_INLINE_SIZE	80
+#define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
+					 - sizeof(struct ocfs2_xattr_header) \
+					 - sizeof(__u32))
 
 static struct ocfs2_xattr_def_value_root def_xv = {
 	.xv.xr_list.l_count = cpu_to_le16(1),
@@ -343,6 +346,52 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
 	return;
 }
 
+static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
+{
+	int size = 0;
+
+	if (value_len <= OCFS2_XATTR_INLINE_SIZE)
+		size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
+	else
+		size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
+	size += sizeof(struct ocfs2_xattr_entry);
+
+	return size;
+}
+
+int ocfs2_calc_security_init(struct inode *dir,
+			     struct ocfs2_security_xattr_info *si,
+			     int *want_clusters,
+			     int *xattr_credits,
+			     struct ocfs2_alloc_context **xattr_ac)
+{
+	int ret = 0;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+						 si->value_len);
+
+	/*
+	 * The max space of security xattr taken inline is
+	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
+	 * So reserve one metadata block for it is ok.
+	 */
+	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
+		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+		if (ret) {
+			mlog_errno(ret);
+			return ret;
+		}
+		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+	}
+
+	/* reserve clusters for xattr value which will be set in B tree*/
+	if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
+		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+							   si->value_len);
+	return ret;
+}
+
 static int ocfs2_xattr_extend_allocation(struct inode *inode,
 					 u32 clusters_to_add,
 					 struct buffer_head *xattr_bh,
@@ -5016,6 +5065,27 @@ static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
 			       size, flags);
 }
 
+int ocfs2_init_security_get(struct inode *inode,
+			    struct inode *dir,
+			    struct ocfs2_security_xattr_info *si)
+{
+	return security_inode_init_security(inode, dir, &si->name, &si->value,
+					    &si->value_len);
+}
+
+int ocfs2_init_security_set(handle_t *handle,
+			    struct inode *inode,
+			    struct buffer_head *di_bh,
+			    struct ocfs2_security_xattr_info *si,
+			    struct ocfs2_alloc_context *xattr_ac,
+			    struct ocfs2_alloc_context *data_ac)
+{
+	return ocfs2_xattr_set_handle(handle, inode, di_bh,
+				     OCFS2_XATTR_INDEX_SECURITY,
+				     si->name, si->value, si->value_len, 0,
+				     xattr_ac, data_ac);
+}
+
 struct xattr_handler ocfs2_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
 	.list	= ocfs2_xattr_security_list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 55c5256..188ef6b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,6 +30,13 @@ enum ocfs2_xattr_type {
 	OCFS2_XATTR_MAX
 };
 
+struct ocfs2_security_xattr_info {
+	int enable;
+	char *name;
+	void *value;
+	size_t value_len;
+};
+
 extern struct xattr_handler ocfs2_xattr_user_handler;
 extern struct xattr_handler ocfs2_xattr_trusted_handler;
 extern struct xattr_handler ocfs2_xattr_security_handler;
@@ -43,5 +50,15 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
 			   struct ocfs2_alloc_context *,
 			   struct ocfs2_alloc_context *);
 int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
+int ocfs2_init_security_get(struct inode *, struct inode *,
+			    struct ocfs2_security_xattr_info *);
+int ocfs2_init_security_set(handle_t *, struct inode *,
+			    struct buffer_head *,
+			    struct ocfs2_security_xattr_info *,
+			    struct ocfs2_alloc_context *,
+			    struct ocfs2_alloc_context *);
+int ocfs2_calc_security_init(struct inode *,
+			     struct ocfs2_security_xattr_info *,
+			     int *, int *, struct ocfs2_alloc_context **);
 
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From 4e3e9d027f63488e676bf7700ec515a192e54f69 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:16:53 +0800
Subject: ocfs2: add ocfs2_xattr_get_nolock

This function does the work of ocfs2_xattr_get under an open lock.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 2cab0d6..ba9b870 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -925,12 +925,8 @@ cleanup:
 	return ret;
 }
 
-/* ocfs2_xattr_get()
- *
- * Copy an extended attribute into the buffer provided.
- * Buffer is NULL to compute the size of buffer required.
- */
-static int ocfs2_xattr_get(struct inode *inode,
+int ocfs2_xattr_get_nolock(struct inode *inode,
+			   struct buffer_head *di_bh,
 			   int name_index,
 			   const char *name,
 			   void *buffer,
@@ -938,7 +934,6 @@ static int ocfs2_xattr_get(struct inode *inode,
 {
 	int ret;
 	struct ocfs2_dinode *di = NULL;
-	struct buffer_head *di_bh = NULL;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_xattr_search xis = {
 		.not_found = -ENODATA,
@@ -953,11 +948,6 @@ static int ocfs2_xattr_get(struct inode *inode,
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
 		ret = -ENODATA;
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 0);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
-	}
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
 
@@ -968,6 +958,32 @@ static int ocfs2_xattr_get(struct inode *inode,
 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
 					    buffer_size, &xbs);
 	up_read(&oi->ip_xattr_sem);
+
+	return ret;
+}
+
+/* ocfs2_xattr_get()
+ *
+ * Copy an extended attribute into the buffer provided.
+ * Buffer is NULL to compute the size of buffer required.
+ */
+static int ocfs2_xattr_get(struct inode *inode,
+			   int name_index,
+			   const char *name,
+			   void *buffer,
+			   size_t buffer_size)
+{
+	int ret;
+	struct buffer_head *di_bh = NULL;
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+				     name, buffer, buffer_size);
+
 	ocfs2_inode_unlock(inode, 0);
 
 	brelse(di_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 188ef6b..86aa10f 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -43,6 +43,8 @@ extern struct xattr_handler ocfs2_xattr_security_handler;
 extern struct xattr_handler *ocfs2_xattr_handlers[];
 
 ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
+int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
+			   const char *, void *, size_t);
 int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
 		    size_t, int);
 int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
-- 
cgit v0.10.2


From 929fb014e041c6572c5e8c3686f1e32742b5b953 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:17:04 +0800
Subject: ocfs2: add POSIX ACL API

This patch adds POSIX ACL(access control lists) APIs in ocfs2. We convert
struct posix_acl to many ocfs2_acl_entry and regard them as an extended
attribute entry.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 589dcdf..e9ef5d1 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -37,6 +37,10 @@ ocfs2-objs := \
 	ver.o			\
 	xattr.o
 
+ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
+ocfs2-objs += acl.o
+endif
+
 ocfs2_stackglue-objs := stackglue.o
 ocfs2_stack_o2cb-objs := stack_o2cb.o
 ocfs2_stack_user-objs := stack_user.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
new file mode 100644
index 0000000..62d0faa
--- /dev/null
+++ b/fs/ocfs2/acl.c
@@ -0,0 +1,378 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.c
+ *
+ * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
+ *
+ * CREDITS:
+ * Lots of code in this file is copy from linux/fs/ext3/acl.c.
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "file.h"
+#include "ocfs2_fs.h"
+
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from xattr value to acl struct.
+ */
+static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
+{
+	int n, count;
+	struct posix_acl *acl;
+
+	if (!value)
+		return NULL;
+	if (size < sizeof(struct posix_acl_entry))
+		return ERR_PTR(-EINVAL);
+
+	count = size / sizeof(struct posix_acl_entry);
+	if (count < 0)
+		return ERR_PTR(-EINVAL);
+	if (count == 0)
+		return NULL;
+
+	acl = posix_acl_alloc(count, GFP_NOFS);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	for (n = 0; n < count; n++) {
+		struct ocfs2_acl_entry *entry =
+			(struct ocfs2_acl_entry *)value;
+
+		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
+		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+		acl->a_entries[n].e_id   = le32_to_cpu(entry->e_id);
+		value += sizeof(struct posix_acl_entry);
+
+	}
+	return acl;
+}
+
+/*
+ * Convert acl struct to xattr value.
+ */
+static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
+{
+	struct ocfs2_acl_entry *entry = NULL;
+	char *ocfs2_acl;
+	size_t n;
+
+	*size = acl->a_count * sizeof(struct posix_acl_entry);
+
+	ocfs2_acl = kmalloc(*size, GFP_NOFS);
+	if (!ocfs2_acl)
+		return ERR_PTR(-ENOMEM);
+
+	entry = (struct ocfs2_acl_entry *)ocfs2_acl;
+	for (n = 0; n < acl->a_count; n++, entry++) {
+		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
+		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+		entry->e_id   = cpu_to_le32(acl->a_entries[n].e_id);
+	}
+	return ocfs2_acl;
+}
+
+static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
+					      int type,
+					      struct buffer_head *di_bh)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	int name_index;
+	char *value = NULL;
+	struct posix_acl *acl;
+	int retval;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
+	if (retval > 0) {
+		value = kmalloc(retval, GFP_NOFS);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+						"", value, retval);
+	}
+
+	if (retval > 0)
+		acl = ocfs2_acl_from_xattr(value, retval);
+	else if (retval == -ENODATA || retval == 0)
+		acl = NULL;
+	else
+		acl = ERR_PTR(retval);
+
+	kfree(value);
+
+	return acl;
+}
+
+
+/*
+ * Get posix acl.
+ */
+static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *di_bh = NULL;
+	struct posix_acl *acl;
+	int ret;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return NULL;
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		acl = ERR_PTR(ret);
+		return acl;
+	}
+
+	acl = ocfs2_get_acl_nolock(inode, type, di_bh);
+
+	ocfs2_inode_unlock(inode, 0);
+
+	brelse(di_bh);
+
+	return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ */
+static int ocfs2_set_acl(handle_t *handle,
+			 struct inode *inode,
+			 struct buffer_head *di_bh,
+			 int type,
+			 struct posix_acl *acl,
+			 struct ocfs2_alloc_context *meta_ac,
+			 struct ocfs2_alloc_context *data_ac)
+{
+	int name_index;
+	void *value = NULL;
+	size_t size = 0;
+	int ret;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+		if (acl) {
+			mode_t mode = inode->i_mode;
+			ret = posix_acl_equiv_mode(acl, &mode);
+			if (ret < 0)
+				return ret;
+			else {
+				inode->i_mode = mode;
+				if (ret == 0)
+					acl = NULL;
+			}
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		value = ocfs2_acl_to_xattr(acl, &size);
+		if (IS_ERR(value))
+			return (int)PTR_ERR(value);
+	}
+
+	if (handle)
+		ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
+					     "", value, size, 0,
+					     meta_ac, data_ac);
+	else
+		ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
+
+	kfree(value);
+
+	return ret;
+}
+
+static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+					  char *list,
+					  size_t list_len,
+					  const char *name,
+					  size_t name_len)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return 0;
+
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+	return size;
+}
+
+static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+					   char *list,
+					   size_t list_len,
+					   const char *name,
+					   size_t name_len)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return 0;
+
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+	return size;
+}
+
+static int ocfs2_xattr_get_acl(struct inode *inode,
+			       int type,
+			       void *buffer,
+			       size_t size)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl;
+	int ret;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return -EOPNOTSUPP;
+
+	acl = ocfs2_get_acl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	ret = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return ret;
+}
+
+static int ocfs2_xattr_get_acl_access(struct inode *inode,
+				      const char *name,
+				      void *buffer,
+				      size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int ocfs2_xattr_get_acl_default(struct inode *inode,
+				       const char *name,
+				       void *buffer,
+				       size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int ocfs2_xattr_set_acl(struct inode *inode,
+			       int type,
+			       const void *value,
+			       size_t size)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl;
+	int ret = 0;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return -EOPNOTSUPP;
+
+	if (!is_owner_or_cap(inode))
+		return -EPERM;
+
+	if (value) {
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		else if (acl) {
+			ret = posix_acl_valid(acl);
+			if (ret)
+				goto cleanup;
+		}
+	} else
+		acl = NULL;
+
+	ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
+
+cleanup:
+	posix_acl_release(acl);
+	return ret;
+}
+
+static int ocfs2_xattr_set_acl_access(struct inode *inode,
+				      const char *name,
+				      const void *value,
+				      size_t size,
+				      int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int ocfs2_xattr_set_acl_default(struct inode *inode,
+				       const char *name,
+				       const void *value,
+				       size_t size,
+				       int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ocfs2_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.list	= ocfs2_xattr_list_acl_access,
+	.get	= ocfs2_xattr_get_acl_access,
+	.set	= ocfs2_xattr_set_acl_access,
+};
+
+struct xattr_handler ocfs2_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.list	= ocfs2_xattr_list_acl_default,
+	.get	= ocfs2_xattr_get_acl_default,
+	.set	= ocfs2_xattr_set_acl_default,
+};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
new file mode 100644
index 0000000..1b39f3e
--- /dev/null
+++ b/fs/ocfs2/acl.h
@@ -0,0 +1,29 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.h
+ *
+ * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OCFS2_ACL_H
+#define OCFS2_ACL_H
+
+#include <linux/posix_acl_xattr.h>
+
+struct ocfs2_acl_entry {
+	__le16 e_tag;
+	__le16 e_perm;
+	__le32 e_id;
+};
+
+#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3fed9e3..25d07ff 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -195,6 +195,7 @@ enum ocfs2_mount_options
 	OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
 	OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
 	OCFS2_MOUNT_INODE64 = 1 << 7,	/* Allow inode numbers > 2^32 */
+	OCFS2_MOUNT_POSIX_ACL = 1 << 8,	/* POSIX access control lists */
 };
 
 #define OCFS2_OSB_SOFT_RO	0x0001
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba9b870..2e273c2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -91,6 +91,10 @@ static struct ocfs2_xattr_def_value_root def_xv = {
 
 struct xattr_handler *ocfs2_xattr_handlers[] = {
 	&ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+	&ocfs2_xattr_acl_access_handler,
+	&ocfs2_xattr_acl_default_handler,
+#endif
 	&ocfs2_xattr_trusted_handler,
 	&ocfs2_xattr_security_handler,
 	NULL
@@ -98,6 +102,12 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
 
 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
+					= &ocfs2_xattr_acl_access_handler,
+	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
+					= &ocfs2_xattr_acl_default_handler,
+#endif
 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
 	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
 };
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 86aa10f..6163df3 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,6 +40,10 @@ struct ocfs2_security_xattr_info {
 extern struct xattr_handler ocfs2_xattr_user_handler;
 extern struct xattr_handler ocfs2_xattr_trusted_handler;
 extern struct xattr_handler ocfs2_xattr_security_handler;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+extern struct xattr_handler ocfs2_xattr_acl_access_handler;
+extern struct xattr_handler ocfs2_xattr_acl_default_handler;
+#endif
 extern struct xattr_handler *ocfs2_xattr_handlers[];
 
 ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
-- 
cgit v0.10.2


From 23fc2702bea686569281708ad519b41a11d0a2f4 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:17:18 +0800
Subject: ocfs2: add ocfs2_check_acl

This function is used to enhance permission checking with POSIX ACLs.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 62d0faa..a6a2bf6 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -230,6 +230,21 @@ static int ocfs2_set_acl(handle_t *handle,
 	return ret;
 }
 
+int ocfs2_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int ret = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return ret;
+	}
+
+	return -EAGAIN;
+}
+
 static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 					  char *list,
 					  size_t list_len,
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 1b39f3e..fef10f1 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,4 +26,14 @@ struct ocfs2_acl_entry {
 	__le32 e_id;
 };
 
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+
+extern int ocfs2_check_acl(struct inode *, int);
+
+#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
+#define ocfs2_check_acl NULL
+
+#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
 #endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3605491..7bad7d9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -56,6 +56,7 @@
 #include "suballoc.h"
 #include "super.h"
 #include "xattr.h"
+#include "acl.h"
 
 #include "buffer_head_io.h"
 
@@ -1035,7 +1036,7 @@ int ocfs2_permission(struct inode *inode, int mask)
 		goto out;
 	}
 
-	ret = generic_permission(inode, mask, NULL);
+	ret = generic_permission(inode, mask, ocfs2_check_acl);
 
 	ocfs2_inode_unlock(inode, 0);
 out:
-- 
cgit v0.10.2


From 060bc66dd5017460076d9e808e2198cd532c943d Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:17:29 +0800
Subject: ocfs2: add ocfs2_acl_chmod

This function is used to update acl xattrs during file mode changes.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a6a2bf6..df72256 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -245,6 +245,33 @@ int ocfs2_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
+int ocfs2_acl_chmod(struct inode *inode)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl, *clone;
+	int ret;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return 0;
+
+	acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	posix_acl_release(acl);
+	if (!clone)
+		return -ENOMEM;
+	ret = posix_acl_chmod_masq(clone, inode->i_mode);
+	if (!ret)
+		ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+				    clone, NULL, NULL);
+	posix_acl_release(clone);
+	return ret;
+}
+
 static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 					  char *list,
 					  size_t list_len,
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index fef10f1..68ffd64 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -29,10 +29,15 @@ struct ocfs2_acl_entry {
 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
 
 extern int ocfs2_check_acl(struct inode *, int);
+extern int ocfs2_acl_chmod(struct inode *);
 
 #else /* CONFIG_OCFS2_FS_POSIX_ACL*/
 
 #define ocfs2_check_acl NULL
+static inline int ocfs2_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
 
 #endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7bad7d9..4636aa6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -990,6 +990,12 @@ bail_unlock_rw:
 bail:
 	brelse(bh);
 
+	if (!status && attr->ia_valid & ATTR_MODE) {
+		status = ocfs2_acl_chmod(inode);
+		if (status < 0)
+			mlog_errno(status);
+	}
+
 	mlog_exit(status);
 	return status;
 }
-- 
cgit v0.10.2


From 89c38bd0ade3c567707ed8fce088b253b0369c50 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:17:41 +0800
Subject: ocfs2: add ocfs2_init_acl in mknod

We need to get the parent directories acls and let the new child inherit it.
To this, we add additional calculations for data/metadata allocation.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index df72256..12dfb44 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -272,6 +272,65 @@ int ocfs2_acl_chmod(struct inode *inode)
 	return ret;
 }
 
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+		   struct inode *inode,
+		   struct inode *dir,
+		   struct buffer_head *di_bh,
+		   struct buffer_head *dir_bh,
+		   struct ocfs2_alloc_context *meta_ac,
+		   struct ocfs2_alloc_context *data_ac)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl = NULL;
+	int ret = 0;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+			acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+						   dir_bh);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl)
+			inode->i_mode &= ~current->fs->umask;
+	}
+	if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+		struct posix_acl *clone;
+		mode_t mode;
+
+		if (S_ISDIR(inode->i_mode)) {
+			ret = ocfs2_set_acl(handle, inode, di_bh,
+					    ACL_TYPE_DEFAULT, acl,
+					    meta_ac, data_ac);
+			if (ret)
+				goto cleanup;
+		}
+		clone = posix_acl_clone(acl, GFP_NOFS);
+		ret = -ENOMEM;
+		if (!clone)
+			goto cleanup;
+
+		mode = inode->i_mode;
+		ret = posix_acl_create_masq(clone, &mode);
+		if (ret >= 0) {
+			inode->i_mode = mode;
+			if (ret > 0) {
+				ret = ocfs2_set_acl(handle, inode,
+						    di_bh, ACL_TYPE_ACCESS,
+						    clone, meta_ac, data_ac);
+			}
+		}
+		posix_acl_release(clone);
+	}
+cleanup:
+	posix_acl_release(acl);
+	return ret;
+}
+
 static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 					  char *list,
 					  size_t list_len,
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 68ffd64..8f6389e 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -30,6 +30,10 @@ struct ocfs2_acl_entry {
 
 extern int ocfs2_check_acl(struct inode *, int);
 extern int ocfs2_acl_chmod(struct inode *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+			  struct buffer_head *, struct buffer_head *,
+			  struct ocfs2_alloc_context *,
+			  struct ocfs2_alloc_context *);
 
 #else /* CONFIG_OCFS2_FS_POSIX_ACL*/
 
@@ -38,6 +42,16 @@ static inline int ocfs2_acl_chmod(struct inode *inode)
 {
 	return 0;
 }
+static inline int ocfs2_init_acl(handle_t *handle,
+				 struct inode *inode,
+				 struct inode *dir,
+				 struct buffer_head *di_bh,
+				 struct buffer_head *dir_bh,
+				 struct ocfs2_alloc_context *meta_ac,
+				 struct ocfs2_alloc_context *data_ac)
+{
+	return 0;
+}
 
 #endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 40da46b..7655145 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -61,6 +61,7 @@
 #include "sysfile.h"
 #include "uptodate.h"
 #include "xattr.h"
+#include "acl.h"
 
 #include "buffer_head_io.h"
 
@@ -302,14 +303,13 @@ static int ocfs2_mknod(struct inode *dir,
 		}
 	}
 
-	/* calculate meta data/clusters for setting security xattr */
-	if (si.enable) {
-		status = ocfs2_calc_security_init(dir, &si, &want_clusters,
-						  &xattr_credits, &xattr_ac);
-		if (status < 0) {
-			mlog_errno(status);
-			goto leave;
-		}
+	/* calculate meta data/clusters for setting security and acl xattr */
+	status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
+					&si, &want_clusters,
+					&xattr_credits, &xattr_ac);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
 	}
 
 	/* Reserve a cluster if creating an extent based directory. */
@@ -363,6 +363,13 @@ static int ocfs2_mknod(struct inode *dir,
 		inc_nlink(dir);
 	}
 
+	status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+				xattr_ac, data_ac);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	if (si.enable) {
 		status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
 						 xattr_ac, data_ac);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 2e273c2..3cc8385 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -84,6 +84,10 @@ struct ocfs2_xattr_set_ctxt {
 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
 					 - sizeof(struct ocfs2_xattr_header) \
 					 - sizeof(__u32))
+#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
+					 - sizeof(struct ocfs2_xattr_block) \
+					 - sizeof(struct ocfs2_xattr_header) \
+					 - sizeof(__u32))
 
 static struct ocfs2_xattr_def_value_root def_xv = {
 	.xv.xr_list.l_count = cpu_to_le16(1),
@@ -402,6 +406,81 @@ int ocfs2_calc_security_init(struct inode *dir,
 	return ret;
 }
 
+int ocfs2_calc_xattr_init(struct inode *dir,
+			  struct buffer_head *dir_bh,
+			  int mode,
+			  struct ocfs2_security_xattr_info *si,
+			  int *want_clusters,
+			  int *xattr_credits,
+			  struct ocfs2_alloc_context **xattr_ac)
+{
+	int ret = 0;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	int s_size = 0;
+	int a_size = 0;
+	int acl_len = 0;
+
+	if (si->enable)
+		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+						     si->value_len);
+
+	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
+					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
+					"", NULL, 0);
+		if (acl_len > 0) {
+			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
+			if (S_ISDIR(mode))
+				a_size <<= 1;
+		} else if (acl_len != 0 && acl_len != -ENODATA) {
+			mlog_errno(ret);
+			return ret;
+		}
+	}
+
+	if (!(s_size + a_size))
+		return ret;
+
+	/*
+	 * The max space of security xattr taken inline is
+	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
+	 * The max space of acl xattr taken inline is
+	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
+	 * when blocksize = 512, may reserve one more cluser for
+	 * xattr bucket, otherwise reserve one metadata block
+	 * for them is ok.
+	 */
+	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
+		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+		if (ret) {
+			mlog_errno(ret);
+			return ret;
+		}
+		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+	}
+
+	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
+	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
+		*want_clusters += 1;
+		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
+	}
+
+	/* reserve clusters for xattr value which will be set in B tree*/
+	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
+		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+							   si->value_len);
+	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
+	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
+		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
+		if (S_ISDIR(mode))
+			*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+								   acl_len);
+	}
+
+	return ret;
+}
+
 static int ocfs2_xattr_extend_allocation(struct inode *inode,
 					 u32 clusters_to_add,
 					 struct buffer_head *xattr_bh,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 6163df3..9a67e7d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -66,5 +66,8 @@ int ocfs2_init_security_set(handle_t *, struct inode *,
 int ocfs2_calc_security_init(struct inode *,
 			     struct ocfs2_security_xattr_info *,
 			     int *, int *, struct ocfs2_alloc_context **);
+int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
+			  int, struct ocfs2_security_xattr_info *,
+			  int *, int *, struct ocfs2_alloc_context **);
 
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From a68979b857283daf4acc405e476dcc8812a3ff2b Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Fri, 14 Nov 2008 11:17:52 +0800
Subject: ocfs2: add mount option and Kconfig option for acl

This patch adds the Kconfig option "CONFIG_OCFS2_FS_POSIX_ACL"
and mount options "acl" to enable acls in Ocfs2.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 67310fb..c2a0871 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -31,7 +31,6 @@ Features which OCFS2 does not support yet:
 	- quotas
 	- Directory change notification (F_NOTIFY)
 	- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
-	- POSIX ACLs
 
 Mount options
 =============
@@ -79,3 +78,5 @@ inode64			Indicates that Ocfs2 is allowed to create inodes at
 			bits of significance.
 user_xattr	(*)	Enables Extended User Attributes.
 nouser_xattr		Disables Extended User Attributes.
+acl			Enables POSIX Access Control Lists support.
+noacl		(*)	Disables POSIX Access Control Lists support.
diff --git a/fs/Kconfig b/fs/Kconfig
index ff0e819..e8a47f7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -268,6 +268,15 @@ config OCFS2_COMPAT_JBD
 	  is backwards compatible with JBD.  It is safe to say N here.
 	  However, if you really want to use the original JBD, say Y here.
 
+config OCFS2_FS_POSIX_ACL
+	bool "OCFS2 POSIX Access Control Lists"
+	depends on OCFS2_FS
+	select FS_POSIX_ACL
+	default n
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
 endif # BLOCK
 
 source "fs/notify/Kconfig"
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 304b63a..9e7accc 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -158,6 +158,8 @@ enum {
 	Opt_user_xattr,
 	Opt_nouser_xattr,
 	Opt_inode64,
+	Opt_acl,
+	Opt_noacl,
 	Opt_err,
 };
 
@@ -180,6 +182,8 @@ static const match_table_t tokens = {
 	{Opt_user_xattr, "user_xattr"},
 	{Opt_nouser_xattr, "nouser_xattr"},
 	{Opt_inode64, "inode64"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
 	{Opt_err, NULL}
 };
 
@@ -466,6 +470,8 @@ unlock_osb:
 	if (!ret) {
 		/* Only save off the new mount options in case of a successful
 		 * remount. */
+		if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+			parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
 		osb->s_mount_opt = parsed_options.mount_opt;
 		osb->s_atime_quantum = parsed_options.atime_quantum;
 		osb->preferred_slot = parsed_options.slot;
@@ -651,6 +657,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	brelse(bh);
 	bh = NULL;
+
+	if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+		parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+
 	osb->s_mount_opt = parsed_options.mount_opt;
 	osb->s_atime_quantum = parsed_options.atime_quantum;
 	osb->preferred_slot = parsed_options.slot;
@@ -664,6 +674,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_magic = OCFS2_SUPER_MAGIC;
 
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
 	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
 	 * heartbeat=none */
 	if (bdev_read_only(sb->s_bdev)) {
@@ -945,6 +958,19 @@ static int ocfs2_parse_options(struct super_block *sb,
 		case Opt_inode64:
 			mopt->mount_opt |= OCFS2_MOUNT_INODE64;
 			break;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+		case Opt_acl:
+			mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+			break;
+		case Opt_noacl:
+			mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+			break;
+#else
+		case Opt_acl:
+		case Opt_noacl:
+			printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
+			break;
+#endif
 		default:
 			mlog(ML_ERROR,
 			     "Unrecognized mount option \"%s\" "
@@ -1017,6 +1043,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	if (opts & OCFS2_MOUNT_INODE64)
 		seq_printf(s, ",inode64");
 
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+	if (opts & OCFS2_MOUNT_POSIX_ACL)
+		seq_printf(s, ",acl");
+	else
+		seq_printf(s, ",noacl");
+#endif
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From b657c95c11088d77fc1bfc9c84d940f778bf9d12 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:11 -0800
Subject: ocfs2: Wrap inode block reads in a dedicated function.

The ocfs2 code currently reads inodes off disk with a simple
ocfs2_read_block() call.  Each place that does this has a different set
of sanity checks it performs.  Some check only the signature.  A couple
validate the block number (the block read vs di->i_blkno).  A couple
others check for VALID_FL.  Only one place validates i_fs_generation.  A
couple check nothing.  Even when an error is found, they don't all do
the same thing.

We wrap inode reading into ocfs2_read_inode_block().  This will validate
all the above fields, going readonly if they are invalid (they never
should be).  ocfs2_read_inode_block_full() is provided for the places
that want to pass read_block flags.  Every caller is passing a struct
inode with a valid ip_blkno, so we don't need a separate blkno argument
either.

We will remove the validation checks from the rest of the code in a
later commit, as they are no longer necessary.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 5592a2f..9c598ad 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5658,7 +5658,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+	status = ocfs2_read_inode_block(inode, &bh);
 	if (status < 0) {
 		iput(inode);
 		mlog_errno(status);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c22543b..e219f8b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -68,20 +68,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
-	status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+	status = ocfs2_read_inode_block(inode, &bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	fe = (struct ocfs2_dinode *) bh->b_data;
 
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
-		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
-		     fe->i_signature);
-		goto bail;
-	}
-
 	if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
 						    le32_to_cpu(fe->i_clusters))) {
 		mlog(ML_ERROR, "block offset is outside the allocated size: "
@@ -262,7 +255,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
 
-	ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+	ret = ocfs2_read_inode_block(inode, &di_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 026e6eb..5777045 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -231,7 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
 	struct ocfs2_dinode *di;
 	struct ocfs2_inline_data *data;
 
-	ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+	ret = ocfs2_read_inode_block(dir, &di_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -458,7 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
 	struct ocfs2_dinode *di;
 	struct ocfs2_inline_data *data;
 
-	ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+	ret = ocfs2_read_inode_block(dir, &di_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -636,7 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
 	struct ocfs2_inline_data *data;
 	struct ocfs2_dir_entry *de;
 
-	ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+	ret = ocfs2_read_inode_block(inode, &di_bh);
 	if (ret) {
 		mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6e6cc0a..9f2a7f7 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2024,7 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
 	} else {
 		/* Boo, we have to go to disk. */
 		/* read bh, cast, ocfs2_refresh_inode */
-		status = ocfs2_read_block(inode, oi->ip_blkno, bh);
+		status = ocfs2_read_inode_block(inode, bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail_refresh;
@@ -2032,18 +2032,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
 
 		/* This is a good chance to make sure we're not
-		 * locking an invalid object.
+		 * locking an invalid object.  ocfs2_read_inode_block()
+		 * already checked that the inode block is sane.
 		 *
 		 * We bug on a stale inode here because we checked
 		 * above whether it was wiped from disk. The wiping
 		 * node provides a guarantee that we receive that
 		 * message and can mark the inode before dropping any
 		 * locks associated with it. */
-		if (!OCFS2_IS_VALID_DINODE(fe)) {
-			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
-			status = -EIO;
-			goto bail_refresh;
-		}
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
 				"Invalid dinode %llu disk generation: %u "
@@ -2085,7 +2081,7 @@ static int ocfs2_assign_bh(struct inode *inode,
 		return 0;
 	}
 
-	status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
+	status = ocfs2_read_inode_block(inode, ret_bh);
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2baedac..b686b31 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -630,7 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 	if (ret == 0)
 		goto out;
 
-	ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+	ret = ocfs2_read_inode_block(inode, &di_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4636aa6..41001d5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -402,12 +402,9 @@ static int ocfs2_truncate_file(struct inode *inode,
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
 		   (unsigned long long)new_i_size);
 
+	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
+	 * already validated it */
 	fe = (struct ocfs2_dinode *) di_bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
-		status = -EIO;
-		goto bail;
-	}
 
 	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
 			"Inode %llu, inode i_size = %lld != di "
@@ -546,18 +543,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
 	 */
 	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
 
-	status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+	status = ocfs2_read_inode_block(inode, &bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
-
 	fe = (struct ocfs2_dinode *) bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
-		status = -EIO;
-		goto leave;
-	}
 
 restart_all:
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@@ -1135,9 +1126,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 {
 	int ret;
 	struct buffer_head *bh = NULL;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
-	ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
+	ret = ocfs2_read_inode_block(inode, &bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -1163,8 +1153,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
 	struct buffer_head *di_bh = NULL;
 
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-		ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
-				       &di_bh);
+		ret = ocfs2_read_inode_block(inode, &di_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7aa00d5..9eb701b8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -214,12 +214,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 	return 0;
 }
 
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
-		     	 int create_ino)
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+			  int create_ino)
 {
 	struct super_block *sb;
 	struct ocfs2_super *osb;
-	int status = -EINVAL;
 	int use_plocks = 1;
 
 	mlog_entry("(0x%p, size:%llu)\n", inode,
@@ -232,25 +231,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	    ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
 		use_plocks = 0;
 
-	/* this means that read_inode cannot create a superblock inode
-	 * today.  change if needed. */
-	if (!OCFS2_IS_VALID_DINODE(fe) ||
-	    !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
-		mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
-		     "signature = %.*s, flags = 0x%x\n",
-		     inode->i_ino,
-		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
-		     fe->i_signature, le32_to_cpu(fe->i_flags));
-		goto bail;
-	}
+	/*
+	 * These have all been checked by ocfs2_read_inode_block() or set
+	 * by ocfs2_mknod_locked(), so a failure is a code bug.
+	 */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));  /* This means that read_inode
+						cannot create a superblock
+						inode today.  change if
+						that is needed. */
+	BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
+	BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
 
-	if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
-		mlog(ML_ERROR, "file entry generation does not match "
-		     "superblock! osb->fs_generation=%x, "
-		     "fe->i_fs_generation=%x\n",
-		     osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
-		goto bail;
-	}
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@@ -354,10 +345,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	ocfs2_set_inode_flags(inode);
 
-	status = 0;
-bail:
-	mlog_exit(status);
-	return status;
+	mlog_exit_void();
 }
 
 static int ocfs2_read_locked_inode(struct inode *inode,
@@ -460,11 +448,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 		}
 	}
 
-	if (can_lock)
-		status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
-					   OCFS2_BH_IGNORE_CACHE);
-	else
+	if (can_lock) {
+		status = ocfs2_read_inode_block_full(inode, &bh,
+						     OCFS2_BH_IGNORE_CACHE);
+	} else {
 		status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
+		if (!status)
+			status = ocfs2_validate_inode_block(osb->sb, bh);
+	}
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -472,12 +463,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	status = -EINVAL;
 	fe = (struct ocfs2_dinode *) bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
-		     (unsigned long long)args->fi_blkno, 7,
-		     fe->i_signature);
-		goto bail;
-	}
 
 	/*
 	 * This is a code bug. Right now the caller needs to
@@ -491,10 +476,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
 	    S_ISBLK(le16_to_cpu(fe->i_mode)))
-    		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
 
-	if (ocfs2_populate_inode(inode, fe, 0) < 0)
-		goto bail;
+	ocfs2_populate_inode(inode, fe, 0);
 
 	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
 
@@ -1264,3 +1248,79 @@ void ocfs2_refresh_inode(struct inode *inode,
 
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 }
+
+int ocfs2_validate_inode_block(struct super_block *sb,
+			       struct buffer_head *bh)
+{
+	int rc = -EINVAL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+	BUG_ON(!buffer_uptodate(bh));
+
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
+			    (unsigned long long)bh->b_blocknr, 7,
+			    di->i_signature);
+		goto bail;
+	}
+
+	if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+		ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
+			    (unsigned long long)bh->b_blocknr,
+			    (unsigned long long)le64_to_cpu(di->i_blkno));
+		goto bail;
+	}
+
+	if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+		ocfs2_error(sb,
+			    "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
+			    (unsigned long long)bh->b_blocknr);
+		goto bail;
+	}
+
+	if (le32_to_cpu(di->i_fs_generation) !=
+	    OCFS2_SB(sb)->fs_generation) {
+		ocfs2_error(sb,
+			    "Invalid dinode #%llu: fs_generation is %u\n",
+			    (unsigned long long)bh->b_blocknr,
+			    le32_to_cpu(di->i_fs_generation));
+		goto bail;
+	}
+
+	rc = 0;
+
+bail:
+	return rc;
+}
+
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+				int flags)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
+			       flags);
+	if (rc)
+		goto out;
+
+	if (!(flags & OCFS2_BH_READAHEAD)) {
+		rc = ocfs2_validate_inode_block(inode->i_sb, tmp);
+		if (rc) {
+			brelse(tmp);
+			goto out;
+		}
+	}
+
+	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
+	if (!*bh)
+		*bh = tmp;
+
+out:
+	return rc;
+}
+
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
+{
+	return ocfs2_read_inode_block_full(inode, bh, 0);
+}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f37af9..b79c371 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
 			 int sysfile_type);
 int ocfs2_inode_init_private(struct inode *inode);
 int ocfs2_inode_revalidate(struct dentry *dentry);
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
-			 int create_ino);
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+			  int create_ino);
 void ocfs2_read_inode(struct inode *inode);
 void ocfs2_read_inode2(struct inode *inode, void *opaque);
 ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@@ -153,4 +153,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
 	return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
 }
 
+/* Validate that a bh contains a valid inode */
+int ocfs2_validate_inode_block(struct super_block *sb,
+			       struct buffer_head *bh);
+/*
+ * Read an inode block into *bh.  If *bh is NULL, a bh will be allocated.
+ * This is a cached read.  The inode will be validated with
+ * ocfs2_validate_inode_block().
+ */
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
+/* The same, but can be passed OCFS2_BH_* flags */
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+				int flags);
 #endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 99fe9d5..877aaa0 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1135,8 +1135,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
 	}
 	SET_INODE_JOURNAL(inode);
 
-	status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
-				   OCFS2_BH_IGNORE_CACHE);
+	status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 687b287..19cfb1b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 		goto bail;
 	}
 
-	status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
-				   &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+					     OCFS2_BH_IGNORE_CACHE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 
 	mutex_lock(&inode->i_mutex);
 
-	status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
-				   &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+					     OCFS2_BH_IGNORE_CACHE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 7655145..0134baf 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -531,15 +531,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
-		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
-		     "i_blkno=%llu, i_ino=%lu\n",
-		     (unsigned long long)(*new_fe_bh)->b_blocknr,
-		     (unsigned long long)le64_to_cpu(fe->i_blkno),
-		     inode->i_ino);
-		BUG();
-	}
-
+	ocfs2_populate_inode(inode, fe, 1);
 	ocfs2_inode_set_new(osb, inode);
 	if (!ocfs2_mount_local(osb)) {
 		status = ocfs2_create_new_inode_locks(inode);
@@ -1864,9 +1856,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 
 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
 
-	status = ocfs2_read_block(orphan_dir_inode,
-				  OCFS2_I(orphan_dir_inode)->ip_blkno,
-				  &orphan_dir_bh);
+	status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index cbd03df..ed0a0cf 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
 
 	mlog_entry_void();
 
-	status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
+	status = ocfs2_read_inode_block(inode, bh);
 	if (status < 0) {
 		mlog_errno(status);
 		link = ERR_PTR(status);
-- 
cgit v0.10.2


From 10995aa2451afa20b721cc7de856cae1a13dba57 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:12 -0800
Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_DINODE() checks.

Random places in the code would check a dinode bh to see if it was
valid.  Not only did they do different levels of validation, they
handled errors in different ways.

The previous commit unified inode block reads, validating all block
reads in the same place.  Thus, these haphazard checks are no longer
necessary.  Rather than eliminate them, however, we change them to
BUG_ON() checks.  This ensures the assumptions remain true.  All of the
code paths to these checks have been audited to ensure they come from a
validated inode read.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9c598ad..320545b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -187,20 +187,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
 static int ocfs2_dinode_sanity_check(struct inode *inode,
 				     struct ocfs2_extent_tree *et)
 {
-	int ret = 0;
-	struct ocfs2_dinode *di;
+	struct ocfs2_dinode *di = et->et_object;
 
 	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
 
-	di = et->et_object;
-	if (!OCFS2_IS_VALID_DINODE(di)) {
-		ret = -EIO;
-		ocfs2_error(inode->i_sb,
-			"Inode %llu has invalid path root",
-			(unsigned long long)OCFS2_I(inode)->ip_blkno);
-	}
-
-	return ret;
+	return 0;
 }
 
 static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
@@ -5380,13 +5372,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
 
 	di = (struct ocfs2_dinode *) tl_bh->b_data;
-	tl = &di->id2.i_dealloc;
-	if (!OCFS2_IS_VALID_DINODE(di)) {
-		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
-		status = -EIO;
-		goto bail;
-	}
 
+	/* tl_bh is loaded from ocfs2_truncate_log_init().  It's validated
+	 * by the underlying call to ocfs2_read_inode_block(), so any
+	 * corruption is a code bug */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+	tl = &di->id2.i_dealloc;
 	tl_count = le16_to_cpu(tl->tl_count);
 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
 			tl_count == 0,
@@ -5536,13 +5528,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
 	di = (struct ocfs2_dinode *) tl_bh->b_data;
-	tl = &di->id2.i_dealloc;
-	if (!OCFS2_IS_VALID_DINODE(di)) {
-		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
-		status = -EIO;
-		goto out;
-	}
 
+	/* tl_bh is loaded from ocfs2_truncate_log_init().  It's validated
+	 * by the underlying call to ocfs2_read_inode_block(), so any
+	 * corruption is a code bug */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+	tl = &di->id2.i_dealloc;
 	num_to_flush = le16_to_cpu(tl->tl_used);
 	mlog(0, "Flush %u records from truncate log #%llu\n",
 	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
@@ -5697,13 +5689,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
 	}
 
 	di = (struct ocfs2_dinode *) tl_bh->b_data;
-	tl = &di->id2.i_dealloc;
-	if (!OCFS2_IS_VALID_DINODE(di)) {
-		OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
-		status = -EIO;
-		goto bail;
-	}
 
+	/* tl_bh is loaded from ocfs2_get_truncate_log_info().  It's
+	 * validated by the underlying call to ocfs2_read_inode_block(),
+	 * so any corruption is a code bug */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+	tl = &di->id2.i_dealloc;
 	if (le16_to_cpu(tl->tl_used)) {
 		mlog(0, "We'll have %u logs to recover\n",
 		     le16_to_cpu(tl->tl_used));
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 877aaa0..9223bfc 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -587,17 +587,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 	mlog_entry_void();
 
 	fe = (struct ocfs2_dinode *)bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		/* This is called from startup/shutdown which will
-		 * handle the errors in a specific manner, so no need
-		 * to call ocfs2_error() here. */
-		mlog(ML_ERROR, "Journal dinode %llu  has invalid "
-		     "signature: %.*s",
-		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
-		     fe->i_signature);
-		status = -EIO;
-		goto out;
-	}
+
+	/* The journal bh on the osb always comes from ocfs2_journal_init()
+	 * and was validated there inside ocfs2_inode_lock_full().  It's a
+	 * code bug if we mess it up. */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
 
 	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
 	if (dirty)
@@ -613,7 +607,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 	if (status < 0)
 		mlog_errno(status);
 
-out:
 	mlog_exit(status);
 	return status;
 }
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 25d07ff..467bdb6 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -444,14 +444,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_DINODE(ptr)					\
 	(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
 
-#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di)	do {			\
-	typeof(__di) ____di = (__di);					\
-	ocfs2_error((__sb), 						\
-		"Dinode # %llu has bad signature %.*s",			\
-		(unsigned long long)le64_to_cpu((____di)->i_blkno), 7, 	\
-		(____di)->i_signature);					\
-} while (0)
-
 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr)				\
 	(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
 
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ffd48db..739d452 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
 
 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
 
+	/* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
+	 * so any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
 				 ocfs2_group_bitmap_size(osb->sb) * 8) {
 		mlog(ML_ERROR, "The disk is too old and small. "
@@ -322,12 +326,6 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
 		goto out_unlock;
 	}
 
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
-		ret = -EIO;
-		goto out_unlock;
-	}
-
 	first_new_cluster = le32_to_cpu(fe->i_clusters);
 	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
 					      first_new_cluster - 1);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c5ff18b..95d432b 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -441,11 +441,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 	ac->ac_alloc_slot = slot;
 
 	fe = (struct ocfs2_dinode *) bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
-		status = -EIO;
-		goto bail;
-	}
+
+	/* The bh was validated by the inode read inside
+	 * ocfs2_inode_lock().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
 	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
 		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
 			    (unsigned long long)le64_to_cpu(fe->i_blkno));
@@ -931,11 +931,6 @@ static int ocfs2_relink_block_group(handle_t *handle,
 	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 	struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
 
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
-		status = -EIO;
-		goto out;
-	}
 	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
 		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
 		status = -EIO;
@@ -1392,11 +1387,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	BUG_ON(!ac->ac_bh);
 
 	fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
-		status = -EIO;
-		goto bail;
-	}
+
+	/* The bh was validated by the inode read during
+	 * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
 	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
 	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
 		ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@@ -1782,11 +1777,12 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 
 	mlog_entry_void();
 
-	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
-		status = -EIO;
-		goto bail;
-	}
+	/* The alloc_bh comes from ocfs2_free_dinode() or
+	 * ocfs2_free_clusters().  The callers have all locked the
+	 * allocator and gotten alloc_bh from the lock call.  This
+	 * validates the dinode buffer.  Any corruption that has happended
+	 * is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
 	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
 	mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
-- 
cgit v0.10.2


From 57e3e7971136003c96766346049aa73b82cab079 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:13 -0800
Subject: ocfs2: Consolidate validation of group descriptors.

Currently the validation of group descriptors is directly duplicated so
that one version can error the filesystem and the other (resize) can
just report the problem.  Consolidate to one function that takes a
boolean.  Wrap that function with the old call for the old users.

This is in preparation for lifting the read+validate step into a
single function.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 739d452..a2de32a 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -396,41 +396,16 @@ static int ocfs2_check_new_group(struct inode *inode,
 				 struct buffer_head *group_bh)
 {
 	int ret;
-	struct ocfs2_group_desc *gd;
+	struct ocfs2_group_desc *gd =
+		(struct ocfs2_group_desc *)group_bh->b_data;
 	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
-	unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
-				le16_to_cpu(di->id2.i_chain.cl_bpc);
 
+	ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1);
+	if (ret)
+		goto out;
 
-	gd = (struct ocfs2_group_desc *)group_bh->b_data;
-
-	ret = -EIO;
-	if (!OCFS2_IS_VALID_GROUP_DESC(gd))
-		mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
-		     (unsigned long long)le64_to_cpu(gd->bg_blkno));
-	else if (di->i_blkno != gd->bg_parent_dinode)
-		mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
-		     "pointer (%llu, expected %llu)\n",
-		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
-		     (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
-		     (unsigned long long)le64_to_cpu(di->i_blkno));
-	else if (le16_to_cpu(gd->bg_bits) > max_bits)
-		mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
-		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
-		     le16_to_cpu(gd->bg_bits));
-	else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
-		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
-		     "claims that %u are free\n",
-		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
-		     le16_to_cpu(gd->bg_bits),
-		     le16_to_cpu(gd->bg_free_bits_count));
-	else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
-		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
-		     "max bitmap bits of %u\n",
-		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
-		     le16_to_cpu(gd->bg_bits),
-		     8 * le16_to_cpu(gd->bg_size));
-	else if (le16_to_cpu(gd->bg_chain) != input->chain)
+	ret = -EINVAL;
+	if (le16_to_cpu(gd->bg_chain) != input->chain)
 		mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
 		     "while input has %u set.\n",
 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
@@ -449,6 +424,7 @@ static int ocfs2_check_new_group(struct inode *inode,
 	else
 		ret = 0;
 
+out:
 	return ret;
 }
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 95d432b..ddba97d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -146,59 +146,71 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 }
 
 /* somewhat more expensive than our other checks, so use sparingly. */
-int ocfs2_check_group_descriptor(struct super_block *sb,
-				 struct ocfs2_dinode *di,
-				 struct ocfs2_group_desc *gd)
+int ocfs2_validate_group_descriptor(struct super_block *sb,
+				    struct ocfs2_dinode *di,
+				    struct ocfs2_group_desc *gd,
+				    int clean_error)
 {
 	unsigned int max_bits;
 
+#define do_error(fmt, ...)						\
+	do{								\
+		if (clean_error)					\
+			mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__);	\
+		else							\
+			ocfs2_error(sb, fmt, ##__VA_ARGS__);		\
+	} while (0)
+
 	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
-		return -EIO;
+		do_error("Group Descriptor #%llu has bad signature %.*s",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno), 7,
+			 gd->bg_signature);
+		return -EINVAL;
 	}
 
 	if (di->i_blkno != gd->bg_parent_dinode) {
-		ocfs2_error(sb, "Group descriptor # %llu has bad parent "
-			    "pointer (%llu, expected %llu)",
-			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
-			    (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
-			    (unsigned long long)le64_to_cpu(di->i_blkno));
-		return -EIO;
+		do_error("Group descriptor # %llu has bad parent "
+			 "pointer (%llu, expected %llu)",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+			 (unsigned long long)le64_to_cpu(di->i_blkno));
+		return -EINVAL;
 	}
 
 	max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
 	if (le16_to_cpu(gd->bg_bits) > max_bits) {
-		ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
-			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
-			    le16_to_cpu(gd->bg_bits));
-		return -EIO;
+		do_error("Group descriptor # %llu has bit count of %u",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 le16_to_cpu(gd->bg_bits));
+		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_chain) >=
 	    le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
-		ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
-			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
-			    le16_to_cpu(gd->bg_chain));
-		return -EIO;
+		do_error("Group descriptor # %llu has bad chain %u",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 le16_to_cpu(gd->bg_chain));
+		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
-		ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
-			    "claims that %u are free",
-			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
-			    le16_to_cpu(gd->bg_bits),
-			    le16_to_cpu(gd->bg_free_bits_count));
-		return -EIO;
+		do_error("Group descriptor # %llu has bit count %u but "
+			 "claims that %u are free",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 le16_to_cpu(gd->bg_bits),
+			 le16_to_cpu(gd->bg_free_bits_count));
+		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
-		ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
-			    "max bitmap bits of %u",
-			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
-			    le16_to_cpu(gd->bg_bits),
-			    8 * le16_to_cpu(gd->bg_size));
-		return -EIO;
+		do_error("Group descriptor # %llu has bit count %u but "
+			 "max bitmap bits of %u",
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 le16_to_cpu(gd->bg_bits),
+			 8 * le16_to_cpu(gd->bg_size));
+		return -EINVAL;
 	}
+#undef do_error
 
 	return 0;
 }
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 4df159d..7adfcc4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -165,9 +165,23 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
 
 /* somewhat more expensive than our other checks, so use sparingly. */
-int ocfs2_check_group_descriptor(struct super_block *sb,
-				 struct ocfs2_dinode *di,
-				 struct ocfs2_group_desc *gd);
+/*
+ * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it
+ * finds a problem.  A caller that wants to check a group descriptor
+ * without going readonly passes a nonzero clean_error.  This is only
+ * resize, really.
+ */
+int ocfs2_validate_group_descriptor(struct super_block *sb,
+				    struct ocfs2_dinode *di,
+				    struct ocfs2_group_desc *gd,
+				    int clean_error);
+static inline int ocfs2_check_group_descriptor(struct super_block *sb,
+					       struct ocfs2_dinode *di,
+					       struct ocfs2_group_desc *gd)
+{
+	return ocfs2_validate_group_descriptor(sb, di, gd, 0);
+}
+
 int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
 			  u32 clusters_to_add, u32 extents_to_split,
 			  struct ocfs2_alloc_context **data_ac,
-- 
cgit v0.10.2


From 68f64d471be38631d7196b938d9809802dd467fa Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:14 -0800
Subject: ocfs2: Wrap group descriptor reads in a dedicated function.

We have a clean call for validating group descriptors, but every place
that wants the always does a read_block()+validate() call pair.  Create
a toplevel ocfs2_read_group_descriptor() that does the right
thing.  This allows us to leverage the single call point later for
fancier handling.  We also add validation of gd->bg_generation against
the superblock and gd->bg_blkno against the block we thought we read.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index a2de32a..252baff 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -330,20 +330,14 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
 	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
 					      first_new_cluster - 1);
 
-	ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
+	ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
+					  &group_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_unlock;
 	}
-
 	group = (struct ocfs2_group_desc *)group_bh->b_data;
 
-	ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_unlock;
-	}
-
 	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
 	if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
 		le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@@ -400,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode,
 		(struct ocfs2_group_desc *)group_bh->b_data;
 	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
 
-	ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1);
+	ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1);
 	if (ret)
 		goto out;
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ddba97d..797f509 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -145,13 +145,13 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 	return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
-/* somewhat more expensive than our other checks, so use sparingly. */
 int ocfs2_validate_group_descriptor(struct super_block *sb,
 				    struct ocfs2_dinode *di,
-				    struct ocfs2_group_desc *gd,
+				    struct buffer_head *bh,
 				    int clean_error)
 {
 	unsigned int max_bits;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
 
 #define do_error(fmt, ...)						\
 	do{								\
@@ -162,16 +162,32 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 	} while (0)
 
 	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
-		do_error("Group Descriptor #%llu has bad signature %.*s",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno), 7,
+		do_error("Group descriptor #%llu has bad signature %.*s",
+			 (unsigned long long)bh->b_blocknr, 7,
 			 gd->bg_signature);
 		return -EINVAL;
 	}
 
+	if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
+		do_error("Group descriptor #%llu has an invalid bg_blkno "
+			 "of %llu",
+			 (unsigned long long)bh->b_blocknr,
+			 (unsigned long long)le64_to_cpu(gd->bg_blkno));
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
+		do_error("Group descriptor #%llu has an invalid "
+			 "fs_generation of #%u",
+			 (unsigned long long)bh->b_blocknr,
+			 le32_to_cpu(gd->bg_generation));
+		return -EINVAL;
+	}
+
 	if (di->i_blkno != gd->bg_parent_dinode) {
-		do_error("Group descriptor # %llu has bad parent "
+		do_error("Group descriptor #%llu has bad parent "
 			 "pointer (%llu, expected %llu)",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 (unsigned long long)bh->b_blocknr,
 			 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
 			 (unsigned long long)le64_to_cpu(di->i_blkno));
 		return -EINVAL;
@@ -179,33 +195,33 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 
 	max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
 	if (le16_to_cpu(gd->bg_bits) > max_bits) {
-		do_error("Group descriptor # %llu has bit count of %u",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+		do_error("Group descriptor #%llu has bit count of %u",
+			 (unsigned long long)bh->b_blocknr,
 			 le16_to_cpu(gd->bg_bits));
 		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_chain) >=
 	    le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
-		do_error("Group descriptor # %llu has bad chain %u",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+		do_error("Group descriptor #%llu has bad chain %u",
+			 (unsigned long long)bh->b_blocknr,
 			 le16_to_cpu(gd->bg_chain));
 		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
-		do_error("Group descriptor # %llu has bit count %u but "
+		do_error("Group descriptor #%llu has bit count %u but "
 			 "claims that %u are free",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 (unsigned long long)bh->b_blocknr,
 			 le16_to_cpu(gd->bg_bits),
 			 le16_to_cpu(gd->bg_free_bits_count));
 		return -EINVAL;
 	}
 
 	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
-		do_error("Group descriptor # %llu has bit count %u but "
+		do_error("Group descriptor #%llu has bit count %u but "
 			 "max bitmap bits of %u",
-			 (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			 (unsigned long long)bh->b_blocknr,
 			 le16_to_cpu(gd->bg_bits),
 			 8 * le16_to_cpu(gd->bg_size));
 		return -EINVAL;
@@ -215,6 +231,30 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 	return 0;
 }
 
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+				u64 gd_blkno, struct buffer_head **bh)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_block(inode, gd_blkno, &tmp);
+	if (rc)
+		goto out;
+
+	rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
+	if (rc) {
+		brelse(tmp);
+		goto out;
+	}
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!*bh)
+		*bh = tmp;
+
+out:
+	return rc;
+}
+
 static int ocfs2_block_group_fill(handle_t *handle,
 				  struct inode *alloc_inode,
 				  struct buffer_head *bg_bh,
@@ -1177,21 +1217,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 	u16 found;
 	struct buffer_head *group_bh = NULL;
 	struct ocfs2_group_desc *gd;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
 	struct inode *alloc_inode = ac->ac_inode;
 
-	ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
+	ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
+					  &group_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
 
 	gd = (struct ocfs2_group_desc *) group_bh->b_data;
-	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
-		ret = -EIO;
-		goto out;
-	}
-
 	ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
 				  ac->ac_max_block, bit_off, &found);
 	if (ret < 0) {
@@ -1248,19 +1284,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	     bits_wanted, chain,
 	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
-	status = ocfs2_read_block(alloc_inode,
-				  le64_to_cpu(cl->cl_recs[chain].c_blkno),
-				  &group_bh);
+	status = ocfs2_read_group_descriptor(alloc_inode, fe,
+					     le64_to_cpu(cl->cl_recs[chain].c_blkno),
+					     &group_bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	bg = (struct ocfs2_group_desc *) group_bh->b_data;
-	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
-	if (status) {
-		mlog_errno(status);
-		goto bail;
-	}
 
 	status = -ENOSPC;
 	/* for now, the chain search is a bit simplistic. We just use
@@ -1278,18 +1309,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		next_group = le64_to_cpu(bg->bg_next_group);
 		prev_group_bh = group_bh;
 		group_bh = NULL;
-		status = ocfs2_read_block(alloc_inode,
-					  next_group, &group_bh);
+		status = ocfs2_read_group_descriptor(alloc_inode, fe,
+						     next_group, &group_bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
 		}
 		bg = (struct ocfs2_group_desc *) group_bh->b_data;
-		status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
-		if (status) {
-			mlog_errno(status);
-			goto bail;
-		}
 	}
 	if (status < 0) {
 		if (status != -ENOSPC)
@@ -1801,18 +1827,14 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
 	     (unsigned long long)bg_blkno, start_bit);
 
-	status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
+	status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
+					     &group_bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
-
 	group = (struct ocfs2_group_desc *) group_bh->b_data;
-	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
-	if (status) {
-		mlog_errno(status);
-		goto bail;
-	}
+
 	BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
 
 	status = ocfs2_block_group_clear_bits(handle, alloc_inode,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 7adfcc4..43de4fd 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -164,23 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
  * and return that block offset. */
 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
 
-/* somewhat more expensive than our other checks, so use sparingly. */
 /*
  * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it
  * finds a problem.  A caller that wants to check a group descriptor
  * without going readonly passes a nonzero clean_error.  This is only
- * resize, really.
+ * resize, really.  Everyone else should be using
+ * ocfs2_read_group_descriptor().
  */
 int ocfs2_validate_group_descriptor(struct super_block *sb,
 				    struct ocfs2_dinode *di,
-				    struct ocfs2_group_desc *gd,
+				    struct buffer_head *bh,
 				    int clean_error);
-static inline int ocfs2_check_group_descriptor(struct super_block *sb,
-					       struct ocfs2_dinode *di,
-					       struct ocfs2_group_desc *gd)
-{
-	return ocfs2_validate_group_descriptor(sb, di, gd, 0);
-}
+/*
+ * Read a group descriptor block into *bh.  If *bh is NULL, a bh will be
+ * allocated.  This is a cached read.  The descriptor will be validated with
+ * ocfs2_validate_group_descriptor().
+ */
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+				u64 gd_blkno, struct buffer_head **bh);
 
 int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
 			  u32 clusters_to_add, u32 extents_to_split,
-- 
cgit v0.10.2


From 4203530613280281868b3ca36c817530bca3825c Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:15 -0800
Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_GROUP_DESC() checks.

Random places in the code would check a group descriptor bh to see if it
was valid. The previous commit unified descriptor block reads,
validating all block reads in the same place.  Thus, these checks are no
longer necessary.  Rather than eliminate them, however, we change them
to BUG_ON() checks.  This ensures the assumptions remain true.  All of
the code paths to these checks have been audited to ensure they come
from a validated descriptor read.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 467bdb6..82ba887 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -458,13 +458,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_GROUP_DESC(ptr)					\
 	(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
 
-#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd)	do {		\
-	typeof(__gd) ____gd = (__gd);					\
-		ocfs2_error((__sb),					\
-		"Group Descriptor # %llu has bad signature %.*s",	\
-		(unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
-		(____gd)->bg_signature);				\
-} while (0)
 
 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr)					\
 	(!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 797f509..766a00b 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -842,10 +842,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
 	int offset, start, found, status = 0;
 	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 
-	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
-		return -EIO;
-	}
+	/* Callers got this descriptor from
+	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
 
 	found = start = best_offset = best_size = 0;
 	bitmap = bg->bg_bitmap;
@@ -910,11 +909,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 
 	mlog_entry_void();
 
-	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-		status = -EIO;
-		goto bail;
-	}
+	/* All callers get the descriptor via
+	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
 	BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
 
 	mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@@ -983,16 +980,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
 	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 	struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
 
-	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-		status = -EIO;
-		goto out;
-	}
-	if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
-		status = -EIO;
-		goto out;
-	}
+	/* The caller got these descriptors from
+	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
+	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
 
 	mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
 	     (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@@ -1055,7 +1046,7 @@ out_rollback:
 		bg->bg_next_group = cpu_to_le64(bg_ptr);
 		prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
 	}
-out:
+
 	mlog_exit(status);
 	return status;
 }
@@ -1758,11 +1749,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 
 	mlog_entry_void();
 
-	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-		status = -EIO;
-		goto bail;
-	}
+	/* The caller got this descriptor from
+	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
 
 	mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
 
-- 
cgit v0.10.2


From 5e96581a377fc6bd76e9b112da9aeb8a7ae8bf22 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:16 -0800
Subject: ocfs2: Wrap extent block reads in a dedicated function.

We weren't consistently checking extent blocks after we read them.
Most places checked the signature, but none checked h_blkno or
h_fs_signature.  Create a toplevel ocfs2_read_extent_block() that does
the read and the validation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 320545b..f430cc6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -678,6 +678,66 @@ struct ocfs2_merge_ctxt {
 	int			c_split_covers_rec;
 };
 
+static int ocfs2_validate_extent_block(struct super_block *sb,
+				       struct buffer_head *bh)
+{
+	struct ocfs2_extent_block *eb =
+		(struct ocfs2_extent_block *)bh->b_data;
+
+	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
+		ocfs2_error(sb,
+			    "Extent block #%llu has bad signature %.*s",
+			    (unsigned long long)bh->b_blocknr, 7,
+			    eb->h_signature);
+		return -EINVAL;
+	}
+
+	if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
+		ocfs2_error(sb,
+			    "Extent block #%llu has an invalid h_blkno "
+			    "of %llu",
+			    (unsigned long long)bh->b_blocknr,
+			    (unsigned long long)le64_to_cpu(eb->h_blkno));
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+		ocfs2_error(sb,
+			    "Extent block #%llu has an invalid "
+			    "h_fs_generation of #%u",
+			    (unsigned long long)bh->b_blocknr,
+			    le32_to_cpu(eb->h_fs_generation));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+			    struct buffer_head **bh)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_block(inode, eb_blkno, &tmp);
+	if (rc)
+		goto out;
+
+	rc = ocfs2_validate_extent_block(inode->i_sb, tmp);
+	if (rc) {
+		brelse(tmp);
+		goto out;
+	}
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!*bh)
+		*bh = tmp;
+
+out:
+	return rc;
+}
+
+
 /*
  * How many free extents have we got before we need more meta data?
  */
@@ -697,8 +757,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
 	last_eb_blk = ocfs2_et_get_last_eb_blk(et);
 
 	if (last_eb_blk) {
-		retval = ocfs2_read_block(inode, last_eb_blk,
-					  &eb_bh);
+		retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
 		if (retval < 0) {
 			mlog_errno(retval);
 			goto bail;
@@ -900,11 +959,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	for(i = 0; i < new_blocks; i++) {
 		bh = new_eb_bhs[i];
 		eb = (struct ocfs2_extent_block *) bh->b_data;
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-			status = -EIO;
-			goto bail;
-		}
+		/* ocfs2_create_new_meta_bhs() should create it right! */
+		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
 		status = ocfs2_journal_access(handle, inode, bh,
@@ -1044,11 +1100,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	}
 
 	eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
-	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-		status = -EIO;
-		goto bail;
-	}
+	/* ocfs2_create_new_meta_bhs() should create it right! */
+	BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
@@ -1168,18 +1221,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 		brelse(bh);
 		bh = NULL;
 
-		status = ocfs2_read_block(inode, blkno, &bh);
+		status = ocfs2_read_extent_block(inode, blkno, &bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
 		}
 
 		eb = (struct ocfs2_extent_block *) bh->b_data;
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-			status = -EIO;
-			goto bail;
-		}
 		el = &eb->h_list;
 
 		if (le16_to_cpu(el->l_next_free_rec) <
@@ -1532,7 +1580,7 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		brelse(bh);
 		bh = NULL;
-		ret = ocfs2_read_block(inode, blkno, &bh);
+		ret = ocfs2_read_extent_block(inode, blkno, &bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1540,11 +1588,6 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		eb = (struct ocfs2_extent_block *) bh->b_data;
 		el = &eb->h_list;
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-			ret = -EIO;
-			goto out;
-		}
 
 		if (le16_to_cpu(el->l_next_free_rec) >
 		    le16_to_cpu(el->l_count)) {
@@ -4089,8 +4132,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			    le16_to_cpu(new_el->l_count)) {
 				bh = path_leaf_bh(left_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
-								 eb);
+				ocfs2_error(inode->i_sb,
+					    "Extent block #%llu has an "
+					    "invalid l_next_free_rec of "
+					    "%d.  It should have "
+					    "matched the l_count of %d",
+					    (unsigned long long)le64_to_cpu(eb->h_blkno),
+					    le16_to_cpu(new_el->l_next_free_rec),
+					    le16_to_cpu(new_el->l_count));
+				status = -EINVAL;
 				goto out;
 			}
 			rec = &new_el->l_recs[
@@ -4139,8 +4189,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
 				bh = path_leaf_bh(right_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
-								 eb);
+				ocfs2_error(inode->i_sb,
+					    "Extent block #%llu has an "
+					    "invalid l_next_free_rec of %d",
+					    (unsigned long long)le64_to_cpu(eb->h_blkno),
+					    le16_to_cpu(new_el->l_next_free_rec));
+				status = -EINVAL;
 				goto out;
 			}
 			rec = &new_el->l_recs[1];
@@ -4286,7 +4340,9 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		 * ocfs2_figure_insert_type() and ocfs2_add_branch()
 		 * may want it later.
 		 */
-		ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
+		ret = ocfs2_read_extent_block(inode,
+					      ocfs2_et_get_last_eb_blk(et),
+					      &bh);
 		if (ret) {
 			mlog_exit(ret);
 			goto out;
@@ -4752,20 +4808,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 	if (path->p_tree_depth) {
 		struct ocfs2_extent_block *eb;
 
-		ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
-				       &last_eb_bh);
+		ret = ocfs2_read_extent_block(inode,
+					      ocfs2_et_get_last_eb_blk(et),
+					      &last_eb_bh);
 		if (ret) {
 			mlog_exit(ret);
 			goto out;
 		}
 
 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-			ret = -EROFS;
-			goto out;
-		}
-
 		rightmost_el = &eb->h_list;
 	} else
 		rightmost_el = path_root_el(path);
@@ -4910,8 +4961,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 
 	depth = path->p_tree_depth;
 	if (depth > 0) {
-		ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
-				       &last_eb_bh);
+		ret = ocfs2_read_extent_block(inode,
+					      ocfs2_et_get_last_eb_blk(et),
+					      &last_eb_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -6231,11 +6283,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
 
 	eb = (struct ocfs2_extent_block *) bh->b_data;
 	el = &eb->h_list;
-	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-		ret = -EROFS;
-		goto out;
-	}
+
+	/* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
+	 * Any corruption is a code bug. */
+	BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 
 	*new_last_eb = bh;
 	get_bh(*new_last_eb);
@@ -7140,20 +7191,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
 
 	if (fe->id2.i_list.l_tree_depth) {
-		status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
-					  &last_eb_bh);
+		status = ocfs2_read_extent_block(inode,
+						 le64_to_cpu(fe->i_last_eb_blk),
+						 &last_eb_bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
 		}
 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-
-			brelse(last_eb_bh);
-			status = -EIO;
-			goto bail;
-		}
 	}
 
 	(*tc)->tc_last_eb_bh = last_eb_bh;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 0fbf8fc..59d37d1 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 					struct buffer_head *bh,
 					struct ocfs2_xattr_value_root *xv);
 
+/*
+ * Read an extent block into *bh.  If *bh is NULL, a bh will be
+ * allocated.  This is a cached read.  The extent block will be validated
+ * with ocfs2_validate_extent_block().
+ */
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+			    struct buffer_head **bh);
+
 struct ocfs2_alloc_context;
 int ocfs2_insert_extent(struct ocfs2_super *osb,
 			handle_t *handle,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index b686b31..0bd9d96 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 
-	ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
+	ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
 	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 	el = &eb->h_list;
 
-	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
-		ret = -EROFS;
-		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-		goto out;
-	}
-
 	if (el->l_tree_depth) {
 		ocfs2_error(inode->i_sb,
 			    "Inode %lu has non zero tree depth in "
@@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 			goto no_more_extents;
 
-		ret = ocfs2_read_block(inode,
-				       le64_to_cpu(eb->h_next_leaf_blk),
-				       &next_eb_bh);
+		ret = ocfs2_read_extent_block(inode,
+					      le64_to_cpu(eb->h_next_leaf_blk),
+					      &next_eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
-		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
-
-		if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
-			ret = -EROFS;
-			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
-			goto out;
-		}
 
+		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 		el = &next_eb->h_list;
-
 		i = ocfs2_search_for_hole_index(el, v_cluster);
 	}
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 82ba887..f04b229 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -447,14 +447,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr)				\
 	(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
 
-#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb)	do {		\
-	typeof(__eb) ____eb = (__eb);					\
-	ocfs2_error((__sb), 						\
-		"Extent Block # %llu has bad signature %.*s",		\
-		(unsigned long long)le64_to_cpu((____eb)->h_blkno), 7,	\
-		(____eb)->h_signature);					\
-} while (0)
-
 #define OCFS2_IS_VALID_GROUP_DESC(ptr)					\
 	(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
 
-- 
cgit v0.10.2


From a22305cc693254a2aa651e797875669112ef8635 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:17 -0800
Subject: ocfs2: Wrap dirblock reads in a dedicated function.

We have ocfs2_bread() as a vestige of the original ext-based dir code.
It's only used by directories, though.  Turn it into
ocfs2_read_dir_block(), with a prototype matching the other metadata
read functions.  It's set up to validate dirblocks when the time comes.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 5777045..c2f3fd9 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -82,49 +82,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 			       struct ocfs2_alloc_context *meta_ac,
 			       struct buffer_head **new_bh);
 
-static struct buffer_head *ocfs2_bread(struct inode *inode,
-				       int block, int *err, int reada)
-{
-	struct buffer_head *bh = NULL;
-	int tmperr;
-	u64 p_blkno;
-	int readflags = 0;
-
-	if (reada)
-		readflags |= OCFS2_BH_READAHEAD;
-
-	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
-	    i_size_read(inode)) {
-		BUG_ON(!reada);
-		return NULL;
-	}
-
-	down_read(&OCFS2_I(inode)->ip_alloc_sem);
-	tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
-					     NULL);
-	up_read(&OCFS2_I(inode)->ip_alloc_sem);
-	if (tmperr < 0) {
-		mlog_errno(tmperr);
-		goto fail;
-	}
-
-	tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
-	if (tmperr < 0)
-		goto fail;
-
-	tmperr = 0;
-
-	*err = 0;
-	return bh;
-
-fail:
-	brelse(bh);
-	bh = NULL;
-
-	*err = -EIO;
-	return NULL;
-}
-
 /*
  * bh passed here can be an inode block or a dir data block, depending
  * on the inode inline data flag.
@@ -250,6 +207,76 @@ out:
 	return NULL;
 }
 
+static int ocfs2_validate_dir_block(struct super_block *sb,
+				    struct buffer_head *bh)
+{
+	/*
+	 * Nothing yet.  We don't validate dirents here, that's handled
+	 * in-place when the code walks them.
+	 */
+
+	return 0;
+}
+
+/*
+ * This function forces all errors to -EIO for consistency with its
+ * predecessor, ocfs2_bread().  We haven't audited what returning the
+ * real error codes would do to callers.  We log the real codes with
+ * mlog_errno() before we squash them.
+ */
+static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
+				struct buffer_head **bh, int flags)
+{
+	int rc = 0;
+	struct buffer_head *tmp = *bh;
+	u64 p_blkno;
+
+	if (((u64)v_block << inode->i_sb->s_blocksize_bits) >=
+	    i_size_read(inode)) {
+		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
+		goto out;
+	}
+
+	down_read(&OCFS2_I(inode)->ip_alloc_sem);
+	rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL,
+					 NULL);
+	up_read(&OCFS2_I(inode)->ip_alloc_sem);
+	if (rc) {
+		mlog_errno(rc);
+		goto out;
+	}
+
+	if (!p_blkno) {
+		rc = -EIO;
+		mlog(ML_ERROR,
+		     "Directory #%llu contains a hole at offset %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)v_block << inode->i_sb->s_blocksize_bits);
+		goto out;
+	}
+
+	rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags);
+	if (rc) {
+		mlog_errno(rc);
+		goto out;
+	}
+
+	if (!(flags & OCFS2_BH_READAHEAD)) {
+		rc = ocfs2_validate_dir_block(inode->i_sb, tmp);
+		if (rc) {
+			brelse(tmp);
+			goto out;
+		}
+	}
+
+	/* If ocfs2_read_blocks() got us a new bh, pass it up.  */
+	if (!*bh)
+		*bh = tmp;
+
+out:
+	return rc ? -EIO : 0;
+}
+
 static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
 					       struct inode *dir,
 					       struct ocfs2_dir_entry **res_dir)
@@ -296,15 +323,17 @@ restart:
 				}
 				num++;
 
-				bh = ocfs2_bread(dir, b++, &err, 1);
+				bh = NULL;
+				err = ocfs2_read_dir_block(dir, b++, &bh,
+							   OCFS2_BH_READAHEAD);
 				bh_use[ra_max] = bh;
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
-		if (ocfs2_read_block(dir, block, &bh)) {
+		if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
 			/* read error, skip block & hope for the best.
-			 * ocfs2_read_block() has released the bh. */
+			 * ocfs2_read_dir_block() has released the bh. */
 			ocfs2_error(dir->i_sb, "reading directory %llu, "
 				    "offset %lu\n",
 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -724,7 +753,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 	int i, stored;
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
-	int err;
 	struct super_block * sb = inode->i_sb;
 	unsigned int ra_sectors = 16;
 
@@ -735,12 +763,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 
 	while (!error && !stored && *f_pos < i_size_read(inode)) {
 		blk = (*f_pos) >> sb->s_blocksize_bits;
-		bh = ocfs2_bread(inode, blk, &err, 0);
-		if (!bh) {
-			mlog(ML_ERROR,
-			     "directory #%llu contains a hole at offset %lld\n",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			     *f_pos);
+		if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
+			/* Skip the corrupt dirblock and keep trying */
 			*f_pos += sb->s_blocksize - offset;
 			continue;
 		}
@@ -754,8 +778,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
 			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
 			     i > 0; i--) {
-				tmp = ocfs2_bread(inode, ++blk, &err, 1);
-				brelse(tmp);
+				tmp = NULL;
+				if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
+							  OCFS2_BH_READAHEAD))
+					brelse(tmp);
 			}
 			last_ra_blk = blk;
 			ra_sectors = 8;
@@ -828,6 +854,7 @@ revalidate:
 		}
 		offset = 0;
 		brelse(bh);
+		bh = NULL;
 	}
 
 	stored = 0;
@@ -1680,8 +1707,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
 	struct super_block *sb = dir->i_sb;
 	int status;
 
-	bh = ocfs2_bread(dir, 0, &status, 0);
-	if (!bh) {
+	status = ocfs2_read_dir_block(dir, 0, &bh, 0);
+	if (status) {
 		mlog_errno(status);
 		goto bail;
 	}
@@ -1702,11 +1729,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
 				status = -ENOSPC;
 				goto bail;
 			}
-			bh = ocfs2_bread(dir,
-					 offset >> sb->s_blocksize_bits,
-					 &status,
-					 0);
-			if (!bh) {
+			status = ocfs2_read_dir_block(dir,
+					     offset >> sb->s_blocksize_bits,
+					     &bh, 0);
+			if (status) {
 				mlog_errno(status);
 				goto bail;
 			}
-- 
cgit v0.10.2


From 4ae1d69bedc8d174cb8a558694607e013157cde1 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:18 -0800
Subject: ocfs2: Wrap xattr block reads in a dedicated function

We weren't consistently checking xattr blocks after we read them.
Most places checked the signature, but none checked xb_blkno or
xb_fs_signature.  Create a toplevel ocfs2_read_xattr_block() that does
the read and the validation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3cc8385..ef4aa54 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -314,6 +314,65 @@ static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 	}
 }
 
+static int ocfs2_validate_xattr_block(struct super_block *sb,
+				      struct buffer_head *bh)
+{
+	struct ocfs2_xattr_block *xb =
+		(struct ocfs2_xattr_block *)bh->b_data;
+
+	mlog(0, "Validating xattr block %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
+	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
+		ocfs2_error(sb,
+			    "Extended attribute block #%llu has bad "
+			    "signature %.*s",
+			    (unsigned long long)bh->b_blocknr, 7,
+			    xb->xb_signature);
+		return -EINVAL;
+	}
+
+	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
+		ocfs2_error(sb,
+			    "Extended attribute block #%llu has an "
+			    "invalid xb_blkno of %llu",
+			    (unsigned long long)bh->b_blocknr,
+			    (unsigned long long)le64_to_cpu(xb->xb_blkno));
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+		ocfs2_error(sb,
+			    "Extended attribute block #%llu has an invalid "
+			    "xb_fs_generation of #%u",
+			    (unsigned long long)bh->b_blocknr,
+			    le32_to_cpu(xb->xb_fs_generation));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
+				  struct buffer_head **bh)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_block(inode, xb_blkno, &tmp);
+	if (!rc) {
+		rc = ocfs2_validate_xattr_block(inode->i_sb, tmp);
+		if (rc)
+			brelse(tmp);
+	}
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!rc && !*bh)
+		*bh = tmp;
+
+	return rc;
+}
+
 static inline const char *ocfs2_xattr_prefix(int name_index)
 {
 	struct xattr_handler *handler = NULL;
@@ -739,18 +798,14 @@ static int ocfs2_xattr_block_list(struct inode *inode,
 	if (!di->i_xattr_loc)
 		return ret;
 
-	ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
 
 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
-	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
-		ret = -EIO;
-		goto cleanup;
-	}
-
 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
 		ret = ocfs2_xattr_list_entries(inode, header,
@@ -760,7 +815,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
 		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
 						   buffer, buffer_size);
 	}
-cleanup:
+
 	brelse(blk_bh);
 
 	return ret;
@@ -1693,24 +1748,19 @@ static int ocfs2_xattr_free_block(struct inode *inode,
 	u64 blk, bg_blkno;
 	u16 bit;
 
-	ret = ocfs2_read_block(inode, block, &blk_bh);
+	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
-	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
-		ret = -EIO;
-		goto out;
-	}
-
 	ret = ocfs2_xattr_block_remove(inode, blk_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
 
+	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 	blk = le64_to_cpu(xb->xb_blkno);
 	bit = le16_to_cpu(xb->xb_suballoc_bit);
 	bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1950,19 +2000,15 @@ static int ocfs2_xattr_block_find(struct inode *inode,
 	if (!di->i_xattr_loc)
 		return ret;
 
-	ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
 
-	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
-	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
-		ret = -EIO;
-		goto cleanup;
-	}
-
 	xs->xattr_bh = blk_bh;
+	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 
 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		xs->header = &xb->xb_attrs.xb_header;
@@ -2259,9 +2305,9 @@ meta_guess:
 	/* calculate metadata allocation. */
 	if (di->i_xattr_loc) {
 		if (!xbs->xattr_bh) {
-			ret = ocfs2_read_block(inode,
-					       le64_to_cpu(di->i_xattr_loc),
-					       &bh);
+			ret = ocfs2_read_xattr_block(inode,
+						     le64_to_cpu(di->i_xattr_loc),
+						     &bh);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
-- 
cgit v0.10.2


From 970e4936d7d15f35d00fd15a14f5343ba78b2fc8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:19 -0800
Subject: ocfs2: Validate metadata only when it's read from disk.

Add an optional validation hook to ocfs2_read_blocks().  Now the
validation function is only called when a block was actually read off of
disk.  It is not called when the buffer was in cache.

We add a buffer state bit BH_NeedsValidate to flag these buffers.  It
must always be one higher than the last JBD2 buffer state bit.

The dinode, dirblock, extent_block, and xattr_block validators are
lifted to this scheme directly.  The group_descriptor validator needs to
be split into two pieces.  The first part only needs the gd buffer and
is passed to ocfs2_read_block().  The second part requires the dinode as
well, and is called every time.  It's only 3 compares, so it's tiny.
This also allows us to clean up the non-fatal gd check used by resize.c.
It now has no magic argument.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f430cc6..e823a27 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
 	struct ocfs2_extent_block *eb =
 		(struct ocfs2_extent_block *)bh->b_data;
 
+	mlog(0, "Validating extent block %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
 	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
 		ocfs2_error(sb,
 			    "Extent block #%llu has bad signature %.*s",
@@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, eb_blkno, &tmp);
-	if (rc)
-		goto out;
-
-	rc = ocfs2_validate_extent_block(inode->i_sb, tmp);
-	if (rc) {
-		brelse(tmp);
-		goto out;
-	}
+	rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+			      ocfs2_validate_extent_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
-	if (!*bh)
+	if (!rc && !*bh)
 		*bh = tmp;
 
-out:
 	return rc;
 }
 
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 3a178ec..0e9eed0 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -39,6 +39,19 @@
 
 #include "buffer_head_io.h"
 
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits.  Currently BH_Unshadow is the last
+ * JBD2 bit.
+ */
+enum ocfs2_state_bits {
+	BH_NeedsValidate = BH_Unshadow + 1,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		      struct inode *inode)
 {
@@ -166,7 +179,9 @@ bail:
 }
 
 int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
-		      struct buffer_head *bhs[], int flags)
+		      struct buffer_head *bhs[], int flags,
+		      int (*validate)(struct super_block *sb,
+				      struct buffer_head *bh))
 {
 	int status = 0;
 	int i, ignore_cache = 0;
@@ -298,6 +313,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 
 			clear_buffer_uptodate(bh);
 			get_bh(bh); /* for end_buffer_read_sync() */
+			if (validate)
+				set_buffer_needs_validate(bh);
 			bh->b_end_io = end_buffer_read_sync;
 			submit_bh(READ, bh);
 			continue;
@@ -328,6 +345,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 				bhs[i] = NULL;
 				continue;
 			}
+
+			if (buffer_needs_validate(bh)) {
+				/* We never set NeedsValidate if the
+				 * buffer was held by the journal, so
+				 * that better not have changed */
+				BUG_ON(buffer_jbd(bh));
+				clear_buffer_needs_validate(bh);
+				status = validate(inode->i_sb, bh);
+				if (status) {
+					put_bh(bh);
+					bhs[i] = NULL;
+					continue;
+				}
+			}
 		}
 
 		/* Always set the buffer in the cache, even if it was
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 75e1dcb..c75d682 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,21 +31,24 @@
 void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
 			     int uptodate);
 
-static inline int ocfs2_read_block(struct inode	       *inode,
-				   u64                  off,
-				   struct buffer_head **bh);
-
 int ocfs2_write_block(struct ocfs2_super          *osb,
 		      struct buffer_head  *bh,
 		      struct inode        *inode);
-int ocfs2_read_blocks(struct inode	  *inode,
-		      u64                  block,
-		      int                  nr,
-		      struct buffer_head  *bhs[],
-		      int                  flags);
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 			   unsigned int nr, struct buffer_head *bhs[]);
 
+/*
+ * If not NULL, validate() will be called on a buffer that is freshly
+ * read from disk.  It will not be called if the buffer was in cache.
+ * Note that if validate() is being used for this buffer, it needs to
+ * be set even for a READAHEAD call, as it marks the buffer for later
+ * validation.
+ */
+int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+		      struct buffer_head *bhs[], int flags,
+		      int (*validate)(struct super_block *sb,
+				      struct buffer_head *bh));
+
 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 				struct buffer_head *bh);
 
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 #define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct inode *inode, u64 off,
-				   struct buffer_head **bh)
+				   struct buffer_head **bh,
+				   int (*validate)(struct super_block *sb,
+						   struct buffer_head *bh))
 {
 	int status = 0;
 
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
 		goto bail;
 	}
 
-	status = ocfs2_read_blocks(inode, off, 1, bh, 0);
+	status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
 
 bail:
 	return status;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c2f3fd9..7e863d4 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
 	 * Nothing yet.  We don't validate dirents here, that's handled
 	 * in-place when the code walks them.
 	 */
+	mlog(0, "Validating dirblock %llu\n",
+	     (unsigned long long)bh->b_blocknr);
 
 	return 0;
 }
@@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
 		goto out;
 	}
 
-	rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags);
+	rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags,
+			       ocfs2_validate_dir_block);
 	if (rc) {
 		mlog_errno(rc);
 		goto out;
 	}
 
-	if (!(flags & OCFS2_BH_READAHEAD)) {
-		rc = ocfs2_validate_dir_block(inode->i_sb, tmp);
-		if (rc) {
-			brelse(tmp);
-			goto out;
-		}
-	}
-
 	/* If ocfs2_read_blocks() got us a new bh, pass it up.  */
 	if (!*bh)
 		*bh = tmp;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 9eb701b8..ec3497b 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 	int rc = -EINVAL;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
+	mlog(0, "Validating dinode %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
 	BUG_ON(!buffer_uptodate(bh));
 
 	if (!OCFS2_IS_VALID_DINODE(di)) {
@@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 	struct buffer_head *tmp = *bh;
 
 	rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
-			       flags);
-	if (rc)
-		goto out;
-
-	if (!(flags & OCFS2_BH_READAHEAD)) {
-		rc = ocfs2_validate_inode_block(inode->i_sb, tmp);
-		if (rc) {
-			brelse(tmp);
-			goto out;
-		}
-	}
+			       flags, ocfs2_validate_inode_block);
 
 	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
-	if (!*bh)
+	if (!rc && !*bh)
 		*bh = tmp;
 
-out:
 	return rc;
 }
 
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 252baff..867de3e 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode,
 		(struct ocfs2_group_desc *)group_bh->b_data;
 	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
 
-	ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1);
+	ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
 	if (ret)
 		goto out;
 
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bdda2d8..40661e7 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
 	 */
 	ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
-				OCFS2_BH_IGNORE_CACHE);
+				OCFS2_BH_IGNORE_CACHE, NULL);
 	if (ret == 0) {
 		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 
 		bh = NULL;  /* Acquire a fresh bh */
 		status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
-					   OCFS2_BH_IGNORE_CACHE);
+					   OCFS2_BH_IGNORE_CACHE, NULL);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 766a00b..226fe21 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 	return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
-int ocfs2_validate_group_descriptor(struct super_block *sb,
-				    struct ocfs2_dinode *di,
-				    struct buffer_head *bh,
-				    int clean_error)
-{
-	unsigned int max_bits;
-	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
-
 #define do_error(fmt, ...)						\
 	do{								\
 		if (clean_error)					\
@@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 			ocfs2_error(sb, fmt, ##__VA_ARGS__);		\
 	} while (0)
 
+static int ocfs2_validate_gd_self(struct super_block *sb,
+				  struct buffer_head *bh,
+				  int clean_error)
+{
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
 	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
 		do_error("Group descriptor #%llu has bad signature %.*s",
 			 (unsigned long long)bh->b_blocknr, 7,
@@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 		return -EINVAL;
 	}
 
+	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+		do_error("Group descriptor #%llu has bit count %u but "
+			 "claims that %u are free",
+			 (unsigned long long)bh->b_blocknr,
+			 le16_to_cpu(gd->bg_bits),
+			 le16_to_cpu(gd->bg_free_bits_count));
+		return -EINVAL;
+	}
+
+	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+		do_error("Group descriptor #%llu has bit count %u but "
+			 "max bitmap bits of %u",
+			 (unsigned long long)bh->b_blocknr,
+			 le16_to_cpu(gd->bg_bits),
+			 8 * le16_to_cpu(gd->bg_size));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ocfs2_validate_gd_parent(struct super_block *sb,
+				    struct ocfs2_dinode *di,
+				    struct buffer_head *bh,
+				    int clean_error)
+{
+	unsigned int max_bits;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
 	if (di->i_blkno != gd->bg_parent_dinode) {
 		do_error("Group descriptor #%llu has bad parent "
 			 "pointer (%llu, expected %llu)",
@@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
 		return -EINVAL;
 	}
 
-	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
-		do_error("Group descriptor #%llu has bit count %u but "
-			 "claims that %u are free",
-			 (unsigned long long)bh->b_blocknr,
-			 le16_to_cpu(gd->bg_bits),
-			 le16_to_cpu(gd->bg_free_bits_count));
-		return -EINVAL;
-	}
+	return 0;
+}
 
-	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
-		do_error("Group descriptor #%llu has bit count %u but "
-			 "max bitmap bits of %u",
-			 (unsigned long long)bh->b_blocknr,
-			 le16_to_cpu(gd->bg_bits),
-			 8 * le16_to_cpu(gd->bg_size));
-		return -EINVAL;
-	}
 #undef do_error
 
-	return 0;
+/*
+ * This version only prints errors.  It does not fail the filesystem, and
+ * exists only for resize.
+ */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+				 struct ocfs2_dinode *di,
+				 struct buffer_head *bh)
+{
+	int rc;
+
+	rc = ocfs2_validate_gd_self(sb, bh, 1);
+	if (!rc)
+		rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
+
+	return rc;
+}
+
+static int ocfs2_validate_group_descriptor(struct super_block *sb,
+					   struct buffer_head *bh)
+{
+	mlog(0, "Validating group descriptor %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
+	return ocfs2_validate_gd_self(sb, bh, 0);
 }
 
 int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
@@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, gd_blkno, &tmp);
+	rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+			      ocfs2_validate_group_descriptor);
 	if (rc)
 		goto out;
 
-	rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
+	rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
 	if (rc) {
 		brelse(tmp);
 		goto out;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 43de4fd..e3c13c7 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
 
 /*
- * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it
+ * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
  * finds a problem.  A caller that wants to check a group descriptor
- * without going readonly passes a nonzero clean_error.  This is only
- * resize, really.  Everyone else should be using
- * ocfs2_read_group_descriptor().
+ * without going readonly should read the block with ocfs2_read_block[s]()
+ * and then checking it with this function.  This is only resize, really.
+ * Everyone else should be using ocfs2_read_group_descriptor().
  */
-int ocfs2_validate_group_descriptor(struct super_block *sb,
-				    struct ocfs2_dinode *di,
-				    struct buffer_head *bh,
-				    int clean_error);
+int ocfs2_check_group_descriptor(struct super_block *sb,
+				 struct ocfs2_dinode *di,
+				 struct buffer_head *bh);
 /*
  * Read a group descriptor block into *bh.  If *bh is NULL, a bh will be
  * allocated.  This is a cached read.  The descriptor will be validated with
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ef4aa54..8af29b3 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -266,7 +266,8 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 	int rc;
 
 	rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
-			       bucket->bu_blocks, bucket->bu_bhs, 0);
+			       bucket->bu_blocks, bucket->bu_bhs, 0,
+			       NULL);
 	if (rc)
 		ocfs2_xattr_bucket_relse(bucket);
 	return rc;
@@ -359,12 +360,8 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, xb_blkno, &tmp);
-	if (!rc) {
-		rc = ocfs2_validate_xattr_block(inode->i_sb, tmp);
-		if (rc)
-			brelse(tmp);
-	}
+	rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+			      ocfs2_validate_xattr_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
 	if (!rc && !*bh)
@@ -925,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 		/* Copy ocfs2_xattr_value */
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh);
+			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1174,7 +1171,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh);
+			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -2206,7 +2203,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		base = xis->base;
 		credits += OCFS2_INODE_UPDATE_CREDITS;
 	} else {
-		int i, block_off;
+		int i, block_off = 0;
 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
 		xe = xbs->here;
 		name_offset = le16_to_cpu(xe->xe_name_offset);
@@ -2840,6 +2837,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
 			break;
 		}
 
+
 		xe_name = bucket_block(bucket, block_off) + new_offset;
 		if (!memcmp(name, xe_name, name_len)) {
 			*xe_index = i;
@@ -3598,7 +3596,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
+		ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
 		if (ret < 0) {
 			mlog_errno(ret);
 			brelse(new_bh);
@@ -3990,7 +3988,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 	ocfs2_journal_dirty(handle, first_bh);
 
 	/* update the new bucket header. */
-	ret = ocfs2_read_block(inode, to_blk_start, &bh);
+	ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -4337,7 +4335,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_read_block(inode, p_blkno, &first_bh);
+	ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4635,7 +4633,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
 	value_blk += header_bh->b_blocknr;
 
-	ret = ocfs2_read_block(inode, value_blk, &value_bh);
+	ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
-- 
cgit v0.10.2


From a8549fb5abb2b372e46d5de0d23ff8b24f4a61af Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:20 -0800
Subject: ocfs2: Wrap virtual block reads in ocfs2_read_virt_blocks()

The ocfs2_read_dir_block() function really maps an inode's virtual
blocks to physical ones before calling ocfs2_read_blocks().  Let's
extract that to common code, because other places might want to do that.

Other than the block number being virtual, ocfs2_read_virt_blocks()
takes the same arguments as ocfs2_read_blocks().  It converts those
virtual block numbers to physical before calling ocfs2_read_blocks()
directly.  If the blocks asked for are discontiguous, this can mean
multiple calls to ocfs2_read_blocks(), but this is mostly hidden from
the caller.

Like ocfs2_read_blocks(), the caller can pass in an existing
buffer_head.  This is usually done to pick up some readahead I/O.
ocfs2_read_virt_blocks() checks the buffer_head's block number
against the extent map - it must match.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 0bd9d96..f2bb1a0 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -806,3 +806,74 @@ out:
 
 	return ret;
 }
+
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+			   struct buffer_head *bhs[], int flags,
+			   int (*validate)(struct super_block *sb,
+					   struct buffer_head *bh))
+{
+	int rc = 0;
+	u64 p_block, p_count;
+	int i, count, done = 0;
+
+	mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
+		   "flags = %x, validate = %p)\n",
+		   inode, (unsigned long long)v_block, nr, bhs, flags,
+		   validate);
+
+	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
+	    i_size_read(inode)) {
+		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
+		goto out;
+	}
+
+	while (done < nr) {
+		down_read(&OCFS2_I(inode)->ip_alloc_sem);
+		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
+						 &p_block, &p_count, NULL);
+		up_read(&OCFS2_I(inode)->ip_alloc_sem);
+		if (rc) {
+			mlog_errno(rc);
+			break;
+		}
+
+		if (!p_block) {
+			rc = -EIO;
+			mlog(ML_ERROR,
+			     "Inode #%llu contains a hole at offset %llu\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			     (unsigned long long)(v_block + done) <<
+			     inode->i_sb->s_blocksize_bits);
+			break;
+		}
+
+		count = nr - done;
+		if (p_count < count)
+			count = p_count;
+
+		/*
+		 * If the caller passed us bhs, they should have come
+		 * from a previous readahead call to this function.  Thus,
+		 * they should have the right b_blocknr.
+		 */
+		for (i = 0; i < count; i++) {
+			if (!bhs[done + i])
+				continue;
+			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
+		}
+
+		rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
+				       flags, validate);
+		if (rc) {
+			mlog_errno(rc);
+			break;
+		}
+		done += count;
+	}
+
+out:
+	mlog_exit(rc);
+	return rc;
+}
+
+
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1c4aa8b..b7dd973 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 			     u32 *p_cluster, u32 *num_clusters,
 			     struct ocfs2_extent_list *el);
 
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+			   struct buffer_head *bhs[], int flags,
+			   int (*validate)(struct super_block *sb,
+					   struct buffer_head *bh));
+static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
+					struct buffer_head **bh,
+					int (*validate)(struct super_block *sb,
+							struct buffer_head *bh))
+{
+	int status = 0;
+
+	if (bh == NULL) {
+		printk("ocfs2: bh == NULL\n");
+		status = -EINVAL;
+		goto bail;
+	}
+
+	status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
+
+bail:
+	return status;
+}
+
+
 #endif  /* _EXTENT_MAP_H */
-- 
cgit v0.10.2


From 511308d90b53479b194cd067715f44dc99d39b08 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 13 Nov 2008 14:49:21 -0800
Subject: ocfs2: Convert ocfs2_read_dir_block() to ocfs2_read_virt_blocks()

Now that we've centralized the ocfs2_read_virt_blocks() code, let's use
it in ocfs2_read_dir_block().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 7e863d4..d83cff9 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -231,44 +231,16 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
 {
 	int rc = 0;
 	struct buffer_head *tmp = *bh;
-	u64 p_blkno;
 
-	if (((u64)v_block << inode->i_sb->s_blocksize_bits) >=
-	    i_size_read(inode)) {
-		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
-		goto out;
-	}
-
-	down_read(&OCFS2_I(inode)->ip_alloc_sem);
-	rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL,
-					 NULL);
-	up_read(&OCFS2_I(inode)->ip_alloc_sem);
-	if (rc) {
+	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
+				    ocfs2_validate_dir_block);
+	if (rc)
 		mlog_errno(rc);
-		goto out;
-	}
 
-	if (!p_blkno) {
-		rc = -EIO;
-		mlog(ML_ERROR,
-		     "Directory #%llu contains a hole at offset %llu\n",
-		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
-		     (unsigned long long)v_block << inode->i_sb->s_blocksize_bits);
-		goto out;
-	}
-
-	rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags,
-			       ocfs2_validate_dir_block);
-	if (rc) {
-		mlog_errno(rc);
-		goto out;
-	}
-
-	/* If ocfs2_read_blocks() got us a new bh, pass it up.  */
-	if (!*bh)
+	/* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+	if (!rc && !*bh)
 		*bh = tmp;
 
-out:
 	return rc ? -EIO : 0;
 }
 
-- 
cgit v0.10.2


From 53ef99cad9878f02f27bb30bc304fc42af8bdd6e Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 18 Nov 2008 16:53:43 -0800
Subject: ocfs2: Remove JBD compatibility layer

JBD2 is fully backwards compatible with JBD and it's been tested enough with
Ocfs2 that we can clean this code up now.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/Kconfig b/fs/Kconfig
index e8a47f7..b93425a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -258,16 +258,6 @@ config OCFS2_DEBUG_FS
 	  this option for debugging only as it is likely to decrease
 	  performance of the filesystem.
 
-config OCFS2_COMPAT_JBD
-	bool "Use JBD for compatibility"
-	depends on OCFS2_FS
-	default n
-	select JBD
-	help
-	  The ocfs2 filesystem now uses JBD2 for its journalling.  JBD2
-	  is backwards compatible with JBD.  It is safe to say N here.
-	  However, if you really want to use the original JBD, say Y here.
-
 config OCFS2_FS_POSIX_ACL
 	bool "OCFS2 POSIX Access Control Lists"
 	depends on OCFS2_FS
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e823a27..69d67ab 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6638,11 +6638,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
 		mlog_errno(ret);
 	else if (ocfs2_should_order_data(inode)) {
 		ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-		ret = walk_page_buffers(handle, page_buffers(page),
-					from, to, &partial,
-					ocfs2_journal_dirty_data);
-#endif
 		if (ret < 0)
 			mlog_errno(ret);
 	}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e219f8b..6af79ad 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -474,12 +474,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 
 	if (ocfs2_should_order_data(inode)) {
 		ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-		ret = walk_page_buffers(handle,
-					page_buffers(page),
-					from, to, NULL,
-					ocfs2_journal_dirty_data);
-#endif
 		if (ret < 0)
 			mlog_errno(ret);
 	}
@@ -1065,15 +1059,8 @@ static void ocfs2_write_failure(struct inode *inode,
 		tmppage = wc->w_pages[i];
 
 		if (page_has_buffers(tmppage)) {
-			if (ocfs2_should_order_data(inode)) {
+			if (ocfs2_should_order_data(inode))
 				ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-				walk_page_buffers(wc->w_handle,
-						  page_buffers(tmppage),
-						  from, to, NULL,
-						  ocfs2_journal_dirty_data);
-#endif
-			}
 
 			block_commit_write(tmppage, from, to);
 		}
@@ -1912,15 +1899,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 		}
 
 		if (page_has_buffers(tmppage)) {
-			if (ocfs2_should_order_data(inode)) {
+			if (ocfs2_should_order_data(inode))
 				ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-				walk_page_buffers(wc->w_handle,
-						  page_buffers(tmppage),
-						  from, to, NULL,
-						  ocfs2_journal_dirty_data);
-#endif
-			}
 			block_commit_write(tmppage, from, to);
 		}
 	}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9223bfc..12b62a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -434,20 +434,6 @@ int ocfs2_journal_dirty(handle_t *handle,
 	return status;
 }
 
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int ocfs2_journal_dirty_data(handle_t *handle,
-			     struct buffer_head *bh)
-{
-	int err = journal_dirty_data(handle, bh);
-	if (err)
-		mlog_errno(err);
-	/* TODO: When we can handle it, abort the handle and go RO on
-	 * error here. */
-
-	return err;
-}
-#endif
-
 #define OCFS2_DEFAULT_COMMIT_INTERVAL	(HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
 
 void ocfs2_set_journal_params(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4d14e9..8203980 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,12 +27,7 @@
 #define OCFS2_JOURNAL_H
 
 #include <linux/fs.h>
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# include <linux/jbd2.h>
-#else
-# include <linux/jbd.h>
-# include "ocfs2_jbd_compat.h"
-#endif
+#include <linux/jbd2.h>
 
 enum ocfs2_journal_state {
 	OCFS2_JOURNAL_FREE = 0,
@@ -273,10 +268,6 @@ int                  ocfs2_journal_access(handle_t *handle,
  */
 int                  ocfs2_journal_dirty(handle_t *handle,
 					 struct buffer_head *bh);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int                  ocfs2_journal_dirty_data(handle_t *handle,
-					      struct buffer_head *bh);
-#endif
 
 /*
  *  Credit Macros:
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
deleted file mode 100644
index b91c78f..0000000
--- a/fs/ocfs2/ocfs2_jbd_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_jbd_compat.h
- *
- * Compatibility defines for JBD.
- *
- * Copyright (C) 2008 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef OCFS2_JBD_COMPAT_H
-#define OCFS2_JBD_COMPAT_H
-
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# error Should not have been included
-#endif
-
-struct jbd2_inode {
-	unsigned int dummy;
-};
-
-#define JBD2_BARRIER			JFS_BARRIER
-#define JBD2_DEFAULT_MAX_COMMIT_AGE	JBD_DEFAULT_MAX_COMMIT_AGE
-
-#define jbd2_journal_ack_err			journal_ack_err
-#define jbd2_journal_clear_err			journal_clear_err
-#define jbd2_journal_destroy			journal_destroy
-#define jbd2_journal_dirty_metadata		journal_dirty_metadata
-#define jbd2_journal_errno			journal_errno
-#define jbd2_journal_extend			journal_extend
-#define jbd2_journal_flush			journal_flush
-#define jbd2_journal_force_commit		journal_force_commit
-#define jbd2_journal_get_write_access		journal_get_write_access
-#define jbd2_journal_get_undo_access		journal_get_undo_access
-#define jbd2_journal_init_inode			journal_init_inode
-#define jbd2_journal_invalidatepage		journal_invalidatepage
-#define jbd2_journal_load			journal_load
-#define jbd2_journal_lock_updates		journal_lock_updates
-#define jbd2_journal_restart			journal_restart
-#define jbd2_journal_start			journal_start
-#define jbd2_journal_start_commit		journal_start_commit
-#define jbd2_journal_stop			journal_stop
-#define jbd2_journal_try_to_free_buffers	journal_try_to_free_buffers
-#define jbd2_journal_unlock_updates		journal_unlock_updates
-#define jbd2_journal_wipe			journal_wipe
-#define jbd2_log_wait_commit			log_wait_commit
-
-static inline int jbd2_journal_file_inode(handle_t *handle,
-					  struct jbd2_inode *inode)
-{
-	return 0;
-}
-
-static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
-						      loff_t new_size)
-{
-	return 0;
-}
-
-static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
-					       struct inode *inode)
-{
-	return;
-}
-
-static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
-						  struct jbd2_inode *jinode)
-{
-	return;
-}
-
-
-#endif  /* OCFS2_JBD_COMPAT_H */
-- 
cgit v0.10.2


From 97aff52ae13d3c11a074bbbfc80ad0b59cb8cdeb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 19 Nov 2008 16:48:41 +0800
Subject: ocfs2/xattr: Fix a bug in xattr allocation estimation

When we extend one xattr's value to a large size, the old value size might
be smaller than the size of a value root. In those cases, we still need to
guess the metadata allocation.

Reported-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 8af29b3..d0b94ed 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2270,6 +2270,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 								 value_size);
 			xv = (struct ocfs2_xattr_value_root *)
 			     (base + name_offset + name_len);
+			value_size = OCFS2_XATTR_ROOT_SIZE;
 		} else
 			xv = &def_xv.xv;
 
@@ -2283,7 +2284,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 							     &xv->xr_list,
 							     new_clusters -
 							     old_clusters);
-			goto out;
+			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
+				goto out;
 		}
 	} else {
 		/*
-- 
cgit v0.10.2


From 9f868f16e40e9ad8e39aebff94a4be0d96520734 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 19 Nov 2008 16:48:42 +0800
Subject: ocfs2/xattr: Restore not_found in xis

During an xattr set, when we move a xattr which was stored in inode to the
outside bucket, we have to delete it and it will use the old value of
xis->not_found. xis->not_found is removed by ocfs2_calc_xattr_set_need
though, so we must restore it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d0b94ed..9cb71e1 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2414,7 +2414,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 				    struct ocfs2_xattr_search *xbs,
 				    struct ocfs2_xattr_set_ctxt *ctxt)
 {
-	int ret = 0, credits;
+	int ret = 0, credits, old_found;
 
 	if (!xi->value) {
 		/* Remove existing extended attribute */
@@ -2433,6 +2433,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 			xi->value = NULL;
 			xi->value_len = 0;
 
+			old_found = xis->not_found;
 			xis->not_found = -ENODATA;
 			ret = ocfs2_calc_xattr_set_need(inode,
 							di,
@@ -2442,6 +2443,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 							NULL,
 							NULL,
 							&credits);
+			xis->not_found = old_found;
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -2462,6 +2464,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 				if (ret)
 					goto out;
 
+				old_found = xis->not_found;
 				xis->not_found = -ENODATA;
 				ret = ocfs2_calc_xattr_set_need(inode,
 								di,
@@ -2471,6 +2474,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 								NULL,
 								NULL,
 								&credits);
+				xis->not_found = old_found;
 				if (ret) {
 					mlog_errno(ret);
 					goto out;
-- 
cgit v0.10.2


From 74f783af95c982aef6d3a1415275650dcf511666 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 19 Aug 2008 14:51:22 +0200
Subject: quota: Add callbacks for allocating and destroying dquot structures

Some filesystems would like to keep private information together with each
dquot. Add callbacks alloc_dquot and destroy_dquot allowing filesystem to
allocate larger dquots from their private slab in a similar fashion we
currently allocate inodes.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index c237ccc..1b5fc4b 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -415,6 +415,16 @@ out_dqlock:
 	return ret;
 }
 
+static void dquot_destroy(struct dquot *dquot)
+{
+	kmem_cache_free(dquot_cachep, dquot);
+}
+
+static inline void do_destroy_dquot(struct dquot *dquot)
+{
+	dquot->dq_sb->dq_op->destroy_dquot(dquot);
+}
+
 /* Invalidate all dquots on the list. Note that this function is called after
  * quota is disabled and pointers from inodes removed so there cannot be new
  * quota users. There can still be some users of quotas due to inodes being
@@ -463,7 +473,7 @@ restart:
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
-		kmem_cache_free(dquot_cachep, dquot);
+		do_destroy_dquot(dquot);
 	}
 	spin_unlock(&dq_list_lock);
 }
@@ -527,7 +537,7 @@ static void prune_dqcache(int count)
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
-		kmem_cache_free(dquot_cachep, dquot);
+		do_destroy_dquot(dquot);
 		count--;
 		head = free_dquots.prev;
 	}
@@ -625,11 +635,16 @@ we_slept:
 	spin_unlock(&dq_list_lock);
 }
 
+static struct dquot *dquot_alloc(struct super_block *sb, int type)
+{
+	return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+}
+
 static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 
-	dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+	dquot = sb->dq_op->alloc_dquot(sb, type);
 	if(!dquot)
 		return NODQUOT;
 
@@ -682,7 +697,7 @@ we_slept:
 		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
 		if (empty)
-			kmem_cache_free(dquot_cachep, empty);
+			do_destroy_dquot(empty);
 	}
 	/* Wait for dq_lock - after this we know that either dquot_release() is already
 	 * finished or it will be canceled due to dq_count > 1 test */
@@ -1533,7 +1548,9 @@ struct dquot_operations dquot_operations = {
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
 	.mark_dirty	= dquot_mark_dquot_dirty,
-	.write_info	= dquot_commit_info
+	.write_info	= dquot_commit_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
 };
 
 static inline void set_enable_flags(struct quota_info *dqopt, int type)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 40401b5..3ce708c 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -292,6 +292,8 @@ struct dquot_operations {
 	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
+	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
+	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
 	int (*acquire_dquot) (struct dquot *);		/* Quota is going to be created on disk */
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
-- 
cgit v0.10.2


From 12095460f7f315f8ef67a55b2194195d325d48d7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 14:45:12 +0200
Subject: quota: Increase size of variables for limits and inode usage

So far quota was fine with quota block limits and inode limits/numbers in
a 32-bit type. Now with rapid increase in storage sizes there are coming
requests to be able to handle quota limits above 4TB / more that 2^32 inodes.
So bump up sizes of types in mem_dqblk structure to 64-bits to be able to
handle this. Also update inode allocation / checking functions to use qsize_t
and make global structure keep quota limits in bytes so that things are
consistent.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 1b5fc4b..c02223b 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -835,7 +835,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 	}
 }
 
-static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
 {
 	dquot->dq_dqb.dqb_curinodes += number;
 }
@@ -845,7 +845,7 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
 	dquot->dq_dqb.dqb_curspace += number;
 }
 
-static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
 	if (dquot->dq_dqb.dqb_curinodes > number)
 		dquot->dq_dqb.dqb_curinodes -= number;
@@ -862,7 +862,7 @@ static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
 		dquot->dq_dqb.dqb_curspace -= number;
 	else
 		dquot->dq_dqb.dqb_curspace = 0;
-	if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+	if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 		dquot->dq_dqb.dqb_btime = (time_t) 0;
 	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 }
@@ -1038,7 +1038,7 @@ static inline char ignore_hardlimit(struct dquot *dquot)
 }
 
 /* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
+static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
 	if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
@@ -1077,7 +1077,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
@@ -1085,7 +1085,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
@@ -1094,7 +1094,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime == 0) {
 		if (!prealloc) {
 			*warntype = QUOTA_NL_BSOFTWARN;
@@ -1111,7 +1111,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	return QUOTA_OK;
 }
 
-static int info_idq_free(struct dquot *dquot, ulong inodes)
+static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
 	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1128,15 +1128,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes)
 static int info_bdq_free(struct dquot *dquot, qsize_t space)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
-	    toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+	    dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_NOWARN;
 
-	if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
-	    dquot->dq_dqb.dqb_bsoftlimit)
+	if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_BSOFTBELOW;
-	if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
-	    toqb(dquot->dq_dqb.dqb_curspace - space) <
-						dquot->dq_dqb.dqb_bhardlimit)
+	if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
+	    dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
 		return QUOTA_NL_BHARDBELOW;
 	return QUOTA_NL_NOWARN;
 }
@@ -1279,7 +1277,7 @@ warn_put_all:
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode, unsigned long number)
+int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 {
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
@@ -1364,7 +1362,7 @@ out_sub:
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_inode(const struct inode *inode, unsigned long number)
+int dquot_free_inode(const struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
@@ -1883,14 +1881,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
 	return ret;
 }
 
+static inline qsize_t qbtos(qsize_t blocks)
+{
+	return blocks << QIF_DQBLKSIZE_BITS;
+}
+
+static inline qsize_t stoqb(qsize_t space)
+{
+	return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
+}
+
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 
 	spin_lock(&dq_data_lock);
-	di->dqb_bhardlimit = dm->dqb_bhardlimit;
-	di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
+	di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
+	di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
 	di->dqb_curspace = dm->dqb_curspace;
 	di->dqb_ihardlimit = dm->dqb_ihardlimit;
 	di->dqb_isoftlimit = dm->dqb_isoftlimit;
@@ -1937,8 +1945,8 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		check_blim = 1;
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
-		dm->dqb_bsoftlimit = di->dqb_bsoftlimit;
-		dm->dqb_bhardlimit = di->dqb_bhardlimit;
+		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
+		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
 	}
 	if (di->dqb_valid & QIF_INODES) {
@@ -1956,7 +1964,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		dm->dqb_itime = di->dqb_itime;
 
 	if (check_blim) {
-		if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
+		if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
 			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 		}
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 5ae15b1..3e078ee 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -14,14 +14,27 @@ MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Old quota format support");
 MODULE_LICENSE("GPL");
 
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v1_stoqb(qsize_t space)
+{
+	return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v1_qbtos(qsize_t blocks)
+{
+	return blocks << QUOTABLOCK_BITS;
+}
+
 static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d)
 {
 	m->dqb_ihardlimit = d->dqb_ihardlimit;
 	m->dqb_isoftlimit = d->dqb_isoftlimit;
 	m->dqb_curinodes = d->dqb_curinodes;
-	m->dqb_bhardlimit = d->dqb_bhardlimit;
-	m->dqb_bsoftlimit = d->dqb_bsoftlimit;
-	m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS;
+	m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit);
+	m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit);
+	m->dqb_curspace = v1_qbtos(d->dqb_curblocks);
 	m->dqb_itime = d->dqb_itime;
 	m->dqb_btime = d->dqb_btime;
 }
@@ -31,9 +44,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
 	d->dqb_ihardlimit = m->dqb_ihardlimit;
 	d->dqb_isoftlimit = m->dqb_isoftlimit;
 	d->dqb_curinodes = m->dqb_curinodes;
-	d->dqb_bhardlimit = m->dqb_bhardlimit;
-	d->dqb_bsoftlimit = m->dqb_bsoftlimit;
-	d->dqb_curblocks = toqb(m->dqb_curspace);
+	d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit);
+	d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit);
+	d->dqb_curblocks = v1_stoqb(m->dqb_curspace);
 	d->dqb_itime = m->dqb_itime;
 	d->dqb_btime = m->dqb_btime;
 }
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b53827d..51c4717 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -26,6 +26,19 @@ typedef char *dqbuf_t;
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
 #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
 
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v2_stoqb(qsize_t space)
+{
+	return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v2_qbtos(qsize_t blocks)
+{
+	return blocks << QUOTABLOCK_BITS;
+}
+
 /* Check whether given file is really vfsv0 quotafile */
 static int v2_check_quota_file(struct super_block *sb, int type)
 {
@@ -104,8 +117,8 @@ static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
 	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
 	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
 	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+	m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit));
+	m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
 	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
 }
@@ -116,8 +129,8 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
 	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+	d->dqb_bhardlimit = cpu_to_le32(v2_qbtos(m->dqb_bhardlimit));
+	d->dqb_bsoftlimit = cpu_to_le32(v2_qbtos(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
 	d->dqb_id = cpu_to_le32(id);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 3ce708c..9ea4683 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -39,15 +39,6 @@
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
 
-/* Size of blocks in which are counted size limits */
-#define QUOTABLOCK_BITS 10
-#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-
-/* Conversion routines from and to quota blocks */
-#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10))
-#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
-#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
-
 #define MAXQUOTAS 2
 #define USRQUOTA  0		/* element used for user quotas */
 #define GRPQUOTA  1		/* element used for group quotas */
@@ -80,6 +71,11 @@
 #define Q_GETQUOTA 0x800007	/* get user quota structure */
 #define Q_SETQUOTA 0x800008	/* set user quota structure */
 
+/* Size of block in which space limits are passed through the quota
+ * interface */
+#define QIF_DQBLKSIZE_BITS 10
+#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS)
+
 /*
  * Quota structure used for communication with userspace via quotactl
  * Following flags are used to specify which fields are valid
@@ -187,12 +183,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
@@ -287,9 +283,9 @@ struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
 	int (*alloc_space) (struct inode *, qsize_t, int);
-	int (*alloc_inode) (const struct inode *, unsigned long);
+	int (*alloc_inode) (const struct inode *, qsize_t);
 	int (*free_space) (struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, unsigned long);
+	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a558a4c..adcc7ba 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -26,10 +26,10 @@ int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 
 int dquot_free_space(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, unsigned long number);
+int dquot_free_inode(const struct inode *inode, qsize_t number);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int dquot_commit(struct dquot *dquot);
-- 
cgit v0.10.2


From 1497d3ad487b64eeea83ac203263802755438949 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 15:49:59 +0200
Subject: quota: Remove bogus 'optimization' in check_idq() and check_bdq()

Checks like <= 0 for an unsigned type do not make much sence. The value
could be only 0 and that does not happen often enough for the check
to be worth it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index c02223b..c883306 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1041,7 +1041,7 @@ static inline char ignore_hardlimit(struct dquot *dquot)
 static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -1073,7 +1073,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
-- 
cgit v0.10.2


From e4bc7b4b7ff783779b6928d55a9308910bf180a3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 16:21:01 +0200
Subject: quota: Make _SUSPENDED just a flag

Upto now, DQUOT_USR_SUSPENDED behaved like a state - i.e., either quota
was enabled or suspended or none. Now allowed states are 0, ENABLED,
ENABLED | SUSPENDED. This will be useful later when we implement separate
enabling of quota usage tracking and limits enforcement because we need to
keep track of a state which has been suspended.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index c883306..22340c6 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1570,18 +1570,20 @@ static inline void reset_enable_flags(struct quota_info *dqopt, int type,
 {
 	switch (type) {
 		case USRQUOTA:
-			dqopt->flags &= ~DQUOT_USR_ENABLED;
 			if (remount)
 				dqopt->flags |= DQUOT_USR_SUSPENDED;
-			else
+			else {
+				dqopt->flags &= ~DQUOT_USR_ENABLED;
 				dqopt->flags &= ~DQUOT_USR_SUSPENDED;
+			}
 			break;
 		case GRPQUOTA:
-			dqopt->flags &= ~DQUOT_GRP_ENABLED;
 			if (remount)
 				dqopt->flags |= DQUOT_GRP_SUSPENDED;
-			else
+			else {
+				dqopt->flags &= ~DQUOT_GRP_ENABLED;
 				dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
+			}
 			break;
 	}
 }
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index adcc7ba..ffd9707 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -67,8 +67,10 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
 static inline int sb_has_quota_enabled(struct super_block *sb, int type)
 {
 	if (type == USRQUOTA)
-		return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
-	return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+		return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED)
+			&& !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED);
+	return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)
+		&& !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED);
 }
 
 static inline int sb_any_quota_enabled(struct super_block *sb)
-- 
cgit v0.10.2


From f55abc0fb9c3189de3da829adf3220322c0da43e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 17:50:32 +0200
Subject: quota: Allow to separately enable quota accounting and enforcing
 limits

Split DQUOT_USR_ENABLED (and DQUOT_GRP_ENABLED) into DQUOT_USR_USAGE_ENABLED
and DQUOT_USR_LIMITS_ENABLED. This way we are able to separately enable /
disable whether we should:
1) ignore quotas completely
2) just keep uptodate information about usage
3) actually enforce quota limits

This is going to be useful when quota is treated as filesystem metadata - we
then want to keep quota information uptodate all the time and just enable /
disable limits enforcement.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 22340c6..7569633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -489,7 +489,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_enabled(sb, cnt))
+		if (!sb_has_quota_active(sb, cnt))
 			continue;
 		spin_lock(&dq_list_lock);
 		dirty = &dqopt->info[cnt].dqi_dirty_list;
@@ -514,8 +514,8 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
-			&& info_dirty(&dqopt->info[cnt]))
+		if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
+		    && info_dirty(&dqopt->info[cnt]))
 			sb->dq_op->write_info(sb, cnt);
 	spin_lock(&dq_list_lock);
 	dqstats.syncs++;
@@ -594,7 +594,7 @@ we_slept:
 		/* We have more than one user... nothing to do */
 		atomic_dec(&dquot->dq_count);
 		/* Releasing dquot during quotaoff phase? */
-		if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+		if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
 		    atomic_read(&dquot->dq_count) == 1)
 			wake_up(&dquot->dq_wait_unused);
 		spin_unlock(&dq_list_lock);
@@ -670,7 +670,7 @@ static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot, *empty = NODQUOT;
 
-        if (!sb_has_quota_enabled(sb, type))
+        if (!sb_has_quota_active(sb, type))
 		return NODQUOT;
 we_slept:
 	spin_lock(&dq_list_lock);
@@ -1041,7 +1041,8 @@ static inline char ignore_hardlimit(struct dquot *dquot)
 static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -1073,7 +1074,8 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
@@ -1114,7 +1116,8 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
-	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
+	    !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
 		return QUOTA_NL_NOWARN;
 
 	if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1508,7 +1511,7 @@ warn_put_all:
 /* Wrapper for transferring ownership of an inode */
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
 			return 1;
@@ -1551,53 +1554,22 @@ struct dquot_operations dquot_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static inline void set_enable_flags(struct quota_info *dqopt, int type)
-{
-	switch (type) {
-		case USRQUOTA:
-			dqopt->flags |= DQUOT_USR_ENABLED;
-			dqopt->flags &= ~DQUOT_USR_SUSPENDED;
-			break;
-		case GRPQUOTA:
-			dqopt->flags |= DQUOT_GRP_ENABLED;
-			dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
-			break;
-	}
-}
-
-static inline void reset_enable_flags(struct quota_info *dqopt, int type,
-				      int remount)
-{
-	switch (type) {
-		case USRQUOTA:
-			if (remount)
-				dqopt->flags |= DQUOT_USR_SUSPENDED;
-			else {
-				dqopt->flags &= ~DQUOT_USR_ENABLED;
-				dqopt->flags &= ~DQUOT_USR_SUSPENDED;
-			}
-			break;
-		case GRPQUOTA:
-			if (remount)
-				dqopt->flags |= DQUOT_GRP_SUSPENDED;
-			else {
-				dqopt->flags &= ~DQUOT_GRP_ENABLED;
-				dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
-			}
-			break;
-	}
-}
-
-
 /*
  * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
  */
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 {
 	int cnt, ret = 0;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct inode *toputinode[MAXQUOTAS];
 
+	/* Cannot turn off usage accounting without turning off limits, or
+	 * suspend quotas and simultaneously turn quotas off. */
+	if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
+	    || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
+	    DQUOT_USAGE_ENABLED)))
+		return -EINVAL;
+
 	/* We need to serialize quota_off() for device */
 	mutex_lock(&dqopt->dqonoff_mutex);
 
@@ -1606,7 +1578,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 	 * sometimes we are called when fill_super() failed and calling
 	 * sync_fs() in such cases does no good.
 	 */
-	if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
+	if (!sb_any_quota_loaded(sb)) {
 		mutex_unlock(&dqopt->dqonoff_mutex);
 		return 0;
 	}
@@ -1614,17 +1586,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
-		/* If we keep inodes of quota files after remount and quotaoff
-		 * is called, drop kept inodes. */
-		if (!remount && sb_has_quota_suspended(sb, cnt)) {
-			iput(dqopt->files[cnt]);
-			dqopt->files[cnt] = NULL;
-			reset_enable_flags(dqopt, cnt, 0);
+		if (!sb_has_quota_loaded(sb, cnt))
 			continue;
+
+		if (flags & DQUOT_SUSPENDED) {
+			dqopt->flags |=
+				dquot_state_flag(DQUOT_SUSPENDED, cnt);
+		} else {
+			dqopt->flags &= ~dquot_state_flag(flags, cnt);
+			/* Turning off suspended quotas? */
+			if (!sb_has_quota_loaded(sb, cnt) &&
+			    sb_has_quota_suspended(sb, cnt)) {
+				dqopt->flags &=	~dquot_state_flag(
+							DQUOT_SUSPENDED, cnt);
+				iput(dqopt->files[cnt]);
+				dqopt->files[cnt] = NULL;
+				continue;
+			}
 		}
-		if (!sb_has_quota_enabled(sb, cnt))
+
+		/* We still have to keep quota loaded? */
+		if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
 			continue;
-		reset_enable_flags(dqopt, cnt, remount);
 
 		/* Note: these are blocking operations */
 		drop_dquot_ref(sb, cnt);
@@ -1640,7 +1623,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 		put_quota_format(dqopt->info[cnt].dqi_format);
 
 		toputinode[cnt] = dqopt->files[cnt];
-		if (!remount)
+		if (!sb_has_quota_loaded(sb, cnt))
 			dqopt->files[cnt] = NULL;
 		dqopt->info[cnt].dqi_flags = 0;
 		dqopt->info[cnt].dqi_igrace = 0;
@@ -1663,7 +1646,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 			mutex_lock(&dqopt->dqonoff_mutex);
 			/* If quota was reenabled in the meantime, we have
 			 * nothing to do */
-			if (!sb_has_quota_enabled(sb, cnt)) {
+			if (!sb_has_quota_loaded(sb, cnt)) {
 				mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
 				toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
 				  S_NOATIME | S_NOQUOTA);
@@ -1673,10 +1656,13 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 			}
 			mutex_unlock(&dqopt->dqonoff_mutex);
 			/* On remount RO, we keep the inode pointer so that we
-			 * can reenable quota on the subsequent remount RW.
-			 * But we have better not keep inode pointer when there
-			 * is pending delete on the quota file... */
-			if (!remount)
+			 * can reenable quota on the subsequent remount RW. We
+			 * have to check 'flags' variable and not use sb_has_
+			 * function because another quotaon / quotaoff could
+			 * change global state before we got here. We refuse
+			 * to suspend quotas when there is pending delete on
+			 * the quota file... */
+			if (!(flags & DQUOT_SUSPENDED))
 				iput(toputinode[cnt]);
 			else if (!toputinode[cnt]->i_nlink)
 				ret = -EBUSY;
@@ -1686,12 +1672,22 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 	return ret;
 }
 
+int vfs_quota_off(struct super_block *sb, int type, int remount)
+{
+	return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
+				 (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+}
+
 /*
  *	Turn quotas on on a device
  */
 
-/* Helper function when we already have the inode */
-static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
+/*
+ * Helper function to turn quotas on when we already have the inode of
+ * quota file and no quota information is loaded.
+ */
+static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+	unsigned int flags)
 {
 	struct quota_format_type *fmt = find_quota_format(format_id);
 	struct super_block *sb = inode->i_sb;
@@ -1713,6 +1709,11 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 		error = -EINVAL;
 		goto out_fmt;
 	}
+	/* Usage always has to be set... */
+	if (!(flags & DQUOT_USAGE_ENABLED)) {
+		error = -EINVAL;
+		goto out_fmt;
+	}
 
 	/* As we bypass the pagecache we must now flush the inode so that
 	 * we see all the changes from userspace... */
@@ -1721,8 +1722,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	invalidate_bdev(sb->s_bdev);
 	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
-	if (sb_has_quota_enabled(sb, type) ||
-			sb_has_quota_suspended(sb, type)) {
+	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
 	}
@@ -1754,7 +1754,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	}
 	mutex_unlock(&dqopt->dqio_mutex);
 	mutex_unlock(&inode->i_mutex);
-	set_enable_flags(dqopt, type);
+	dqopt->flags |= dquot_state_flag(flags, type);
 
 	add_dquot_ref(sb, type);
 	mutex_unlock(&dqopt->dqonoff_mutex);
@@ -1787,20 +1787,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct inode *inode;
 	int ret;
+	unsigned int flags;
 
 	mutex_lock(&dqopt->dqonoff_mutex);
 	if (!sb_has_quota_suspended(sb, type)) {
 		mutex_unlock(&dqopt->dqonoff_mutex);
 		return 0;
 	}
-	BUG_ON(sb_has_quota_enabled(sb, type));
-
 	inode = dqopt->files[type];
 	dqopt->files[type] = NULL;
-	reset_enable_flags(dqopt, type, 0);
+	flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+						DQUOT_LIMITS_ENABLED, type);
+	dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
-	ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
+	flags = dquot_generic_flag(flags, type);
+	ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
+				   flags);
 	iput(inode);
 
 	return ret;
@@ -1816,12 +1819,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
 	if (path->mnt->mnt_sb != sb)
 		error = -EXDEV;
 	else
-		error = vfs_quota_on_inode(path->dentry->d_inode, type,
-					   format_id);
+		error = vfs_load_quota_inode(path->dentry->d_inode, type,
+					     format_id, DQUOT_USAGE_ENABLED |
+					     DQUOT_LIMITS_ENABLED);
 	return error;
 }
 
-/* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
 		 int remount)
 {
@@ -1840,6 +1843,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
 }
 
 /*
+ * More powerful function for turning on quotas allowing setting
+ * of individual quota flags
+ */
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+		unsigned int flags)
+{
+	int ret = 0;
+	struct super_block *sb = inode->i_sb;
+	struct quota_info *dqopt = sb_dqopt(sb);
+
+	/* Just unsuspend quotas? */
+	if (flags & DQUOT_SUSPENDED)
+		return vfs_quota_on_remount(sb, type);
+	if (!flags)
+		return 0;
+	/* Just updating flags needed? */
+	if (sb_has_quota_loaded(sb, type)) {
+		mutex_lock(&dqopt->dqonoff_mutex);
+		/* Now do a reliable test... */
+		if (!sb_has_quota_loaded(sb, type)) {
+			mutex_unlock(&dqopt->dqonoff_mutex);
+			goto load_quota;
+		}
+		if (flags & DQUOT_USAGE_ENABLED &&
+		    sb_has_quota_usage_enabled(sb, type)) {
+			ret = -EBUSY;
+			goto out_lock;
+		}
+		if (flags & DQUOT_LIMITS_ENABLED &&
+		    sb_has_quota_limits_enabled(sb, type)) {
+			ret = -EBUSY;
+			goto out_lock;
+		}
+		sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
+out_lock:
+		mutex_unlock(&dqopt->dqonoff_mutex);
+		return ret;
+	}
+
+load_quota:
+	return vfs_load_quota_inode(inode, type, format_id, flags);
+}
+
+/*
  * This function is used when filesystem needs to initialize quotas
  * during mount time.
  */
@@ -1860,7 +1907,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 
 	error = security_quota_on(dentry);
 	if (!error)
-		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+		error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+				DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 
 out:
 	dput(dentry);
@@ -1997,12 +2045,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 	int rc;
 
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!(dquot = dqget(sb, id, type))) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-		return -ESRCH;
+	dquot = dqget(sb, id, type);
+	if (!dquot) {
+		rc = -ESRCH;
+		goto out;
 	}
 	rc = do_set_dqblk(dquot, di);
 	dqput(dquot);
+out:
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return rc;
 }
@@ -2013,7 +2063,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	struct mem_dqinfo *mi;
   
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_enabled(sb, type)) {
+	if (!sb_has_quota_active(sb, type)) {
 		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
@@ -2032,11 +2082,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
+	int err = 0;
 
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_enabled(sb, type)) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-		return -ESRCH;
+	if (!sb_has_quota_active(sb, type)) {
+		err = -ESRCH;
+		goto out;
 	}
 	mi = sb_dqopt(sb)->info + type;
 	spin_lock(&dq_data_lock);
@@ -2050,8 +2101,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
 	sb->dq_op->write_info(sb, type);
+out:
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-	return 0;
+	return err;
 }
 
 struct quotactl_ops vfs_quotactl_ops = {
@@ -2213,9 +2265,11 @@ EXPORT_SYMBOL(register_quota_format);
 EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
+EXPORT_SYMBOL(vfs_quota_enable);
 EXPORT_SYMBOL(vfs_quota_on);
 EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
+EXPORT_SYMBOL(vfs_quota_disable);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
 EXPORT_SYMBOL(vfs_get_dqinfo);
diff --git a/fs/quota.c b/fs/quota.c
index b7fe44e..8678d9f 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -73,7 +73,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
 		case Q_SETQUOTA:
 		case Q_GETQUOTA:
 			/* This is just informative test so we are satisfied without a lock */
-			if (!sb_has_quota_enabled(sb, type))
+			if (!sb_has_quota_active(sb, type))
 				return -ESRCH;
 	}
 
@@ -175,7 +175,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_enabled(sb, cnt))
+		if (!sb_has_quota_active(sb, cnt))
 			continue;
 		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
 		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
@@ -201,7 +201,7 @@ restart:
 		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 			if (type != -1 && type != cnt)
 				continue;
-			if (!sb_has_quota_enabled(sb, cnt))
+			if (!sb_has_quota_active(sb, cnt))
 				continue;
 			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
 			    list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
@@ -245,7 +245,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
 			__u32 fmt;
 
 			down_read(&sb_dqopt(sb)->dqptr_sem);
-			if (!sb_has_quota_enabled(sb, type)) {
+			if (!sb_has_quota_active(sb, type)) {
 				up_read(&sb_dqopt(sb)->dqptr_sem);
 				return -ESRCH;
 			}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9ea4683..93717ab 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -318,12 +318,34 @@ struct quota_format_type {
 	struct quota_format_type *qf_next;
 };
 
-#define DQUOT_USR_ENABLED	0x01		/* User diskquotas enabled */
-#define DQUOT_GRP_ENABLED	0x02		/* Group diskquotas enabled */
-#define DQUOT_USR_SUSPENDED	0x04		/* User diskquotas are off, but
+/* Quota state flags - they actually come in two flavors - for users and groups */
+enum {
+	_DQUOT_USAGE_ENABLED = 0,		/* Track disk usage for users */
+	_DQUOT_LIMITS_ENABLED,			/* Enforce quota limits for users */
+	_DQUOT_SUSPENDED,			/* User diskquotas are off, but
 						 * we have necessary info in
 						 * memory to turn them on */
-#define DQUOT_GRP_SUSPENDED	0x08		/* The same for group quotas */
+	_DQUOT_STATE_FLAGS
+};
+#define DQUOT_USAGE_ENABLED	(1 << _DQUOT_USAGE_ENABLED)
+#define DQUOT_LIMITS_ENABLED	(1 << _DQUOT_LIMITS_ENABLED)
+#define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED)
+#define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
+				 DQUOT_SUSPENDED)
+
+static inline unsigned int dquot_state_flag(unsigned int flags, int type)
+{
+	if (type == USRQUOTA)
+		return flags;
+	return flags << _DQUOT_STATE_FLAGS;
+}
+
+static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
+{
+	if (type == USRQUOTA)
+		return flags;
+	return flags >> _DQUOT_STATE_FLAGS;
+}
 
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index ffd9707..3b3346f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -40,11 +40,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+	unsigned int flags);
 int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
  	struct path *path);
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
 int vfs_quota_sync(struct super_block *sb, int type);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
@@ -64,26 +67,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
  * Functions for checking status of quota
  */
 
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
-	if (type == USRQUOTA)
-		return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED)
-			&& !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED);
-	return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)
-		&& !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED);
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_USAGE_ENABLED, type);
 }
 
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
-	return sb_has_quota_enabled(sb, USRQUOTA) ||
-		sb_has_quota_enabled(sb, GRPQUOTA);
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
 }
 
 static inline int sb_has_quota_suspended(struct super_block *sb, int type)
 {
-	if (type == USRQUOTA)
-		return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
-	return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_SUSPENDED, type);
 }
 
 static inline int sb_any_quota_suspended(struct super_block *sb)
@@ -92,6 +91,34 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
 		sb_has_quota_suspended(sb, GRPQUOTA);
 }
 
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+	/* Currently if anything is on, then quota usage is on as well */
+	return sb_has_quota_usage_enabled(sb, type);
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+	return sb_has_quota_loaded(sb, USRQUOTA) ||
+		sb_has_quota_loaded(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+	return sb_has_quota_loaded(sb, type) &&
+	       !sb_has_quota_suspended(sb, type);
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+	return sb_has_quota_active(sb, USRQUOTA) ||
+	       sb_has_quota_active(sb, GRPQUOTA);
+}
+
+/* For backward compatibility until we remove all users */
+#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
+
 /*
  * Operations supported for diskquotas.
  */
@@ -106,7 +133,7 @@ extern struct quotactl_ops vfs_quotactl_ops;
 static inline void vfs_dq_init(struct inode *inode)
 {
 	BUG_ON(!inode->i_sb);
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
+	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
@@ -114,7 +141,7 @@ static inline void vfs_dq_init(struct inode *inode)
  * a transaction (deadlocks possible otherwise) */
 static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
 		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
 			return 1;
@@ -134,7 +161,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 
 static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
 		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
 			return 1;
@@ -154,7 +181,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 
 static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
@@ -164,7 +191,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
 
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb))
+	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
 	else
 		inode_sub_bytes(inode, nr);
@@ -178,7 +205,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 
 static inline void vfs_dq_free_inode(struct inode *inode)
 {
-	if (sb_any_quota_enabled(inode->i_sb))
+	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
@@ -199,12 +226,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
 	return 0;
 }
 
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
 	return 0;
 }
@@ -219,6 +246,30 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
 	return 0;
 }
 
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+	return 0;
+}
+
+/* For backward compatibility until we remove all users */
+#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
+
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v0.10.2


From ee0d5ffe0da2aa992004447113e28622621a983f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 18:11:50 +0200
Subject: ext3: Use sb_any_quota_loaded() instead of sb_any_quota_enabled()

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f6c94f2..250ec53 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1035,8 +1035,7 @@ static int parse_options (char *options, struct super_block *sb,
 		case Opt_grpjquota:
 			qtype = GRPQUOTA;
 set_qf_name:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR
 					"EXT3-fs: Cannot change journaled "
@@ -1075,8 +1074,7 @@ set_qf_name:
 		case Opt_offgrpjquota:
 			qtype = GRPQUOTA;
 clear_qf_name:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR "EXT3-fs: Cannot change "
 					"journaled quota options when "
@@ -1095,8 +1093,7 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			qfmt = QFMT_VFS_V0;
 set_qf_format:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
 				printk(KERN_ERR "EXT3-fs: Cannot change "
 					"journaled quota options when "
@@ -1115,8 +1112,7 @@ set_qf_format:
 			set_opt(sbi->s_mount_opt, GRPQUOTA);
 			break;
 		case Opt_noquota:
-			if (sb_any_quota_enabled(sb) ||
-			    sb_any_quota_suspended(sb)) {
+			if (sb_any_quota_loaded(sb)) {
 				printk(KERN_ERR "EXT3-fs: Cannot change quota "
 					"options when quota turned on.\n");
 				return 0;
-- 
cgit v0.10.2


From 17bd13b31ce4fe7f789d8848e8cbc8cb42b10544 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 18:14:35 +0200
Subject: ext4: Use sb_any_quota_loaded() instead of sb_any_quota_enabled()

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04158ad..49fcf88 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1142,8 +1142,7 @@ static int parse_options(char *options, struct super_block *sb,
 		case Opt_grpjquota:
 			qtype = GRPQUOTA;
 set_qf_name:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR
 				       "EXT4-fs: Cannot change journaled "
@@ -1182,8 +1181,7 @@ set_qf_name:
 		case Opt_offgrpjquota:
 			qtype = GRPQUOTA;
 clear_qf_name:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR "EXT4-fs: Cannot change "
 					"journaled quota options when "
@@ -1202,8 +1200,7 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			qfmt = QFMT_VFS_V0;
 set_qf_format:
-			if ((sb_any_quota_enabled(sb) ||
-			     sb_any_quota_suspended(sb)) &&
+			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
 				printk(KERN_ERR "EXT4-fs: Cannot change "
 					"journaled quota options when "
@@ -1222,7 +1219,7 @@ set_qf_format:
 			set_opt(sbi->s_mount_opt, GRPQUOTA);
 			break;
 		case Opt_noquota:
-			if (sb_any_quota_enabled(sb)) {
+			if (sb_any_quota_loaded(sb)) {
 				printk(KERN_ERR "EXT4-fs: Cannot change quota "
 					"options when quota turned on.\n");
 				return 0;
-- 
cgit v0.10.2


From 6929f891241d3fe3af01d28503b645e63241e49a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 18:16:36 +0200
Subject: reiserfs: Use sb_any_quota_loaded() instead of
 sb_any_quota_enabled().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 663a91f..a9b393a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -994,8 +994,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		if (c == 'u' || c == 'g') {
 			int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
 
-			if ((sb_any_quota_enabled(s) ||
-			     sb_any_quota_suspended(s)) &&
+			if (sb_any_quota_loaded(s) &&
 			    (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
 				reiserfs_warning(s,
 						 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1041,8 +1040,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 						 "reiserfs_parse_options: unknown quota format specified.");
 				return 0;
 			}
-			if ((sb_any_quota_enabled(s) ||
-			     sb_any_quota_suspended(s)) &&
+			if (sb_any_quota_loaded(s) &&
 			    *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
 				reiserfs_warning(s,
 						 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1067,7 +1065,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 	}
 	/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
 	if (!(*mount_options & (1 << REISERFS_QUOTA))
-	    && sb_any_quota_enabled(s)) {
+	    && sb_any_quota_loaded(s)) {
 		reiserfs_warning(s,
 				 "reiserfs_parse_options: quota options must be present when quota is turned on.");
 		return 0;
-- 
cgit v0.10.2


From dcb30695f2cac86b71417629a6fe8042b4fe2ab2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 18:30:40 +0200
Subject: quota: Remove compatibility function sb_any_quota_enabled()

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3b3346f..e840ca5 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -116,9 +116,6 @@ static inline int sb_any_quota_active(struct super_block *sb)
 	       sb_has_quota_active(sb, GRPQUOTA);
 }
 
-/* For backward compatibility until we remove all users */
-#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
-
 /*
  * Operations supported for diskquotas.
  */
@@ -267,9 +264,6 @@ static inline int sb_any_quota_active(struct super_block *sb)
 	return 0;
 }
 
-/* For backward compatibility until we remove all users */
-#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
-
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v0.10.2


From ca785ec66b991e9ca74dd9840fc014487ad095e1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 30 Sep 2008 17:53:37 +0200
Subject: quota: Introduce DQUOT_QUOTA_SYS_FILE flag

If filesystem can handle quota files as system files hidden from users, we can
skip a lot of cache invalidation, syncing, inode flags setting etc. when
turning quotas on, off and quota_sync. Allow filesystem to indicate that it is
hiding quota files from users by DQUOT_QUOTA_SYS_FILE flag.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 7569633..74185c3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1631,6 +1631,11 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 		dqopt->ops[cnt] = NULL;
 	}
 	mutex_unlock(&dqopt->dqonoff_mutex);
+
+	/* Skip syncing and setting flags if quota files are hidden */
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+		goto put_inodes;
+
 	/* Sync the superblock so that buffers with quota data are written to
 	 * disk (and so userspace sees correct data afterwards). */
 	if (sb->s_op->sync_fs)
@@ -1655,6 +1660,12 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 				mark_inode_dirty(toputinode[cnt]);
 			}
 			mutex_unlock(&dqopt->dqonoff_mutex);
+		}
+	if (sb->s_bdev)
+		invalidate_bdev(sb->s_bdev);
+put_inodes:
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (toputinode[cnt]) {
 			/* On remount RO, we keep the inode pointer so that we
 			 * can reenable quota on the subsequent remount RW. We
 			 * have to check 'flags' variable and not use sb_has_
@@ -1667,8 +1678,6 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 			else if (!toputinode[cnt]->i_nlink)
 				ret = -EBUSY;
 		}
-	if (sb->s_bdev)
-		invalidate_bdev(sb->s_bdev);
 	return ret;
 }
 
@@ -1715,25 +1724,31 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		goto out_fmt;
 	}
 
-	/* As we bypass the pagecache we must now flush the inode so that
-	 * we see all the changes from userspace... */
-	write_inode_now(inode, 1);
-	/* And now flush the block cache so that kernel sees the changes */
-	invalidate_bdev(sb->s_bdev);
+	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+		/* As we bypass the pagecache we must now flush the inode so
+		 * that we see all the changes from userspace... */
+		write_inode_now(inode, 1);
+		/* And now flush the block cache so that kernel sees the
+		 * changes */
+		invalidate_bdev(sb->s_bdev);
+	}
 	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
 	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
 	}
-	/* We don't want quota and atime on quota files (deadlocks possible)
-	 * Also nobody should write to the file - we use special IO operations
-	 * which ignore the immutable bit. */
-	down_write(&dqopt->dqptr_sem);
-	oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
-	inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-	up_write(&dqopt->dqptr_sem);
-	sb->dq_op->drop(inode);
+
+	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+		/* We don't want quota and atime on quota files (deadlocks
+		 * possible) Also nobody should write to the file - we use
+		 * special IO operations which ignore the immutable bit. */
+		down_write(&dqopt->dqptr_sem);
+		oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
+		inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+		up_write(&dqopt->dqptr_sem);
+		sb->dq_op->drop(inode);
+	}
 
 	error = -EIO;
 	dqopt->files[type] = igrab(inode);
diff --git a/fs/quota.c b/fs/quota.c
index 8678d9f..4a8c94f 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	int cnt;
 
 	sb->s_qcop->quota_sync(sb, type);
+
+	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
+		return;
 	/* This is not very clever (and fast) but currently I don't know about
 	 * any other simple way of getting quota data to disk and we must get
 	 * them there for userspace to be visible... */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 93717ab..80b8807 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -332,6 +332,13 @@ enum {
 #define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED)
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
+/* Other quota flags */
+#define DQUOT_QUOTA_SYS_FILE	(1 << 6)	/* Quota file is a special
+						 * system file and user cannot
+						 * touch it. Filesystem is
+						 * responsible for setting
+						 * S_NOQUOTA, S_NOATIME flags
+						 */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-- 
cgit v0.10.2


From cf770c137122b78470a67ebd5498947869a09197 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 21 Sep 2008 23:17:53 +0200
Subject: quota: Move quotaio_v[12].h from include/linux/ to fs/

Since these include files are used only by implementation of quota formats,
there's no need to have them in include/linux/.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 3e078ee..b4af1c6 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -3,13 +3,14 @@
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
-#include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
 #include <asm/byteorder.h>
 
+#include "quotaio_v1.h"
+
 MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Old quota format support");
 MODULE_LICENSE("GPL");
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 51c4717..a21d1a7 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -6,7 +6,6 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/dqblk_v2.h>
-#include <linux/quotaio_v2.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -15,6 +14,8 @@
 
 #include <asm/byteorder.h>
 
+#include "quotaio_v2.h"
+
 MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Quota format v2 support");
 MODULE_LICENSE("GPL");
@@ -129,8 +130,8 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
 	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(v2_qbtos(m->dqb_bhardlimit));
-	d->dqb_bsoftlimit = cpu_to_le32(v2_qbtos(m->dqb_bsoftlimit));
+	d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit));
+	d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
 	d->dqb_id = cpu_to_le32(id);
diff --git a/fs/quotaio_v1.h b/fs/quotaio_v1.h
new file mode 100644
index 0000000..746654b
--- /dev/null
+++ b/fs/quotaio_v1.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_QUOTAIO_V1_H
+#define _LINUX_QUOTAIO_V1_H
+
+#include <linux/types.h>
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define MAX_IQ_TIME  604800	/* (7*24*60*60) 1 week */
+#define MAX_DQ_TIME  604800	/* (7*24*60*60) 1 week */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.
+ */
+struct v1_disk_dqblk {
+	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	__u32 dqb_curblocks;	/* current block count */
+	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__u32 dqb_isoftlimit;	/* preferred inode limit */
+	__u32 dqb_curinodes;	/* current # allocated inodes */
+	time_t dqb_btime;	/* time limit for excessive disk use */
+	time_t dqb_itime;	/* time limit for excessive inode use */
+};
+
+#define v1_dqoff(UID)      ((loff_t)((UID) * sizeof (struct v1_disk_dqblk)))
+
+#endif	/* _LINUX_QUOTAIO_V1_H */
diff --git a/fs/quotaio_v2.h b/fs/quotaio_v2.h
new file mode 100644
index 0000000..303d7cb
--- /dev/null
+++ b/fs/quotaio_v2.h
@@ -0,0 +1,79 @@
+/*
+ *	Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTAIO_V2_H
+#define _LINUX_QUOTAIO_V2_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Definitions of magics and versions of current quota files
+ */
+#define V2_INITQMAGICS {\
+	0xd9c01f11,	/* USRQUOTA */\
+	0xd9c01927	/* GRPQUOTA */\
+}
+
+#define V2_INITQVERSIONS {\
+	0,		/* USRQUOTA */\
+	0		/* GRPQUOTA */\
+}
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures.
+ */
+struct v2_disk_dqblk {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le32 dqb_isoftlimit;	/* preferred inode limit */
+	__le32 dqb_curinodes;	/* current # allocated inodes */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+/*
+ * Here are header structures as written on disk and their in-memory copies
+ */
+/* First generic header */
+struct v2_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* File version */
+};
+
+/* Header with type and version specific information */
+struct v2_disk_dqinfo {
+	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
+	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard limit */
+	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
+	__le32 dqi_blocks;	/* Number of blocks in file */
+	__le32 dqi_free_blk;	/* Number of first free block in the list */
+	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
+};
+
+/*
+ *  Structure of header of block with quota structures. It is padded to 16 bytes so
+ *  there will be space for exactly 21 quota-entries in a block
+ */
+struct v2_disk_dqdbheader {
+	__le32 dqdh_next_free;	/* Number of next block with free entry */
+	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
+	__le16 dqdh_entries;	/* Number of valid entries in block */
+	__le16 dqdh_pad1;
+	__le32 dqdh_pad2;
+};
+
+#define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
+#define V2_DQBLKSIZE_BITS	10
+#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
+#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
+#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
+#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
+
+#endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 95ac8234..900a787 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -134,8 +134,6 @@ header-y += posix_types.h
 header-y += ppdev.h
 header-y += prctl.h
 header-y += qnxtypes.h
-header-y += quotaio_v1.h
-header-y += quotaio_v2.h
 header-y += radeonfb.h
 header-y += raw.h
 header-y += resource.h
diff --git a/include/linux/quotaio_v1.h b/include/linux/quotaio_v1.h
deleted file mode 100644
index 746654b..0000000
--- a/include/linux/quotaio_v1.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _LINUX_QUOTAIO_V1_H
-#define _LINUX_QUOTAIO_V1_H
-
-#include <linux/types.h>
-
-/*
- * The following constants define the amount of time given a user
- * before the soft limits are treated as hard limits (usually resulting
- * in an allocation failure). The timer is started when the user crosses
- * their soft limit, it is reset when they go below their soft limit.
- */
-#define MAX_IQ_TIME  604800	/* (7*24*60*60) 1 week */
-#define MAX_DQ_TIME  604800	/* (7*24*60*60) 1 week */
-
-/*
- * The following structure defines the format of the disk quota file
- * (as it appears on disk) - the file is an array of these structures
- * indexed by user or group number.
- */
-struct v1_disk_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
-	__u32 dqb_curblocks;	/* current block count */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
-	time_t dqb_btime;	/* time limit for excessive disk use */
-	time_t dqb_itime;	/* time limit for excessive inode use */
-};
-
-#define v1_dqoff(UID)      ((loff_t)((UID) * sizeof (struct v1_disk_dqblk)))
-
-#endif	/* _LINUX_QUOTAIO_V1_H */
diff --git a/include/linux/quotaio_v2.h b/include/linux/quotaio_v2.h
deleted file mode 100644
index 303d7cb..0000000
--- a/include/linux/quotaio_v2.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *	Definitions of structures for vfsv0 quota format
- */
-
-#ifndef _LINUX_QUOTAIO_V2_H
-#define _LINUX_QUOTAIO_V2_H
-
-#include <linux/types.h>
-#include <linux/quota.h>
-
-/*
- * Definitions of magics and versions of current quota files
- */
-#define V2_INITQMAGICS {\
-	0xd9c01f11,	/* USRQUOTA */\
-	0xd9c01927	/* GRPQUOTA */\
-}
-
-#define V2_INITQVERSIONS {\
-	0,		/* USRQUOTA */\
-	0		/* GRPQUOTA */\
-}
-
-/*
- * The following structure defines the format of the disk quota file
- * (as it appears on disk) - the file is a radix tree whose leaves point
- * to blocks of these structures.
- */
-struct v2_disk_dqblk {
-	__le32 dqb_id;		/* id this quota applies to */
-	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__le32 dqb_isoftlimit;	/* preferred inode limit */
-	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
-	__le64 dqb_curspace;	/* current space occupied (in bytes) */
-	__le64 dqb_btime;	/* time limit for excessive disk use */
-	__le64 dqb_itime;	/* time limit for excessive inode use */
-};
-
-/*
- * Here are header structures as written on disk and their in-memory copies
- */
-/* First generic header */
-struct v2_disk_dqheader {
-	__le32 dqh_magic;	/* Magic number identifying file */
-	__le32 dqh_version;	/* File version */
-};
-
-/* Header with type and version specific information */
-struct v2_disk_dqinfo {
-	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
-	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard limit */
-	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
-	__le32 dqi_blocks;	/* Number of blocks in file */
-	__le32 dqi_free_blk;	/* Number of first free block in the list */
-	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
-};
-
-/*
- *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
- */
-struct v2_disk_dqdbheader {
-	__le32 dqdh_next_free;	/* Number of next block with free entry */
-	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
-	__le16 dqdh_entries;	/* Number of valid entries in block */
-	__le16 dqdh_pad1;
-	__le32 dqdh_pad2;
-};
-
-#define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
-#define V2_DQBLKSIZE_BITS	10
-#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
-#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
-#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
-
-#endif /* _LINUX_QUOTAIO_V2_H */
-- 
cgit v0.10.2


From 1ccd14b9c271c1ac6eec5c5ec5def433100e7248 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Sep 2008 05:54:49 +0200
Subject: quota: Split off quota tree handling into a separate file

There is going to be a new version of quota format having 64-bit
quota limits and a new quota format for OCFS2. They are both
going to use the same tree structure as VFSv0 quota format. So
split out tree handling into a separate file and make size of
leaf blocks, amount of space usable in each block (needed for
checksumming) and structures contained in them configurable
so that the code can be shared.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/Kconfig b/fs/Kconfig
index b93425a..c1ce3d8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -302,6 +302,10 @@ config PRINT_QUOTA_WARNING
 	  Note that this behavior is currently deprecated and may go away in
 	  future. Please use notification via netlink socket instead.
 
+# Generic support for tree structured quota files. Seleted when needed.
+config QUOTA_TREE
+	 tristate
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
@@ -313,6 +317,7 @@ config QFMT_V1
 config QFMT_V2
 	tristate "Quota format v2 support"
 	depends on QUOTA
+	select QUOTA_TREE
 	help
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
diff --git a/fs/Makefile b/fs/Makefile
index e6f423d..c830611 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_GENERIC_ACL)	+= generic_acl.o
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
+obj-$(CONFIG_QUOTA_TREE)	+= quota_tree.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
 
 obj-$(CONFIG_PROC_FS)		+= proc/
diff --git a/fs/quota_tree.c b/fs/quota_tree.c
new file mode 100644
index 0000000..953404c
--- /dev/null
+++ b/fs/quota_tree.c
@@ -0,0 +1,645 @@
+/*
+ *	vfsv0 quota IO operations on file
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/dqblk_v2.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+
+#include <asm/byteorder.h>
+
+#include "quota_tree.h"
+
+MODULE_AUTHOR("Jan Kara");
+MODULE_DESCRIPTION("Quota trie support");
+MODULE_LICENSE("GPL");
+
+#define __QUOTA_QT_PARANOIA
+
+typedef char *dqbuf_t;
+
+static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+{
+	unsigned int epb = info->dqi_usable_bs >> 2;
+
+	depth = info->dqi_qtree_depth - depth - 1;
+	while (depth--)
+		id /= epb;
+	return id % epb;
+}
+
+/* Number of entries in one blocks */
+static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
+{
+	return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader))
+	       / info->dqi_entry_size;
+}
+
+static dqbuf_t getdqbuf(size_t size)
+{
+	dqbuf_t buf = kmalloc(size, GFP_NOFS);
+	if (!buf)
+		printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
+	return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+	kfree(buf);
+}
+
+static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+	struct super_block *sb = info->dqi_sb;
+
+	memset(buf, 0, info->dqi_usable_bs);
+	return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf,
+	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+	struct super_block *sb = info->dqi_sb;
+
+	return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf,
+	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+/* Remove empty block from list and return it */
+static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int ret, blk;
+
+	if (!buf)
+		return -ENOMEM;
+	if (info->dqi_free_blk) {
+		blk = info->dqi_free_blk;
+		ret = read_blk(info, blk, buf);
+		if (ret < 0)
+			goto out_buf;
+		info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+	}
+	else {
+		memset(buf, 0, info->dqi_usable_bs);
+		/* Assure block allocation... */
+		ret = write_blk(info, info->dqi_blocks, buf);
+		if (ret < 0)
+			goto out_buf;
+		blk = info->dqi_blocks++;
+	}
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	ret = blk;
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Insert empty block to the list */
+static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int err;
+
+	dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	dh->dqdh_entries = cpu_to_le16(0);
+	err = write_blk(info, blk, buf);
+	if (err < 0)
+		return err;
+	info->dqi_free_blk = blk;
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	return 0;
+}
+
+/* Remove given block from the list of blocks with free entries */
+static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	uint nextblk = le32_to_cpu(dh->dqdh_next_free);
+	uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	if (nextblk) {
+		err = read_blk(info, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+							dh->dqdh_prev_free;
+		err = write_blk(info, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	if (prevblk) {
+		err = read_blk(info, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
+							dh->dqdh_next_free;
+		err = write_blk(info, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	} else {
+		info->dqi_free_entry = nextblk;
+		mark_info_dirty(info->dqi_sb, info->dqi_type);
+	}
+	freedqbuf(tmpbuf);
+	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+	/* No matter whether write succeeds block is out of list */
+	if (write_blk(info, blk, buf) < 0)
+		printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Insert given block to the beginning of list with free entries */
+static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	err = write_blk(info, blk, buf);
+	if (err < 0)
+		goto out_buf;
+	if (info->dqi_free_entry) {
+		err = read_blk(info, info->dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+							cpu_to_le32(blk);
+		err = write_blk(info, info->dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	freedqbuf(tmpbuf);
+	info->dqi_free_entry = blk;
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Is the entry in the block free? */
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk)
+{
+	int i;
+
+	for (i = 0; i < info->dqi_entry_size; i++)
+		if (disk[i])
+			return 0;
+	return 1;
+}
+EXPORT_SYMBOL(qtree_entry_unused);
+
+/* Find space for dquot */
+static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+			      struct dquot *dquot, int *err)
+{
+	uint blk, i;
+	struct qt_disk_dqdbheader *dh;
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	char *ddquot;
+
+	*err = 0;
+	if (!buf) {
+		*err = -ENOMEM;
+		return 0;
+	}
+	dh = (struct qt_disk_dqdbheader *)buf;
+	if (info->dqi_free_entry) {
+		blk = info->dqi_free_entry;
+		*err = read_blk(info, blk, buf);
+		if (*err < 0)
+			goto out_buf;
+	} else {
+		blk = get_free_dqblk(info);
+		if ((int)blk < 0) {
+			*err = blk;
+			freedqbuf(buf);
+			return 0;
+		}
+		memset(buf, 0, info->dqi_usable_bs);
+		/* This is enough as block is already zeroed and entry list is empty... */
+		info->dqi_free_entry = blk;
+		mark_info_dirty(dquot->dq_sb, dquot->dq_type);
+	}
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
+		*err = remove_free_dqentry(info, buf, blk);
+		if (*err < 0) {
+			printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
+			       "remove block (%u) from entry free list.\n",
+			       blk);
+			goto out_buf;
+		}
+	}
+	le16_add_cpu(&dh->dqdh_entries, 1);
+	/* Find free structure in block */
+	for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+	     i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot);
+	     i++, ddquot += info->dqi_entry_size);
+#ifdef __QUOTA_QT_PARANOIA
+	if (i == qtree_dqstr_in_blk(info)) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
+				"but it shouldn't.\n");
+		*err = -EIO;
+		goto out_buf;
+	}
+#endif
+	*err = write_blk(info, blk, buf);
+	if (*err < 0) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
+				"data block %u.\n", blk);
+		goto out_buf;
+	}
+	dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+			sizeof(struct qt_disk_dqdbheader) +
+			i * info->dqi_entry_size;
+	freedqbuf(buf);
+	return blk;
+out_buf:
+	freedqbuf(buf);
+	return 0;
+}
+
+/* Insert reference to structure into the trie */
+static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+			  uint *treeblk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0, newson = 0, newact = 0;
+	__le32 *ref;
+	uint newblk;
+
+	if (!buf)
+		return -ENOMEM;
+	if (!*treeblk) {
+		ret = get_free_dqblk(info);
+		if (ret < 0)
+			goto out_buf;
+		*treeblk = ret;
+		memset(buf, 0, info->dqi_usable_bs);
+		newact = 1;
+	} else {
+		ret = read_blk(info, *treeblk, buf);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Can't read tree quota block "
+					"%u.\n", *treeblk);
+			goto out_buf;
+		}
+	}
+	ref = (__le32 *)buf;
+	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (!newblk)
+		newson = 1;
+	if (depth == info->dqi_qtree_depth - 1) {
+#ifdef __QUOTA_QT_PARANOIA
+		if (newblk) {
+			printk(KERN_ERR "VFS: Inserting already present quota "
+					"entry (block %u).\n",
+			       le32_to_cpu(ref[get_index(info,
+						dquot->dq_id, depth)]));
+			ret = -EIO;
+			goto out_buf;
+		}
+#endif
+		newblk = find_free_dqentry(info, dquot, &ret);
+	} else {
+		ret = do_insert_tree(info, dquot, &newblk, depth+1);
+	}
+	if (newson && ret >= 0) {
+		ref[get_index(info, dquot->dq_id, depth)] =
+							cpu_to_le32(newblk);
+		ret = write_blk(info, *treeblk, buf);
+	} else if (newact && ret < 0) {
+		put_free_dqblk(info, buf, *treeblk);
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Wrapper for inserting quota structure into tree */
+static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+				 struct dquot *dquot)
+{
+	int tmp = QT_TREEOFF;
+	return do_insert_tree(info, dquot, &tmp, 0);
+}
+
+/*
+ *	We don't have to be afraid of deadlocks as we never have quotas on quota files...
+ */
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	struct super_block *sb = dquot->dq_sb;
+	ssize_t ret;
+	dqbuf_t ddquot = getdqbuf(info->dqi_entry_size);
+
+	if (!ddquot)
+		return -ENOMEM;
+
+	/* dq_off is guarded by dqio_mutex */
+	if (!dquot->dq_off) {
+		ret = dq_insert_tree(info, dquot);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Error %zd occurred while "
+					"creating quota.\n", ret);
+			freedqbuf(ddquot);
+			return ret;
+		}
+	}
+	spin_lock(&dq_data_lock);
+	info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
+	spin_unlock(&dq_data_lock);
+	ret = sb->s_op->quota_write(sb, type, (char *)ddquot,
+					info->dqi_entry_size, dquot->dq_off);
+	if (ret != info->dqi_entry_size) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+		       sb->s_id);
+		if (ret >= 0)
+			ret = -ENOSPC;
+	} else {
+		ret = 0;
+	}
+	dqstats.writes++;
+	freedqbuf(ddquot);
+
+	return ret;
+}
+EXPORT_SYMBOL(qtree_write_dquot);
+
+/* Free dquot entry in data block */
+static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+			uint blk)
+{
+	struct qt_disk_dqdbheader *dh;
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0;
+
+	if (!buf)
+		return -ENOMEM;
+	if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
+		printk(KERN_ERR "VFS: Quota structure has offset to other "
+		  "block (%u) than it should (%u).\n", blk,
+		  (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+		goto out_buf;
+	}
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+		goto out_buf;
+	}
+	dh = (struct qt_disk_dqdbheader *)buf;
+	le16_add_cpu(&dh->dqdh_entries, -1);
+	if (!le16_to_cpu(dh->dqdh_entries)) {	/* Block got free? */
+		ret = remove_free_dqentry(info, buf, blk);
+		if (ret >= 0)
+			ret = put_free_dqblk(info, buf, blk);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
+			  "to free list.\n", blk);
+			goto out_buf;
+		}
+	} else {
+		memset(buf +
+		       (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)),
+		       0, info->dqi_entry_size);
+		if (le16_to_cpu(dh->dqdh_entries) ==
+		    qtree_dqstr_in_blk(info) - 1) {
+			/* Insert will write block itself */
+			ret = insert_free_dqentry(info, buf, blk);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't insert quota data "
+				       "block (%u) to free entry list.\n", blk);
+				goto out_buf;
+			}
+		} else {
+			ret = write_blk(info, blk, buf);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't write quota data "
+				  "block %u\n", blk);
+				goto out_buf;
+			}
+		}
+	}
+	dquot->dq_off = 0;	/* Quota is now unattached */
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Remove reference to dquot from tree */
+static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+		       uint *blk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0;
+	uint newblk;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, *blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+		goto out_buf;
+	}
+	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (depth == info->dqi_qtree_depth - 1) {
+		ret = free_dqentry(info, dquot, newblk);
+		newblk = 0;
+	} else {
+		ret = remove_tree(info, dquot, &newblk, depth+1);
+	}
+	if (ret >= 0 && !newblk) {
+		int i;
+		ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+		/* Block got empty? */
+		for (i = 0;
+		     i < (info->dqi_usable_bs >> 2) && !ref[i];
+		     i++);
+		/* Don't put the root block into the free block list */
+		if (i == (info->dqi_usable_bs >> 2)
+		    && *blk != QT_TREEOFF) {
+			put_free_dqblk(info, buf, *blk);
+			*blk = 0;
+		} else {
+			ret = write_blk(info, *blk, buf);
+			if (ret < 0)
+				printk(KERN_ERR "VFS: Can't write quota tree "
+				  "block %u.\n", *blk);
+		}
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Delete dquot from tree */
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	uint tmp = QT_TREEOFF;
+
+	if (!dquot->dq_off)	/* Even not allocated? */
+		return 0;
+	return remove_tree(info, dquot, &tmp, 0);
+}
+EXPORT_SYMBOL(qtree_delete_dquot);
+
+/* Find entry in block */
+static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+				 struct dquot *dquot, uint blk)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	loff_t ret = 0;
+	int i;
+	char *ddquot;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+	     i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot);
+	     i++, ddquot += info->dqi_entry_size);
+	if (i == qtree_dqstr_in_blk(info)) {
+		printk(KERN_ERR "VFS: Quota for id %u referenced "
+		  "but not present.\n", dquot->dq_id);
+		ret = -EIO;
+		goto out_buf;
+	} else {
+		ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+		  qt_disk_dqdbheader) + i * info->dqi_entry_size;
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree */
+static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+				struct dquot *dquot, uint blk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	loff_t ret = 0;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	ret = 0;
+	blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (!blk)	/* No reference? */
+		goto out_buf;
+	if (depth < info->dqi_qtree_depth - 1)
+		ret = find_tree_dqentry(info, dquot, blk, depth+1);
+	else
+		ret = find_block_dqentry(info, dquot, blk);
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree - wrapper function */
+static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+				  struct dquot *dquot)
+{
+	return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
+}
+
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	struct super_block *sb = dquot->dq_sb;
+	loff_t offset;
+	dqbuf_t ddquot;
+	int ret = 0;
+
+#ifdef __QUOTA_QT_PARANOIA
+	/* Invalidated quota? */
+	if (!sb_dqopt(dquot->dq_sb)->files[type]) {
+		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+		return -EIO;
+	}
+#endif
+	/* Do we know offset of the dquot entry in the quota file? */
+	if (!dquot->dq_off) {
+		offset = find_dqentry(info, dquot);
+		if (offset <= 0) {	/* Entry not present? */
+			if (offset < 0)
+				printk(KERN_ERR "VFS: Can't read quota "
+				  "structure for id %u.\n", dquot->dq_id);
+			dquot->dq_off = 0;
+			set_bit(DQ_FAKE_B, &dquot->dq_flags);
+			memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+			ret = offset;
+			goto out;
+		}
+		dquot->dq_off = offset;
+	}
+	ddquot = getdqbuf(info->dqi_entry_size);
+	if (!ddquot)
+		return -ENOMEM;
+	ret = sb->s_op->quota_read(sb, type, (char *)ddquot,
+				   info->dqi_entry_size, dquot->dq_off);
+	if (ret != info->dqi_entry_size) {
+		if (ret >= 0)
+			ret = -EIO;
+		printk(KERN_ERR "VFS: Error while reading quota "
+				"structure for id %u.\n", dquot->dq_id);
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+		freedqbuf(ddquot);
+		goto out;
+	}
+	spin_lock(&dq_data_lock);
+	info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
+	if (!dquot->dq_dqb.dqb_bhardlimit &&
+	    !dquot->dq_dqb.dqb_bsoftlimit &&
+	    !dquot->dq_dqb.dqb_ihardlimit &&
+	    !dquot->dq_dqb.dqb_isoftlimit)
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+	spin_unlock(&dq_data_lock);
+	freedqbuf(ddquot);
+out:
+	dqstats.reads++;
+	return ret;
+}
+EXPORT_SYMBOL(qtree_read_dquot);
+
+/* Check whether dquot should not be deleted. We know we are
+ * the only one operating on dquot (thanks to dq_lock) */
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
+		return qtree_delete_dquot(info, dquot);
+	return 0;
+}
+EXPORT_SYMBOL(qtree_release_dquot);
diff --git a/fs/quota_tree.h b/fs/quota_tree.h
new file mode 100644
index 0000000..a1ab8db
--- /dev/null
+++ b/fs/quota_tree.h
@@ -0,0 +1,25 @@
+/*
+ *	Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTA_TREE_H
+#define _LINUX_QUOTA_TREE_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ *  Structure of header of block with quota structures. It is padded to 16 bytes so
+ *  there will be space for exactly 21 quota-entries in a block
+ */
+struct qt_disk_dqdbheader {
+	__le32 dqdh_next_free;	/* Number of next block with free entry */
+	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
+	__le16 dqdh_entries;	/* Number of valid entries in block */
+	__le16 dqdh_pad1;
+	__le32 dqdh_pad2;
+};
+
+#define QT_TREEOFF	1		/* Offset of tree in file in blocks */
+
+#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index a21d1a7..a87f102 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -14,6 +14,7 @@
 
 #include <asm/byteorder.h>
 
+#include "quota_tree.h"
 #include "quotaio_v2.h"
 
 MODULE_AUTHOR("Jan Kara");
@@ -22,10 +23,15 @@ MODULE_LICENSE("GPL");
 
 #define __QUOTA_V2_PARANOIA
 
-typedef char *dqbuf_t;
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2_is_id(void *dp, struct dquot *dquot);
 
-#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+static struct qtree_fmt_operations v2_qtree_ops = {
+	.mem2disk_dqblk = v2_mem2diskdqb,
+	.disk2mem_dqblk = v2_disk2memdqb,
+	.is_id = v2_is_id,
+};
 
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -64,7 +70,7 @@ static int v2_check_quota_file(struct super_block *sb, int type)
 static int v2_read_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
 	ssize_t size;
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -80,9 +86,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
-	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
-	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
-	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	info->u.v2_i.i.dqi_sb = sb;
+	info->u.v2_i.i.dqi_type = type;
+	info->u.v2_i.i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	info->u.v2_i.i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	info->u.v2_i.i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	info->u.v2_i.i.dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+	info->u.v2_i.i.dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+	info->u.v2_i.i.dqi_qtree_depth = qtree_depth(&info->u.v2_i.i);
+	info->u.v2_i.i.dqi_entry_size = sizeof(struct v2_disk_dqblk);
+	info->u.v2_i.i.dqi_ops = &v2_qtree_ops;
 	return 0;
 }
 
@@ -90,7 +103,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
 static int v2_write_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
 	ssize_t size;
 
 	spin_lock(&dq_data_lock);
@@ -99,9 +112,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
 	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
 	spin_unlock(&dq_data_lock);
-	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
-	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
-	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.i.dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.i.dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.i.dqi_free_entry);
 	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -112,8 +125,11 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void v2_disk2memdqb(struct dquot *dquot, void *dp)
 {
+	struct v2_disk_dqblk *d = dp, empty;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+
 	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
 	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
 	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
@@ -122,10 +138,20 @@ static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
 	m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
 	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
+	/* We need to escape back all-zero structure */
+	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+	empty.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+		m->dqb_itime = 0;
 }
 
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
 {
+	struct v2_disk_dqblk *d = dp;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+	struct qtree_mem_dqinfo *info =
+			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+
 	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
@@ -134,553 +160,35 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
-}
-
-static dqbuf_t getdqbuf(void)
-{
-	dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS);
-	if (!buf)
-		printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
-	return buf;
-}
-
-static inline void freedqbuf(dqbuf_t buf)
-{
-	kfree(buf);
-}
-
-static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
-	memset(buf, 0, V2_DQBLKSIZE);
-	return sb->s_op->quota_read(sb, type, (char *)buf,
-	       V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
-	return sb->s_op->quota_write(sb, type, (char *)buf,
-	       V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-/* Remove empty block from list and return it */
-static int get_free_dqblk(struct super_block *sb, int type)
-{
-	dqbuf_t buf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int ret, blk;
-
-	if (!buf)
-		return -ENOMEM;
-	if (info->u.v2_i.dqi_free_blk) {
-		blk = info->u.v2_i.dqi_free_blk;
-		if ((ret = read_blk(sb, type, blk, buf)) < 0)
-			goto out_buf;
-		info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
-	}
-	else {
-		memset(buf, 0, V2_DQBLKSIZE);
-		/* Assure block allocation... */
-		if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
-			goto out_buf;
-		blk = info->u.v2_i.dqi_blocks++;
-	}
-	mark_info_dirty(sb, type);
-	ret = blk;
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Insert empty block to the list */
-static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int err;
-
-	dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk);
-	dh->dqdh_prev_free = cpu_to_le32(0);
-	dh->dqdh_entries = cpu_to_le16(0);
-	info->u.v2_i.dqi_free_blk = blk;
-	mark_info_dirty(sb, type);
-	/* Some strange block. We had better leave it... */
-	if ((err = write_blk(sb, type, blk, buf)) < 0)
-		return err;
-	return 0;
+	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	if (qtree_entry_unused(info, dp))
+		d->dqb_itime = cpu_to_le64(1);
 }
 
-/* Remove given block from the list of blocks with free entries */
-static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+static int v2_is_id(void *dp, struct dquot *dquot)
 {
-	dqbuf_t tmpbuf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
-	int err;
+	struct v2_disk_dqblk *d = dp;
+	struct qtree_mem_dqinfo *info =
+			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
 
-	if (!tmpbuf)
-		return -ENOMEM;
-	if (nextblk) {
-		if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
-		if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	if (prevblk) {
-		if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
-		if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	else {
-		info->u.v2_i.dqi_free_entry = nextblk;
-		mark_info_dirty(sb, type);
-	}
-	freedqbuf(tmpbuf);
-	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
-	/* No matter whether write succeeds block is out of list */
-	if (write_blk(sb, type, blk, buf) < 0)
-		printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
-	return 0;
-out_buf:
-	freedqbuf(tmpbuf);
-	return err;
-}
-
-/* Insert given block to the beginning of list with free entries */
-static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
-	dqbuf_t tmpbuf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int err;
-
-	if (!tmpbuf)
-		return -ENOMEM;
-	dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
-	dh->dqdh_prev_free = cpu_to_le32(0);
-	if ((err = write_blk(sb, type, blk, buf)) < 0)
-		goto out_buf;
-	if (info->u.v2_i.dqi_free_entry) {
-		if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
-		if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	freedqbuf(tmpbuf);
-	info->u.v2_i.dqi_free_entry = blk;
-	mark_info_dirty(sb, type);
-	return 0;
-out_buf:
-	freedqbuf(tmpbuf);
-	return err;
-}
-
-/* Find space for dquot */
-static uint find_free_dqentry(struct dquot *dquot, int *err)
-{
-	struct super_block *sb = dquot->dq_sb;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
-	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
-	dqbuf_t buf;
-
-	*err = 0;
-	if (!(buf = getdqbuf())) {
-		*err = -ENOMEM;
+	if (qtree_entry_unused(info, dp))
 		return 0;
-	}
-	dh = (struct v2_disk_dqdbheader *)buf;
-	ddquot = GETENTRIES(buf);
-	if (info->u.v2_i.dqi_free_entry) {
-		blk = info->u.v2_i.dqi_free_entry;
-		if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
-			goto out_buf;
-	}
-	else {
-		blk = get_free_dqblk(sb, dquot->dq_type);
-		if ((int)blk < 0) {
-			*err = blk;
-			freedqbuf(buf);
-			return 0;
-		}
-		memset(buf, 0, V2_DQBLKSIZE);
-		/* This is enough as block is already zeroed and entry list is empty... */
-		info->u.v2_i.dqi_free_entry = blk;
-		mark_info_dirty(sb, dquot->dq_type);
-	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
-		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
-			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
-			goto out_buf;
-		}
-	le16_add_cpu(&dh->dqdh_entries, 1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
-#ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
-		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
-		*err = -EIO;
-		goto out_buf;
-	}
-#endif
-	if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
-		goto out_buf;
-	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
-	freedqbuf(buf);
-	return blk;
-out_buf:
-	freedqbuf(buf);
-	return 0;
+	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
 }
 
-/* Insert reference to structure into the trie */
-static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
-{
-	struct super_block *sb = dquot->dq_sb;
-	dqbuf_t buf;
-	int ret = 0, newson = 0, newact = 0;
-	__le32 *ref;
-	uint newblk;
-
-	if (!(buf = getdqbuf()))
-		return -ENOMEM;
-	if (!*treeblk) {
-		ret = get_free_dqblk(sb, dquot->dq_type);
-		if (ret < 0)
-			goto out_buf;
-		*treeblk = ret;
-		memset(buf, 0, V2_DQBLKSIZE);
-		newact = 1;
-	}
-	else {
-		if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
-			printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
-			goto out_buf;
-		}
-	}
-	ref = (__le32 *)buf;
-	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (!newblk)
-		newson = 1;
-	if (depth == V2_DQTREEDEPTH-1) {
-#ifdef __QUOTA_V2_PARANOIA
-		if (newblk) {
-			printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
-			ret = -EIO;
-			goto out_buf;
-		}
-#endif
-		newblk = find_free_dqentry(dquot, &ret);
-	}
-	else
-		ret = do_insert_tree(dquot, &newblk, depth+1);
-	if (newson && ret >= 0) {
-		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
-		ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
-	}
-	else if (newact && ret < 0)
-		put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Wrapper for inserting quota structure into tree */
-static inline int dq_insert_tree(struct dquot *dquot)
+static int v2_read_dquot(struct dquot *dquot)
 {
-	int tmp = V2_DQTREEOFF;
-	return do_insert_tree(dquot, &tmp, 0);
+	return qtree_read_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
-/*
- *	We don't have to be afraid of deadlocks as we never have quotas on quota files...
- */
 static int v2_write_dquot(struct dquot *dquot)
 {
-	int type = dquot->dq_type;
-	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
-
-	/* dq_off is guarded by dqio_mutex */
-	if (!dquot->dq_off)
-		if ((ret = dq_insert_tree(dquot)) < 0) {
-			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
-			return ret;
-		}
-	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
-	/* Argh... We may need to write structure full of zeroes but that would be
-	 * treated as an empty place by the rest of the code. Format change would
-	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
-	spin_unlock(&dq_data_lock);
-	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
-		if (ret >= 0)
-			ret = -ENOSPC;
-	}
-	else
-		ret = 0;
-	dqstats.writes++;
-
-	return ret;
+	return qtree_write_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
-/* Free dquot entry in data block */
-static int free_dqentry(struct dquot *dquot, uint blk)
-{
-	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
-	struct v2_disk_dqdbheader *dh;
-	dqbuf_t buf = getdqbuf();
-	int ret = 0;
-
-	if (!buf)
-		return -ENOMEM;
-	if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
-		printk(KERN_ERR "VFS: Quota structure has offset to other "
-		  "block (%u) than it should (%u).\n", blk,
-		  (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
-		goto out_buf;
-	}
-	if ((ret = read_blk(sb, type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
-		goto out_buf;
-	}
-	dh = (struct v2_disk_dqdbheader *)buf;
-	le16_add_cpu(&dh->dqdh_entries, -1);
-	if (!le16_to_cpu(dh->dqdh_entries)) {	/* Block got free? */
-		if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
-		    (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
-			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
-			  "to free list.\n", blk);
-			goto out_buf;
-		}
-	}
-	else {
-		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
-			/* Insert will write block itself */
-			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
-				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
-				goto out_buf;
-			}
-		}
-		else
-			if ((ret = write_blk(sb, type, blk, buf)) < 0) {
-				printk(KERN_ERR "VFS: Can't write quota data "
-				  "block %u\n", blk);
-				goto out_buf;
-			}
-	}
-	dquot->dq_off = 0;	/* Quota is now unattached */
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Remove reference to dquot from tree */
-static int remove_tree(struct dquot *dquot, uint *blk, int depth)
-{
-	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
-	dqbuf_t buf = getdqbuf();
-	int ret = 0;
-	uint newblk;
-	__le32 *ref = (__le32 *)buf;
-	
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
-		goto out_buf;
-	}
-	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (depth == V2_DQTREEDEPTH-1) {
-		ret = free_dqentry(dquot, newblk);
-		newblk = 0;
-	}
-	else
-		ret = remove_tree(dquot, &newblk, depth+1);
-	if (ret >= 0 && !newblk) {
-		int i;
-		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
-		for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);	/* Block got empty? */
-		/* Don't put the root block into the free block list */
-		if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
-			put_free_dqblk(sb, type, buf, *blk);
-			*blk = 0;
-		}
-		else
-			if ((ret = write_blk(sb, type, *blk, buf)) < 0)
-				printk(KERN_ERR "VFS: Can't write quota tree "
-				  "block %u.\n", *blk);
-	}
-out_buf:
-	freedqbuf(buf);
-	return ret;	
-}
-
-/* Delete dquot from tree */
-static int v2_delete_dquot(struct dquot *dquot)
-{
-	uint tmp = V2_DQTREEOFF;
-
-	if (!dquot->dq_off)	/* Even not allocated? */
-		return 0;
-	return remove_tree(dquot, &tmp, 0);
-}
-
-/* Find entry in block */
-static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
-{
-	dqbuf_t buf = getdqbuf();
-	loff_t ret = 0;
-	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
-
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
-		goto out_buf;
-	}
-	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
-	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
-				break;
-	}
-	if (i == V2_DQSTRINBLK) {
-		printk(KERN_ERR "VFS: Quota for id %u referenced "
-		  "but not present.\n", dquot->dq_id);
-		ret = -EIO;
-		goto out_buf;
-	}
-	else
-		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Find entry for given id in the tree */
-static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
-{
-	dqbuf_t buf = getdqbuf();
-	loff_t ret = 0;
-	__le32 *ref = (__le32 *)buf;
-
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
-		goto out_buf;
-	}
-	ret = 0;
-	blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (!blk)	/* No reference? */
-		goto out_buf;
-	if (depth < V2_DQTREEDEPTH-1)
-		ret = find_tree_dqentry(dquot, blk, depth+1);
-	else
-		ret = find_block_dqentry(dquot, blk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Find entry for given id in the tree - wrapper function */
-static inline loff_t find_dqentry(struct dquot *dquot)
-{
-	return find_tree_dqentry(dquot, V2_DQTREEOFF, 0);
-}
-
-static int v2_read_dquot(struct dquot *dquot)
-{
-	int type = dquot->dq_type;
-	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
-	int ret = 0;
-
-#ifdef __QUOTA_V2_PARANOIA
-	/* Invalidated quota? */
-	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
-		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
-		return -EIO;
-	}
-#endif
-	offset = find_dqentry(dquot);
-	if (offset <= 0) {	/* Entry not present? */
-		if (offset < 0)
-			printk(KERN_ERR "VFS: Can't read quota "
-			  "structure for id %u.\n", dquot->dq_id);
-		dquot->dq_off = 0;
-		set_bit(DQ_FAKE_B, &dquot->dq_flags);
-		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
-		ret = offset;
-	}
-	else {
-		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
-			if (ret >= 0)
-				ret = -EIO;
-			printk(KERN_ERR "VFS: Error while reading quota "
-			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
-		}
-		else {
-			ret = 0;
-			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
-		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
-		if (!dquot->dq_dqb.dqb_bhardlimit &&
-			!dquot->dq_dqb.dqb_bsoftlimit &&
-			!dquot->dq_dqb.dqb_ihardlimit &&
-			!dquot->dq_dqb.dqb_isoftlimit)
-			set_bit(DQ_FAKE_B, &dquot->dq_flags);
-	}
-	dqstats.reads++;
-
-	return ret;
-}
-
-/* Check whether dquot should not be deleted. We know we are
- * the only one operating on dquot (thanks to dq_lock) */
 static int v2_release_dquot(struct dquot *dquot)
 {
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
-		return v2_delete_dquot(dquot);
-	return 0;
+	return qtree_release_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
 static struct quota_format_ops v2_format_ops = {
diff --git a/fs/quotaio_v2.h b/fs/quotaio_v2.h
index 303d7cb..530fe58 100644
--- a/fs/quotaio_v2.h
+++ b/fs/quotaio_v2.h
@@ -21,6 +21,12 @@
 	0		/* GRPQUOTA */\
 }
 
+/* First generic header */
+struct v2_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* File version */
+};
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
@@ -38,15 +44,6 @@ struct v2_disk_dqblk {
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
-/*
- * Here are header structures as written on disk and their in-memory copies
- */
-/* First generic header */
-struct v2_disk_dqheader {
-	__le32 dqh_magic;	/* Magic number identifying file */
-	__le32 dqh_version;	/* File version */
-};
-
 /* Header with type and version specific information */
 struct v2_disk_dqinfo {
 	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
@@ -57,23 +54,7 @@ struct v2_disk_dqinfo {
 	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
 };
 
-/*
- *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
- */
-struct v2_disk_dqdbheader {
-	__le32 dqdh_next_free;	/* Number of next block with free entry */
-	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
-	__le16 dqdh_entries;	/* Number of valid entries in block */
-	__le16 dqdh_pad1;
-	__le32 dqdh_pad2;
-};
-
 #define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
-#define V2_DQBLKSIZE_BITS	10
-#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
-#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
-#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
+#define V2_DQBLKSIZE_BITS 10				/* Size of leaf block in tree */
 
 #endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h
new file mode 100644
index 0000000..82a1652
--- /dev/null
+++ b/include/linux/dqblk_qtree.h
@@ -0,0 +1,56 @@
+/*
+ *	Definitions of structures and functions for quota formats using trie
+ */
+
+#ifndef _LINUX_DQBLK_QTREE_H
+#define _LINUX_DQBLK_QTREE_H
+
+#include <linux/types.h>
+
+/* Numbers of blocks needed for updates - we count with the smallest
+ * possible block size (1024) */
+#define QTREE_INIT_ALLOC 4
+#define QTREE_INIT_REWRITE 2
+#define QTREE_DEL_ALLOC 0
+#define QTREE_DEL_REWRITE 6
+
+struct dquot;
+
+/* Operations */
+struct qtree_fmt_operations {
+	void (*mem2disk_dqblk)(void *disk, struct dquot *dquot);	/* Convert given entry from in memory format to disk one */
+	void (*disk2mem_dqblk)(struct dquot *dquot, void *disk);	/* Convert given entry from disk format to in memory one */
+	int (*is_id)(void *disk, struct dquot *dquot);	/* Is this structure for given id? */
+};
+
+/* Inmemory copy of version specific information */
+struct qtree_mem_dqinfo {
+	struct super_block *dqi_sb;	/* Sb quota is on */
+	int dqi_type;			/* Quota type */
+	unsigned int dqi_blocks;	/* # of blocks in quota file */
+	unsigned int dqi_free_blk;	/* First block in list of free blocks */
+	unsigned int dqi_free_entry;	/* First block with free entry */
+	unsigned int dqi_blocksize_bits;	/* Block size of quota file */
+	unsigned int dqi_entry_size;	/* Size of quota entry in quota file */
+	unsigned int dqi_usable_bs;	/* Space usable in block for quota data */
+	unsigned int dqi_qtree_depth;	/* Precomputed depth of quota tree */
+	struct qtree_fmt_operations *dqi_ops;	/* Operations for entry manipulation */
+};
+
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk);
+static inline int qtree_depth(struct qtree_mem_dqinfo *info)
+{
+	unsigned int epb = info->dqi_usable_bs >> 2;
+	unsigned long long entries = epb;
+	int i;
+
+	for (i = 1; entries < (1ULL << 32); i++)
+		entries *= epb;
+	return i;
+}
+
+#endif /* _LINUX_DQBLK_QTREE_H */
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index 4f85332..e5e22a7 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -1,26 +1,23 @@
 /*
- *	Definitions of structures for vfsv0 quota format
+ *  Definitions for vfsv0 quota format
  */
 
 #ifndef _LINUX_DQBLK_V2_H
 #define _LINUX_DQBLK_V2_H
 
-#include <linux/types.h>
+#include <linux/dqblk_qtree.h>
 
-/* id numbers of quota format */
+/* Id number of quota format */
 #define QFMT_VFS_V0 2
 
 /* Numbers of blocks needed for updates */
-#define V2_INIT_ALLOC 4
-#define V2_INIT_REWRITE 2
-#define V2_DEL_ALLOC 0
-#define V2_DEL_REWRITE 6
+#define V2_INIT_ALLOC QTREE_INIT_ALLOC
+#define V2_INIT_REWRITE QTREE_INIT_REWRITE
+#define V2_DEL_ALLOC QTREE_DEL_ALLOC
+#define V2_DEL_REWRITE QTREE_DEL_REWRITE
 
-/* Inmemory copy of version specific information */
 struct v2_mem_dqinfo {
-	unsigned int dqi_blocks;
-	unsigned int dqi_free_blk;
-	unsigned int dqi_free_entry;
+	struct qtree_mem_dqinfo i;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
-- 
cgit v0.10.2


From e3d4d56b9715e40ded2a84d0d4fa7f3b6c58983c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Oct 2008 18:44:14 +0200
Subject: quota: Convert union in mem_dqinfo to a pointer

Coming quota support for OCFS2 is going to need quite a bit
of additional per-sb quota information. Moreover having fs.h
include all the types needed for this structure would be a
pain in the a**. So remove the union from mem_dqinfo and add
a private pointer for filesystem's use.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index a87f102..b618b56 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -71,6 +71,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct qtree_mem_dqinfo *qinfo;
 	ssize_t size;
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -80,22 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type)
 			sb->s_id);
 		return -1;
 	}
+	info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
+	if (!info->dqi_priv) {
+		printk(KERN_WARNING
+		       "Not enough memory for quota information structure.\n");
+		return -1;
+	}
+	qinfo = info->dqi_priv;
 	/* limits are stored as unsigned 32-bit data */
 	info->dqi_maxblimit = 0xffffffff;
 	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
-	info->u.v2_i.i.dqi_sb = sb;
-	info->u.v2_i.i.dqi_type = type;
-	info->u.v2_i.i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
-	info->u.v2_i.i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
-	info->u.v2_i.i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
-	info->u.v2_i.i.dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
-	info->u.v2_i.i.dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
-	info->u.v2_i.i.dqi_qtree_depth = qtree_depth(&info->u.v2_i.i);
-	info->u.v2_i.i.dqi_entry_size = sizeof(struct v2_disk_dqblk);
-	info->u.v2_i.i.dqi_ops = &v2_qtree_ops;
+	qinfo->dqi_sb = sb;
+	qinfo->dqi_type = type;
+	qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+	qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+	qinfo->dqi_qtree_depth = qtree_depth(qinfo);
+	qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
+	qinfo->dqi_ops = &v2_qtree_ops;
 	return 0;
 }
 
@@ -104,6 +112,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
 	ssize_t size;
 
 	spin_lock(&dq_data_lock);
@@ -112,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
 	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
 	spin_unlock(&dq_data_lock);
-	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.i.dqi_blocks);
-	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.i.dqi_free_blk);
-	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.i.dqi_free_entry);
+	dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
 	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -150,7 +159,7 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
 	struct v2_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 	struct qtree_mem_dqinfo *info =
-			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
 
 	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
@@ -169,7 +178,7 @@ static int v2_is_id(void *dp, struct dquot *dquot)
 {
 	struct v2_disk_dqblk *d = dp;
 	struct qtree_mem_dqinfo *info =
-			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
 
 	if (qtree_entry_unused(info, dp))
 		return 0;
@@ -178,24 +187,30 @@ static int v2_is_id(void *dp, struct dquot *dquot)
 
 static int v2_read_dquot(struct dquot *dquot)
 {
-	return qtree_read_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
 }
 
 static int v2_write_dquot(struct dquot *dquot)
 {
-	return qtree_write_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
 }
 
 static int v2_release_dquot(struct dquot *dquot)
 {
-	return qtree_release_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
+}
+
+static int v2_free_file_info(struct super_block *sb, int type)
+{
+	kfree(sb_dqinfo(sb, type)->dqi_priv);
+	return 0;
 }
 
 static struct quota_format_ops v2_format_ops = {
 	.check_quota_file	= v2_check_quota_file,
 	.read_file_info		= v2_read_file_info,
 	.write_file_info	= v2_write_file_info,
-	.free_file_info		= NULL,
+	.free_file_info		= v2_free_file_info,
 	.read_dqblk		= v2_read_dquot,
 	.commit_dqblk		= v2_write_dquot,
 	.release_dqblk		= v2_release_dquot,
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 57f1250..9cea901 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -17,8 +17,4 @@
 #define V1_DEL_ALLOC 0
 #define V1_DEL_REWRITE 2
 
-/* Special information about quotafile */
-struct v1_mem_dqinfo {
-};
-
 #endif	/* _LINUX_DQBLK_V1_H */
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index e5e22a7..ff8af1b 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -16,8 +16,4 @@
 #define V2_DEL_ALLOC QTREE_DEL_ALLOC
 #define V2_DEL_REWRITE QTREE_DEL_REWRITE
 
-struct v2_mem_dqinfo {
-	struct qtree_mem_dqinfo i;
-};
-
 #endif /* _LINUX_DQBLK_V2_H */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 80b8807..e51dfdc 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -208,10 +208,7 @@ struct mem_dqinfo {
 	unsigned int dqi_igrace;
 	qsize_t dqi_maxblimit;
 	qsize_t dqi_maxilimit;
-	union {
-		struct v1_mem_dqinfo v1_i;
-		struct v2_mem_dqinfo v2_i;
-	} u;
+	void *dqi_priv;
 };
 
 struct super_block;
-- 
cgit v0.10.2


From db49d2df489f727096438706a5428115e84a3f0d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2008 18:21:39 +0200
Subject: quota: Allow negative usage of space and inodes

For clustered filesystems, it can happen that space / inode usage goes
negative temporarily (because some node is allocating another node
is freeing and they are not completely in sync). So let quota code
allow this and change qsize_t so a signed type so that we don't
underflow the variables.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 74185c3..9c78ffe 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -847,7 +847,8 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
 
 static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
-	if (dquot->dq_dqb.dqb_curinodes > number)
+	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+	    dquot->dq_dqb.dqb_curinodes >= number)
 		dquot->dq_dqb.dqb_curinodes -= number;
 	else
 		dquot->dq_dqb.dqb_curinodes = 0;
@@ -858,7 +859,8 @@ static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 
 static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
 {
-	if (dquot->dq_dqb.dqb_curspace > number)
+	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+	    dquot->dq_dqb.dqb_curspace >= number)
 		dquot->dq_dqb.dqb_curspace -= number;
 	else
 		dquot->dq_dqb.dqb_curspace = 0;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e51dfdc..75bf761 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -168,7 +168,7 @@ enum {
 #include <asm/atomic.h>
 
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
+typedef long long qsize_t;	/* Type in which we store sizes */
 
 extern spinlock_t dq_data_lock;
 
@@ -336,6 +336,7 @@ enum {
 						 * responsible for setting
 						 * S_NOQUOTA, S_NOATIME flags
 						 */
+#define DQUOT_NEGATIVE_USAGE	(1 << 7)	/* Allow negative quota usage */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-- 
cgit v0.10.2


From 4d59bce4f9eaf26d6d9046b56a2f1c0c7f20981d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Oct 2008 16:48:10 +0200
Subject: quota: Keep which entries were set by SETQUOTA quotactl

Quota in a clustered environment needs to synchronize quota information
among cluster nodes. This means we have to occasionally update some
information in dquot from disk / network. On the other hand we have to
be careful not to overwrite changes administrator did via SETQUOTA.
So indicate in dquot->dq_flags which entries have been set by SETQUOTA
and quota format can clear these flags when it properly propagated
the changes.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 9c78ffe..8922672 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -2010,25 +2010,33 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 	if (di->dqb_valid & QIF_SPACE) {
 		dm->dqb_curspace = di->dqb_curspace;
 		check_blim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
 		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
 		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_INODES) {
 		dm->dqb_curinodes = di->dqb_curinodes;
 		check_ilim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ILIMITS) {
 		dm->dqb_isoftlimit = di->dqb_isoftlimit;
 		dm->dqb_ihardlimit = di->dqb_ihardlimit;
 		check_ilim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BTIME)
+	if (di->dqb_valid & QIF_BTIME) {
 		dm->dqb_btime = di->dqb_btime;
-	if (di->dqb_valid & QIF_ITIME)
+		__set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+	}
+	if (di->dqb_valid & QIF_ITIME) {
 		dm->dqb_itime = di->dqb_itime;
+		__set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+	}
 
 	if (check_blim) {
 		if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 75bf761..6d98885 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -80,12 +80,21 @@
  * Quota structure used for communication with userspace via quotactl
  * Following flags are used to specify which fields are valid
  */
-#define QIF_BLIMITS	1
-#define QIF_SPACE	2
-#define QIF_ILIMITS	4
-#define QIF_INODES	8
-#define QIF_BTIME	16
-#define QIF_ITIME	32
+enum {
+	QIF_BLIMITS_B = 0,
+	QIF_SPACE_B,
+	QIF_ILIMITS_B,
+	QIF_INODES_B,
+	QIF_BTIME_B,
+	QIF_ITIME_B,
+};
+
+#define QIF_BLIMITS	(1 << QIF_BLIMITS_B)
+#define QIF_SPACE	(1 << QIF_SPACE_B)
+#define QIF_ILIMITS	(1 << QIF_ILIMITS_B)
+#define QIF_INODES	(1 << QIF_INODES_B)
+#define QIF_BTIME	(1 << QIF_BTIME_B)
+#define QIF_ITIME	(1 << QIF_ITIME_B)
 #define QIF_LIMITS	(QIF_BLIMITS | QIF_ILIMITS)
 #define QIF_USAGE	(QIF_SPACE | QIF_INODES)
 #define QIF_TIMES	(QIF_BTIME | QIF_ITIME)
@@ -242,6 +251,11 @@ extern struct dqstats dqstats;
 #define DQ_FAKE_B	3	/* no limits only usage */
 #define DQ_READ_B	4	/* dquot was read into memory */
 #define DQ_ACTIVE_B	5	/* dquot is active (dquot_release not called) */
+#define DQ_LASTSET_B	6	/* Following 6 bits (see QIF_) are reserved\
+				 * for the mask of entries set via SETQUOTA\
+				 * quotactl. They are set under dq_data_lock\
+				 * and the quota format handling dquot can\
+				 * clear them when it sees fit. */
 
 struct dquot {
 	struct hlist_node dq_hash;	/* Hash list in memory */
-- 
cgit v0.10.2


From 571b46e40bebb0d57130ca24c4a84dfd553adb91 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2008 09:17:52 +0100
Subject: quota: Update version number

Increase reported version number of quota support since quota core has changed
significantly. Also remove __DQUOT_NUM_VERSION__ since nobody uses it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6d98885..ec82beb 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -36,8 +36,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
-#define __DQUOT_VERSION__	"dquot_6.5.1"
-#define __DQUOT_NUM_VERSION__	6*10000+5*100+1
+#define __DQUOT_VERSION__	"dquot_6.5.2"
 
 #define MAXQUOTAS 2
 #define USRQUOTA  0		/* element used for user quotas */
-- 
cgit v0.10.2


From 3d9ea253a0e73dccaa869888ec2ceb17ea76c810 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 10 Oct 2008 16:12:23 +0200
Subject: quota: Add helpers to allow ocfs2 specific quota initialization,
 freeing and recovery

OCFS2 needs to peek whether quota structure is already in memory so
that it can avoid expensive cluster locking in that case. Similarly
when freeing dquots, it checks whether it is the last quota structure
user or not. Finally, it needs to get reference to dquot structure for
specified id and quota type when recovering quota file after crash.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 8922672..ae8fd9e 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash;
 
 struct dqstats dqstats;
 
-static void dqput(struct dquot *dquot);
-
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
 {
@@ -568,7 +566,7 @@ static struct shrinker dqcache_shrinker = {
  * NOTE: If you change this function please check whether dqput_blocks() works right...
  * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
-static void dqput(struct dquot *dquot)
+void dqput(struct dquot *dquot)
 {
 	int ret;
 
@@ -662,10 +660,28 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 }
 
 /*
+ * Check whether dquot is in memory.
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
+ */
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
+{
+	unsigned int hashent = hashfn(sb, id, type);
+	int ret = 0;
+
+        if (!sb_has_quota_active(sb, type))
+		return 0;
+	spin_lock(&dq_list_lock);
+	if (find_dquot(hashent, sb, id, type) != NODQUOT)
+		ret = 1;
+	spin_unlock(&dq_list_lock);
+	return ret;
+}
+
+/*
  * Get reference to dquot
  * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
-static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot, *empty = NODQUOT;
@@ -1184,17 +1200,23 @@ out_err:
  * 	Release all quotas referenced by inode
  *	Transaction must be started at an entry
  */
-int dquot_drop(struct inode *inode)
+int dquot_drop_locked(struct inode *inode)
 {
 	int cnt;
 
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] != NODQUOT) {
 			dqput(inode->i_dquot[cnt]);
 			inode->i_dquot[cnt] = NODQUOT;
 		}
 	}
+	return 0;
+}
+
+int dquot_drop(struct inode *inode)
+{
+	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	dquot_drop_locked(inode);
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return 0;
 }
@@ -2308,7 +2330,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(dquot_drop_locked);
 EXPORT_SYMBOL(vfs_dq_drop);
+EXPORT_SYMBOL(dqget);
+EXPORT_SYMBOL(dqput);
+EXPORT_SYMBOL(dquot_is_cached);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e840ca5..e3a1027 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,6 +24,10 @@ void sync_dquots(struct super_block *sb, int type);
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
+int dquot_drop_locked(struct inode *inode);
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
+void dqput(struct dquot *dquot);
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v0.10.2


From 12c77527e4138bc3b17d17b0e0c909e4fc84924f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 20 Oct 2008 17:05:00 +0200
Subject: quota: Implement function for scanning active dquots

OCFS2 needs to scan all active dquots once in a while and sync quota
information among cluster nodes. Provide a helper function for it so
that it does not have to reimplement internally a list which VFS
already has. Moreover this function is probably going to be useful
for other clustered filesystems if they decide to use VFS quotas.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index ae8fd9e..075dc76 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -476,6 +476,41 @@ restart:
 	spin_unlock(&dq_list_lock);
 }
 
+/* Call callback for every active dquot on given filesystem */
+int dquot_scan_active(struct super_block *sb,
+		      int (*fn)(struct dquot *dquot, unsigned long priv),
+		      unsigned long priv)
+{
+	struct dquot *dquot, *old_dquot = NULL;
+	int ret = 0;
+
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	spin_lock(&dq_list_lock);
+	list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+		if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+			continue;
+		if (dquot->dq_sb != sb)
+			continue;
+		/* Now we have active dquot so we can just increase use count */
+		atomic_inc(&dquot->dq_count);
+		dqstats.lookups++;
+		spin_unlock(&dq_list_lock);
+		dqput(old_dquot);
+		old_dquot = dquot;
+		ret = fn(dquot, priv);
+		if (ret < 0)
+			goto out;
+		spin_lock(&dq_list_lock);
+		/* We are safe to continue now because our dquot could not
+		 * be moved out of the inuse list while we hold the reference */
+	}
+	spin_unlock(&dq_list_lock);
+out:
+	dqput(old_dquot);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+	return ret;
+}
+
 int vfs_quota_sync(struct super_block *sb, int type)
 {
 	struct list_head *dirty;
@@ -2318,6 +2353,7 @@ EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_disable);
 EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_scan_active);
 EXPORT_SYMBOL(vfs_quota_sync);
 EXPORT_SYMBOL(vfs_get_dqinfo);
 EXPORT_SYMBOL(vfs_set_dqinfo);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e3a1027..f491394 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -28,6 +28,9 @@ int dquot_drop_locked(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
 int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
+int dquot_scan_active(struct super_block *sb,
+		      int (*fn)(struct dquot *dquot, unsigned long priv),
+		      unsigned long priv);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v0.10.2


From 90e86a63eadf1a3b2f19b68d82150dc63fe01443 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 27 Aug 2008 22:30:28 +0200
Subject: ocfs2: Support nested transactions

OCFS2 can easily support nested transactions. We just have to
take care and not spoil statistics acquire semaphore unnecessarily.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 12b62a3..11a1178 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -256,11 +256,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
 	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
 	BUG_ON(max_buffs <= 0);
 
-	/* JBD might support this, but our journalling code doesn't yet. */
-	if (journal_current_handle()) {
-		mlog(ML_ERROR, "Recursive transaction attempted!\n");
-		BUG();
-	}
+	/* Nested transaction? Just return the handle... */
+	if (journal_current_handle())
+		return jbd2_journal_start(journal, max_buffs);
 
 	down_read(&osb->journal->j_trans_barrier);
 
@@ -285,16 +283,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
 int ocfs2_commit_trans(struct ocfs2_super *osb,
 		       handle_t *handle)
 {
-	int ret;
+	int ret, nested;
 	struct ocfs2_journal *journal = osb->journal;
 
 	BUG_ON(!handle);
 
+	nested = handle->h_ref > 1;
 	ret = jbd2_journal_stop(handle);
 	if (ret < 0)
 		mlog_errno(ret);
 
-	up_read(&journal->j_trans_barrier);
+	if (!nested)
+		up_read(&journal->j_trans_barrier);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 1a224ad11eeb190da4a123e156601aad1bb67f24 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 15:43:36 +0200
Subject: ocfs2: Assign feature bits and system inodes to quota feature and
 quota files

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/Kconfig b/fs/Kconfig
index c1ce3d8..f9b6e29 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -189,6 +189,8 @@ config OCFS2_FS
 	select CONFIGFS_FS
 	select JBD2
 	select CRC32
+	select QUOTA
+	select QUOTA_TREE
 	help
 	  OCFS2 is a general purpose extent based shared disk cluster file
 	  system with many similarities to ext3. It supports 64 bit inode
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ec3497b..ec25d99 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -283,6 +283,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
 	} else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
+	} else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
+		inode->i_flags |= S_NOQUOTA;
 	} else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
 		mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
 		/* we can't actually hit this as read_inode can't
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5e0c0d0..06e3bd6 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -94,7 +94,7 @@
 					 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
 					 | OCFS2_FEATURE_INCOMPAT_XATTR)
-#define OCFS2_FEATURE_RO_COMPAT_SUPP	OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
+#define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
 
 /*
  * Heartbeat-only devices are missing journals and other files.  The
@@ -163,6 +163,12 @@
  */
 #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN	0x0001
 
+/*
+ * Maintain quota information for this filesystem
+ */
+#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA	0x0002
+#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA	0x0004
+
 /* The byte offset of the first backup block will be 1G.
  * The following will be 4G, 16G, 64G, 256G and 1T.
  */
@@ -192,6 +198,7 @@
 #define OCFS2_HEARTBEAT_FL	(0x00000200)	/* Heartbeat area */
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 #define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
+#define OCFS2_QUOTA_FL		(0x00001000)	/* Quota file */
 
 /*
  * Flags on ocfs2_dinode.i_dyn_features
@@ -329,13 +336,17 @@ enum {
 #define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
 	HEARTBEAT_SYSTEM_INODE,
 	GLOBAL_BITMAP_SYSTEM_INODE,
-#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE
+	USER_QUOTA_SYSTEM_INODE,
+	GROUP_QUOTA_SYSTEM_INODE,
+#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
 	ORPHAN_DIR_SYSTEM_INODE,
 	EXTENT_ALLOC_SYSTEM_INODE,
 	INODE_ALLOC_SYSTEM_INODE,
 	JOURNAL_SYSTEM_INODE,
 	LOCAL_ALLOC_SYSTEM_INODE,
 	TRUNCATE_LOG_SYSTEM_INODE,
+	LOCAL_USER_QUOTA_SYSTEM_INODE,
+	LOCAL_GROUP_QUOTA_SYSTEM_INODE,
 	NUM_SYSTEM_INODES
 };
 
@@ -349,6 +360,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
 	[SLOT_MAP_SYSTEM_INODE]			= { "slot_map", 0, S_IFREG | 0644 },
 	[HEARTBEAT_SYSTEM_INODE]		= { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
 	[GLOBAL_BITMAP_SYSTEM_INODE]		= { "global_bitmap", 0, S_IFREG | 0644 },
+	[USER_QUOTA_SYSTEM_INODE]		= { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+	[GROUP_QUOTA_SYSTEM_INODE]		= { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
 
 	/* Slot-specific system inodes (one copy per slot) */
 	[ORPHAN_DIR_SYSTEM_INODE]		= { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@@ -356,7 +369,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
 	[INODE_ALLOC_SYSTEM_INODE]		= { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
 	[JOURNAL_SYSTEM_INODE]			= { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
 	[LOCAL_ALLOC_SYSTEM_INODE]		= { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
-	[TRUNCATE_LOG_SYSTEM_INODE]		= { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
+	[TRUNCATE_LOG_SYSTEM_INODE]		= { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
+	[LOCAL_USER_QUOTA_SYSTEM_INODE]		= { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+	[LOCAL_GROUP_QUOTA_SYSTEM_INODE]	= { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
 };
 
 /* Parameter passed from mount.ocfs2 to module */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9e7accc..41bb019 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -225,6 +225,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
+static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
+{
+	if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+	    && (ino == USER_QUOTA_SYSTEM_INODE
+		|| ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
+		return 0;
+	if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
+	    && (ino == GROUP_QUOTA_SYSTEM_INODE
+		|| ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
+		return 0;
+	return 1;
+}
+
 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 {
 	struct inode *new = NULL;
@@ -251,6 +264,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
 	for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
 	     i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
+		if (!ocfs2_need_system_inode(osb, i))
+			continue;
 		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
 		if (!new) {
 			ocfs2_release_system_inodes(osb);
@@ -281,6 +296,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
 	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
 	     i < NUM_SYSTEM_INODES;
 	     i++) {
+		if (!ocfs2_need_system_inode(osb, i))
+			continue;
 		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
 		if (!new) {
 			ocfs2_release_system_inodes(osb);
-- 
cgit v0.10.2


From bbbd0eb34bf801dee01e345785959a75258f6567 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 21 Aug 2008 18:22:30 +0200
Subject: ocfs2: Mark system files as not subject to quota accounting

Mark system files as not subject to quota accounting. This prevents
possible recursions into quota code and thus deadlocks.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ec25d99..50dbc48 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -275,8 +275,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 
-	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
+	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
+		inode->i_flags |= S_NOQUOTA;
+	}
 
 	if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
-- 
cgit v0.10.2


From 9e33d69f553aaf11377307e8d6f82deb3385e351 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 25 Aug 2008 19:56:50 +0200
Subject: ocfs2: Implementation of local and global quota file handling

For each quota type each node has local quota file. In this file it stores
changes users have made to disk usage via this node. Once in a while this
information is synced to global file (and thus with other nodes) so that
limits enforcement at least aproximately works.

Global quota files contain all the information about usage and limits. It's
mostly handled by the generic VFS code (which implements a trie of structures
inside a quota file). We only have to provide functions to convert structures
from on-disk format to in-memory one. We also have to provide wrappers for
various quota functions starting transactions and acquiring necessary cluster
locks before the actual IO is really started.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e9ef5d1..7e4b361 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -35,6 +35,8 @@ ocfs2-objs := \
 	sysfile.o 		\
 	uptodate.o		\
 	ver.o			\
+	quota_local.o		\
+	quota_global.o		\
 	xattr.o
 
 ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 57670c6..7e72a81 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
 #define ML_QUORUM	0x0000000008000000ULL /* net connection quorum */
 #define ML_EXPORT	0x0000000010000000ULL /* ocfs2 export operations */
 #define ML_XATTR	0x0000000020000000ULL /* ocfs2 extended attributes */
+#define ML_QUOTA	0x0000000040000000ULL /* ocfs2 quota operations */
 /* bits that are infrequently given and frequently matched in the high word */
 #define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
 #define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 9f2a7f7..058aa86 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_DLM_GLUE
 #include <cluster/masklog.h>
@@ -51,6 +52,7 @@
 #include "slot_map.h"
 #include "super.h"
 #include "uptodate.h"
+#include "quota.h"
 
 #include "buffer_head_io.h"
 
@@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
 
 /*
  * Return value from ->downconvert_worker functions.
@@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres);
 
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
 
 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
 
@@ -258,6 +262,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
 	.flags		= 0,
 };
 
+static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+	.set_lvb	= ocfs2_set_qinfo_lvb,
+	.get_osb	= ocfs2_get_qinfo_osb,
+	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
+};
+
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -279,6 +289,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
 }
 
+static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
+{
+	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
+
+	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
+}
+
 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
 {
 	if (lockres->l_ops->get_osb)
@@ -507,6 +524,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
 	return OCFS2_SB(inode->i_sb);
 }
 
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
+{
+	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
+
+	return OCFS2_SB(info->dqi_gi.dqi_sb);
+}
+
 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_file_private *fp = lockres->l_priv;
@@ -609,6 +633,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
 }
 
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+			       struct ocfs2_mem_dqinfo *info)
+{
+	ocfs2_lock_res_init_once(lockres);
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
+			      0, lockres->l_name);
+	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
+				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
+				   info);
+}
+
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
 {
 	mlog_entry_void();
@@ -3445,6 +3480,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE_POST;
 }
 
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
+{
+	struct ocfs2_qinfo_lvb *lvb;
+	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
+	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+					    oinfo->dqi_gi.dqi_type);
+
+	mlog_entry_void();
+
+	lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
+	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
+	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
+	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
+	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
+	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
+	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
+
+	mlog_exit_void();
+}
+
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+
+	mlog_entry_void();
+	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres, level);
+	mlog_exit_void();
+}
+
+static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
+{
+	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+					    oinfo->dqi_gi.dqi_type);
+	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+	struct buffer_head *bh;
+	struct ocfs2_global_disk_dqinfo *gdinfo;
+	int status = 0;
+
+	if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
+		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
+		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
+		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
+		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
+		oinfo->dqi_gi.dqi_free_entry =
+					be32_to_cpu(lvb->lvb_free_entry);
+	} else {
+		bh = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &status);
+		if (!bh) {
+			mlog_errno(status);
+			goto bail;
+		}
+		gdinfo = (struct ocfs2_global_disk_dqinfo *)
+					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
+		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
+		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
+		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
+		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
+		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
+		oinfo->dqi_gi.dqi_free_entry =
+					le32_to_cpu(gdinfo->dqi_free_entry);
+		brelse(bh);
+		ocfs2_track_lock_refresh(lockres);
+	}
+
+bail:
+	return status;
+}
+
+/* Lock quota info, this function expects at least shared lock on the quota file
+ * so that we can safely refresh quota info from disk. */
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+	int status = 0;
+
+	mlog_entry_void();
+
+	/* On RO devices, locking really isn't needed... */
+	if (ocfs2_is_hard_readonly(osb)) {
+		if (ex)
+			status = -EROFS;
+		goto bail;
+	}
+	if (ocfs2_mount_local(osb))
+		goto bail;
+
+	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+	if (!ocfs2_should_refresh_lock_res(lockres))
+		goto bail;
+	/* OK, we have the lock but we need to refresh the quota info */
+	status = ocfs2_refresh_qinfo(oinfo);
+	if (status)
+		ocfs2_qinfo_unlock(oinfo, ex);
+	ocfs2_complete_lock_res_refresh(lockres, status);
+bail:
+	mlog_exit(status);
+	return status;
+}
+
 /*
  * This is the filesystem locking protocol.  It provides the lock handling
  * hooks for the underlying DLM.  It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 2bb01f0..3f8d998 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
 	__be32       lvb_reserved2;
 };
 
+#define OCFS2_QINFO_LVB_VERSION 1
+
+struct ocfs2_qinfo_lvb {
+	__u8	lvb_version;
+	__u8	lvb_reserved[3];
+	__be32	lvb_bgrace;
+	__be32	lvb_igrace;
+	__be32	lvb_syncms;
+	__be32	lvb_blocks;
+	__be32	lvb_free_blk;
+	__be32	lvb_free_entry;
+};
+
 /* ocfs2_inode_lock_full() 'arg_flags' flags */
 /* don't wait on recovery. */
 #define OCFS2_META_LOCK_RECOVERY	(0x01)
@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 struct ocfs2_file_private;
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_file_private *fp);
+struct ocfs2_mem_dqinfo;
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+                               struct ocfs2_mem_dqinfo *info);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_drop_inode_locks(struct inode *inode);
@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
 void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
 int ocfs2_file_lock(struct file *file, int ex, int trylock);
 void ocfs2_file_unlock(struct file *file);
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+
 
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41001d5..372d965 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -304,9 +304,9 @@ bail:
 	return status;
 }
 
-static int ocfs2_simple_size_update(struct inode *inode,
-				    struct buffer_head *di_bh,
-				    u64 new_i_size)
+int ocfs2_simple_size_update(struct inode *inode,
+			     struct buffer_head *di_bh,
+			     u64 new_i_size)
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e92382c..172f9fb 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 			 struct ocfs2_alloc_context *data_ac,
 			 struct ocfs2_alloc_context *meta_ac,
 			 enum ocfs2_alloc_restarted *reason_ret);
+int ocfs2_simple_size_update(struct inode *inode,
+			     struct buffer_head *di_bh,
+			     u64 new_i_size);
 int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
 			  u64 zero_to);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index b79c371..eb3c302 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 			   struct buffer_head *bh);
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
 int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
+struct buffer_head *ocfs2_bread(struct inode *inode,
+				int block, int *err, int reada);
 
 void ocfs2_set_inode_flags(struct inode *inode);
 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 06e3bd6..0a5ac79 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -883,6 +883,109 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
 	return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
 }
 
+/*
+ *  On disk structures for global quota file
+ */
+
+/* Magic numbers and known versions for global quota files */
+#define OCFS2_GLOBAL_QMAGICS {\
+	0x0cf52470, /* USRQUOTA */ \
+	0x0cf52471  /* GRPQUOTA */ \
+}
+
+#define OCFS2_GLOBAL_QVERSIONS {\
+	0, \
+	0, \
+}
+
+
+/* Each block of each quota file has a certain fixed number of bytes reserved
+ * for OCFS2 internal use at its end. OCFS2 can use it for things like
+ * checksums, etc. */
+#define OCFS2_QBLK_RESERVED_SPACE 8
+
+/* Generic header of all quota files */
+struct ocfs2_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* Quota format version */
+};
+
+#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of global quota file (immediately follows the generic
+ * header) */
+struct ocfs2_global_disk_dqinfo {
+/*00*/	__le32 dqi_bgrace;	/* Grace time for space softlimit excess */
+	__le32 dqi_igrace;	/* Grace time for inode softlimit excess */
+	__le32 dqi_syncms;	/* Time after which we sync local changes to
+				 * global quota file */
+	__le32 dqi_blocks;	/* Number of blocks in quota file */
+/*10*/	__le32 dqi_free_blk;	/* First free block in quota file */
+	__le32 dqi_free_entry;	/* First block with free dquot entry in quota
+				 * file */
+};
+
+/* Structure with global user / group information. We reserve some space
+ * for future use. */
+struct ocfs2_global_disk_dqblk {
+/*00*/	__le32 dqb_id;          /* ID the structure belongs to */
+	__le32 dqb_use_count;   /* Number of nodes having reference to this structure */
+	__le64 dqb_ihardlimit;  /* absolute limit on allocated inodes */
+/*10*/	__le64 dqb_isoftlimit;  /* preferred inode limit */
+	__le64 dqb_curinodes;   /* current # allocated inodes */
+/*20*/	__le64 dqb_bhardlimit;  /* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;  /* preferred limit on disk space */
+/*30*/	__le64 dqb_curspace;    /* current space occupied */
+	__le64 dqb_btime;       /* time limit for excessive disk use */
+/*40*/	__le64 dqb_itime;       /* time limit for excessive inode use */
+	__le64 dqb_pad1;
+/*50*/	__le64 dqb_pad2;
+};
+
+/*
+ *  On-disk structures for local quota file
+ */
+
+/* Magic numbers and known versions for local quota files */
+#define OCFS2_LOCAL_QMAGICS {\
+	0x0cf524c0, /* USRQUOTA */ \
+	0x0cf524c1  /* GRPQUOTA */ \
+}
+
+#define OCFS2_LOCAL_QVERSIONS {\
+	0, \
+	0, \
+}
+
+/* Quota flags in dqinfo header */
+#define OLQF_CLEAN	0x0001	/* Quota file is empty (this should be after\
+				 * quota has been cleanly turned off) */
+
+#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of local quota file (immediately follows the generic
+ * header) */
+struct ocfs2_local_disk_dqinfo {
+	__le32 dqi_flags;	/* Flags for quota file */
+	__le32 dqi_chunks;	/* Number of chunks of quota structures
+				 * with a bitmap */
+	__le32 dqi_blocks;	/* Number of blocks allocated for quota file */
+};
+
+/* Header of one chunk of a quota file */
+struct ocfs2_local_disk_chunk {
+	__le32 dqc_free;	/* Number of free entries in the bitmap */
+	u8 dqc_bitmap[0];	/* Bitmap of entries in the corresponding
+				 * chunk of quota file */
+};
+
+/* One entry in local quota file */
+struct ocfs2_local_disk_dqblk {
+/*00*/	__le64 dqb_id;		/* id this quota applies to */
+	__le64 dqb_spacemod;	/* Change in the amount of used space */
+/*10*/	__le64 dqb_inodemod;	/* Change in the amount of used inodes */
+};
+
 #ifdef __KERNEL__
 static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
 {
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 82c200f..eb6f50c 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
 	OCFS2_LOCK_TYPE_DENTRY,
 	OCFS2_LOCK_TYPE_OPEN,
 	OCFS2_LOCK_TYPE_FLOCK,
+	OCFS2_LOCK_TYPE_QINFO,
 	OCFS2_NUM_LOCK_TYPES
 };
 
@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 		case OCFS2_LOCK_TYPE_FLOCK:
 			c = 'F';
 			break;
+		case OCFS2_LOCK_TYPE_QINFO:
+			c = 'Q';
+			break;
 		default:
 			c = '\0';
 	}
@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
 	[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
 	[OCFS2_LOCK_TYPE_OPEN] = "Open",
 	[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
+	[OCFS2_LOCK_TYPE_QINFO] = "Quota",
 };
 
 static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
new file mode 100644
index 0000000..1f1c863
--- /dev/null
+++ b/fs/ocfs2/quota.h
@@ -0,0 +1,93 @@
+/*
+ * quota.h for OCFS2
+ *
+ * On disk quota structures for local and global quota file, in-memory
+ * structures.
+ *
+ */
+
+#ifndef _OCFS2_QUOTA_H
+#define _OCFS2_QUOTA_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/quota.h>
+#include <linux/list.h>
+#include <linux/dqblk_qtree.h>
+
+#include "ocfs2.h"
+
+/* Common stuff */
+/* id number of quota format */
+#define QFMT_OCFS2 3
+
+/*
+ * In-memory structures
+ */
+struct ocfs2_dquot {
+	struct dquot dq_dquot;	/* Generic VFS dquot */
+	loff_t dq_local_off;	/* Offset in the local quota file */
+	struct ocfs2_quota_chunk *dq_chunk;	/* Chunk dquot is in */
+	unsigned int dq_use_count;	/* Number of nodes having reference to this entry in global quota file */
+	s64 dq_origspace;	/* Last globally synced space usage */
+	s64 dq_originodes;	/* Last globally synced inode usage */
+};
+
+/* In-memory structure with quota header information */
+struct ocfs2_mem_dqinfo {
+	unsigned int dqi_type;		/* Quota type this structure describes */
+	unsigned int dqi_chunks;	/* Number of chunks in local quota file */
+	unsigned int dqi_blocks;	/* Number of blocks allocated for local quota file */
+	unsigned int dqi_syncms;	/* How often should we sync with other nodes */
+	struct list_head dqi_chunk;	/* List of chunks */
+	struct inode *dqi_gqinode;	/* Global quota file inode */
+	struct ocfs2_lock_res dqi_gqlock;	/* Lock protecting quota information structure */
+	struct buffer_head *dqi_gqi_bh;	/* Buffer head with global quota file inode - set only if inode lock is obtained */
+	int dqi_gqi_count;		/* Number of holders of dqi_gqi_bh */
+	struct buffer_head *dqi_lqi_bh;	/* Buffer head with local quota file inode */
+	struct buffer_head *dqi_ibh;	/* Buffer with information header */
+	struct qtree_mem_dqinfo dqi_gi;	/* Info about global file */
+};
+
+static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
+{
+	return container_of(dquot, struct ocfs2_dquot, dq_dquot);
+}
+
+struct ocfs2_quota_chunk {
+	struct list_head qc_chunk;	/* List of quotafile chunks */
+	int qc_num;			/* Number of quota chunk */
+	struct buffer_head *qc_headerbh;	/* Buffer head with chunk header */
+};
+
+extern struct kmem_cache *ocfs2_dquot_cachep;
+extern struct kmem_cache *ocfs2_qf_chunk_cachep;
+
+extern struct qtree_fmt_operations ocfs2_global_ops;
+
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+			 size_t len, loff_t off);
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+			  const char *data, size_t len, loff_t off);
+int ocfs2_global_read_info(struct super_block *sb, int type);
+int ocfs2_global_write_info(struct super_block *sb, int type);
+int ocfs2_global_read_dquot(struct dquot *dquot);
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
+static inline int ocfs2_sync_dquot(struct dquot *dquot)
+{
+	return __ocfs2_sync_dquot(dquot, 0);
+}
+static inline int ocfs2_global_release_dquot(struct dquot *dquot)
+{
+	return __ocfs2_sync_dquot(dquot, 1);
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
+					   int block, int *err);
+
+extern struct dquot_operations ocfs2_quota_operations;
+extern struct quota_format_type ocfs2_quota_format;
+
+#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
new file mode 100644
index 0000000..af8340c
--- /dev/null
+++ b/fs/ocfs2/quota_global.c
@@ -0,0 +1,919 @@
+/*
+ *  Implementation of operations over global quota file
+ */
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/dqblk_qtree.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "alloc.h"
+#include "inode.h"
+#include "journal.h"
+#include "file.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "uptodate.h"
+#include "quota.h"
+
+static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
+{
+	struct ocfs2_global_disk_dqblk *d = dp;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+
+	/* Update from disk only entries not set by the admin */
+	if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) {
+		m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+		m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+	}
+	if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+		m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+	if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) {
+		m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
+	}
+	if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+		m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+	if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags))
+		m->dqb_btime = le64_to_cpu(d->dqb_btime);
+	if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags))
+		m->dqb_itime = le64_to_cpu(d->dqb_itime);
+	OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count);
+}
+
+static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
+{
+	struct ocfs2_global_disk_dqblk *d = dp;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+
+	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
+	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+	d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+	d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+	d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+	d->dqb_btime = cpu_to_le64(m->dqb_btime);
+	d->dqb_itime = cpu_to_le64(m->dqb_itime);
+}
+
+static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
+{
+	struct ocfs2_global_disk_dqblk *d = dp;
+	struct ocfs2_mem_dqinfo *oinfo =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+	if (qtree_entry_unused(&oinfo->dqi_gi, dp))
+		return 0;
+	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+}
+
+struct qtree_fmt_operations ocfs2_global_ops = {
+	.mem2disk_dqblk = ocfs2_global_mem2diskdqb,
+	.disk2mem_dqblk = ocfs2_global_disk2memdqb,
+	.is_id = ocfs2_global_is_id,
+};
+
+
+struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
+					   int block, int *err)
+{
+	struct buffer_head *tmp = NULL;
+
+	*err = ocfs2_read_virt_blocks(inode, block, 1, &tmp, 0, NULL);
+	if (*err)
+		mlog_errno(*err);
+
+	return tmp;
+}
+
+static struct buffer_head *ocfs2_get_quota_block(struct inode *inode,
+						 int block, int *err)
+{
+	u64 pblock, pcount;
+	struct buffer_head *bh;
+
+	down_read(&OCFS2_I(inode)->ip_alloc_sem);
+	*err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount,
+					   NULL);
+	up_read(&OCFS2_I(inode)->ip_alloc_sem);
+	if (*err) {
+		mlog_errno(*err);
+		return NULL;
+	}
+	bh = sb_getblk(inode->i_sb, pblock);
+	if (!bh) {
+		*err = -EIO;
+		mlog_errno(*err);
+	}
+	return bh;
+}
+
+/* Read data from global quotafile - avoid pagecache and such because we cannot
+ * afford acquiring the locks... We use quota cluster lock to serialize
+ * operations. Caller is responsible for acquiring it. */
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+			 size_t len, loff_t off)
+{
+	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	struct inode *gqinode = oinfo->dqi_gqinode;
+	loff_t i_size = i_size_read(gqinode);
+	int offset = off & (sb->s_blocksize - 1);
+	sector_t blk = off >> sb->s_blocksize_bits;
+	int err = 0;
+	struct buffer_head *bh;
+	size_t toread, tocopy;
+
+	if (off > i_size)
+		return 0;
+	if (off + len > i_size)
+		len = i_size - off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = min((size_t)(sb->s_blocksize - offset), toread);
+		bh = ocfs2_read_quota_block(gqinode, blk, &err);
+		if (!bh) {
+			mlog_errno(err);
+			return err;
+		}
+		memcpy(data, bh->b_data + offset, tocopy);
+		brelse(bh);
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+	return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+			  const char *data, size_t len, loff_t off)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct inode *gqinode = oinfo->dqi_gqinode;
+	int offset = off & (sb->s_blocksize - 1);
+	sector_t blk = off >> sb->s_blocksize_bits;
+	int err = 0, new = 0;
+	struct buffer_head *bh;
+	handle_t *handle = journal_current_handle();
+
+	if (!handle) {
+		mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
+		     "because transaction was not started.\n",
+		     (unsigned long long)off, (unsigned long long)len);
+		return -EIO;
+	}
+	if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) {
+		WARN_ON(1);
+		len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
+	}
+
+	mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
+	if (gqinode->i_size < off + len) {
+		down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+		err = ocfs2_extend_no_holes(gqinode, off + len, off);
+		up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+		if (err < 0)
+			goto out;
+		err = ocfs2_simple_size_update(gqinode,
+					       oinfo->dqi_gqi_bh,
+					       off + len);
+		if (err < 0)
+			goto out;
+		new = 1;
+	}
+	/* Not rewriting whole block? */
+	if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
+	    !new) {
+		bh = ocfs2_read_quota_block(gqinode, blk, &err);
+		if (!bh) {
+			mlog_errno(err);
+			return err;
+		}
+		err = ocfs2_journal_access(handle, gqinode, bh,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	} else {
+		bh = ocfs2_get_quota_block(gqinode, blk, &err);
+		if (!bh) {
+			mlog_errno(err);
+			return err;
+		}
+		err = ocfs2_journal_access(handle, gqinode, bh,
+						OCFS2_JOURNAL_ACCESS_CREATE);
+	}
+	if (err < 0) {
+		brelse(bh);
+		goto out;
+	}
+	lock_buffer(bh);
+	if (new)
+		memset(bh->b_data, 0, sb->s_blocksize);
+	memcpy(bh->b_data + offset, data, len);
+	flush_dcache_page(bh->b_page);
+	unlock_buffer(bh);
+	ocfs2_set_buffer_uptodate(gqinode, bh);
+	err = ocfs2_journal_dirty(handle, bh);
+	brelse(bh);
+	if (err < 0)
+		goto out;
+out:
+	if (err) {
+		mutex_unlock(&gqinode->i_mutex);
+		mlog_errno(err);
+		return err;
+	}
+	gqinode->i_version++;
+	ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
+	mutex_unlock(&gqinode->i_mutex);
+	return len;
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+	int status;
+	struct buffer_head *bh = NULL;
+
+	status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
+	if (status < 0)
+		return status;
+	spin_lock(&dq_data_lock);
+	if (!oinfo->dqi_gqi_count++)
+		oinfo->dqi_gqi_bh = bh;
+	else
+		WARN_ON(bh != oinfo->dqi_gqi_bh);
+	spin_unlock(&dq_data_lock);
+	return 0;
+}
+
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+	ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
+	brelse(oinfo->dqi_gqi_bh);
+	spin_lock(&dq_data_lock);
+	if (!--oinfo->dqi_gqi_count)
+		oinfo->dqi_gqi_bh = NULL;
+	spin_unlock(&dq_data_lock);
+}
+
+/* Read information header from global quota file */
+int ocfs2_global_read_info(struct super_block *sb, int type)
+{
+	struct inode *gqinode = NULL;
+	unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+					GROUP_QUOTA_SYSTEM_INODE };
+	struct ocfs2_global_disk_dqinfo dinfo;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	int status;
+
+	mlog_entry_void();
+
+	/* Read global header */
+	gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+			OCFS2_INVALID_SLOT);
+	if (!gqinode) {
+		mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
+			type);
+		status = -EINVAL;
+		goto out_err;
+	}
+	oinfo->dqi_gi.dqi_sb = sb;
+	oinfo->dqi_gi.dqi_type = type;
+	ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
+	oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
+	oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
+	oinfo->dqi_gqi_bh = NULL;
+	oinfo->dqi_gqi_count = 0;
+	oinfo->dqi_gqinode = gqinode;
+	status = ocfs2_lock_global_qf(oinfo, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+	status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+				      sizeof(struct ocfs2_global_disk_dqinfo),
+				      OCFS2_GLOBAL_INFO_OFF);
+	ocfs2_unlock_global_qf(oinfo, 0);
+	if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
+		mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
+		     status);
+		if (status >= 0)
+			status = -EIO;
+		mlog_errno(status);
+		goto out_err;
+	}
+	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
+	oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
+	oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits;
+	oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
+						OCFS2_QBLK_RESERVED_SPACE;
+	oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
+out_err:
+	mlog_exit(status);
+	return status;
+}
+
+/* Write information to global quota file. Expects exlusive lock on quota
+ * file inode and quota info */
+static int __ocfs2_global_write_info(struct super_block *sb, int type)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct ocfs2_global_disk_dqinfo dinfo;
+	ssize_t size;
+
+	spin_lock(&dq_data_lock);
+	info->dqi_flags &= ~DQF_INFO_DIRTY;
+	dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
+	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
+	spin_unlock(&dq_data_lock);
+	dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
+	dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry);
+	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
+				     sizeof(struct ocfs2_global_disk_dqinfo),
+				     OCFS2_GLOBAL_INFO_OFF);
+	if (size != sizeof(struct ocfs2_global_disk_dqinfo)) {
+		mlog(ML_ERROR, "Cannot write global quota info structure\n");
+		if (size >= 0)
+			size = -EIO;
+		return size;
+	}
+	return 0;
+}
+
+int ocfs2_global_write_info(struct super_block *sb, int type)
+{
+	int err;
+	struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+
+	err = ocfs2_qinfo_lock(info, 1);
+	if (err < 0)
+		return err;
+	err = __ocfs2_global_write_info(sb, type);
+	ocfs2_qinfo_unlock(info, 1);
+	return err;
+}
+
+/* Read in information from global quota file and acquire a reference to it.
+ * dquot_acquire() has already started the transaction and locked quota file */
+int ocfs2_global_read_dquot(struct dquot *dquot)
+{
+	int err, err2, ex = 0;
+	struct ocfs2_mem_dqinfo *info =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+	err = ocfs2_qinfo_lock(info, 0);
+	if (err < 0)
+		goto out;
+	err = qtree_read_dquot(&info->dqi_gi, dquot);
+	if (err < 0)
+		goto out_qlock;
+	OCFS2_DQUOT(dquot)->dq_use_count++;
+	OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+	OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+	if (!dquot->dq_off) {	/* No real quota entry? */
+		/* Upgrade to exclusive lock for allocation */
+		err = ocfs2_qinfo_lock(info, 1);
+		if (err < 0)
+			goto out_qlock;
+		ex = 1;
+	}
+	err = qtree_write_dquot(&info->dqi_gi, dquot);
+	if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
+		err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
+		if (!err)
+			err = err2;
+	}
+out_qlock:
+	if (ex)
+		ocfs2_qinfo_unlock(info, 1);
+	ocfs2_qinfo_unlock(info, 0);
+out:
+	if (err < 0)
+		mlog_errno(err);
+	return err;
+}
+
+/* Sync local information about quota modifications with global quota file.
+ * Caller must have started the transaction and obtained exclusive lock for
+ * global quota file inode */
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
+{
+	int err, err2;
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+	struct ocfs2_global_disk_dqblk dqblk;
+	s64 spacechange, inodechange;
+	time_t olditime, oldbtime;
+
+	err = sb->s_op->quota_read(sb, type, (char *)&dqblk,
+				   sizeof(struct ocfs2_global_disk_dqblk),
+				   dquot->dq_off);
+	if (err != sizeof(struct ocfs2_global_disk_dqblk)) {
+		if (err >= 0) {
+			mlog(ML_ERROR, "Short read from global quota file "
+				       "(%u read)\n", err);
+			err = -EIO;
+		}
+		goto out;
+	}
+
+	/* Update space and inode usage. Get also other information from
+	 * global quota file so that we don't overwrite any changes there.
+	 * We are */
+	spin_lock(&dq_data_lock);
+	spacechange = dquot->dq_dqb.dqb_curspace -
+					OCFS2_DQUOT(dquot)->dq_origspace;
+	inodechange = dquot->dq_dqb.dqb_curinodes -
+					OCFS2_DQUOT(dquot)->dq_originodes;
+	olditime = dquot->dq_dqb.dqb_itime;
+	oldbtime = dquot->dq_dqb.dqb_btime;
+	ocfs2_global_disk2memdqb(dquot, &dqblk);
+	mlog(0, "Syncing global dquot %d space %lld+%lld, inodes %lld+%lld\n",
+	     dquot->dq_id, dquot->dq_dqb.dqb_curspace, spacechange,
+	     dquot->dq_dqb.dqb_curinodes, inodechange);
+	if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+		dquot->dq_dqb.dqb_curspace += spacechange;
+	if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+		dquot->dq_dqb.dqb_curinodes += inodechange;
+	/* Set properly space grace time... */
+	if (dquot->dq_dqb.dqb_bsoftlimit &&
+	    dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) {
+		if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) &&
+		    oldbtime > 0) {
+			if (dquot->dq_dqb.dqb_btime > 0)
+				dquot->dq_dqb.dqb_btime =
+					min(dquot->dq_dqb.dqb_btime, oldbtime);
+			else
+				dquot->dq_dqb.dqb_btime = oldbtime;
+		}
+	} else {
+		dquot->dq_dqb.dqb_btime = 0;
+		clear_bit(DQ_BLKS_B, &dquot->dq_flags);
+	}
+	/* Set properly inode grace time... */
+	if (dquot->dq_dqb.dqb_isoftlimit &&
+	    dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) {
+		if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) &&
+		    olditime > 0) {
+			if (dquot->dq_dqb.dqb_itime > 0)
+				dquot->dq_dqb.dqb_itime =
+					min(dquot->dq_dqb.dqb_itime, olditime);
+			else
+				dquot->dq_dqb.dqb_itime = olditime;
+		}
+	} else {
+		dquot->dq_dqb.dqb_itime = 0;
+		clear_bit(DQ_INODES_B, &dquot->dq_flags);
+	}
+	/* All information is properly updated, clear the flags */
+	__clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
+	__clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
+	__clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
+	__clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
+	__clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+	__clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+	OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+	OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+	spin_unlock(&dq_data_lock);
+	err = ocfs2_qinfo_lock(info, freeing);
+	if (err < 0) {
+		mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
+			       " (type=%d, id=%u)\n", dquot->dq_type,
+			       (unsigned)dquot->dq_id);
+		goto out;
+	}
+	if (freeing)
+		OCFS2_DQUOT(dquot)->dq_use_count--;
+	err = qtree_write_dquot(&info->dqi_gi, dquot);
+	if (err < 0)
+		goto out_qlock;
+	if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) {
+		err = qtree_release_dquot(&info->dqi_gi, dquot);
+		if (info_dirty(sb_dqinfo(sb, type))) {
+			err2 = __ocfs2_global_write_info(sb, type);
+			if (!err)
+				err = err2;
+		}
+	}
+out_qlock:
+	ocfs2_qinfo_unlock(info, freeing);
+out:
+	if (err < 0)
+		mlog_errno(err);
+	return err;
+}
+
+/*
+ *  Wrappers for generic quota functions
+ */
+
+static int ocfs2_write_dquot(struct dquot *dquot)
+{
+	handle_t *handle;
+	struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+	int status = 0;
+
+	mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+	handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out;
+	}
+	status = dquot_commit(dquot);
+	ocfs2_commit_trans(osb, handle);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+{
+	struct ocfs2_mem_dqinfo *oinfo;
+	int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+				    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+
+	if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+		return 0;
+
+	oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	/* We modify tree, leaf block, global info, local chunk header,
+	 * global and local inode */
+	return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
+	       2 * OCFS2_INODE_UPDATE_CREDITS;
+}
+
+static int ocfs2_release_dquot(struct dquot *dquot)
+{
+	handle_t *handle;
+	struct ocfs2_mem_dqinfo *oinfo =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+	struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+	int status = 0;
+
+	mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+	handle = ocfs2_start_trans(osb,
+		ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out_ilock;
+	}
+	status = dquot_release(dquot);
+	ocfs2_commit_trans(osb, handle);
+out_ilock:
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
+{
+	struct ocfs2_mem_dqinfo *oinfo;
+	int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+				    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+	struct ocfs2_dinode *lfe, *gfe;
+
+	if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+		return 0;
+
+	oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
+	lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
+	/* We can extend local file + global file. In local file we
+	 * can modify info, chunk header block and dquot block. In
+	 * global file we can modify info, tree and leaf block */
+	return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
+	       ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
+	       3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
+}
+
+static int ocfs2_acquire_dquot(struct dquot *dquot)
+{
+	handle_t *handle;
+	struct ocfs2_mem_dqinfo *oinfo =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+	struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+	int status = 0;
+
+	mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+	/* We need an exclusive lock, because we're going to update use count
+	 * and instantiate possibly new dquot structure */
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+	handle = ocfs2_start_trans(osb,
+		ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out_ilock;
+	}
+	status = dquot_acquire(dquot);
+	ocfs2_commit_trans(osb, handle);
+out_ilock:
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
+{
+	unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
+			     (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) |
+			     (1 << (DQ_LASTSET_B + QIF_INODES_B)) |
+			     (1 << (DQ_LASTSET_B + QIF_SPACE_B)) |
+			     (1 << (DQ_LASTSET_B + QIF_BTIME_B)) |
+			     (1 << (DQ_LASTSET_B + QIF_ITIME_B));
+	int sync = 0;
+	int status;
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	handle_t *handle;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+
+	mlog_entry("id=%u, type=%d", dquot->dq_id, type);
+	dquot_mark_dquot_dirty(dquot);
+
+	/* In case user set some limits, sync dquot immediately to global
+	 * quota file so that information propagates quicker */
+	spin_lock(&dq_data_lock);
+	if (dquot->dq_flags & mask)
+		sync = 1;
+	spin_unlock(&dq_data_lock);
+	if (!sync) {
+		status = ocfs2_write_dquot(dquot);
+		goto out;
+	}
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+	handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out_ilock;
+	}
+	status = ocfs2_sync_dquot(dquot);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+	/* Now write updated local dquot structure */
+	status = dquot_commit(dquot);
+out_trans:
+	ocfs2_commit_trans(osb, handle);
+out_ilock:
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+/* This should happen only after set_dqinfo(). */
+static int ocfs2_write_info(struct super_block *sb, int type)
+{
+	handle_t *handle;
+	int status = 0;
+	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+	mlog_entry_void();
+
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+	handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out_ilock;
+	}
+	status = dquot_commit_info(sb, type);
+	ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_ilock:
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+/* This is difficult. We have to lock quota inode and start transaction
+ * in this function but we don't want to take the penalty of exlusive
+ * quota file lock when we are just going to use cached structures. So
+ * we just take read lock check whether we have dquot cached and if so,
+ * we don't have to take the write lock... */
+static int ocfs2_dquot_initialize(struct inode *inode, int type)
+{
+	handle_t *handle = NULL;
+	int status = 0;
+	struct super_block *sb = inode->i_sb;
+	struct ocfs2_mem_dqinfo *oinfo;
+	int exclusive = 0;
+	int cnt;
+	qid_t id;
+
+	mlog_entry_void();
+
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+		status = ocfs2_lock_global_qf(oinfo, 0);
+		if (status < 0)
+			goto out;
+		/* This is just a performance optimization not a reliable test.
+		 * Since we hold an inode lock, noone can actually release
+		 * the structure until we are finished with initialization. */
+		if (inode->i_dquot[cnt] != NODQUOT) {
+			ocfs2_unlock_global_qf(oinfo, 0);
+			continue;
+		}
+		/* When we have inode lock, we know that no dquot_release() can
+		 * run and thus we can safely check whether we need to
+		 * read+modify global file to get quota information or whether
+		 * our node already has it. */
+		if (cnt == USRQUOTA)
+			id = inode->i_uid;
+		else if (cnt == GRPQUOTA)
+			id = inode->i_gid;
+		else
+			BUG();
+		/* Obtain exclusion from quota off... */
+		down_write(&sb_dqopt(sb)->dqptr_sem);
+		exclusive = !dquot_is_cached(sb, id, cnt);
+		up_write(&sb_dqopt(sb)->dqptr_sem);
+		if (exclusive) {
+			status = ocfs2_lock_global_qf(oinfo, 1);
+			if (status < 0) {
+				exclusive = 0;
+				mlog_errno(status);
+				goto out_ilock;
+			}
+			handle = ocfs2_start_trans(OCFS2_SB(sb),
+					ocfs2_calc_qinit_credits(sb, cnt));
+			if (IS_ERR(handle)) {
+				status = PTR_ERR(handle);
+				mlog_errno(status);
+				goto out_ilock;
+			}
+		}
+		dquot_initialize(inode, cnt);
+		if (exclusive) {
+			ocfs2_commit_trans(OCFS2_SB(sb), handle);
+			ocfs2_unlock_global_qf(oinfo, 1);
+		}
+		ocfs2_unlock_global_qf(oinfo, 0);
+	}
+	mlog_exit(0);
+	return 0;
+out_ilock:
+	if (exclusive)
+		ocfs2_unlock_global_qf(oinfo, 1);
+	ocfs2_unlock_global_qf(oinfo, 0);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_dquot_drop_slow(struct inode *inode)
+{
+	int status;
+	int cnt;
+	int got_lock[MAXQUOTAS] = {0, 0};
+	handle_t *handle;
+	struct super_block *sb = inode->i_sb;
+	struct ocfs2_mem_dqinfo *oinfo;
+
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+		status = ocfs2_lock_global_qf(oinfo, 1);
+		if (status < 0)
+			goto out;
+		got_lock[cnt] = 1;
+	}
+	handle = ocfs2_start_trans(OCFS2_SB(sb),
+			ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+			ocfs2_calc_qinit_credits(sb, GRPQUOTA));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+				goto out;
+	}
+	dquot_drop(inode);
+	ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (got_lock[cnt]) {
+			oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+			ocfs2_unlock_global_qf(oinfo, 1);
+		}
+	return status;
+}
+
+/* See the comment before ocfs2_dquot_initialize. */
+static int ocfs2_dquot_drop(struct inode *inode)
+{
+	int status = 0;
+	struct super_block *sb = inode->i_sb;
+	struct ocfs2_mem_dqinfo *oinfo;
+	int exclusive = 0;
+	int cnt;
+	int got_lock[MAXQUOTAS] = {0, 0};
+
+	mlog_entry_void();
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+		status = ocfs2_lock_global_qf(oinfo, 0);
+		if (status < 0)
+			goto out;
+		got_lock[cnt] = 1;
+	}
+	/* Lock against anyone releasing references so that when when we check
+	 * we know we are not going to be last ones to release dquot */
+	down_write(&sb_dqopt(sb)->dqptr_sem);
+	/* Urgh, this is a terrible hack :( */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt] != NODQUOT &&
+		    atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
+			exclusive = 1;
+			break;
+		}
+	}
+	if (!exclusive)
+		dquot_drop_locked(inode);
+	up_write(&sb_dqopt(sb)->dqptr_sem);
+out:
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (got_lock[cnt]) {
+			oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+			ocfs2_unlock_global_qf(oinfo, 0);
+		}
+	/* In case we bailed out because we had to do expensive locking
+	 * do it now... */
+	if (exclusive)
+		status = ocfs2_dquot_drop_slow(inode);
+	mlog_exit(status);
+	return status;
+}
+
+static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
+{
+	struct ocfs2_dquot *dquot =
+				kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS);
+
+	if (!dquot)
+		return NULL;
+	return &dquot->dq_dquot;
+}
+
+static void ocfs2_destroy_dquot(struct dquot *dquot)
+{
+	kmem_cache_free(ocfs2_dquot_cachep, dquot);
+}
+
+struct dquot_operations ocfs2_quota_operations = {
+	.initialize	= ocfs2_dquot_initialize,
+	.drop		= ocfs2_dquot_drop,
+	.alloc_space	= dquot_alloc_space,
+	.alloc_inode	= dquot_alloc_inode,
+	.free_space	= dquot_free_space,
+	.free_inode	= dquot_free_inode,
+	.transfer	= dquot_transfer,
+	.write_dquot	= ocfs2_write_dquot,
+	.acquire_dquot	= ocfs2_acquire_dquot,
+	.release_dquot	= ocfs2_release_dquot,
+	.mark_dirty	= ocfs2_mark_dquot_dirty,
+	.write_info	= ocfs2_write_info,
+	.alloc_dquot	= ocfs2_alloc_dquot,
+	.destroy_dquot	= ocfs2_destroy_dquot,
+};
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
new file mode 100644
index 0000000..55c3f2f
--- /dev/null
+++ b/fs/ocfs2/quota_local.c
@@ -0,0 +1,833 @@
+/*
+ *  Implementation of operations over local quota file
+ */
+
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "inode.h"
+#include "alloc.h"
+#include "file.h"
+#include "buffer_head_io.h"
+#include "journal.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "quota.h"
+
+/* Number of local quota structures per block */
+static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
+{
+	return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) /
+		sizeof(struct ocfs2_local_disk_dqblk));
+}
+
+/* Number of blocks with entries in one chunk */
+static inline unsigned int ol_chunk_blocks(struct super_block *sb)
+{
+	return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+		 OCFS2_QBLK_RESERVED_SPACE) << 3) /
+	       ol_quota_entries_per_block(sb);
+}
+
+/* Number of entries in a chunk bitmap */
+static unsigned int ol_chunk_entries(struct super_block *sb)
+{
+	return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb);
+}
+
+/* Offset of the chunk in quota file */
+static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
+{
+	/* 1 block for local quota file info, 1 block per chunk for chunk info */
+	return 1 + (ol_chunk_blocks(sb) + 1) * c;
+}
+
+/* Offset of the dquot structure in the quota file */
+static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
+{
+	int epb = ol_quota_entries_per_block(sb);
+
+	return ((ol_quota_chunk_block(sb, c) + 1 + off / epb)
+		<< sb->s_blocksize_bits) +
+		(off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Compute block number from given offset */
+static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
+{
+	return off >> sb->s_blocksize_bits;
+}
+
+static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
+{
+	return off & ((1 << sb->s_blocksize_bits) - 1);
+}
+
+/* Compute offset in the chunk of a structure with the given offset */
+static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off)
+{
+	int epb = ol_quota_entries_per_block(sb);
+
+	return ((off >> sb->s_blocksize_bits) -
+			ol_quota_chunk_block(sb, c) - 1) * epb
+	       + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) /
+		 sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Write bufferhead into the fs */
+static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
+		void (*modify)(struct buffer_head *, void *), void *private)
+{
+	struct super_block *sb = inode->i_sb;
+	handle_t *handle;
+	int status;
+
+	handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		return status;
+	}
+	status = ocfs2_journal_access(handle, inode, bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		ocfs2_commit_trans(OCFS2_SB(sb), handle);
+		return status;
+	}
+	lock_buffer(bh);
+	modify(bh, private);
+	unlock_buffer(bh);
+	status = ocfs2_journal_dirty(handle, bh);
+	if (status < 0) {
+		mlog_errno(status);
+		ocfs2_commit_trans(OCFS2_SB(sb), handle);
+		return status;
+	}
+	status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+	return 0;
+}
+
+/* Check whether we understand format of quota files */
+static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
+{
+	unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS;
+	unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS;
+	unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS;
+	unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
+	unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+					GROUP_QUOTA_SYSTEM_INODE };
+	struct buffer_head *bh;
+	struct inode *linode = sb_dqopt(sb)->files[type];
+	struct inode *ginode = NULL;
+	struct ocfs2_disk_dqheader *dqhead;
+	int status, ret = 0;
+
+	/* First check whether we understand local quota file */
+	bh = ocfs2_read_quota_block(linode, 0, &status);
+	if (!bh) {
+		mlog_errno(status);
+		mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
+			type);
+		goto out_err;
+	}
+	dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+	if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) {
+		mlog(ML_ERROR, "quota file magic does not match (%u != %u),"
+			" type=%d\n", le32_to_cpu(dqhead->dqh_magic),
+			lmagics[type], type);
+		goto out_err;
+	}
+	if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) {
+		mlog(ML_ERROR, "quota file version does not match (%u != %u),"
+			" type=%d\n", le32_to_cpu(dqhead->dqh_version),
+			lversions[type], type);
+		goto out_err;
+	}
+	brelse(bh);
+	bh = NULL;
+
+	/* Next check whether we understand global quota file */
+	ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+						OCFS2_INVALID_SLOT);
+	if (!ginode) {
+		mlog(ML_ERROR, "cannot get global quota file inode "
+				"(type=%d)\n", type);
+		goto out_err;
+	}
+	/* Since the header is read only, we don't care about locking */
+	bh = ocfs2_read_quota_block(ginode, 0, &status);
+	if (!bh) {
+		mlog_errno(status);
+		mlog(ML_ERROR, "failed to read global quota file header "
+				"(type=%d)\n", type);
+		goto out_err;
+	}
+	dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+	if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) {
+		mlog(ML_ERROR, "global quota file magic does not match "
+			"(%u != %u), type=%d\n",
+			le32_to_cpu(dqhead->dqh_magic), gmagics[type], type);
+		goto out_err;
+	}
+	if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) {
+		mlog(ML_ERROR, "global quota file version does not match "
+			"(%u != %u), type=%d\n",
+			le32_to_cpu(dqhead->dqh_version), gversions[type],
+			type);
+		goto out_err;
+	}
+
+	ret = 1;
+out_err:
+	brelse(bh);
+	iput(ginode);
+	return ret;
+}
+
+/* Release given list of quota file chunks */
+static void ocfs2_release_local_quota_bitmaps(struct list_head *head)
+{
+	struct ocfs2_quota_chunk *pos, *next;
+
+	list_for_each_entry_safe(pos, next, head, qc_chunk) {
+		list_del(&pos->qc_chunk);
+		brelse(pos->qc_headerbh);
+		kmem_cache_free(ocfs2_qf_chunk_cachep, pos);
+	}
+}
+
+/* Load quota bitmaps into memory */
+static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
+			struct ocfs2_local_disk_dqinfo *ldinfo,
+			struct list_head *head)
+{
+	struct ocfs2_quota_chunk *newchunk;
+	int i, status;
+
+	INIT_LIST_HEAD(head);
+	for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) {
+		newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+		if (!newchunk) {
+			ocfs2_release_local_quota_bitmaps(head);
+			return -ENOMEM;
+		}
+		newchunk->qc_num = i;
+		newchunk->qc_headerbh = ocfs2_read_quota_block(inode,
+				ol_quota_chunk_block(inode->i_sb, i),
+				&status);
+		if (!newchunk->qc_headerbh) {
+			mlog_errno(status);
+			kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
+			ocfs2_release_local_quota_bitmaps(head);
+			return status;
+		}
+		list_add_tail(&newchunk->qc_chunk, head);
+	}
+	return 0;
+}
+
+static void olq_update_info(struct buffer_head *bh, void *private)
+{
+	struct mem_dqinfo *info = private;
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct ocfs2_local_disk_dqinfo *ldinfo;
+
+	ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+						OCFS2_LOCAL_INFO_OFF);
+	spin_lock(&dq_data_lock);
+	ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
+	ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
+	ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
+	spin_unlock(&dq_data_lock);
+}
+
+/* Read information header from quota file */
+static int ocfs2_local_read_info(struct super_block *sb, int type)
+{
+	struct ocfs2_local_disk_dqinfo *ldinfo;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo;
+	struct inode *lqinode = sb_dqopt(sb)->files[type];
+	int status;
+	struct buffer_head *bh = NULL;
+	int locked = 0;
+
+	info->dqi_maxblimit = 0x7fffffffffffffffLL;
+	info->dqi_maxilimit = 0x7fffffffffffffffLL;
+	oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
+	if (!oinfo) {
+		mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
+			       " info.");
+		goto out_err;
+	}
+	info->dqi_priv = oinfo;
+	oinfo->dqi_type = type;
+	INIT_LIST_HEAD(&oinfo->dqi_chunk);
+	oinfo->dqi_lqi_bh = NULL;
+	oinfo->dqi_ibh = NULL;
+
+	status = ocfs2_global_read_info(sb, type);
+	if (status < 0)
+		goto out_err;
+
+	status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+	locked = 1;
+
+	/* Now read local header */
+	bh = ocfs2_read_quota_block(lqinode, 0, &status);
+	if (!bh) {
+		mlog_errno(status);
+		mlog(ML_ERROR, "failed to read quota file info header "
+			"(type=%d)\n", type);
+		goto out_err;
+	}
+	ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+						OCFS2_LOCAL_INFO_OFF);
+	info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
+	oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
+	oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
+	oinfo->dqi_ibh = bh;
+
+	/* We crashed when using local quota file? */
+	if (!(info->dqi_flags & OLQF_CLEAN))
+		goto out_err;	/* So far we just bail out. Later we should resync here */
+
+	status = ocfs2_load_local_quota_bitmaps(sb_dqopt(sb)->files[type],
+						ldinfo,
+						&oinfo->dqi_chunk);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+
+	/* Now mark quota file as used */
+	info->dqi_flags &= ~OLQF_CLEAN;
+	status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+
+	return 0;
+out_err:
+	if (oinfo) {
+		iput(oinfo->dqi_gqinode);
+		ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+		ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+		brelse(oinfo->dqi_lqi_bh);
+		if (locked)
+			ocfs2_inode_unlock(lqinode, 1);
+		ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+		kfree(oinfo);
+	}
+	brelse(bh);
+	return -1;
+}
+
+/* Write local info to quota file */
+static int ocfs2_local_write_info(struct super_block *sb, int type)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
+						->dqi_ibh;
+	int status;
+
+	status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
+				 info);
+	if (status < 0) {
+		mlog_errno(status);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Release info from memory */
+static int ocfs2_local_free_info(struct super_block *sb, int type)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct ocfs2_quota_chunk *chunk;
+	struct ocfs2_local_disk_chunk *dchunk;
+	int mark_clean = 1, len;
+	int status;
+
+	iput(oinfo->dqi_gqinode);
+	ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+	ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+	list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+		dchunk = (struct ocfs2_local_disk_chunk *)
+					(chunk->qc_headerbh->b_data);
+		if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+			len = ol_chunk_entries(sb);
+		} else {
+			len = (oinfo->dqi_blocks -
+			       ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+			      * ol_quota_entries_per_block(sb);
+		}
+		/* Not all entries free? Bug! */
+		if (le32_to_cpu(dchunk->dqc_free) != len) {
+			mlog(ML_ERROR, "releasing quota file with used "
+					"entries (type=%d)\n", type);
+			mark_clean = 0;
+		}
+	}
+	ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+
+	if (!mark_clean)
+		goto out;
+
+	/* Mark local file as clean */
+	info->dqi_flags |= OLQF_CLEAN;
+	status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
+				 oinfo->dqi_ibh,
+				 olq_update_info,
+				 info);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
+out:
+	ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
+	brelse(oinfo->dqi_ibh);
+	brelse(oinfo->dqi_lqi_bh);
+	kfree(oinfo);
+	return 0;
+}
+
+static void olq_set_dquot(struct buffer_head *bh, void *private)
+{
+	struct ocfs2_dquot *od = private;
+	struct ocfs2_local_disk_dqblk *dqblk;
+	struct super_block *sb = od->dq_dquot.dq_sb;
+
+	dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
+		+ ol_dqblk_block_offset(sb, od->dq_local_off));
+
+	dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
+	spin_lock(&dq_data_lock);
+	dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
+					  od->dq_origspace);
+	dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
+					  od->dq_originodes);
+	spin_unlock(&dq_data_lock);
+	mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
+	     od->dq_dquot.dq_id, dqblk->dqb_spacemod, dqblk->dqb_inodemod);
+}
+
+/* Write dquot to local quota file */
+static int ocfs2_local_write_dquot(struct dquot *dquot)
+{
+	struct super_block *sb = dquot->dq_sb;
+	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+	struct buffer_head *bh;
+	int status;
+
+	bh = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
+				    ol_dqblk_file_block(sb, od->dq_local_off),
+				    &status);
+	if (!bh) {
+		mlog_errno(status);
+		goto out;
+	}
+	status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh,
+				 olq_set_dquot, od);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+out:
+	brelse(bh);
+	return status;
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
+						       int type,
+						       int *offset)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct ocfs2_quota_chunk *chunk;
+	struct ocfs2_local_disk_chunk *dchunk;
+	int found = 0, len;
+
+	list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+		dchunk = (struct ocfs2_local_disk_chunk *)
+						chunk->qc_headerbh->b_data;
+		if (le32_to_cpu(dchunk->dqc_free) > 0) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return NULL;
+
+	if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+		len = ol_chunk_entries(sb);
+	} else {
+		len = (oinfo->dqi_blocks -
+		       ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+		      * ol_quota_entries_per_block(sb);
+	}
+
+	found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0);
+	/* We failed? */
+	if (found == len) {
+		mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u"
+		     " entries free (type=%d)\n", chunk->qc_num,
+		     le32_to_cpu(dchunk->dqc_free), type);
+		return ERR_PTR(-EIO);
+	}
+	*offset = found;
+	return chunk;
+}
+
+/* Add new chunk to the local quota file */
+static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
+							struct super_block *sb,
+							int type,
+							int *offset)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct inode *lqinode = sb_dqopt(sb)->files[type];
+	struct ocfs2_quota_chunk *chunk = NULL;
+	struct ocfs2_local_disk_chunk *dchunk;
+	int status;
+	handle_t *handle;
+	struct buffer_head *bh = NULL;
+	u64 p_blkno;
+
+	/* We are protected by dqio_sem so no locking needed */
+	status = ocfs2_extend_no_holes(lqinode,
+				       lqinode->i_size + 2 * sb->s_blocksize,
+				       lqinode->i_size);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+					  lqinode->i_size + 2 * sb->s_blocksize);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+	if (!chunk) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto out;
+	}
+
+	down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+	status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+					     &p_blkno, NULL, NULL);
+	up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	bh = sb_getblk(sb, p_blkno);
+	if (!bh) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto out;
+	}
+	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+
+	handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out;
+	}
+
+	status = ocfs2_journal_access(handle, lqinode, bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+	lock_buffer(bh);
+	dchunk->dqc_free = ol_quota_entries_per_block(sb);
+	memset(dchunk->dqc_bitmap, 0,
+	       sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+	       OCFS2_QBLK_RESERVED_SPACE);
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+	status = ocfs2_journal_dirty(handle, bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+
+	oinfo->dqi_blocks += 2;
+	oinfo->dqi_chunks++;
+	status = ocfs2_local_write_info(sb, type);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+	status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk);
+	chunk->qc_num = list_entry(chunk->qc_chunk.prev,
+				   struct ocfs2_quota_chunk,
+				   qc_chunk)->qc_num + 1;
+	chunk->qc_headerbh = bh;
+	*offset = 0;
+	return chunk;
+out_trans:
+	ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+	brelse(bh);
+	kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
+	return ERR_PTR(status);
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
+						       struct super_block *sb,
+						       int type,
+						       int *offset)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+	struct ocfs2_quota_chunk *chunk;
+	struct inode *lqinode = sb_dqopt(sb)->files[type];
+	struct ocfs2_local_disk_chunk *dchunk;
+	int epb = ol_quota_entries_per_block(sb);
+	unsigned int chunk_blocks;
+	int status;
+	handle_t *handle;
+
+	if (list_empty(&oinfo->dqi_chunk))
+		return ocfs2_local_quota_add_chunk(sb, type, offset);
+	/* Is the last chunk full? */
+	chunk = list_entry(oinfo->dqi_chunk.prev,
+			struct ocfs2_quota_chunk, qc_chunk);
+	chunk_blocks = oinfo->dqi_blocks -
+			ol_quota_chunk_block(sb, chunk->qc_num) - 1;
+	if (ol_chunk_blocks(sb) == chunk_blocks)
+		return ocfs2_local_quota_add_chunk(sb, type, offset);
+
+	/* We are protected by dqio_sem so no locking needed */
+	status = ocfs2_extend_no_holes(lqinode,
+				       lqinode->i_size + sb->s_blocksize,
+				       lqinode->i_size);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+					  lqinode->i_size + sb->s_blocksize);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out;
+	}
+	status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh,
+				 OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+
+	dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data;
+	lock_buffer(chunk->qc_headerbh);
+	le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
+	unlock_buffer(chunk->qc_headerbh);
+	status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+	oinfo->dqi_blocks++;
+	status = ocfs2_local_write_info(sb, type);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_trans;
+	}
+
+	status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	*offset = chunk_blocks * epb;
+	return chunk;
+out_trans:
+	ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+	return ERR_PTR(status);
+}
+
+void olq_alloc_dquot(struct buffer_head *bh, void *private)
+{
+	int *offset = private;
+	struct ocfs2_local_disk_chunk *dchunk;
+
+	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+	ocfs2_set_bit(*offset, dchunk->dqc_bitmap);
+	le32_add_cpu(&dchunk->dqc_free, -1);
+}
+
+/* Create dquot in the local file for given id */
+static int ocfs2_create_local_dquot(struct dquot *dquot)
+{
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	struct inode *lqinode = sb_dqopt(sb)->files[type];
+	struct ocfs2_quota_chunk *chunk;
+	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+	int offset;
+	int status;
+
+	chunk = ocfs2_find_free_entry(sb, type, &offset);
+	if (!chunk) {
+		chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
+		if (IS_ERR(chunk))
+			return PTR_ERR(chunk);
+	} else if (IS_ERR(chunk)) {
+		return PTR_ERR(chunk);
+	}
+	od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
+	od->dq_chunk = chunk;
+
+	/* Initialize dquot structure on disk */
+	status = ocfs2_local_write_dquot(dquot);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	/* Mark structure as allocated */
+	status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot,
+				 &offset);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+out:
+	return status;
+}
+
+/* Create entry in local file for dquot, load data from the global file */
+static int ocfs2_local_read_dquot(struct dquot *dquot)
+{
+	int status;
+
+	mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
+
+	status = ocfs2_global_read_dquot(dquot);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+
+	/* Now create entry in the local quota file */
+	status = ocfs2_create_local_dquot(dquot);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_err;
+	}
+	mlog_exit(0);
+	return 0;
+out_err:
+	mlog_exit(status);
+	return status;
+}
+
+/* Release dquot structure from local quota file. ocfs2_release_dquot() has
+ * already started a transaction and obtained exclusive lock for global
+ * quota file. */
+static int ocfs2_local_release_dquot(struct dquot *dquot)
+{
+	int status;
+	int type = dquot->dq_type;
+	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+	struct super_block *sb = dquot->dq_sb;
+	struct ocfs2_local_disk_chunk *dchunk;
+	int offset;
+	handle_t *handle = journal_current_handle();
+
+	BUG_ON(!handle);
+	/* First write all local changes to global file */
+	status = ocfs2_global_release_dquot(dquot);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type],
+			od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num,
+					     od->dq_local_off);
+	dchunk = (struct ocfs2_local_disk_chunk *)
+			(od->dq_chunk->qc_headerbh->b_data);
+	/* Mark structure as freed */
+	lock_buffer(od->dq_chunk->qc_headerbh);
+	ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
+	le32_add_cpu(&dchunk->dqc_free, 1);
+	unlock_buffer(od->dq_chunk->qc_headerbh);
+	status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+	status = 0;
+out:
+	/* Clear the read bit so that next time someone uses this
+	 * dquot he reads fresh info from disk and allocates local
+	 * dquot structure */
+	clear_bit(DQ_READ_B, &dquot->dq_flags);
+	return status;
+}
+
+static struct quota_format_ops ocfs2_format_ops = {
+	.check_quota_file	= ocfs2_local_check_quota_file,
+	.read_file_info		= ocfs2_local_read_info,
+	.write_file_info	= ocfs2_global_write_info,
+	.free_file_info		= ocfs2_local_free_info,
+	.read_dqblk		= ocfs2_local_read_dquot,
+	.commit_dqblk		= ocfs2_local_write_dquot,
+	.release_dqblk		= ocfs2_local_release_dquot,
+};
+
+struct quota_format_type ocfs2_quota_format = {
+	.qf_fmt_id = QFMT_OCFS2,
+	.qf_ops = &ocfs2_format_ops,
+	.qf_owner = THIS_MODULE
+};
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 41bb019..7bb83e4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -65,10 +65,13 @@
 #include "uptodate.h"
 #include "ver.h"
 #include "xattr.h"
+#include "quota.h"
 
 #include "buffer_head_io.h"
 
 static struct kmem_cache *ocfs2_inode_cachep = NULL;
+struct kmem_cache *ocfs2_dquot_cachep;
+struct kmem_cache *ocfs2_qf_chunk_cachep;
 
 /* OCFS2 needs to schedule several differnt types of work which
  * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -137,6 +140,8 @@ static const struct super_operations ocfs2_sops = {
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
 	.show_options   = ocfs2_show_options,
+	.quota_read	= ocfs2_quota_read,
+	.quota_write	= ocfs2_quota_write,
 };
 
 enum {
@@ -1104,6 +1109,7 @@ static int __init ocfs2_init(void)
 
 	ocfs2_set_locking_protocol();
 
+	status = register_quota_format(&ocfs2_quota_format);
 leave:
 	if (status < 0) {
 		ocfs2_free_mem_caches();
@@ -1127,6 +1133,8 @@ static void __exit ocfs2_exit(void)
 		destroy_workqueue(ocfs2_wq);
 	}
 
+	unregister_quota_format(&ocfs2_quota_format);
+
 	debugfs_remove(ocfs2_debugfs_root);
 
 	ocfs2_free_mem_caches();
@@ -1242,8 +1250,27 @@ static int ocfs2_initialize_mem_caches(void)
 				       (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD),
 				       ocfs2_inode_init_once);
-	if (!ocfs2_inode_cachep)
+	ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
+					sizeof(struct ocfs2_dquot),
+					0,
+					(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					NULL);
+	ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
+					sizeof(struct ocfs2_quota_chunk),
+					0,
+					(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+					NULL);
+	if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
+	    !ocfs2_qf_chunk_cachep) {
+		if (ocfs2_inode_cachep)
+			kmem_cache_destroy(ocfs2_inode_cachep);
+		if (ocfs2_dquot_cachep)
+			kmem_cache_destroy(ocfs2_dquot_cachep);
+		if (ocfs2_qf_chunk_cachep)
+			kmem_cache_destroy(ocfs2_qf_chunk_cachep);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -1252,8 +1279,15 @@ static void ocfs2_free_mem_caches(void)
 {
 	if (ocfs2_inode_cachep)
 		kmem_cache_destroy(ocfs2_inode_cachep);
-
 	ocfs2_inode_cachep = NULL;
+
+	if (ocfs2_dquot_cachep)
+		kmem_cache_destroy(ocfs2_dquot_cachep);
+	ocfs2_dquot_cachep = NULL;
+
+	if (ocfs2_qf_chunk_cachep)
+		kmem_cache_destroy(ocfs2_qf_chunk_cachep);
+	ocfs2_qf_chunk_cachep = NULL;
 }
 
 static int ocfs2_get_sector(struct super_block *sb,
-- 
cgit v0.10.2


From a90714c150e3ce677c57a9dac3ab1ec342c75a95 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 9 Oct 2008 19:38:40 +0200
Subject: ocfs2: Add quota calls for allocation and freeing of inodes and space

Add quota calls for allocation and freeing of inodes and space, also update
estimates on number of needed credits for a transaction. Move out inode
allocation from ocfs2_mknod_locked() because vfs_dq_init() must be called
outside of a transaction.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 69d67ab..84a7bd4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
 #include <cluster/masklog.h>
@@ -5322,7 +5323,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		mlog_errno(ret);
@@ -6552,6 +6553,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 		goto bail;
 	}
 
+	vfs_dq_free_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
 				      clusters_to_del;
@@ -6860,6 +6863,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 	struct page **pages = NULL;
 	loff_t end = osb->s_clustersize;
 	struct ocfs2_extent_tree et;
+	int did_quota = 0;
 
 	has_data = i_size_read(inode) ? 1 : 0;
 
@@ -6879,7 +6883,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
+	handle = ocfs2_start_trans(osb,
+				   ocfs2_inline_to_extents_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		mlog_errno(ret);
@@ -6898,6 +6903,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		unsigned int page_end;
 		u64 phys;
 
+		if (vfs_dq_alloc_space_nodirty(inode,
+				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
+			ret = -EDQUOT;
+			goto out_commit;
+		}
+		did_quota = 1;
+
 		ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
 					   &num);
 		if (ret) {
@@ -6971,6 +6983,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 	}
 
 out_commit:
+	if (ret < 0 && did_quota)
+		vfs_dq_free_space_nodirty(inode,
+					  ocfs2_clusters_to_bytes(osb->sb, 1));
+
 	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 6af79ad..6b647ec 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -27,6 +27,7 @@
 #include <linux/swap.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/mpage.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_FILE_IO
 #include <cluster/masklog.h>
@@ -1730,6 +1731,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
 	wc->w_handle = handle;
 
+	if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
+		ret = -EDQUOT;
+		goto out_commit;
+	}
 	/*
 	 * We don't want this to fail in ocfs2_write_end(), so do it
 	 * here.
@@ -1738,7 +1744,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out_quota;
 	}
 
 	/*
@@ -1751,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 					 mmap_page);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out_quota;
 	}
 
 	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
 					  len);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_commit;
+		goto out_quota;
 	}
 
 	if (data_ac)
@@ -1770,6 +1776,10 @@ success:
 	*pagep = wc->w_target_page;
 	*fsdata = wc;
 	return 0;
+out_quota:
+	if (clusters_to_alloc)
+		vfs_dq_free_space(inode,
+			  ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
 out_commit:
 	ocfs2_commit_trans(osb, handle);
 
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d83cff9..3708fe4 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -40,6 +40,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_NAMEI
 #include <cluster/masklog.h>
@@ -1210,9 +1211,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 				   unsigned int blocks_wanted,
 				   struct buffer_head **first_block_bh)
 {
-	int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
 	u32 alloc, bit_off, len;
 	struct super_block *sb = dir->i_sb;
+	int ret, credits = ocfs2_inline_to_extents_credits(sb);
 	u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 	struct ocfs2_inode_info *oi = OCFS2_I(dir);
@@ -1221,6 +1222,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	handle_t *handle;
 	struct ocfs2_extent_tree et;
+	int did_quota = 0;
 
 	ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
 
@@ -1258,6 +1260,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out_sem;
 	}
 
+	if (vfs_dq_alloc_space_nodirty(dir,
+				ocfs2_clusters_to_bytes(osb->sb, alloc))) {
+		ret = -EDQUOT;
+		goto out_commit;
+	}
+	did_quota = 1;
 	/*
 	 * Try to claim as many clusters as the bitmap can give though
 	 * if we only get one now, that's enough to continue. The rest
@@ -1380,6 +1388,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	dirdata_bh = NULL;
 
 out_commit:
+	if (ret < 0 && did_quota)
+		vfs_dq_free_space_nodirty(dir,
+			ocfs2_clusters_to_bytes(osb->sb, 2));
 	ocfs2_commit_trans(osb, handle);
 
 out_sem:
@@ -1404,7 +1415,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 			       struct buffer_head **new_bh)
 {
 	int status;
-	int extend;
+	int extend, did_quota = 0;
 	u64 p_blkno, v_blkno;
 
 	spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -1414,6 +1425,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	if (extend) {
 		u32 offset = OCFS2_I(dir)->ip_clusters;
 
+		if (vfs_dq_alloc_space_nodirty(dir,
+					ocfs2_clusters_to_bytes(sb, 1))) {
+			status = -EDQUOT;
+			goto bail;
+		}
+		did_quota = 1;
+
 		status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
 					      1, 0, parent_fe_bh, handle,
 					      data_ac, meta_ac, NULL);
@@ -1439,6 +1457,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	}
 	status = 0;
 bail:
+	if (did_quota && status < 0)
+		vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
 	mlog_exit(status);
 	return status;
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 372d965..9374d37 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -35,6 +35,7 @@
 #include <linux/mount.h>
 #include <linux/writeback.h>
 #include <linux/falloc.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -57,6 +58,7 @@
 #include "super.h"
 #include "xattr.h"
 #include "acl.h"
+#include "quota.h"
 
 #include "buffer_head_io.h"
 
@@ -534,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
 	enum ocfs2_alloc_restarted why;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_extent_tree et;
+	int did_quota = 0;
 
 	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
 
@@ -577,6 +580,13 @@ restart_all:
 	}
 
 restarted_transaction:
+	if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
+	    clusters_to_add))) {
+		status = -EDQUOT;
+		goto leave;
+	}
+	did_quota = 1;
+
 	/* reserve a write to the file entry early on - that we if we
 	 * run out of credits in the allocation path, we can still
 	 * update i_size. */
@@ -614,6 +624,10 @@ restarted_transaction:
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
+	/* Release unused quota reservation */
+	vfs_dq_free_space(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+	did_quota = 0;
 
 	if (why != RESTART_NONE && clusters_to_add) {
 		if (why == RESTART_META) {
@@ -646,6 +660,9 @@ restarted_transaction:
 	     OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
 
 leave:
+	if (status < 0 && did_quota)
+		vfs_dq_free_space(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	if (handle) {
 		ocfs2_commit_trans(osb, handle);
 		handle = NULL;
@@ -877,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	struct buffer_head *bh = NULL;
 	handle_t *handle = NULL;
+	int locked[MAXQUOTAS] = {0, 0};
+	int credits, qtype;
+	struct ocfs2_mem_dqinfo *oinfo;
 
 	mlog_entry("(0x%p, '%.*s')\n", dentry,
 	           dentry->d_name.len, dentry->d_name.name);
@@ -947,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-	if (IS_ERR(handle)) {
-		status = PTR_ERR(handle);
-		mlog_errno(status);
-		goto bail_unlock;
+	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		credits = OCFS2_INODE_UPDATE_CREDITS;
+		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
+		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+			oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
+			status = ocfs2_lock_global_qf(oinfo, 1);
+			if (status < 0)
+				goto bail_unlock;
+			credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+				ocfs2_calc_qdel_credits(sb, USRQUOTA);
+			locked[USRQUOTA] = 1;
+		}
+		if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
+		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+			oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
+			status = ocfs2_lock_global_qf(oinfo, 1);
+			if (status < 0)
+				goto bail_unlock;
+			credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
+				   ocfs2_calc_qdel_credits(sb, GRPQUOTA);
+			locked[GRPQUOTA] = 1;
+		}
+		handle = ocfs2_start_trans(osb, credits);
+		if (IS_ERR(handle)) {
+			status = PTR_ERR(handle);
+			mlog_errno(status);
+			goto bail_unlock;
+		}
+		status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		if (status < 0)
+			goto bail_commit;
+	} else {
+		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+		if (IS_ERR(handle)) {
+			status = PTR_ERR(handle);
+			mlog_errno(status);
+			goto bail_unlock;
+		}
 	}
 
 	/*
@@ -974,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 bail_commit:
 	ocfs2_commit_trans(osb, handle);
 bail_unlock:
+	for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
+		if (!locked[qtype])
+			continue;
+		oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
+		ocfs2_unlock_global_qf(oinfo, 1);
+	}
 	ocfs2_inode_unlock(inode, 1);
 bail_unlock_rw:
 	if (size_change)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 50dbc48..288512c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 
 #include <asm/byteorder.h>
 
@@ -603,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
+					ocfs2_quota_trans_credits(inode->i_sb));
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -635,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	ocfs2_remove_from_cache(inode, di_bh);
+	vfs_dq_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
 				   inode_alloc_bh, di);
@@ -917,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode)
 
 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
 
-	if (is_bad_inode(inode)) {
+	/* When we fail in read_inode() we mark inode as bad. The second test
+	 * catches the case when inode allocation fails before allocating
+	 * a block for inode. */
+	if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
 		mlog(0, "Skipping delete of bad inode\n");
 		goto bail;
 	}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 8203980..ee08e9c 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -284,6 +284,37 @@ int                  ocfs2_journal_dirty(handle_t *handle,
 /* extended attribute block update */
 #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
 
+/* global quotafile inode update, data block */
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
+/*
+ * The two writes below can accidentally see global info dirty due
+ * to set_info() quotactl so make them prepared for the writes.
+ */
+/* quota data block, global info */
+/* Write to local quota file */
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+
+/* global quota data block, local quota data block, global quota inode,
+ * global quota info */
+#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+
+static inline int ocfs2_quota_trans_credits(struct super_block *sb)
+{
+	int credits = 0;
+
+	if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
+		credits += OCFS2_QWRITE_CREDITS;
+	if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
+		credits += OCFS2_QWRITE_CREDITS;
+	return credits;
+}
+
+/* Number of credits needed for removing quota structure from file */
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
+/* Number of credits needed for initialization of new quota structure */
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
+
 /* group extend. inode update and last group update. */
 #define OCFS2_GROUP_EXTEND_CREDITS	(OCFS2_INODE_UPDATE_CREDITS + 1)
 
@@ -294,8 +325,11 @@ int                  ocfs2_journal_dirty(handle_t *handle,
  * prev. group desc. if we relink. */
 #define OCFS2_SUBALLOC_ALLOC (3)
 
-#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC		\
-					 + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
+{
+	return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
+	       ocfs2_quota_trans_credits(sb);
+}
 
 /* dinode + group descriptor update. We don't relink on free yet. */
 #define OCFS2_SUBALLOC_FREE  (2)
@@ -304,16 +338,23 @@ int                  ocfs2_journal_dirty(handle_t *handle,
 #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE 		      \
 					 + OCFS2_TRUNCATE_LOG_UPDATE)
 
-#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_remove_extent_credits(struct super_block *sb)
+{
+	return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
+	       ocfs2_quota_trans_credits(sb);
+}
 
 /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
  * bitmap block for the new bit) */
 #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
 
 /* parent fe, parent block, new file entry, inode alloc fe, inode alloc
- * group descriptor + mkdir/symlink blocks */
-#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC                         \
-			    + OCFS2_DIR_LINK_ADDITIONAL_CREDITS)
+ * group descriptor + mkdir/symlink blocks + quota update */
+static inline int ocfs2_mknod_credits(struct super_block *sb)
+{
+	return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
+	       ocfs2_quota_trans_credits(sb);
+}
 
 /* local alloc metadata change + main bitmap updates */
 #define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS                 \
@@ -323,13 +364,21 @@ int                  ocfs2_journal_dirty(handle_t *handle,
  * for the dinode, one for the new block. */
 #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
 
-/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
-#define OCFS2_LINK_CREDITS  (2*OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
+ * update on dir */
+static inline int ocfs2_link_credits(struct super_block *sb)
+{
+	return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
+	       ocfs2_quota_trans_credits(sb);
+}
 
 /* inode + dir inode (if we unlink a dir), + dir entry block + orphan
  * dir inode link */
-#define OCFS2_UNLINK_CREDITS  (2 * OCFS2_INODE_UPDATE_CREDITS + 1             \
-			      + OCFS2_LINK_CREDITS)
+static inline int ocfs2_unlink_credits(struct super_block *sb)
+{
+	/* The quota update from ocfs2_link_credits is unused here... */
+	return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
+}
 
 /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
  * inode alloc group descriptor */
@@ -338,8 +387,10 @@ int                  ocfs2_journal_dirty(handle_t *handle,
 /* dinode update, old dir dinode update, new dir dinode update, old
  * dir dir entry, new dir dir entry, dir entry update for renaming
  * directory + target unlink */
-#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3              \
-			     + OCFS2_UNLINK_CREDITS)
+static inline int ocfs2_rename_credits(struct super_block *sb)
+{
+	return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
+}
 
 /* global bitmap dinode, group desc., relinked group,
  * suballocator dinode, group desc., relinked group,
@@ -377,18 +428,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
 	 * credit for the dinode there. */
 	extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
 
-	return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
+	return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
+	       ocfs2_quota_trans_credits(sb);
 }
 
 static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
 {
-	int blocks = OCFS2_MKNOD_CREDITS;
+	int blocks = ocfs2_mknod_credits(sb);
 
 	/* links can be longer than one block so we may update many
 	 * within our single allocated extent. */
 	blocks += ocfs2_clusters_to_blocks(sb, 1);
 
-	return blocks;
+	return blocks + ocfs2_quota_trans_credits(sb);
 }
 
 static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@@ -425,6 +477,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
 	/* update to the truncate log. */
 	credits += OCFS2_TRUNCATE_LOG_UPDATE;
 
+	credits += ocfs2_quota_trans_credits(sb);
+
 	return credits;
 }
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0134baf..6173807 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -40,6 +40,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_NAMEI
 #include <cluster/masklog.h>
@@ -212,6 +213,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;
+	vfs_dq_init(inode);
 	return inode;
 }
 
@@ -236,6 +238,7 @@ static int ocfs2_mknod(struct inode *dir,
 	struct ocfs2_security_xattr_info si = {
 		.enable = 1,
 	};
+	int did_quota_inode = 0;
 
 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
 		   (unsigned long)dev, dentry->d_name.len,
@@ -323,7 +326,8 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS + xattr_credits);
+	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
+				   xattr_credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -331,6 +335,15 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
+	 * to be called. */
+	if (sb_any_quota_active(osb->sb) &&
+	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+		status = -EDQUOT;
+		goto leave;
+	}
+	did_quota_inode = 1;
+
 	/* do the real work now. */
 	status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
 				    &new_fe_bh, parent_fe_bh, handle,
@@ -399,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
 	d_instantiate(dentry, inode);
 	status = 0;
 leave:
+	if (status < 0 && did_quota_inode)
+		vfs_dq_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -641,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_unlock_inode;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
+	handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
 		handle = NULL;
@@ -828,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
+	handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1234,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
+	handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1555,6 +1570,7 @@ static int ocfs2_symlink(struct inode *dir,
 	struct ocfs2_security_xattr_info si = {
 		.enable = 1,
 	};
+	int did_quota = 0, did_quota_inode = 0;
 
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1648,6 +1664,15 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
+	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
+	 * to be called. */
+	if (sb_any_quota_active(osb->sb) &&
+	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+		status = -EDQUOT;
+		goto bail;
+	}
+	did_quota_inode = 1;
+
 	status = ocfs2_mknod_locked(osb, dir, inode, dentry,
 				    0, &new_fe_bh, parent_fe_bh, handle,
 				    inode_ac);
@@ -1663,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
 		u32 offset = 0;
 
 		inode->i_op = &ocfs2_symlink_inode_operations;
+		if (vfs_dq_alloc_space_nodirty(inode,
+		    ocfs2_clusters_to_bytes(osb->sb, 1))) {
+			status = -EDQUOT;
+			goto bail;
+		}
+		did_quota = 1;
 		status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
 					      new_fe_bh,
 					      handle, data_ac, NULL,
@@ -1728,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 bail:
+	if (status < 0 && did_quota)
+		vfs_dq_free_space_nodirty(inode,
+					ocfs2_clusters_to_bytes(osb->sb, 1));
+	if (status < 0 && did_quota_inode)
+		vfs_dq_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 9cb71e1..3b9634c 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1665,7 +1665,8 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	ctxt.handle = ocfs2_start_trans(osb,
+					ocfs2_remove_extent_credits(osb->sb));
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
 		mlog_errno(ret);
@@ -2233,7 +2234,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 	 */
 	if (!xi->value) {
 		if (!ocfs2_xattr_is_local(xe))
-			credits += OCFS2_REMOVE_EXTENT_CREDITS;
+			credits += ocfs2_remove_extent_credits(inode->i_sb);
 
 		goto out;
 	}
@@ -2250,7 +2251,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		 */
 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
 			clusters_add += new_clusters;
-			credits += OCFS2_REMOVE_EXTENT_CREDITS +
+			credits += ocfs2_remove_extent_credits(inode->i_sb) +
 				    OCFS2_INODE_UPDATE_CREDITS;
 			if (!ocfs2_xattr_is_local(xe))
 				credits += ocfs2_calc_extend_credits(
@@ -2275,7 +2276,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 			xv = &def_xv.xv;
 
 		if (old_clusters >= new_clusters) {
-			credits += OCFS2_REMOVE_EXTENT_CREDITS;
+			credits += ocfs2_remove_extent_credits(inode->i_sb);
 			goto out;
 		} else {
 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
@@ -4750,7 +4751,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
 	if (IS_ERR(handle)) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -5109,7 +5110,8 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+	ctxt.handle = ocfs2_start_trans(osb,
+					ocfs2_remove_extent_credits(osb->sb));
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
 		mlog_errno(ret);
-- 
cgit v0.10.2


From 171bf93ce11f4c9929fdce6ce63df8da2f3c4475 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Mon, 20 Oct 2008 15:36:47 +0200
Subject: ocfs2: Periodic quota syncing

This patch creates a work queue for periodic syncing of locally cached quota
information to the global quota files. We constantly queue a delayed work
item, to get the periodic behavior.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Acked-by: Jan Kara <jack@suse.cz>

diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 1f1c863..e2233d5 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -39,6 +39,7 @@ struct ocfs2_mem_dqinfo {
 	unsigned int dqi_chunks;	/* Number of chunks in local quota file */
 	unsigned int dqi_blocks;	/* Number of blocks allocated for local quota file */
 	unsigned int dqi_syncms;	/* How often should we sync with other nodes */
+	unsigned int dqi_syncjiff;	/* Precomputed dqi_syncms in jiffies */
 	struct list_head dqi_chunk;	/* List of chunks */
 	struct inode *dqi_gqinode;	/* Global quota file inode */
 	struct ocfs2_lock_res dqi_gqlock;	/* Lock protecting quota information structure */
@@ -47,6 +48,7 @@ struct ocfs2_mem_dqinfo {
 	struct buffer_head *dqi_lqi_bh;	/* Buffer head with local quota file inode */
 	struct buffer_head *dqi_ibh;	/* Buffer with information header */
 	struct qtree_mem_dqinfo dqi_gi;	/* Info about global file */
+	struct delayed_work dqi_sync_work;	/* Work for syncing dquots */
 };
 
 static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
@@ -90,4 +92,7 @@ struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
 extern struct dquot_operations ocfs2_quota_operations;
 extern struct quota_format_type ocfs2_quota_format;
 
+int ocfs2_quota_setup(void);
+void ocfs2_quota_shutdown(void);
+
 #endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index af8340c..adf5350 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -1,10 +1,14 @@
 /*
  *  Implementation of operations over global quota file
  */
+#include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #include <linux/dqblk_qtree.h>
+#include <linux/jiffies.h>
+#include <linux/writeback.h>
+#include <linux/workqueue.h>
 
 #define MLOG_MASK_PREFIX ML_QUOTA
 #include <cluster/masklog.h>
@@ -20,6 +24,10 @@
 #include "uptodate.h"
 #include "quota.h"
 
+static struct workqueue_struct *ocfs2_quota_wq = NULL;
+
+static void qsync_work_fn(struct work_struct *work);
+
 static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
 {
 	struct ocfs2_global_disk_dqblk *d = dp;
@@ -313,6 +321,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
+	oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
 	oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -320,6 +329,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
 	oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
 						OCFS2_QBLK_RESERVED_SPACE;
 	oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
+	INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
+	queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
+			   oinfo->dqi_syncjiff);
+
 out_err:
 	mlog_exit(status);
 	return status;
@@ -520,6 +533,61 @@ out:
 }
 
 /*
+ *  Functions for periodic syncing of dquots with global file
+ */
+static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
+{
+	handle_t *handle;
+	struct super_block *sb = dquot->dq_sb;
+	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	int status = 0;
+
+	mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id,
+		   dquot->dq_type, type, sb->s_id);
+	if (type != dquot->dq_type)
+		goto out;
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+
+	handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto out_ilock;
+	}
+	mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+	status = ocfs2_sync_dquot(dquot);
+	mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+	if (status < 0)
+		mlog_errno(status);
+	/* We have to write local structure as well... */
+	dquot_mark_dquot_dirty(dquot);
+	status = dquot_commit(dquot);
+	if (status < 0)
+		mlog_errno(status);
+	ocfs2_commit_trans(osb, handle);
+out_ilock:
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	mlog_exit(status);
+	return status;
+}
+
+static void qsync_work_fn(struct work_struct *work)
+{
+	struct ocfs2_mem_dqinfo *oinfo = container_of(work,
+						      struct ocfs2_mem_dqinfo,
+						      dqi_sync_work.work);
+	struct super_block *sb = oinfo->dqi_gqinode->i_sb;
+
+	dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
+	queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
+			   oinfo->dqi_syncjiff);
+}
+
+/*
  *  Wrappers for generic quota functions
  */
 
@@ -917,3 +985,20 @@ struct dquot_operations ocfs2_quota_operations = {
 	.alloc_dquot	= ocfs2_alloc_dquot,
 	.destroy_dquot	= ocfs2_destroy_dquot,
 };
+
+int ocfs2_quota_setup(void)
+{
+	ocfs2_quota_wq = create_workqueue("o2quot");
+	if (!ocfs2_quota_wq)
+		return -ENOMEM;
+	return 0;
+}
+
+void ocfs2_quota_shutdown(void)
+{
+	if (ocfs2_quota_wq) {
+		flush_workqueue(ocfs2_quota_wq);
+		destroy_workqueue(ocfs2_quota_wq);
+		ocfs2_quota_wq = NULL;
+	}
+}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 55c3f2f..40e82b4 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -368,6 +368,10 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
 	int mark_clean = 1, len;
 	int status;
 
+	/* At this point we know there are no more dquots and thus
+	 * even if there's some sync in the pdflush queue, it won't
+	 * find any dquots and return without doing anything */
+	cancel_delayed_work_sync(&oinfo->dqi_sync_work);
 	iput(oinfo->dqi_gqinode);
 	ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
 	ocfs2_lock_res_free(&oinfo->dqi_gqlock);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7bb83e4..60f1d29 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1107,11 +1107,16 @@ static int __init ocfs2_init(void)
 		mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
 	}
 
+	status = ocfs2_quota_setup();
+	if (status)
+		goto leave;
+
 	ocfs2_set_locking_protocol();
 
 	status = register_quota_format(&ocfs2_quota_format);
 leave:
 	if (status < 0) {
+		ocfs2_quota_shutdown();
 		ocfs2_free_mem_caches();
 		exit_ocfs2_uptodate_cache();
 	}
@@ -1128,6 +1133,8 @@ static void __exit ocfs2_exit(void)
 {
 	mlog_entry_void();
 
+	ocfs2_quota_shutdown();
+
 	if (ocfs2_wq) {
 		flush_workqueue(ocfs2_wq);
 		destroy_workqueue(ocfs2_wq);
-- 
cgit v0.10.2


From 2205363dce7447b8e85f1ead14387664c1a98753 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 20 Oct 2008 23:50:38 +0200
Subject: ocfs2: Implement quota recovery

Implement functions for recovery after a crash. Functions just
read local quota file and sync info to global quota file.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 11a1178..c602420 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -45,6 +45,7 @@
 #include "slot_map.h"
 #include "super.h"
 #include "sysfile.h"
+#include "quota.h"
 
 #include "buffer_head_io.h"
 
@@ -52,7 +53,7 @@ DEFINE_SPINLOCK(trans_inc_lock);
 
 static int ocfs2_force_read_journal(struct inode *inode);
 static int ocfs2_recover_node(struct ocfs2_super *osb,
-			      int node_num);
+			      int node_num, int slot_num);
 static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
 static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
@@ -857,6 +858,7 @@ struct ocfs2_la_recovery_item {
 	int			lri_slot;
 	struct ocfs2_dinode	*lri_la_dinode;
 	struct ocfs2_dinode	*lri_tl_dinode;
+	struct ocfs2_quota_recovery *lri_qrec;
 };
 
 /* Does the second half of the recovery process. By this point, the
@@ -877,6 +879,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
 	struct ocfs2_super *osb = journal->j_osb;
 	struct ocfs2_dinode *la_dinode, *tl_dinode;
 	struct ocfs2_la_recovery_item *item, *n;
+	struct ocfs2_quota_recovery *qrec;
 	LIST_HEAD(tmp_la_list);
 
 	mlog_entry_void();
@@ -922,6 +925,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
 		if (ret < 0)
 			mlog_errno(ret);
 
+		qrec = item->lri_qrec;
+		if (qrec) {
+			mlog(0, "Recovering quota files");
+			ret = ocfs2_finish_quota_recovery(osb, qrec,
+							  item->lri_slot);
+			if (ret < 0)
+				mlog_errno(ret);
+			/* Recovery info is already freed now */
+		}
+
 		kfree(item);
 	}
 
@@ -935,7 +948,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
 static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
 					    int slot_num,
 					    struct ocfs2_dinode *la_dinode,
-					    struct ocfs2_dinode *tl_dinode)
+					    struct ocfs2_dinode *tl_dinode,
+					    struct ocfs2_quota_recovery *qrec)
 {
 	struct ocfs2_la_recovery_item *item;
 
@@ -950,6 +964,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
 		if (tl_dinode)
 			kfree(tl_dinode);
 
+		if (qrec)
+			ocfs2_free_quota_recovery(qrec);
+
 		mlog_errno(-ENOMEM);
 		return;
 	}
@@ -958,6 +975,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
 	item->lri_la_dinode = la_dinode;
 	item->lri_slot = slot_num;
 	item->lri_tl_dinode = tl_dinode;
+	item->lri_qrec = qrec;
 
 	spin_lock(&journal->j_lock);
 	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -977,6 +995,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
 		ocfs2_queue_recovery_completion(journal,
 						osb->slot_num,
 						osb->local_alloc_copy,
+						NULL,
 						NULL);
 		ocfs2_schedule_truncate_log_flush(osb, 0);
 
@@ -985,11 +1004,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
 	}
 }
 
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
+{
+	if (osb->quota_rec) {
+		ocfs2_queue_recovery_completion(osb->journal,
+						osb->slot_num,
+						NULL,
+						NULL,
+						osb->quota_rec);
+		osb->quota_rec = NULL;
+	}
+}
+
 static int __ocfs2_recovery_thread(void *arg)
 {
-	int status, node_num;
+	int status, node_num, slot_num;
 	struct ocfs2_super *osb = arg;
 	struct ocfs2_recovery_map *rm = osb->recovery_map;
+	int *rm_quota = NULL;
+	int rm_quota_used = 0, i;
+	struct ocfs2_quota_recovery *qrec;
 
 	mlog_entry_void();
 
@@ -998,6 +1032,11 @@ static int __ocfs2_recovery_thread(void *arg)
 		goto bail;
 	}
 
+	rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
+	if (!rm_quota) {
+		status = -ENOMEM;
+		goto bail;
+	}
 restart:
 	status = ocfs2_super_lock(osb, 1);
 	if (status < 0) {
@@ -1011,8 +1050,28 @@ restart:
 		 * clear it until ocfs2_recover_node() has succeeded. */
 		node_num = rm->rm_entries[0];
 		spin_unlock(&osb->osb_lock);
-
-		status = ocfs2_recover_node(osb, node_num);
+		mlog(0, "checking node %d\n", node_num);
+		slot_num = ocfs2_node_num_to_slot(osb, node_num);
+		if (slot_num == -ENOENT) {
+			status = 0;
+			mlog(0, "no slot for this node, so no recovery"
+			     "required.\n");
+			goto skip_recovery;
+		}
+		mlog(0, "node %d was using slot %d\n", node_num, slot_num);
+
+		/* It is a bit subtle with quota recovery. We cannot do it
+		 * immediately because we have to obtain cluster locks from
+		 * quota files and we also don't want to just skip it because
+		 * then quota usage would be out of sync until some node takes
+		 * the slot. So we remember which nodes need quota recovery
+		 * and when everything else is done, we recover quotas. */
+		for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
+		if (i == rm_quota_used)
+			rm_quota[rm_quota_used++] = slot_num;
+
+		status = ocfs2_recover_node(osb, node_num, slot_num);
+skip_recovery:
 		if (!status) {
 			ocfs2_recovery_map_clear(osb, node_num);
 		} else {
@@ -1034,13 +1093,27 @@ restart:
 	if (status < 0)
 		mlog_errno(status);
 
+	/* Now it is right time to recover quotas... We have to do this under
+	 * superblock lock so that noone can start using the slot (and crash)
+	 * before we recover it */
+	for (i = 0; i < rm_quota_used; i++) {
+		qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+		if (IS_ERR(qrec)) {
+			status = PTR_ERR(qrec);
+			mlog_errno(status);
+			continue;
+		}
+		ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
+						NULL, NULL, qrec);
+	}
+
 	ocfs2_super_unlock(osb, 1);
 
 	/* We always run recovery on our own orphan dir - the dead
 	 * node(s) may have disallowd a previos inode delete. Re-processing
 	 * is therefore required. */
 	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
-					NULL);
+					NULL, NULL);
 
 bail:
 	mutex_lock(&osb->recovery_lock);
@@ -1055,6 +1128,9 @@ bail:
 
 	mutex_unlock(&osb->recovery_lock);
 
+	if (rm_quota)
+		kfree(rm_quota);
+
 	mlog_exit(status);
 	/* no one is callint kthread_stop() for us so the kthread() api
 	 * requires that we call do_exit().  And it isn't exported, but
@@ -1282,31 +1358,19 @@ done:
  * far less concerning.
  */
 static int ocfs2_recover_node(struct ocfs2_super *osb,
-			      int node_num)
+			      int node_num, int slot_num)
 {
 	int status = 0;
-	int slot_num;
 	struct ocfs2_dinode *la_copy = NULL;
 	struct ocfs2_dinode *tl_copy = NULL;
 
-	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
-		   node_num, osb->node_num);
-
-	mlog(0, "checking node %d\n", node_num);
+	mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
+		   node_num, slot_num, osb->node_num);
 
 	/* Should not ever be called to recover ourselves -- in that
 	 * case we should've called ocfs2_journal_load instead. */
 	BUG_ON(osb->node_num == node_num);
 
-	slot_num = ocfs2_node_num_to_slot(osb, node_num);
-	if (slot_num == -ENOENT) {
-		status = 0;
-		mlog(0, "no slot for this node, so no recovery required.\n");
-		goto done;
-	}
-
-	mlog(0, "node %d was using slot %d\n", node_num, slot_num);
-
 	status = ocfs2_replay_journal(osb, node_num, slot_num);
 	if (status < 0) {
 		if (status == -EBUSY) {
@@ -1342,7 +1406,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 
 	/* This will kfree the memory pointed to by la_copy and tl_copy */
 	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
-					tl_copy);
+					tl_copy, NULL);
 
 	status = 0;
 done:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index ee08e9c..37013bf 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -168,6 +168,7 @@ void   ocfs2_recovery_thread(struct ocfs2_super *osb,
 			     int node_num);
 int    ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
 void   ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
 
 static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
 {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index f04b229..6b25b4a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -206,6 +206,7 @@ enum ocfs2_mount_options
 struct ocfs2_journal;
 struct ocfs2_slot_info;
 struct ocfs2_recovery_map;
+struct ocfs2_quota_recovery;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
@@ -287,10 +288,11 @@ struct ocfs2_super
 	char *local_alloc_debug_buf;
 #endif
 
-	/* Next two fields are for local node slot recovery during
+	/* Next three fields are for local node slot recovery during
 	 * mount. */
 	int dirty;
 	struct ocfs2_dinode *local_alloc_copy;
+	struct ocfs2_quota_recovery *quota_rec;
 
 	struct ocfs2_alloc_stats alloc_stats;
 	char dev_str[20];		/* "major,minor" of the device */
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index e2233d5..04872b4 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -33,6 +33,17 @@ struct ocfs2_dquot {
 	s64 dq_originodes;	/* Last globally synced inode usage */
 };
 
+/* Description of one chunk to recover in memory */
+struct ocfs2_recovery_chunk {
+	struct list_head rc_list;	/* List of chunks */
+	int rc_chunk;			/* Chunk number */
+	unsigned long *rc_bitmap;	/* Bitmap of entries to recover */
+};
+
+struct ocfs2_quota_recovery {
+	struct list_head r_list[MAXQUOTAS];	/* List of chunks to recover */
+};
+
 /* In-memory structure with quota header information */
 struct ocfs2_mem_dqinfo {
 	unsigned int dqi_type;		/* Quota type this structure describes */
@@ -49,6 +60,10 @@ struct ocfs2_mem_dqinfo {
 	struct buffer_head *dqi_ibh;	/* Buffer with information header */
 	struct qtree_mem_dqinfo dqi_gi;	/* Info about global file */
 	struct delayed_work dqi_sync_work;	/* Work for syncing dquots */
+	struct ocfs2_quota_recovery *dqi_rec;	/* Pointer to recovery
+						 * information, in case we
+						 * enable quotas on file
+						 * needing it */
 };
 
 static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
@@ -67,6 +82,12 @@ extern struct kmem_cache *ocfs2_qf_chunk_cachep;
 
 extern struct qtree_fmt_operations ocfs2_global_ops;
 
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+				struct ocfs2_super *osb, int slot_num);
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+				struct ocfs2_quota_recovery *rec,
+				int slot_num);
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
 ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
 			 size_t len, loff_t off);
 ssize_t ocfs2_quota_write(struct super_block *sb, int type,
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index adf5350..49b536a 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -87,7 +87,6 @@ struct qtree_fmt_operations ocfs2_global_ops = {
 	.is_id = ocfs2_global_is_id,
 };
 
-
 struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
 					   int block, int *err)
 {
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 40e82b4..b985621 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -49,14 +49,25 @@ static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
 	return 1 + (ol_chunk_blocks(sb) + 1) * c;
 }
 
-/* Offset of the dquot structure in the quota file */
-static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
+static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off)
+{
+	int epb = ol_quota_entries_per_block(sb);
+
+	return ol_quota_chunk_block(sb, c) + 1 + off / epb;
+}
+
+static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off)
 {
 	int epb = ol_quota_entries_per_block(sb);
 
-	return ((ol_quota_chunk_block(sb, c) + 1 + off / epb)
-		<< sb->s_blocksize_bits) +
-		(off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
+	return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Offset of the dquot structure in the quota file */
+static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
+{
+	return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) +
+	       ol_dqblk_block_off(sb, c, off);
 }
 
 /* Compute block number from given offset */
@@ -253,6 +264,379 @@ static void olq_update_info(struct buffer_head *bh, void *private)
 	spin_unlock(&dq_data_lock);
 }
 
+static int ocfs2_add_recovery_chunk(struct super_block *sb,
+				    struct ocfs2_local_disk_chunk *dchunk,
+				    int chunk,
+				    struct list_head *head)
+{
+	struct ocfs2_recovery_chunk *rc;
+
+	rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS);
+	if (!rc)
+		return -ENOMEM;
+	rc->rc_chunk = chunk;
+	rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
+	if (!rc->rc_bitmap) {
+		kfree(rc);
+		return -ENOMEM;
+	}
+	memcpy(rc->rc_bitmap, dchunk->dqc_bitmap,
+	       (ol_chunk_entries(sb) + 7) >> 3);
+	list_add_tail(&rc->rc_list, head);
+	return 0;
+}
+
+static void free_recovery_list(struct list_head *head)
+{
+	struct ocfs2_recovery_chunk *next;
+	struct ocfs2_recovery_chunk *rchunk;
+
+	list_for_each_entry_safe(rchunk, next, head, rc_list) {
+		list_del(&rchunk->rc_list);
+		kfree(rchunk->rc_bitmap);
+		kfree(rchunk);
+	}
+}
+
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec)
+{
+	int type;
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		free_recovery_list(&(rec->r_list[type]));
+	kfree(rec);
+}
+
+/* Load entries in our quota file we have to recover*/
+static int ocfs2_recovery_load_quota(struct inode *lqinode,
+				     struct ocfs2_local_disk_dqinfo *ldinfo,
+				     int type,
+				     struct list_head *head)
+{
+	struct super_block *sb = lqinode->i_sb;
+	struct buffer_head *hbh;
+	struct ocfs2_local_disk_chunk *dchunk;
+	int i, chunks = le32_to_cpu(ldinfo->dqi_chunks);
+	int status = 0;
+
+	for (i = 0; i < chunks; i++) {
+		hbh = ocfs2_read_quota_block(lqinode,
+					     ol_quota_chunk_block(sb, i),
+					     &status);
+		if (!hbh) {
+			mlog_errno(status);
+			break;
+		}
+		dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+		if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb))
+			status = ocfs2_add_recovery_chunk(sb, dchunk, i, head);
+		brelse(hbh);
+		if (status < 0)
+			break;
+	}
+	if (status < 0)
+		free_recovery_list(head);
+	return status;
+}
+
+static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void)
+{
+	int type;
+	struct ocfs2_quota_recovery *rec;
+
+	rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS);
+	if (!rec)
+		return NULL;
+	for (type = 0; type < MAXQUOTAS; type++)
+		INIT_LIST_HEAD(&(rec->r_list[type]));
+	return rec;
+}
+
+/* Load information we need for quota recovery into memory */
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+						struct ocfs2_super *osb,
+						int slot_num)
+{
+	unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+					    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+	unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+					LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+	struct super_block *sb = osb->sb;
+	struct ocfs2_local_disk_dqinfo *ldinfo;
+	struct inode *lqinode;
+	struct buffer_head *bh;
+	int type;
+	int status = 0;
+	struct ocfs2_quota_recovery *rec;
+
+	mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num);
+	rec = ocfs2_alloc_quota_recovery();
+	if (!rec)
+		return ERR_PTR(-ENOMEM);
+	/* First init... */
+
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+			continue;
+		/* At this point, journal of the slot is already replayed so
+		 * we can trust metadata and data of the quota file */
+		lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+		if (!lqinode) {
+			status = -ENOENT;
+			goto out;
+		}
+		status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+					       OCFS2_META_LOCK_RECOVERY);
+		if (status < 0) {
+			mlog_errno(status);
+			goto out_put;
+		}
+		/* Now read local header */
+		bh = ocfs2_read_quota_block(lqinode, 0, &status);
+		if (!bh) {
+			mlog_errno(status);
+			mlog(ML_ERROR, "failed to read quota file info header "
+				"(slot=%d type=%d)\n", slot_num, type);
+			goto out_lock;
+		}
+		ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+							OCFS2_LOCAL_INFO_OFF);
+		status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+						   &rec->r_list[type]);
+		brelse(bh);
+out_lock:
+		ocfs2_inode_unlock(lqinode, 1);
+out_put:
+		iput(lqinode);
+		if (status < 0)
+			break;
+	}
+out:
+	if (status < 0) {
+		ocfs2_free_quota_recovery(rec);
+		rec = ERR_PTR(status);
+	}
+	return rec;
+}
+
+/* Sync changes in local quota file into global quota file and
+ * reinitialize local quota file.
+ * The function expects local quota file to be already locked and
+ * dqonoff_mutex locked. */
+static int ocfs2_recover_local_quota_file(struct inode *lqinode,
+					  int type,
+					  struct ocfs2_quota_recovery *rec)
+{
+	struct super_block *sb = lqinode->i_sb;
+	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+	struct ocfs2_local_disk_chunk *dchunk;
+	struct ocfs2_local_disk_dqblk *dqblk;
+	struct dquot *dquot;
+	handle_t *handle;
+	struct buffer_head *hbh = NULL, *qbh = NULL;
+	int status = 0;
+	int bit, chunk;
+	struct ocfs2_recovery_chunk *rchunk, *next;
+	qsize_t spacechange, inodechange;
+
+	mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
+
+	status = ocfs2_lock_global_qf(oinfo, 1);
+	if (status < 0)
+		goto out;
+
+	list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
+		chunk = rchunk->rc_chunk;
+		hbh = ocfs2_read_quota_block(lqinode,
+					     ol_quota_chunk_block(sb, chunk),
+					     &status);
+		if (!hbh) {
+			mlog_errno(status);
+			break;
+		}
+		dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+		for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
+			qbh = ocfs2_read_quota_block(lqinode,
+						ol_dqblk_block(sb, chunk, bit),
+						&status);
+			if (!qbh) {
+				mlog_errno(status);
+				break;
+			}
+			dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
+				ol_dqblk_block_off(sb, chunk, bit));
+			dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
+			if (!dquot) {
+				status = -EIO;
+				mlog(ML_ERROR, "Failed to get quota structure "
+				     "for id %u, type %d. Cannot finish quota "
+				     "file recovery.\n",
+				     (unsigned)le64_to_cpu(dqblk->dqb_id),
+				     type);
+				goto out_put_bh;
+			}
+			handle = ocfs2_start_trans(OCFS2_SB(sb),
+						   OCFS2_QSYNC_CREDITS);
+			if (IS_ERR(handle)) {
+				status = PTR_ERR(handle);
+				mlog_errno(status);
+				goto out_put_dquot;
+			}
+			mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+			spin_lock(&dq_data_lock);
+			/* Add usage from quota entry into quota changes
+			 * of our node. Auxiliary variables are important
+			 * due to signedness */
+			spacechange = le64_to_cpu(dqblk->dqb_spacemod);
+			inodechange = le64_to_cpu(dqblk->dqb_inodemod);
+			dquot->dq_dqb.dqb_curspace += spacechange;
+			dquot->dq_dqb.dqb_curinodes += inodechange;
+			spin_unlock(&dq_data_lock);
+			/* We want to drop reference held by the crashed
+			 * node. Since we have our own reference we know
+			 * global structure actually won't be freed. */
+			status = ocfs2_global_release_dquot(dquot);
+			if (status < 0) {
+				mlog_errno(status);
+				goto out_commit;
+			}
+			/* Release local quota file entry */
+			status = ocfs2_journal_access(handle, lqinode,
+					qbh, OCFS2_JOURNAL_ACCESS_WRITE);
+			if (status < 0) {
+				mlog_errno(status);
+				goto out_commit;
+			}
+			lock_buffer(qbh);
+			WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap));
+			ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
+			le32_add_cpu(&dchunk->dqc_free, 1);
+			unlock_buffer(qbh);
+			status = ocfs2_journal_dirty(handle, qbh);
+			if (status < 0)
+				mlog_errno(status);
+out_commit:
+			mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+			ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_put_dquot:
+			dqput(dquot);
+out_put_bh:
+			brelse(qbh);
+			if (status < 0)
+				break;
+		}
+		brelse(hbh);
+		list_del(&rchunk->rc_list);
+		kfree(rchunk->rc_bitmap);
+		kfree(rchunk);
+		if (status < 0)
+			break;
+	}
+	ocfs2_unlock_global_qf(oinfo, 1);
+out:
+	if (status < 0)
+		free_recovery_list(&(rec->r_list[type]));
+	mlog_exit(status);
+	return status;
+}
+
+/* Recover local quota files for given node different from us */
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+				struct ocfs2_quota_recovery *rec,
+				int slot_num)
+{
+	unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+					LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+	struct super_block *sb = osb->sb;
+	struct ocfs2_local_disk_dqinfo *ldinfo;
+	struct buffer_head *bh;
+	handle_t *handle;
+	int type;
+	int status = 0;
+	struct inode *lqinode;
+	unsigned int flags;
+
+	mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (list_empty(&(rec->r_list[type])))
+			continue;
+		mlog(0, "Recovering quota in slot %d\n", slot_num);
+		lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+		if (!lqinode) {
+			status = -ENOENT;
+			goto out;
+		}
+		status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+						       OCFS2_META_LOCK_NOQUEUE);
+		/* Someone else is holding the lock? Then he must be
+		 * doing the recovery. Just skip the file... */
+		if (status == -EAGAIN) {
+			mlog(ML_NOTICE, "skipping quota recovery for slot %d "
+			     "because quota file is locked.\n", slot_num);
+			status = 0;
+			goto out_put;
+		} else if (status < 0) {
+			mlog_errno(status);
+			goto out_put;
+		}
+		/* Now read local header */
+		bh = ocfs2_read_quota_block(lqinode, 0, &status);
+		if (!bh) {
+			mlog_errno(status);
+			mlog(ML_ERROR, "failed to read quota file info header "
+				"(slot=%d type=%d)\n", slot_num, type);
+			goto out_lock;
+		}
+		ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+							OCFS2_LOCAL_INFO_OFF);
+		/* Is recovery still needed? */
+		flags = le32_to_cpu(ldinfo->dqi_flags);
+		if (!(flags & OLQF_CLEAN))
+			status = ocfs2_recover_local_quota_file(lqinode,
+								type,
+								rec);
+		/* We don't want to mark file as clean when it is actually
+		 * active */
+		if (slot_num == osb->slot_num)
+			goto out_bh;
+		/* Mark quota file as clean if we are recovering quota file of
+		 * some other node. */
+		handle = ocfs2_start_trans(osb, 1);
+		if (IS_ERR(handle)) {
+			status = PTR_ERR(handle);
+			mlog_errno(status);
+			goto out_bh;
+		}
+		status = ocfs2_journal_access(handle, lqinode, bh,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
+		if (status < 0) {
+			mlog_errno(status);
+			goto out_trans;
+		}
+		lock_buffer(bh);
+		ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
+		unlock_buffer(bh);
+		status = ocfs2_journal_dirty(handle, bh);
+		if (status < 0)
+			mlog_errno(status);
+out_trans:
+		ocfs2_commit_trans(osb, handle);
+out_bh:
+		brelse(bh);
+out_lock:
+		ocfs2_inode_unlock(lqinode, 1);
+out_put:
+		iput(lqinode);
+		if (status < 0)
+			break;
+	}
+out:
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+	kfree(rec);
+	return status;
+}
+
 /* Read information header from quota file */
 static int ocfs2_local_read_info(struct super_block *sb, int type)
 {
@@ -262,6 +646,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
 	struct inode *lqinode = sb_dqopt(sb)->files[type];
 	int status;
 	struct buffer_head *bh = NULL;
+	struct ocfs2_quota_recovery *rec;
 	int locked = 0;
 
 	info->dqi_maxblimit = 0x7fffffffffffffffLL;
@@ -275,6 +660,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
 	info->dqi_priv = oinfo;
 	oinfo->dqi_type = type;
 	INIT_LIST_HEAD(&oinfo->dqi_chunk);
+	oinfo->dqi_rec = NULL;
 	oinfo->dqi_lqi_bh = NULL;
 	oinfo->dqi_ibh = NULL;
 
@@ -305,10 +691,27 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
 	oinfo->dqi_ibh = bh;
 
 	/* We crashed when using local quota file? */
-	if (!(info->dqi_flags & OLQF_CLEAN))
-		goto out_err;	/* So far we just bail out. Later we should resync here */
+	if (!(info->dqi_flags & OLQF_CLEAN)) {
+		rec = OCFS2_SB(sb)->quota_rec;
+		if (!rec) {
+			rec = ocfs2_alloc_quota_recovery();
+			if (!rec) {
+				status = -ENOMEM;
+				mlog_errno(status);
+				goto out_err;
+			}
+			OCFS2_SB(sb)->quota_rec = rec;
+		}
 
-	status = ocfs2_load_local_quota_bitmaps(sb_dqopt(sb)->files[type],
+		status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+                                                   &rec->r_list[type]);
+		if (status < 0) {
+			mlog_errno(status);
+			goto out_err;
+		}
+	}
+
+	status = ocfs2_load_local_quota_bitmaps(lqinode,
 						ldinfo,
 						&oinfo->dqi_chunk);
 	if (status < 0) {
@@ -394,6 +797,12 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
 	}
 	ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
 
+	/* dqonoff_mutex protects us against racing with recovery thread... */
+	if (oinfo->dqi_rec) {
+		ocfs2_free_quota_recovery(oinfo->dqi_rec);
+		mark_clean = 0;
+	}
+
 	if (!mark_clean)
 		goto out;
 
-- 
cgit v0.10.2


From 19ece546a418997226bd91552fbc41abcb05cea6 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 21 Aug 2008 20:13:17 +0200
Subject: ocfs2: Enable quota accounting on mount, disable on umount

Enable quota usage tracking on mount and disable it on umount. Also
add support for quota on and quota off quotactls and usrquota and
grpquota mount options. Add quota features among supported ones.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c602420..302f114 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -56,7 +56,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 			      int node_num, int slot_num);
 static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 				      int dirty, int replayed);
 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -65,6 +65,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 				 int slot);
 static int ocfs2_commit_thread(void *arg);
 
+static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+{
+	return __ocfs2_wait_on_mount(osb, 0);
+}
+
+static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
+{
+	return __ocfs2_wait_on_mount(osb, 1);
+}
+
+
 
 /*
  * The recovery_list is a simple linked list of node numbers to recover.
@@ -895,6 +906,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
 
 		mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
 
+		ocfs2_wait_on_quotas(osb);
+
 		la_dinode = item->lri_la_dinode;
 		if (la_dinode) {
 			mlog(0, "Clean up local alloc %llu\n",
@@ -1701,13 +1714,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 	return ret;
 }
 
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
 {
 	/* This check is good because ocfs2 will wait on our recovery
 	 * thread before changing it to something other than MOUNTED
 	 * or DISABLED. */
 	wait_event(osb->osb_mount_event,
-		   atomic_read(&osb->vol_state) == VOLUME_MOUNTED ||
+		  (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
+		   atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
 		   atomic_read(&osb->vol_state) == VOLUME_DISABLED);
 
 	/* If there's an error on mount, then we may never get to the
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6b25b4a..5c77798 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -161,6 +161,7 @@ enum ocfs2_vol_state
 {
 	VOLUME_INIT = 0,
 	VOLUME_MOUNTED,
+	VOLUME_MOUNTED_QUOTAS,
 	VOLUME_DISMOUNTED,
 	VOLUME_DISABLED
 };
@@ -196,6 +197,8 @@ enum ocfs2_mount_options
 	OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
 	OCFS2_MOUNT_INODE64 = 1 << 7,	/* Allow inode numbers > 2^32 */
 	OCFS2_MOUNT_POSIX_ACL = 1 << 8,	/* POSIX access control lists */
+	OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
+	OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
 };
 
 #define OCFS2_OSB_SOFT_RO	0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 0a5ac79..359732e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -94,7 +94,9 @@
 					 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
 					 | OCFS2_FEATURE_INCOMPAT_XATTR)
-#define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
+#define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
+					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
+					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
 
 /*
  * Heartbeat-only devices are missing journals and other files.  The
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 60f1d29..2eb657c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
 #include <linux/debugfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/quotaops.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
@@ -127,6 +128,9 @@ static int ocfs2_get_sector(struct super_block *sb,
 static void ocfs2_write_super(struct super_block *sb);
 static struct inode *ocfs2_alloc_inode(struct super_block *sb);
 static void ocfs2_destroy_inode(struct inode *inode);
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
+static int ocfs2_enable_quotas(struct ocfs2_super *osb);
+static void ocfs2_disable_quotas(struct ocfs2_super *osb);
 
 static const struct super_operations ocfs2_sops = {
 	.statfs		= ocfs2_statfs,
@@ -165,6 +169,8 @@ enum {
 	Opt_inode64,
 	Opt_acl,
 	Opt_noacl,
+	Opt_usrquota,
+	Opt_grpquota,
 	Opt_err,
 };
 
@@ -189,6 +195,8 @@ static const match_table_t tokens = {
 	{Opt_inode64, "inode64"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_err, NULL}
 };
 
@@ -452,6 +460,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 
 	/* We're going to/from readonly mode. */
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+		/* Disable quota accounting before remounting RO */
+		if (*flags & MS_RDONLY) {
+			ret = ocfs2_susp_quotas(osb, 0);
+			if (ret < 0)
+				goto out;
+		}
 		/* Lock here so the check of HARD_RO and the potential
 		 * setting of SOFT_RO is atomic. */
 		spin_lock(&osb->osb_lock);
@@ -487,6 +501,21 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 		}
 unlock_osb:
 		spin_unlock(&osb->osb_lock);
+		/* Enable quota accounting after remounting RW */
+		if (!ret && !(*flags & MS_RDONLY)) {
+			if (sb_any_quota_suspended(sb))
+				ret = ocfs2_susp_quotas(osb, 1);
+			else
+				ret = ocfs2_enable_quotas(osb);
+			if (ret < 0) {
+				/* Return back changes... */
+				spin_lock(&osb->osb_lock);
+				sb->s_flags |= MS_RDONLY;
+				osb->osb_flags |= OCFS2_OSB_SOFT_RO;
+				spin_unlock(&osb->osb_lock);
+				goto out;
+			}
+		}
 	}
 
 	if (!ret) {
@@ -647,6 +676,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
 	return 0;
 }
 
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
+{
+	int type;
+	struct super_block *sb = osb->sb;
+	unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+					     OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+	int status = 0;
+
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+			continue;
+		if (unsuspend)
+			status = vfs_quota_enable(
+					sb_dqopt(sb)->files[type],
+					type, QFMT_OCFS2,
+					DQUOT_SUSPENDED);
+		else
+			status = vfs_quota_disable(sb, type,
+						   DQUOT_SUSPENDED);
+		if (status < 0)
+			break;
+	}
+	if (status < 0)
+		mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
+		     "remount (error = %d).\n", status);
+	return status;
+}
+
+static int ocfs2_enable_quotas(struct ocfs2_super *osb)
+{
+	struct inode *inode[MAXQUOTAS] = { NULL, NULL };
+	struct super_block *sb = osb->sb;
+	unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+					     OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+	unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+					LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+	int status;
+	int type;
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+			continue;
+		inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
+							osb->slot_num);
+		if (!inode[type]) {
+			status = -ENOENT;
+			goto out_quota_off;
+		}
+		status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
+						DQUOT_USAGE_ENABLED);
+		if (status < 0)
+			goto out_quota_off;
+	}
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		iput(inode[type]);
+	return 0;
+out_quota_off:
+	ocfs2_disable_quotas(osb);
+	for (type = 0; type < MAXQUOTAS; type++)
+		iput(inode[type]);
+	mlog_errno(status);
+	return status;
+}
+
+static void ocfs2_disable_quotas(struct ocfs2_super *osb)
+{
+	int type;
+	struct inode *inode;
+	struct super_block *sb = osb->sb;
+
+	/* We mostly ignore errors in this function because there's not much
+	 * we can do when we see them */
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (!sb_has_quota_loaded(sb, type))
+			continue;
+		inode = igrab(sb->s_dquot.files[type]);
+		/* Turn off quotas. This will remove all dquot structures from
+		 * memory and so they will be automatically synced to global
+		 * quota files */
+		vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
+					    DQUOT_LIMITS_ENABLED);
+		if (!inode)
+			continue;
+		iput(inode);
+	}
+}
+
+/* Handle quota on quotactl */
+static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
+			  char *path, int remount)
+{
+	unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+					     OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+
+	if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+		return -EINVAL;
+
+	if (remount)
+		return 0;	/* Just ignore it has been handled in
+				 * ocfs2_remount() */
+	return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
+				    format_id, DQUOT_LIMITS_ENABLED);
+}
+
+/* Handle quota off quotactl */
+static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
+{
+	if (remount)
+		return 0;	/* Ignore now and handle later in
+				 * ocfs2_remount() */
+	return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
+}
+
+static struct quotactl_ops ocfs2_quotactl_ops = {
+	.quota_on	= ocfs2_quota_on,
+	.quota_off	= ocfs2_quota_off,
+	.quota_sync	= vfs_quota_sync,
+	.get_info	= vfs_get_dqinfo,
+	.set_info	= vfs_set_dqinfo,
+	.get_dqblk	= vfs_get_dqblk,
+	.set_dqblk	= vfs_set_dqblk,
+};
+
 static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct dentry *root;
@@ -689,6 +843,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	osb->osb_commit_interval = parsed_options.commit_interval;
 	osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
 	osb->local_alloc_bits = osb->local_alloc_default_bits;
+	if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
+	    !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+					 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+		status = -EINVAL;
+		mlog(ML_ERROR, "User quotas were requested, but this "
+		     "filesystem does not have the feature enabled.\n");
+		goto read_super_error;
+	}
+	if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
+	    !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+					 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+		status = -EINVAL;
+		mlog(ML_ERROR, "Group quotas were requested, but this "
+		     "filesystem does not have the feature enabled.\n");
+		goto read_super_error;
+	}
 
 	status = ocfs2_verify_userspace_stack(osb, &parsed_options);
 	if (status)
@@ -793,6 +963,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	atomic_set(&osb->vol_state, VOLUME_MOUNTED);
 	wake_up(&osb->osb_mount_event);
 
+	/* Now we can initialize quotas because we can afford to wait
+	 * for cluster locks recovery now. That also means that truncation
+	 * log recovery can happen but that waits for proper quota setup */
+	if (!(sb->s_flags & MS_RDONLY)) {
+		status = ocfs2_enable_quotas(osb);
+		if (status < 0) {
+			/* We have to err-out specially here because
+			 * s_root is already set */
+			mlog_errno(status);
+			atomic_set(&osb->vol_state, VOLUME_DISABLED);
+			wake_up(&osb->osb_mount_event);
+			mlog_exit(status);
+			return status;
+		}
+	}
+
+	ocfs2_complete_quota_recovery(osb);
+
+	/* Now we wake up again for processes waiting for quotas */
+	atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
+	wake_up(&osb->osb_mount_event);
+
 	mlog_exit(status);
 	return status;
 
@@ -980,6 +1172,28 @@ static int ocfs2_parse_options(struct super_block *sb,
 		case Opt_inode64:
 			mopt->mount_opt |= OCFS2_MOUNT_INODE64;
 			break;
+		case Opt_usrquota:
+			/* We check only on remount, otherwise features
+			 * aren't yet initialized. */
+			if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+			    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+				mlog(ML_ERROR, "User quota requested but "
+				     "filesystem feature is not set\n");
+				status = 0;
+				goto bail;
+			}
+			mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
+			break;
+		case Opt_grpquota:
+			if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+			    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+				mlog(ML_ERROR, "Group quota requested but "
+				     "filesystem feature is not set\n");
+				status = 0;
+				goto bail;
+			}
+			mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
+			break;
 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
 		case Opt_acl:
 			mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
@@ -1056,6 +1270,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	if (osb->osb_cluster_stack[0])
 		seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
 			   osb->osb_cluster_stack);
+	if (opts & OCFS2_MOUNT_USRQUOTA)
+		seq_printf(s, ",usrquota");
+	if (opts & OCFS2_MOUNT_GRPQUOTA)
+		seq_printf(s, ",grpquota");
 
 	if (opts & OCFS2_MOUNT_NOUSERXATTR)
 		seq_printf(s, ",nouser_xattr");
@@ -1394,6 +1612,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 	osb = OCFS2_SB(sb);
 	BUG_ON(!osb);
 
+	ocfs2_disable_quotas(osb);
+
 	ocfs2_shutdown_local_alloc(osb);
 
 	ocfs2_truncate_log_shutdown(osb);
@@ -1504,6 +1724,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	sb->s_fs_info = osb;
 	sb->s_op = &ocfs2_sops;
 	sb->s_export_op = &ocfs2_export_ops;
+	sb->s_qcop = &ocfs2_quotactl_ops;
+	sb->dq_op = &ocfs2_quota_operations;
 	sb->s_xattr = ocfs2_xattr_handlers;
 	sb->s_time_gran = 1;
 	sb->s_flags |= MS_NOATIME;
-- 
cgit v0.10.2


From e97fcd95a4778a8caf1980c6c72fdf68185a0838 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 18 Nov 2008 17:15:24 -0800
Subject: jbd2: Add BH_JBDPrivateStart

Add this so that file systems using JBD2 can safely allocate unused b_state
bits.

In this case, we add it so that Ocfs2 can define a single bit for tracking
the validation state of a buffer.

Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index c7d106e..f366457 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -329,6 +329,7 @@ enum jbd_state_bits {
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
 BUFFER_FNS(JBD, jbd)
-- 
cgit v0.10.2


From b86c86fa1feb50221dc16071ae5b8a4acf3bd32c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 18 Nov 2008 17:16:47 -0800
Subject: ocfs2: Use BH_JBDPrivateStart instead of BH_Unshadow

This is safer. We no longer have to worry about tracking changes to
jbd_state_bits.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 0e9eed0..15c8e6d 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -42,11 +42,10 @@
 /*
  * Bits on bh->b_state used by ocfs2.
  *
- * These MUST be after the JBD2 bits.  Currently BH_Unshadow is the last
- * JBD2 bit.
+ * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
  */
 enum ocfs2_state_bits {
-	BH_NeedsValidate = BH_Unshadow + 1,
+	BH_NeedsValidate = BH_JBDPrivateStart,
 };
 
 /* Expand the magic b_state functions */
-- 
cgit v0.10.2


From 57a09a7b3d9445a17c78d544f1e49d4d7d61705a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:26 +0100
Subject: ocfs2: Add missing initialization

Add missing variable initialization to ocfs2_dquot_drop_slow().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 49b536a..10ecb33 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -870,7 +870,7 @@ out:
 
 static int ocfs2_dquot_drop_slow(struct inode *inode)
 {
-	int status;
+	int status = 0;
 	int cnt;
 	int got_lock[MAXQUOTAS] = {0, 0};
 	handle_t *handle;
-- 
cgit v0.10.2


From 85eb8b73d66530bb7b931789ae7a5ec9744eed34 Mon Sep 17 00:00:00 2001
From: Joel Becker <Joel.Becker@oracle.com>
Date: Tue, 25 Nov 2008 15:31:27 +0100
Subject: ocfs2: Fix ocfs2_read_quota_block() error handling.

ocfs2_bread() has become ocfs2_read_virt_blocks(), with a prototype to
match ocfs2_read_blocks().  The quota code, converting from
ocfs2_bread(), wraps the call to ocfs2_read_virt_blocks() in
ocfs2_read_quota_block().  Unfortunately, the prototype of
ocfs2_read_quota_block() matches the old prototype of ocfs2_bread().

The problem is that ocfs2_bread() returned the buffer head, and callers
assumed that a NULL pointer was indicative of error.  It wasn't.  This
is why ocfs2_bread() took an int*err argument as well.

The new prototype of ocfs2_read_virt_blocks() avoids this error handling
confusion.  Let's change ocfs2_read_quota_block() to match.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 058aa86..b1c7591 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3519,7 +3519,7 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
 					    oinfo->dqi_gi.dqi_type);
 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-	struct buffer_head *bh;
+	struct buffer_head *bh = NULL;
 	struct ocfs2_global_disk_dqinfo *gdinfo;
 	int status = 0;
 
@@ -3532,8 +3532,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
 		oinfo->dqi_gi.dqi_free_entry =
 					be32_to_cpu(lvb->lvb_free_entry);
 	} else {
-		bh = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &status);
-		if (!bh) {
+		status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
+		if (status) {
 			mlog_errno(status);
 			goto bail;
 		}
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 04872b4..7365e2e 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -107,8 +107,8 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot)
 
 int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
 void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
-struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
-					   int block, int *err);
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+			   struct buffer_head **bh);
 
 extern struct dquot_operations ocfs2_quota_operations;
 extern struct quota_format_type ocfs2_quota_format;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 10ecb33..2bdcddd 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -87,16 +87,21 @@ struct qtree_fmt_operations ocfs2_global_ops = {
 	.is_id = ocfs2_global_is_id,
 };
 
-struct buffer_head *ocfs2_read_quota_block(struct inode *inode,
-					   int block, int *err)
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+			   struct buffer_head **bh)
 {
-	struct buffer_head *tmp = NULL;
+	int rc = 0;
+	struct buffer_head *tmp = *bh;
 
-	*err = ocfs2_read_virt_blocks(inode, block, 1, &tmp, 0, NULL);
-	if (*err)
-		mlog_errno(*err);
+	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL);
+	if (rc)
+		mlog_errno(rc);
+
+	/* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+	if (!rc && !*bh)
+		*bh = tmp;
 
-	return tmp;
+	return rc;
 }
 
 static struct buffer_head *ocfs2_get_quota_block(struct inode *inode,
@@ -143,8 +148,9 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
 	toread = len;
 	while (toread > 0) {
 		tocopy = min((size_t)(sb->s_blocksize - offset), toread);
-		bh = ocfs2_read_quota_block(gqinode, blk, &err);
-		if (!bh) {
+		bh = NULL;
+		err = ocfs2_read_quota_block(gqinode, blk, &bh);
+		if (err) {
 			mlog_errno(err);
 			return err;
 		}
@@ -169,7 +175,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	int offset = off & (sb->s_blocksize - 1);
 	sector_t blk = off >> sb->s_blocksize_bits;
 	int err = 0, new = 0;
-	struct buffer_head *bh;
+	struct buffer_head *bh = NULL;
 	handle_t *handle = journal_current_handle();
 
 	if (!handle) {
@@ -200,13 +206,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	/* Not rewriting whole block? */
 	if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
 	    !new) {
-		bh = ocfs2_read_quota_block(gqinode, blk, &err);
-		if (!bh) {
+		err = ocfs2_read_quota_block(gqinode, blk, &bh);
+		if (err) {
 			mlog_errno(err);
 			return err;
 		}
 		err = ocfs2_journal_access(handle, gqinode, bh,
-						OCFS2_JOURNAL_ACCESS_WRITE);
+					   OCFS2_JOURNAL_ACCESS_WRITE);
 	} else {
 		bh = ocfs2_get_quota_block(gqinode, blk, &err);
 		if (!bh) {
@@ -214,7 +220,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 			return err;
 		}
 		err = ocfs2_journal_access(handle, gqinode, bh,
-						OCFS2_JOURNAL_ACCESS_CREATE);
+					   OCFS2_JOURNAL_ACCESS_CREATE);
 	}
 	if (err < 0) {
 		brelse(bh);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index b985621..7053664 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -139,15 +139,15 @@ static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
 	unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
 	unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
 					GROUP_QUOTA_SYSTEM_INODE };
-	struct buffer_head *bh;
+	struct buffer_head *bh = NULL;
 	struct inode *linode = sb_dqopt(sb)->files[type];
 	struct inode *ginode = NULL;
 	struct ocfs2_disk_dqheader *dqhead;
 	int status, ret = 0;
 
 	/* First check whether we understand local quota file */
-	bh = ocfs2_read_quota_block(linode, 0, &status);
-	if (!bh) {
+	status = ocfs2_read_quota_block(linode, 0, &bh);
+	if (status) {
 		mlog_errno(status);
 		mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
 			type);
@@ -178,8 +178,8 @@ static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
 		goto out_err;
 	}
 	/* Since the header is read only, we don't care about locking */
-	bh = ocfs2_read_quota_block(ginode, 0, &status);
-	if (!bh) {
+	status = ocfs2_read_quota_block(ginode, 0, &bh);
+	if (status) {
 		mlog_errno(status);
 		mlog(ML_ERROR, "failed to read global quota file header "
 				"(type=%d)\n", type);
@@ -235,10 +235,11 @@ static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
 			return -ENOMEM;
 		}
 		newchunk->qc_num = i;
-		newchunk->qc_headerbh = ocfs2_read_quota_block(inode,
+		newchunk->qc_headerbh = NULL;
+		status = ocfs2_read_quota_block(inode,
 				ol_quota_chunk_block(inode->i_sb, i),
-				&status);
-		if (!newchunk->qc_headerbh) {
+				&newchunk->qc_headerbh);
+		if (status) {
 			mlog_errno(status);
 			kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
 			ocfs2_release_local_quota_bitmaps(head);
@@ -320,10 +321,11 @@ static int ocfs2_recovery_load_quota(struct inode *lqinode,
 	int status = 0;
 
 	for (i = 0; i < chunks; i++) {
-		hbh = ocfs2_read_quota_block(lqinode,
-					     ol_quota_chunk_block(sb, i),
-					     &status);
-		if (!hbh) {
+		hbh = NULL;
+		status = ocfs2_read_quota_block(lqinode,
+						ol_quota_chunk_block(sb, i),
+						&hbh);
+		if (status) {
 			mlog_errno(status);
 			break;
 		}
@@ -392,8 +394,9 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
 			goto out_put;
 		}
 		/* Now read local header */
-		bh = ocfs2_read_quota_block(lqinode, 0, &status);
-		if (!bh) {
+		bh = NULL;
+		status = ocfs2_read_quota_block(lqinode, 0, &bh);
+		if (status) {
 			mlog_errno(status);
 			mlog(ML_ERROR, "failed to read quota file info header "
 				"(slot=%d type=%d)\n", slot_num, type);
@@ -447,19 +450,21 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 
 	list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
 		chunk = rchunk->rc_chunk;
-		hbh = ocfs2_read_quota_block(lqinode,
-					     ol_quota_chunk_block(sb, chunk),
-					     &status);
-		if (!hbh) {
+		hbh = NULL;
+		status = ocfs2_read_quota_block(lqinode,
+						ol_quota_chunk_block(sb, chunk),
+						&hbh);
+		if (status) {
 			mlog_errno(status);
 			break;
 		}
 		dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
 		for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
-			qbh = ocfs2_read_quota_block(lqinode,
+			qbh = NULL;
+			status = ocfs2_read_quota_block(lqinode,
 						ol_dqblk_block(sb, chunk, bit),
-						&status);
-			if (!qbh) {
+						&qbh);
+			if (status) {
 				mlog_errno(status);
 				break;
 			}
@@ -581,8 +586,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 			goto out_put;
 		}
 		/* Now read local header */
-		bh = ocfs2_read_quota_block(lqinode, 0, &status);
-		if (!bh) {
+		bh = NULL;
+		status = ocfs2_read_quota_block(lqinode, 0, &bh);
+		if (status) {
 			mlog_errno(status);
 			mlog(ML_ERROR, "failed to read quota file info header "
 				"(slot=%d type=%d)\n", slot_num, type);
@@ -676,8 +682,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
 	locked = 1;
 
 	/* Now read local header */
-	bh = ocfs2_read_quota_block(lqinode, 0, &status);
-	if (!bh) {
+	status = ocfs2_read_quota_block(lqinode, 0, &bh);
+	if (status) {
 		mlog_errno(status);
 		mlog(ML_ERROR, "failed to read quota file info header "
 			"(type=%d)\n", type);
@@ -850,13 +856,13 @@ static int ocfs2_local_write_dquot(struct dquot *dquot)
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
-	struct buffer_head *bh;
+	struct buffer_head *bh = NULL;
 	int status;
 
-	bh = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
+	status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
 				    ol_dqblk_file_block(sb, od->dq_local_off),
-				    &status);
-	if (!bh) {
+				    &bh);
+	if (status) {
 		mlog_errno(status);
 		goto out;
 	}
-- 
cgit v0.10.2


From af09e51b6810d3408db1c0e956b3b0687b0e3723 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:28 +0100
Subject: ocfs2: Fix oops when extending quota files

We have to mark buffer as uptodate before calling ocfs2_journal_access() and
ocfs2_set_buffer_uptodate() does not do this for us.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bdcddd..8fceb0c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -174,7 +174,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	struct inode *gqinode = oinfo->dqi_gqinode;
 	int offset = off & (sb->s_blocksize - 1);
 	sector_t blk = off >> sb->s_blocksize_bits;
-	int err = 0, new = 0;
+	int err = 0, new = 0, ja_type;
 	struct buffer_head *bh = NULL;
 	handle_t *handle = journal_current_handle();
 
@@ -207,32 +207,28 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
 	    !new) {
 		err = ocfs2_read_quota_block(gqinode, blk, &bh);
-		if (err) {
-			mlog_errno(err);
-			return err;
-		}
-		err = ocfs2_journal_access(handle, gqinode, bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	} else {
 		bh = ocfs2_get_quota_block(gqinode, blk, &err);
-		if (!bh) {
-			mlog_errno(err);
-			return err;
-		}
-		err = ocfs2_journal_access(handle, gqinode, bh,
-					   OCFS2_JOURNAL_ACCESS_CREATE);
+		ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
 	}
-	if (err < 0) {
-		brelse(bh);
-		goto out;
+	if (err) {
+		mlog_errno(err);
+		return err;
 	}
 	lock_buffer(bh);
 	if (new)
 		memset(bh->b_data, 0, sb->s_blocksize);
 	memcpy(bh->b_data + offset, data, len);
 	flush_dcache_page(bh->b_page);
+	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	ocfs2_set_buffer_uptodate(gqinode, bh);
+	err = ocfs2_journal_access(handle, gqinode, bh, ja_type);
+	if (err < 0) {
+		brelse(bh);
+		goto out;
+	}
 	err = ocfs2_journal_dirty(handle, bh);
 	brelse(bh);
 	if (err < 0)
-- 
cgit v0.10.2


From 53a3604610e92a5344cf8003c19975583e71a598 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:29 +0100
Subject: ocfs2: Make ocfs2_get_quota_block() consistent with
 ocfs2_read_quota_block()

Make function return error status and not buffer pointer so that it's
consistent with ocfs2_read_quota_block().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 8fceb0c..e527ec6 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -104,26 +104,25 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
 	return rc;
 }
 
-static struct buffer_head *ocfs2_get_quota_block(struct inode *inode,
-						 int block, int *err)
+static int ocfs2_get_quota_block(struct inode *inode, int block,
+				 struct buffer_head **bh)
 {
 	u64 pblock, pcount;
-	struct buffer_head *bh;
+	int err;
 
 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
-	*err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount,
-					   NULL);
+	err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
-	if (*err) {
-		mlog_errno(*err);
-		return NULL;
+	if (err) {
+		mlog_errno(err);
+		return err;
 	}
-	bh = sb_getblk(inode->i_sb, pblock);
-	if (!bh) {
-		*err = -EIO;
-		mlog_errno(*err);
+	*bh = sb_getblk(inode->i_sb, pblock);
+	if (!*bh) {
+		err = -EIO;
+		mlog_errno(err);
 	}
-	return bh;
+	return err;;
 }
 
 /* Read data from global quotafile - avoid pagecache and such because we cannot
@@ -209,7 +208,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 		err = ocfs2_read_quota_block(gqinode, blk, &bh);
 		ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	} else {
-		bh = ocfs2_get_quota_block(gqinode, blk, &err);
+		err = ocfs2_get_quota_block(gqinode, blk, &bh);
 		ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
 	}
 	if (err) {
-- 
cgit v0.10.2


From 9a2f3866c825c67c3a5806799cdc93fb7517f0c4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:30 +0100
Subject: ocfs2: Fix build warnings (64-bit types vs long long)

fs/ocfs2/quota_local.c: In function 'olq_set_dquot':
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64'
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64'
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64'
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64'
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 7 has type '__le64'
fs/ocfs2/quota_local.c:844: warning: format '%lld' expects type 'long long int', but argument 8 has type '__le64'
fs/ocfs2/quota_global.c: In function '__ocfs2_sync_dquot':
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64'
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64'
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64'
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64'
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 8 has type 's64'
fs/ocfs2/quota_global.c:457: warning: format '%lld' expects type 'long long int', but argument 10 has type 's64'

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index e527ec6..054d52b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -457,9 +457,9 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
 	olditime = dquot->dq_dqb.dqb_itime;
 	oldbtime = dquot->dq_dqb.dqb_btime;
 	ocfs2_global_disk2memdqb(dquot, &dqblk);
-	mlog(0, "Syncing global dquot %d space %lld+%lld, inodes %lld+%lld\n",
-	     dquot->dq_id, dquot->dq_dqb.dqb_curspace, spacechange,
-	     dquot->dq_dqb.dqb_curinodes, inodechange);
+	mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n",
+	     dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange,
+	     dquot->dq_dqb.dqb_curinodes, (long long)inodechange);
 	if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
 		dquot->dq_dqb.dqb_curspace += spacechange;
 	if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 7053664..b5ddb22 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -848,7 +848,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
 					  od->dq_originodes);
 	spin_unlock(&dq_data_lock);
 	mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
-	     od->dq_dquot.dq_id, dqblk->dqb_spacemod, dqblk->dqb_inodemod);
+	     od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
+	     (long long)le64_to_cpu(dqblk->dqb_inodemod));
 }
 
 /* Write dquot to local quota file */
-- 
cgit v0.10.2


From 5cd9d5bb86daf632a40f90e2321ea9379e42f073 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:31 +0100
Subject: quota: Unexport dqblk_v1.h and dqblk_v2.h

Unexport header files dqblk_v[12].h since except for quota format ID they
don't contain information userspace should be interested in. Move ID
definitions to quota.h.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 900a787..39da666 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -56,8 +56,6 @@ header-y += dlm_device.h
 header-y += dlm_netlink.h
 header-y += dm-ioctl.h
 header-y += dn.h
-header-y += dqblk_v1.h
-header-y += dqblk_v2.h
 header-y += dqblk_xfs.h
 header-y += efs_fs_sb.h
 header-y += elf-fdpic.h
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 9cea901..3713a72 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -5,9 +5,6 @@
 #ifndef _LINUX_DQBLK_V1_H
 #define _LINUX_DQBLK_V1_H
 
-/* Id of quota format */
-#define QFMT_VFS_OLD 1
-
 /* Root squash turned on */
 #define V1_DQF_RSQUASH 1
 
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index ff8af1b..18000a5 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -7,9 +7,6 @@
 
 #include <linux/dqblk_qtree.h>
 
-/* Id number of quota format */
-#define QFMT_VFS_V0 2
-
 /* Numbers of blocks needed for updates */
 #define V2_INIT_ALLOC QTREE_INIT_ALLOC
 #define V2_INIT_REWRITE QTREE_INIT_REWRITE
diff --git a/include/linux/quota.h b/include/linux/quota.h
index ec82beb..d72d5d8 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -70,6 +70,10 @@
 #define Q_GETQUOTA 0x800007	/* get user quota structure */
 #define Q_SETQUOTA 0x800008	/* set user quota structure */
 
+/* Quota format type IDs */
+#define	QFMT_VFS_OLD 1
+#define	QFMT_VFS_V0 2
+
 /* Size of block in which space limits are passed through the quota
  * interface */
 #define QIF_DQBLKSIZE_BITS 10
-- 
cgit v0.10.2


From 7d9056ba20ebed6e3937a2e23183f6117919cb00 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:32 +0100
Subject: quota: Export dquot_alloc() and dquot_destroy() functions

These are default functions for creating and destroying quota structures
and they should be used from filesystems.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/dquot.c b/fs/dquot.c
index 075dc76..61bfff6 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -413,10 +413,11 @@ out_dqlock:
 	return ret;
 }
 
-static void dquot_destroy(struct dquot *dquot)
+void dquot_destroy(struct dquot *dquot)
 {
 	kmem_cache_free(dquot_cachep, dquot);
 }
+EXPORT_SYMBOL(dquot_destroy);
 
 static inline void do_destroy_dquot(struct dquot *dquot)
 {
@@ -668,10 +669,11 @@ we_slept:
 	spin_unlock(&dq_list_lock);
 }
 
-static struct dquot *dquot_alloc(struct super_block *sb, int type)
+struct dquot *dquot_alloc(struct super_block *sb, int type)
 {
 	return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
 }
+EXPORT_SYMBOL(dquot_alloc);
 
 static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f491394..21b781a 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -31,6 +31,8 @@ int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
 int dquot_scan_active(struct super_block *sb,
 		      int (*fn)(struct dquot *dquot, unsigned long priv),
 		      unsigned long priv);
+struct dquot *dquot_alloc(struct super_block *sb, int type);
+void dquot_destroy(struct dquot *dquot);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v0.10.2


From 4103003b3abb85af9dec9e60616ae086c2bcb4c9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:33 +0100
Subject: reiserfs: Add default allocation routines for quota structures

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a9b393a..c55651f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -649,6 +649,8 @@ static struct dquot_operations reiserfs_quota_operations = {
 	.release_dquot = reiserfs_release_dquot,
 	.mark_dirty = reiserfs_mark_dquot_dirty,
 	.write_info = reiserfs_write_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
 };
 
 static struct quotactl_ops reiserfs_qctl_operations = {
-- 
cgit v0.10.2


From 157091a2c3cdc71422cbc71eace205cf1b9f2200 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:34 +0100
Subject: ext3: Add default allocation routines for quota structures

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 250ec53..c22d014 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -713,7 +713,9 @@ static struct dquot_operations ext3_quota_operations = {
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
 	.mark_dirty	= ext3_mark_dquot_dirty,
-	.write_info	= ext3_write_info
+	.write_info	= ext3_write_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
 };
 
 static struct quotactl_ops ext3_qctl_operations = {
-- 
cgit v0.10.2


From a5b5ee320185adc091a3a31630d278806b19d8f0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:35 +0100
Subject: ext4: Add default allocation routines for quota structures

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 49fcf88..9494bb2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -803,7 +803,9 @@ static struct dquot_operations ext4_quota_operations = {
 	.acquire_dquot	= ext4_acquire_dquot,
 	.release_dquot	= ext4_release_dquot,
 	.mark_dirty	= ext4_mark_dquot_dirty,
-	.write_info	= ext4_write_info
+	.write_info	= ext4_write_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
 };
 
 static struct quotactl_ops ext4_qctl_operations = {
-- 
cgit v0.10.2


From e35ff98f7c37b7bc901b4b90a66a0287565e456c Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 26 Nov 2008 16:20:19 -0800
Subject: ocfs2: fix indendation in ocfs2_dquot_drop_slow

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 054d52b..a10faebe 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -893,7 +893,7 @@ static int ocfs2_dquot_drop_slow(struct inode *inode)
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
-				goto out;
+		goto out;
 	}
 	dquot_drop(inode);
 	ocfs2_commit_trans(OCFS2_SB(sb), handle);
-- 
cgit v0.10.2


From df32b3343aa11e0c7f54783594b24321d17d376f Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 25 Nov 2008 07:21:36 +0800
Subject: ocfs2/quota: sparse fixes for quota

Fix 2 minor things in quota. They are both found by sparse check.
1. an endian bug in ocfs2_local_quota_add_chunk.
2. change olq_alloc_dquot to static.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index b5ddb22..d451b71 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -988,7 +988,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out_trans;
 	}
 	lock_buffer(bh);
-	dchunk->dqc_free = ol_quota_entries_per_block(sb);
+	dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb));
 	memset(dchunk->dqc_bitmap, 0,
 	       sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
 	       OCFS2_QBLK_RESERVED_SPACE);
@@ -1110,7 +1110,7 @@ out:
 	return ERR_PTR(status);
 }
 
-void olq_alloc_dquot(struct buffer_head *bh, void *private)
+static void olq_alloc_dquot(struct buffer_head *bh, void *private)
 {
 	int *offset = private;
 	struct ocfs2_local_disk_chunk *dchunk;
-- 
cgit v0.10.2


From 548b0f22bb7497ba76f91627b99f9fed53a91704 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 24 Nov 2008 19:32:13 -0800
Subject: ocfs2: Dirty the entire bucket in ocfs2_bucket_value_truncate()

ocfs2_bucket_value_truncate() currently takes the first bh of the
bucket, and magically plays around with the value bh - even though
the bucket structure in the calling function already has it.

In addition, future code wants to always dirty the entire bucket when it
is changed.  So let's pass the entire bucket into this function, skip
any block reads (we have them), and add the access/dirty logic.

ocfs2_xattr_update_value_size() is no longer necessary, as it only did
one thing other than journal access/dirty.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b9634c..6db68a2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4580,31 +4580,6 @@ out:
 	return ret;
 }
 
-static int ocfs2_xattr_value_update_size(struct inode *inode,
-					 handle_t *handle,
-					 struct buffer_head *xe_bh,
-					 struct ocfs2_xattr_entry *xe,
-					 u64 new_size)
-{
-	int ret;
-
-	ret = ocfs2_journal_access(handle, inode, xe_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	xe->xe_value_size = cpu_to_le64(new_size);
-
-	ret = ocfs2_journal_dirty(handle, xe_bh);
-	if (ret < 0)
-		mlog_errno(ret);
-
-out:
-	return ret;
-}
-
 /*
  * Truncate the specified xe_off entry in xattr bucket.
  * bucket is indicated by header_bh and len is the new length.
@@ -4613,7 +4588,7 @@ out:
  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
  */
 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
-					     struct buffer_head *header_bh,
+					     struct ocfs2_xattr_bucket *bucket,
 					     int xe_off,
 					     int len,
 					     struct ocfs2_xattr_set_ctxt *ctxt)
@@ -4623,8 +4598,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	struct buffer_head *value_bh = NULL;
 	struct ocfs2_xattr_value_root *xv;
 	struct ocfs2_xattr_entry *xe;
-	struct ocfs2_xattr_header *xh =
-			(struct ocfs2_xattr_header *)header_bh->b_data;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	size_t blocksize = inode->i_sb->s_blocksize;
 
 	xe = &xh->xh_entries[xe_off];
@@ -4638,34 +4612,41 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 
 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
-	value_blk += header_bh->b_blocknr;
 
-	ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
+	value_bh = bucket->bu_bhs[value_blk];
+	BUG_ON(!value_bh);
 
 	xv = (struct ocfs2_xattr_value_root *)
 		(value_bh->b_data + offset % blocksize);
 
-	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
-	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
-	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
+	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
-					    header_bh, xe, len);
+	/*
+	 * From here on out we have to dirty the bucket.  The generic
+	 * value calls only modify one of the bucket's bhs, but we need
+	 * to send the bucket at once.  So if they error, they *could* have
+	 * modified something.  We have to assume they did, and dirty
+	 * the whole bucket.  This leaves us in a consistent state.
+	 */
+	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
+	     xe_off, (unsigned long long)bucket_blkno(bucket), len);
+	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
-		goto out;
+		goto out_dirty;
 	}
 
+	xe->xe_value_size = cpu_to_le64(len);
+
+out_dirty:
+	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
+
 out:
-	brelse(value_bh);
 	return ret;
 }
 
@@ -4681,7 +4662,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
 	BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
 
 	offset = xe - xh->xh_entries;
-	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
+	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
 						offset, len, ctxt);
 	if (ret)
 		mlog_errno(ret);
@@ -5107,11 +5088,13 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	struct ocfs2_xattr_entry *xe;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
+	int credits = ocfs2_remove_extent_credits(osb->sb) +
+		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb,
-					ocfs2_remove_extent_credits(osb->sb));
+	ctxt.handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
 		mlog_errno(ret);
@@ -5123,8 +5106,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
-		ret = ocfs2_xattr_bucket_value_truncate(inode,
-							bucket->bu_bhs[0],
+		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
 							i, 0, &ctxt);
 		if (ret) {
 			mlog_errno(ret);
-- 
cgit v0.10.2


From 88c3b0622acf82c7c86fbc066e81e15edc7c1685 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 11 Dec 2008 08:54:11 +0800
Subject: ocfs2: Narrow the transaction for deleting xattrs from a bucket.

We move the transaction into the loop because in
ocfs2_remove_extent, we will double the credits in function
ocfs2_extend_rotate_transaction. So if we have a large loop
number, we will soon waste much the journal space.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6db68a2..df53a2c 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5094,30 +5094,30 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb, credits);
-	if (IS_ERR(ctxt.handle)) {
-		ret = PTR_ERR(ctxt.handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
 		xe = &xh->xh_entries[i];
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
+		ctxt.handle = ocfs2_start_trans(osb, credits);
+		if (IS_ERR(ctxt.handle)) {
+			ret = PTR_ERR(ctxt.handle);
+			mlog_errno(ret);
+			break;
+		}
+
 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
 							i, 0, &ctxt);
+
+		ocfs2_commit_trans(osb, ctxt.handle);
 		if (ret) {
 			mlog_errno(ret);
 			break;
 		}
 	}
 
-	ret = ocfs2_commit_trans(osb, ctxt.handle);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
-out:
 	return ret;
 }
 
-- 
cgit v0.10.2


From 92de109ade7999084fb0bfcc65d603252504e0d0 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 25 Nov 2008 17:06:40 -0800
Subject: ocfs2: Dirty the entire first bucket in ocfs2_extend_xattr_bucket()

ocfs2_extend_xattr_bucket() takes an extent of buckets and shifts some
of them down to make room for a new xattr.  It is passed the first bh of
the first bucket, because that is where we store the number of buckets
in the extent.

However, future code wants to always dirty the entire bucket when it
is changed.  So let's pass the entire bucket into this function, skip
any block reads (we have them), and add the access/dirty logic.  We also
can skip passing in the target bucket bh - we only need its block
number.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index df53a2c..ed1e959 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3905,7 +3905,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 		mlog_errno(ret);
 		goto out;
 	}
-  
+
 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
 	if (ret)
 		goto out;
@@ -4232,37 +4232,45 @@ leave:
 }
 
 /*
- * Extend a new xattr bucket and move xattrs to the end one by one until
- * We meet with start_bh. Only move half of the xattrs to the bucket after it.
+ * We are given an extent.  'first' is the bucket at the very front of
+ * the extent.  The extent has space for an additional bucket past
+ * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
+ * of the target bucket.  We wish to shift every bucket past the target
+ * down one, filling in that additional space.  When we get back to the
+ * target, we split the target between itself and the now-empty bucket
+ * at target+1 (aka, target_blkno + blks_per_bucket).
  */
 static int ocfs2_extend_xattr_bucket(struct inode *inode,
 				     handle_t *handle,
-				     struct buffer_head *first_bh,
-				     struct buffer_head *start_bh,
+				     struct ocfs2_xattr_bucket *first,
+				     u64 target_blk,
 				     u32 num_clusters)
 {
 	int ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	u64 start_blk = start_bh->b_blocknr, end_blk;
-	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
-	struct ocfs2_xattr_header *first_xh =
-				(struct ocfs2_xattr_header *)first_bh->b_data;
-	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
+	u64 end_blk;
+	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
 
 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
-	     "from %llu, len = %u\n", (unsigned long long)start_blk,
-	     (unsigned long long)first_bh->b_blocknr, num_clusters);
+	     "from %llu, len = %u\n", (unsigned long long)target_blk,
+	     (unsigned long long)bucket_blkno(first), num_clusters);
 
-	BUG_ON(bucket >= num_buckets);
+	/* The extent must have room for an additional bucket */
+	BUG_ON(new_bucket >=
+	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
 
-	end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
+	/* end_blk points to the last existing bucket */
+	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
 
 	/*
-	 * We will touch all the buckets after the start_bh(include it).
-	 * Then we add one more bucket.
+	 * end_blk is the start of the last existing bucket.
+	 * Thus, (end_blk - target_blk) covers the target bucket and
+	 * every bucket after it up to, but not including, the last
+	 * existing bucket.  Then we add the last existing bucket, the
+	 * new bucket, and the first bucket (3 * blk_per_bucket).
 	 */
-	credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
+	credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
 		  handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
@@ -4270,14 +4278,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, first_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, first,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	while (end_blk != start_blk) {
+	while (end_blk != target_blk) {
 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
 					    end_blk + blk_per_bucket, 0);
 		if (ret)
@@ -4285,12 +4293,12 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 		end_blk -= blk_per_bucket;
 	}
 
-	/* Move half of the xattr in start_blk to the next bucket. */
-	ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
-					start_blk + blk_per_bucket, NULL, 0);
+	/* Move half of the xattr in target_blkno to the next bucket. */
+	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
+					target_blk + blk_per_bucket, NULL, 0);
 
-	le16_add_cpu(&first_xh->xh_num_buckets, 1);
-	ocfs2_journal_dirty(handle, first_bh);
+	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
+	ocfs2_xattr_bucket_journal_dirty(handle, first);
 
 out:
 	return ret;
@@ -4324,10 +4332,19 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 	int ret, num_buckets, extend = 1;
 	u64 p_blkno;
 	u32 e_cpos, num_clusters;
+	/* The bucket at the front of the extent */
+	struct ocfs2_xattr_bucket *first;
 
 	mlog(0, "Add new xattr bucket starting form %llu\n",
 	     (unsigned long long)header_bh->b_blocknr);
 
+	first = ocfs2_xattr_bucket_new(inode);
+	if (!first) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
 	/*
 	 * Add refrence for header_bh here because it may be
 	 * changed in ocfs2_add_new_xattr_cluster and we need
@@ -4367,17 +4384,25 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 		}
 	}
 
-	if (extend)
+	if (extend) {
+		/* These bucket reads should be cached */
+		ret = ocfs2_read_xattr_bucket(first, first_bh->b_blocknr);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
 		ret = ocfs2_extend_xattr_bucket(inode,
 						ctxt->handle,
-						first_bh,
-						header_bh,
+						first, header_bh->b_blocknr,
 						num_clusters);
-	if (ret)
-		mlog_errno(ret);
+		if (ret)
+			mlog_errno(ret);
+	}
+
 out:
 	brelse(first_bh);
 	brelse(header_bh);
+	ocfs2_xattr_bucket_free(first);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 15d609293d1954465a4788b9b182214323c6a2a1 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 25 Nov 2008 18:36:42 -0800
Subject: ocfs2: Dirty the entire first bucket in ocfs2_cp_xattr_cluster().

ocfs2_cp_xattr_cluster() takes the last bucket of a full extent and
copies it over to a new extent.  It then updates the headers of both
extents to reflect the new state.  It is passed the first bh of
the first bucket in order to update that first extent's bucket count.
It reads and dirties the first bh of the new extent for the same reason.

However, future code wants to always dirty the entire bucket when it
is changed.  So it is changed to read the entire bucket it is updating
for both extents.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ed1e959..4dba347 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3936,9 +3936,10 @@ out:
 }
 
 /*
- * Copy one xattr cluster from src_blk to to_blk.
- * The to_blk will become the first bucket header of the cluster, so its
- * xh_num_buckets will be initialized as the bucket num in the cluster.
+ * src_blk points to the last cluster of an existing extent.  to_blk
+ * points to a newly allocated extent.  We copy the cluster over to the
+ * new extent, initializing its xh_num_buckets.  The old extent's
+ * xh_num_buckets shrinks by the same amount.
  */
 static int ocfs2_cp_xattr_cluster(struct inode *inode,
 				  handle_t *handle,
@@ -3950,27 +3951,42 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 	int i, ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
+	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
-	struct buffer_head *bh = NULL;
-	struct ocfs2_xattr_header *xh;
-	u64 to_blk_start = to_blk;
+	struct ocfs2_xattr_bucket *old_first, *new_first;
 
 	mlog(0, "cp xattrs from cluster %llu to %llu\n",
 	     (unsigned long long)src_blk, (unsigned long long)to_blk);
 
+	/* The first bucket of the original extent */
+	old_first = ocfs2_xattr_bucket_new(inode);
+	/* The first bucket of the new extent */
+	new_first = ocfs2_xattr_bucket_new(inode);
+	if (!old_first || !new_first) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_xattr_bucket(old_first, first_bh->b_blocknr);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
 	/*
-	 * We need to update the new cluster and 1 more for the update of
-	 * the 1st bucket of the previous extent rec.
+	 * We need to update the first bucket of the old extent and the
+	 * entire first cluster of the new extent.
 	 */
-	credits = bpc + 1 + handle->h_buffer_credits;
+	credits = blks_per_bucket + bpc + handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, first_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3978,45 +3994,45 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 
 	for (i = 0; i < num_buckets; i++) {
 		ret = ocfs2_cp_xattr_bucket(inode, handle,
-					    src_blk, to_blk, 1);
+					    src_blk + (i * blks_per_bucket),
+					    to_blk + (i * blks_per_bucket),
+					    1);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
-
-		src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-		to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	}
 
-	/* update the old bucket header. */
-	xh = (struct ocfs2_xattr_header *)first_bh->b_data;
-	le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
-
-	ocfs2_journal_dirty(handle, first_bh);
-
-	/* update the new bucket header. */
-	ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
-	if (ret < 0) {
+	/*
+	 * Get the new bucket ready before we dirty anything
+	 * (This actually shouldn't fail, because we already dirtied
+	 * it once in ocfs2_cp_xattr_bucket()).
+	 */
+	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
+	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
-
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	xh = (struct ocfs2_xattr_header *)bh->b_data;
-	xh->xh_num_buckets = cpu_to_le16(num_buckets);
+	/* Now update the headers */
+	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
+	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
 
-	ocfs2_journal_dirty(handle, bh);
+	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
+	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
 
 	if (first_hash)
-		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
+		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
+
 out:
-	brelse(bh);
+	ocfs2_xattr_bucket_free(new_first);
+	ocfs2_xattr_bucket_free(old_first);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 2b656c1d6fc5ba7791a360766780a212faed5705 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 25 Nov 2008 19:00:15 -0800
Subject: ocfs2: Explain t_is_new in ocfs2_cp_xattr_cluster().

I was unsure of the JOURNAL_ACCESS parameters in
ocfs2_cp_xattr_cluster().  They're based on the function argument
't_is_new', but I couldn't quite figure out how t_is_new mapped to
allocation.  ocfs2_cp_xattr_cluster() actually overwrites the target,
regardless of t_is_new.

Well, I just figured it out.  So I'm adding a big fat comment for those
who come after me.  ocfs2_divide_xattr_cluster() has the same behavior.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4dba347..5efcf4e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3747,6 +3747,11 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
+	/*
+	 * Hey, if we're overwriting t_bucket, what difference does
+	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
+	 * same part of ocfs2_cp_xattr_bucket().
+	 */
 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
 						new_bucket_head ?
 						OCFS2_JOURNAL_ACCESS_CREATE :
@@ -3918,6 +3923,18 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	if (ret)
 		goto out;
 
+	/*
+	 * Hey, if we're overwriting t_bucket, what difference does
+	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
+	 * cluster to fill, we came here from ocfs2_cp_xattr_cluster(), and
+	 * it is really new - ACCESS_CREATE is required.  But we also
+	 * might have moved data out of t_bucket before extending back
+	 * into it.  ocfs2_add_new_xattr_bucket() can do this - its call
+	 * to ocfs2_add_new_xattr_cluster() may have created a new extent
+	 * and copied out the end of the old extent.  Then it re-extends
+	 * the old extent back to create space for new xattrs.  That's
+	 * how we get here, and the bucket isn't really new.
+	 */
 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
 						t_is_new ?
 						OCFS2_JOURNAL_ACCESS_CREATE :
-- 
cgit v0.10.2


From b5c03e746959bb005b987e9d8511df46680c3daa Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 25 Nov 2008 19:58:16 -0800
Subject: ocfs2: Use ocfs2_cp_xattr_bucket() in
 ocfs2_mv_xattr_bucket_cross_cluster().

The buffer copy loop of ocfs2_mv_xattr_bucket_cross_cluster() actually
looks a lot like ocfs2_cp_xattr_bucket().  Let's just use that instead.
We also use bucket operations to update the buckets at the start of each
extent.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 5efcf4e..5be9966 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -170,6 +170,11 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 
 static int ocfs2_delete_xattr_index_block(struct inode *inode,
 					  struct buffer_head *xb_bh);
+static int ocfs2_cp_xattr_bucket(struct inode *inode,
+				 handle_t *handle,
+				 u64 s_blkno,
+				 u64 t_blkno,
+				 int t_is_new);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -3526,13 +3531,21 @@ out:
 }
 
 /*
- * Move half nums of the xattr bucket in the previous cluster to this new
- * cluster. We only touch the last cluster of the previous extend record.
+ * prev_blkno points to the start of an existing extent.  new_blkno
+ * points to a newly allocated extent.  Because we know each of our
+ * clusters contains more than bucket, we can easily split one cluster
+ * at a bucket boundary.  So we take the last cluster of the existing
+ * extent and split it down the middle.  We move the last half of the
+ * buckets in the last cluster of the existing extent over to the new
+ * extent.
+ *
+ * first_bh is the buffer at prev_blkno so we can update the existing
+ * extent's bucket count.  header_bh is the bucket were we were hoping
+ * to insert our xattr.  If the bucket move places the target in the new
+ * extent, we'll update first_bh and header_bh after modifying the old
+ * extent.
  *
- * first_bh is the first buffer_head of a series of bucket in the same
- * extent rec and header_bh is the header of one bucket in this cluster.
- * They will be updated if we move the data header_bh contains to the new
- * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
+ * first_hash will be set as the 1st xe's name_hash in the new extent.
  */
 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 					       handle_t *handle,
@@ -3545,105 +3558,131 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 {
 	int i, ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
-	int blocksize = inode->i_sb->s_blocksize;
-	struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
-	struct ocfs2_xattr_header *new_xh;
+	int to_move = num_buckets / 2;
+	u64 last_cluster_blkno, src_blkno;
 	struct ocfs2_xattr_header *xh =
 			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
+	struct ocfs2_xattr_bucket *old_first, *new_first;
 
 	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
 
-	prev_bh = *first_bh;
-	get_bh(prev_bh);
-	xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
-
-	prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
+	last_cluster_blkno = prev_blkno + ((num_clusters - 1) * bpc);
+	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
 
 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
 	     (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
 
+	/* The first bucket of the original extent */
+	old_first = ocfs2_xattr_bucket_new(inode);
+	/* The first bucket of the new extent */
+	new_first = ocfs2_xattr_bucket_new(inode);
+	if (!old_first || !new_first) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_xattr_bucket(old_first, prev_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
 	/*
-	 * We need to update the 1st half of the new cluster and
-	 * 1 more for the update of the 1st bucket of the previous
-	 * extent record.
+	 * We need to update the 1st half of the new extent, and we
+	 * need to update the first bucket of the old extent.
 	 */
-	credits = bpc / 2 + 1 + handle->h_buffer_credits;
+	credits = ((to_move + 1) * blks_per_bucket) + handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, prev_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
+						OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
-		old_bh = new_bh = NULL;
-		new_bh = sb_getblk(inode->i_sb, new_blkno);
-		if (!new_bh) {
-			ret = -EIO;
+	for (i = 0; i < to_move; i++) {
+		ret = ocfs2_cp_xattr_bucket(inode, handle,
+					    src_blkno + (i * blks_per_bucket),
+					    new_blkno + (i * blks_per_bucket),
+					    1);
+		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
+	}
 
-		ocfs2_set_new_buffer_uptodate(inode, new_bh);
+	/*
+	 * Get the new bucket ready before we dirty anything
+	 * (This actually shouldn't fail, because we already dirtied
+	 * it once in ocfs2_cp_xattr_bucket()).
+	 */
+	ret = ocfs2_read_xattr_bucket(new_first, new_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
 
-		ret = ocfs2_journal_access(handle, inode, new_bh,
-					   OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			brelse(new_bh);
-			goto out;
-		}
+	/* Now update the headers */
+	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -to_move);
+	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
 
-		ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
-		if (ret < 0) {
-			mlog_errno(ret);
-			brelse(new_bh);
-			goto out;
-		}
+	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(to_move);
+	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
 
-		memcpy(new_bh->b_data, old_bh->b_data, blocksize);
+	if (first_hash)
+		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
 
-		if (i == 0) {
-			new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
-			new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
+	/*
+	 * If the target bucket is anywhere past src_blkno, we moved
+	 * it to the new extent.  We need to update first_bh and header_bh.
+	 */
+	if ((*header_bh)->b_blocknr >= src_blkno) {
+		/* We're done with old_first, so we can re-use it. */
+		ocfs2_xattr_bucket_relse(old_first);
 
-			if (first_hash)
-				*first_hash = le32_to_cpu(
-					new_xh->xh_entries[0].xe_name_hash);
-			new_first_bh = new_bh;
-			get_bh(new_first_bh);
-		}
+		/* Find the block for the new target bucket */
+		src_blkno = new_blkno +
+			((*header_bh)->b_blocknr - src_blkno);
 
-		ocfs2_journal_dirty(handle, new_bh);
+		/*
+		 * This shouldn't fail - the buffers are in the
+		 * journal from ocfs2_cp_xattr_bucket().
+		 */
+		ret = ocfs2_read_xattr_bucket(old_first, src_blkno);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
 
-		if (*header_bh == old_bh) {
-			brelse(*header_bh);
-			*header_bh = new_bh;
-			get_bh(*header_bh);
+		brelse(*first_bh);
+		*first_bh = new_first->bu_bhs[0];
+		get_bh(*first_bh);
 
-			brelse(*first_bh);
-			*first_bh = new_first_bh;
-			get_bh(*first_bh);
-		}
-		brelse(new_bh);
-		brelse(old_bh);
+		brelse(*header_bh);
+		*header_bh = old_first->bu_bhs[0];
+		get_bh(*header_bh);
 	}
 
-	le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
-
-	ocfs2_journal_dirty(handle, prev_bh);
 out:
-	brelse(prev_bh);
-	brelse(new_first_bh);
+	ocfs2_xattr_bucket_free(new_first);
+	ocfs2_xattr_bucket_free(old_first);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 874d65af1c8b8f6456a934701e6828d3017be029 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 13:02:18 -0800
Subject: ocfs2: Rename ocfs2_cp_xattr_cluster() to ocfs2_mv_xattr_buckets().

ocfs2_cp_xattr_cluster() takes the last cluster of an xattr extent,
copies its buckets to the front of a new extent, and then shrinks the bucket
count of the original extent.  So it's really moving the data, not
copying it.

While we're here, the function doesn't need a buffer_head for the old
extent, just the block number.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 5be9966..c1f2e06 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3965,11 +3965,12 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
 	/*
 	 * Hey, if we're overwriting t_bucket, what difference does
 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
-	 * cluster to fill, we came here from ocfs2_cp_xattr_cluster(), and
-	 * it is really new - ACCESS_CREATE is required.  But we also
-	 * might have moved data out of t_bucket before extending back
-	 * into it.  ocfs2_add_new_xattr_bucket() can do this - its call
-	 * to ocfs2_add_new_xattr_cluster() may have created a new extent
+	 * cluster to fill, we came here from
+	 * ocfs2_mv_xattr_buckets(), and it is really new -
+	 * ACCESS_CREATE is required.  But we also might have moved data
+	 * out of t_bucket before extending back into it.
+	 * ocfs2_add_new_xattr_bucket() can do this - its call to
+	 * ocfs2_add_new_xattr_cluster() may have created a new extent
 	 * and copied out the end of the old extent.  Then it re-extends
 	 * the old extent back to create space for new xattrs.  That's
 	 * how we get here, and the bucket isn't really new.
@@ -3992,17 +3993,16 @@ out:
 }
 
 /*
- * src_blk points to the last cluster of an existing extent.  to_blk
- * points to a newly allocated extent.  We copy the cluster over to the
- * new extent, initializing its xh_num_buckets.  The old extent's
- * xh_num_buckets shrinks by the same amount.
+ * src_blk points to the start of an existing extent.  last_blk points to
+ * last cluster in that extent.  to_blk points to a newly allocated
+ * extent.  We copy the buckets from cluster at last_blk to the new extent,
+ * initializing its xh_num_buckets.  The old extent's xh_num_buckets
+ * shrinks by the same amount.
  */
-static int ocfs2_cp_xattr_cluster(struct inode *inode,
+static int ocfs2_mv_xattr_buckets(struct inode *inode,
 				  handle_t *handle,
-				  struct buffer_head *first_bh,
-				  u64 src_blk,
-				  u64 to_blk,
-				  u32 *first_hash)
+				  u64 src_blk, u64 last_blk,
+				  u64 to_blk, u32 *first_hash)
 {
 	int i, ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4011,8 +4011,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
 	struct ocfs2_xattr_bucket *old_first, *new_first;
 
-	mlog(0, "cp xattrs from cluster %llu to %llu\n",
-	     (unsigned long long)src_blk, (unsigned long long)to_blk);
+	mlog(0, "mv xattrs from cluster %llu to %llu\n",
+	     (unsigned long long)last_blk, (unsigned long long)to_blk);
 
 	/* The first bucket of the original extent */
 	old_first = ocfs2_xattr_bucket_new(inode);
@@ -4024,7 +4024,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_read_xattr_bucket(old_first, first_bh->b_blocknr);
+	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4050,7 +4050,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
 
 	for (i = 0; i < num_buckets; i++) {
 		ret = ocfs2_cp_xattr_bucket(inode, handle,
-					    src_blk + (i * blks_per_bucket),
+					    last_blk + (i * blks_per_bucket),
 					    to_blk + (i * blks_per_bucket),
 					    1);
 		if (ret) {
@@ -4175,8 +4175,10 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
 
 		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
-			ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
-						     last_blk, new_blk,
+			ret = ocfs2_mv_xattr_buckets(inode, handle,
+						     (*first_bh)->b_blocknr,
+						     last_blk,
+						     new_blk,
 						     v_start);
 		else {
 			ret = ocfs2_divide_xattr_cluster(inode, handle,
-- 
cgit v0.10.2


From 54ecb6b6df54bf72befb359b21f3759b2952f9d9 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 13:18:31 -0800
Subject: ocfs2: ocfs2_mv_xattr_buckets() can handle a partial cluster now.

If you look at ocfs2_mv_xattr_bucket_cross_cluster(), you'll notice that
two-thirds of the code is almost identical to ocfs2_mv_xattr_buckets().
The only difference is that ocfs2_mv_xattr_buckets() moves a whole
cluster's worth, while ocfs2_mv_xattr_bucket_cross_cluster() moves half
the cluster.

We change ocfs2_mv_xattr_buckets() to allow moving partial clusters.
The original caller of ocfs2_mv_xattr_buckets() still moves the whole
cluster's worth - it just passes a start_bucket of 0.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c1f2e06..9734094 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3995,18 +3995,19 @@ out:
 /*
  * src_blk points to the start of an existing extent.  last_blk points to
  * last cluster in that extent.  to_blk points to a newly allocated
- * extent.  We copy the buckets from cluster at last_blk to the new extent,
- * initializing its xh_num_buckets.  The old extent's xh_num_buckets
- * shrinks by the same amount.
+ * extent.  We copy the buckets from the cluster at last_blk to the new
+ * extent.  If start_bucket is non-zero, we skip that many buckets before
+ * we start copying.  The new extent's xh_num_buckets gets set to the
+ * number of buckets we copied.  The old extent's xh_num_buckets shrinks
+ * by the same amount.
  */
-static int ocfs2_mv_xattr_buckets(struct inode *inode,
-				  handle_t *handle,
-				  u64 src_blk, u64 last_blk,
-				  u64 to_blk, u32 *first_hash)
+static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
+				  u64 src_blk, u64 last_blk, u64 to_blk,
+				  unsigned int start_bucket,
+				  u32 *first_hash)
 {
 	int i, ret, credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
 	struct ocfs2_xattr_bucket *old_first, *new_first;
@@ -4014,6 +4015,12 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode,
 	mlog(0, "mv xattrs from cluster %llu to %llu\n",
 	     (unsigned long long)last_blk, (unsigned long long)to_blk);
 
+	BUG_ON(start_bucket >= num_buckets);
+	if (start_bucket) {
+		num_buckets -= start_bucket;
+		last_blk += (start_bucket * blks_per_bucket);
+	}
+
 	/* The first bucket of the original extent */
 	old_first = ocfs2_xattr_bucket_new(inode);
 	/* The first bucket of the new extent */
@@ -4031,10 +4038,11 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode,
 	}
 
 	/*
-	 * We need to update the first bucket of the old extent and the
-	 * entire first cluster of the new extent.
+	 * We need to update the first bucket of the old extent and all
+	 * the buckets going to the new extent.
 	 */
-	credits = blks_per_bucket + bpc + handle->h_buffer_credits;
+	credits = ((num_buckets + 1) * blks_per_bucket) +
+		handle->h_buffer_credits;
 	ret = ocfs2_extend_trans(handle, credits);
 	if (ret) {
 		mlog_errno(ret);
@@ -4177,8 +4185,7 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
 			ret = ocfs2_mv_xattr_buckets(inode, handle,
 						     (*first_bh)->b_blocknr,
-						     last_blk,
-						     new_blk,
+						     last_blk, new_blk, 0,
 						     v_start);
 		else {
 			ret = ocfs2_divide_xattr_cluster(inode, handle,
-- 
cgit v0.10.2


From c58b6032f93358871361a92d7743dbc85d27084e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 13:36:24 -0800
Subject: ocfs2: Use ocfs2_mv_xattr_buckets() in
 ocfs2_mv_xattr_bucket_cross_cluster().

Now that ocfs2_mv_xattr_buckets() can move a partial cluster's worth of
buckets, ocfs2_mv_xattr_bucket_cross_cluster() can use it.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 9734094..c318928 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -170,11 +170,10 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 
 static int ocfs2_delete_xattr_index_block(struct inode *inode,
 					  struct buffer_head *xb_bh);
-static int ocfs2_cp_xattr_bucket(struct inode *inode,
-				 handle_t *handle,
-				 u64 s_blkno,
-				 u64 t_blkno,
-				 int t_is_new);
+static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
+				  u64 src_blk, u64 last_blk, u64 to_blk,
+				  unsigned int start_bucket,
+				  u32 *first_hash);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -3556,115 +3555,64 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 					       u32 num_clusters,
 					       u32 *first_hash)
 {
-	int i, ret, credits;
+	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
 	int to_move = num_buckets / 2;
-	u64 last_cluster_blkno, src_blkno;
+	u64 src_blkno;
+	u64 last_cluster_blkno = prev_blkno +
+		((num_clusters - 1) * ocfs2_clusters_to_blocks(inode->i_sb, 1));
 	struct ocfs2_xattr_header *xh =
 			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
-	struct ocfs2_xattr_bucket *old_first, *new_first;
+	struct ocfs2_xattr_bucket *new_target, *new_first;
 
 	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
 
-	last_cluster_blkno = prev_blkno + ((num_clusters - 1) * bpc);
-	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
-
 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
-	     (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
+	     (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
 
-	/* The first bucket of the original extent */
-	old_first = ocfs2_xattr_bucket_new(inode);
 	/* The first bucket of the new extent */
 	new_first = ocfs2_xattr_bucket_new(inode);
-	if (!old_first || !new_first) {
+	/* The target bucket if it was moved to the new extent */
+	new_target = ocfs2_xattr_bucket_new(inode);
+	if (!new_target || !new_first) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_read_xattr_bucket(old_first, prev_blkno);
+	ret = ocfs2_mv_xattr_buckets(inode, handle, prev_blkno,
+				     last_cluster_blkno, new_blkno,
+				     to_move, first_hash);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	/*
-	 * We need to update the 1st half of the new extent, and we
-	 * need to update the first bucket of the old extent.
-	 */
-	credits = ((to_move + 1) * blks_per_bucket) + handle->h_buffer_credits;
-	ret = ocfs2_extend_trans(handle, credits);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
-						OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	for (i = 0; i < to_move; i++) {
-		ret = ocfs2_cp_xattr_bucket(inode, handle,
-					    src_blkno + (i * blks_per_bucket),
-					    new_blkno + (i * blks_per_bucket),
-					    1);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-	}
-
-	/*
-	 * Get the new bucket ready before we dirty anything
-	 * (This actually shouldn't fail, because we already dirtied
-	 * it once in ocfs2_cp_xattr_bucket()).
-	 */
-	ret = ocfs2_read_xattr_bucket(new_first, new_blkno);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
-						OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	/* Now update the headers */
-	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -to_move);
-	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
-
-	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(to_move);
-	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
-
-	if (first_hash)
-		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
+	/* This is the first bucket that got moved */
+	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
 
 	/*
-	 * If the target bucket is anywhere past src_blkno, we moved
-	 * it to the new extent.  We need to update first_bh and header_bh.
+	 * If the target bucket was part of the moved buckets, we need to
+	 * update first_bh and header_bh.
 	 */
 	if ((*header_bh)->b_blocknr >= src_blkno) {
-		/* We're done with old_first, so we can re-use it. */
-		ocfs2_xattr_bucket_relse(old_first);
-
 		/* Find the block for the new target bucket */
 		src_blkno = new_blkno +
 			((*header_bh)->b_blocknr - src_blkno);
 
 		/*
-		 * This shouldn't fail - the buffers are in the
+		 * These shouldn't fail - the buffers are in the
 		 * journal from ocfs2_cp_xattr_bucket().
 		 */
-		ret = ocfs2_read_xattr_bucket(old_first, src_blkno);
+		ret = ocfs2_read_xattr_bucket(new_first, new_blkno);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+		ret = ocfs2_read_xattr_bucket(new_target, src_blkno);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3675,13 +3623,13 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 		get_bh(*first_bh);
 
 		brelse(*header_bh);
-		*header_bh = old_first->bu_bhs[0];
+		*header_bh = new_target->bu_bhs[0];
 		get_bh(*header_bh);
 	}
 
 out:
 	ocfs2_xattr_bucket_free(new_first);
-	ocfs2_xattr_bucket_free(old_first);
+	ocfs2_xattr_bucket_free(new_target);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 92cf3adf48097b7561a3c83f800ed3b2b25b18d4 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 14:12:09 -0800
Subject: ocfs2: Start using buckets in ocfs2_adjust_xattr_cross_cluster().

We want to be passing around buckets instead of buffer_heads.  Let's get
them into ocfs2_adjust_xattr_cross_cluster.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c318928..975ba36 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4111,28 +4111,54 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 					    u32 *v_start,
 					    int *extend)
 {
-	int ret = 0;
-	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
+	int ret;
+	struct ocfs2_xattr_bucket *first, *target;
 
 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
 	     (unsigned long long)prev_blk, prev_clusters,
 	     (unsigned long long)new_blk);
 
+	/* The first bucket of the original extent */
+	first = ocfs2_xattr_bucket_new(inode);
+	/* The target bucket for insert */
+	target = ocfs2_xattr_bucket_new(inode);
+	if (!first || !target) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(prev_blk != (*first_bh)->b_blocknr);
+	ret = ocfs2_read_xattr_bucket(first, prev_blk);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
 							  handle,
 							  first_bh,
 							  header_bh,
 							  new_blk,
-							  prev_blk,
+							  bucket_blkno(first),
 							  prev_clusters,
 							  v_start);
 	else {
-		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
+		/* The start of the last cluster in the first extent */
+		u64 last_blk = bucket_blkno(first) +
+			((prev_clusters - 1) *
+			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
 
-		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
+		if (prev_clusters > 1 && bucket_blkno(target) != last_blk)
 			ret = ocfs2_mv_xattr_buckets(inode, handle,
-						     (*first_bh)->b_blocknr,
+						     bucket_blkno(first),
 						     last_blk, new_blk, 0,
 						     v_start);
 		else {
@@ -4140,11 +4166,15 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 							 last_blk, new_blk,
 							 v_start);
 
-			if ((*header_bh)->b_blocknr == last_blk && extend)
+			if ((bucket_blkno(target) == last_blk) && extend)
 				*extend = 0;
 		}
 	}
 
+out:
+	ocfs2_xattr_bucket_free(first);
+	ocfs2_xattr_bucket_free(target);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 41cb814866110b6e35dad7569ecf96163c3bb824 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 14:25:21 -0800
Subject: ocfs2: Pass buckets into ocfs2_mv_xattr_bucket_cross_cluster().

Now that ocfs2_adjust_xattr_cross_cluster() has buckets, it can pass
them into ocfs2_mv_xattr_bucket_cross_cluster().  It no longer has to
care about buffer_heads.  The manipulation of first_bh and header_bh
moves up to ocfs2_adjust_xattr_cross_cluster().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 975ba36..2f16f50 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3548,42 +3548,28 @@ out:
  */
 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 					       handle_t *handle,
-					       struct buffer_head **first_bh,
-					       struct buffer_head **header_bh,
+					       struct ocfs2_xattr_bucket *first,
+					       struct ocfs2_xattr_bucket *target,
 					       u64 new_blkno,
-					       u64 prev_blkno,
 					       u32 num_clusters,
 					       u32 *first_hash)
 {
 	int ret;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
+	struct super_block *sb = inode->i_sb;
+	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
+	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
 	int to_move = num_buckets / 2;
 	u64 src_blkno;
-	u64 last_cluster_blkno = prev_blkno +
-		((num_clusters - 1) * ocfs2_clusters_to_blocks(inode->i_sb, 1));
-	struct ocfs2_xattr_header *xh =
-			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
-	struct ocfs2_xattr_bucket *new_target, *new_first;
+	u64 last_cluster_blkno = bucket_blkno(first) +
+		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
 
-	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
-	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
+	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
+	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
 
 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
 	     (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
 
-	/* The first bucket of the new extent */
-	new_first = ocfs2_xattr_bucket_new(inode);
-	/* The target bucket if it was moved to the new extent */
-	new_target = ocfs2_xattr_bucket_new(inode);
-	if (!new_target || !new_first) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		goto out;
-	}
-
-	ret = ocfs2_mv_xattr_buckets(inode, handle, prev_blkno,
+	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
 				     last_cluster_blkno, new_blkno,
 				     to_move, first_hash);
 	if (ret) {
@@ -3596,41 +3582,32 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
 
 	/*
 	 * If the target bucket was part of the moved buckets, we need to
-	 * update first_bh and header_bh.
+	 * update first and target.
 	 */
-	if ((*header_bh)->b_blocknr >= src_blkno) {
+	if (bucket_blkno(target) >= src_blkno) {
 		/* Find the block for the new target bucket */
 		src_blkno = new_blkno +
-			((*header_bh)->b_blocknr - src_blkno);
+			(bucket_blkno(target) - src_blkno);
+
+		ocfs2_xattr_bucket_relse(first);
+		ocfs2_xattr_bucket_relse(target);
 
 		/*
 		 * These shouldn't fail - the buffers are in the
 		 * journal from ocfs2_cp_xattr_bucket().
 		 */
-		ret = ocfs2_read_xattr_bucket(new_first, new_blkno);
+		ret = ocfs2_read_xattr_bucket(first, new_blkno);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
-		ret = ocfs2_read_xattr_bucket(new_target, src_blkno);
-		if (ret) {
+		ret = ocfs2_read_xattr_bucket(target, src_blkno);
+		if (ret)
 			mlog_errno(ret);
-			goto out;
-		}
 
-		brelse(*first_bh);
-		*first_bh = new_first->bu_bhs[0];
-		get_bh(*first_bh);
-
-		brelse(*header_bh);
-		*header_bh = new_target->bu_bhs[0];
-		get_bh(*header_bh);
 	}
 
 out:
-	ocfs2_xattr_bucket_free(new_first);
-	ocfs2_xattr_bucket_free(new_target);
-
 	return ret;
 }
 
@@ -4141,16 +4118,29 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 		goto out;
 	}
 
-	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
+	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
 							  handle,
-							  first_bh,
-							  header_bh,
+							  first, target,
 							  new_blk,
-							  bucket_blkno(first),
 							  prev_clusters,
 							  v_start);
-	else {
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		/* Did first+target get moved? */
+		if (prev_blk != bucket_blkno(first)) {
+			brelse(*first_bh);
+			*first_bh = first->bu_bhs[0];
+			get_bh(*first_bh);
+
+			brelse(*header_bh);
+			*header_bh = target->bu_bhs[0];
+			get_bh(*header_bh);
+		}
+	} else {
 		/* The start of the last cluster in the first extent */
 		u64 last_blk = bucket_blkno(first) +
 			((prev_clusters - 1) *
-- 
cgit v0.10.2


From 012ee910876e251621705e8dea7c353fd4914e19 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 14:43:31 -0800
Subject: ocfs2: Move buckets up into ocfs2_add_new_xattr_cluster().

Lift the buckets from ocfs2_adjust_xattr_cross_cluster() up into
ocfs2_add_new_xattr_cluster().  Now ocfs2_adjust_xattr_cross_cluster()
doesn't deal with buffer_heads.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 2f16f50..4b24704 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4080,44 +4080,19 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
  */
 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 					    handle_t *handle,
-					    struct buffer_head **first_bh,
-					    struct buffer_head **header_bh,
+					    struct ocfs2_xattr_bucket *first,
+					    struct ocfs2_xattr_bucket *target,
 					    u64 new_blk,
-					    u64 prev_blk,
 					    u32 prev_clusters,
 					    u32 *v_start,
 					    int *extend)
 {
 	int ret;
-	struct ocfs2_xattr_bucket *first, *target;
 
 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
-	     (unsigned long long)prev_blk, prev_clusters,
+	     (unsigned long long)bucket_blkno(first), prev_clusters,
 	     (unsigned long long)new_blk);
 
-	/* The first bucket of the original extent */
-	first = ocfs2_xattr_bucket_new(inode);
-	/* The target bucket for insert */
-	target = ocfs2_xattr_bucket_new(inode);
-	if (!first || !target) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		goto out;
-	}
-
-	BUG_ON(prev_blk != (*first_bh)->b_blocknr);
-	ret = ocfs2_read_xattr_bucket(first, prev_blk);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
 							  handle,
@@ -4125,46 +4100,33 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
 							  new_blk,
 							  prev_clusters,
 							  v_start);
-		if (ret) {
+		if (ret)
 			mlog_errno(ret);
-			goto out;
-		}
-
-		/* Did first+target get moved? */
-		if (prev_blk != bucket_blkno(first)) {
-			brelse(*first_bh);
-			*first_bh = first->bu_bhs[0];
-			get_bh(*first_bh);
-
-			brelse(*header_bh);
-			*header_bh = target->bu_bhs[0];
-			get_bh(*header_bh);
-		}
 	} else {
 		/* The start of the last cluster in the first extent */
 		u64 last_blk = bucket_blkno(first) +
 			((prev_clusters - 1) *
 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
 
-		if (prev_clusters > 1 && bucket_blkno(target) != last_blk)
+		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
 			ret = ocfs2_mv_xattr_buckets(inode, handle,
 						     bucket_blkno(first),
 						     last_blk, new_blk, 0,
 						     v_start);
-		else {
+			if (ret)
+				mlog_errno(ret);
+		} else {
 			ret = ocfs2_divide_xattr_cluster(inode, handle,
 							 last_blk, new_blk,
 							 v_start);
+			if (ret)
+				mlog_errno(ret);
 
 			if ((bucket_blkno(target) == last_blk) && extend)
 				*extend = 0;
 		}
 	}
 
-out:
-	ocfs2_xattr_bucket_free(first);
-	ocfs2_xattr_bucket_free(target);
-
 	return ret;
 }
 
@@ -4202,6 +4164,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_extent_tree et;
+	struct ocfs2_xattr_bucket *first, *target;
 
 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
 	     "previous xattr blkno = %llu\n",
@@ -4210,6 +4173,29 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
+	/* The first bucket of the original extent */
+	first = ocfs2_xattr_bucket_new(inode);
+	/* The target bucket for insert */
+	target = ocfs2_xattr_bucket_new(inode);
+	if (!first || !target) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	BUG_ON(prev_blkno != (*first_bh)->b_blocknr);
+	ret = ocfs2_read_xattr_bucket(first, prev_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr);
+	if (ret) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
 	ret = ocfs2_journal_access(handle, inode, root_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
@@ -4250,10 +4236,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	} else {
 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
 						       handle,
-						       first_bh,
-						       header_bh,
+						       first,
+						       target,
 						       block,
-						       prev_blkno,
 						       prev_clusters,
 						       &v_start,
 						       extend);
@@ -4261,6 +4246,17 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 			mlog_errno(ret);
 			goto leave;
 		}
+
+		/* Did first+target get moved? */
+		if (prev_blkno != bucket_blkno(first)) {
+			brelse(*first_bh);
+			*first_bh = first->bu_bhs[0];
+			get_bh(*first_bh);
+
+			brelse(*header_bh);
+			*header_bh = target->bu_bhs[0];
+			get_bh(*header_bh);
+		}
 	}
 
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
@@ -4277,6 +4273,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 		mlog_errno(ret);
 
 leave:
+	ocfs2_xattr_bucket_free(first);
+	ocfs2_xattr_bucket_free(target);
 	return ret;
 }
 
-- 
cgit v0.10.2


From ed29c0ca14871021fc8aced74650648dcb2c6e81 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 15:08:44 -0800
Subject: ocfs2: Move buckets up into ocfs2_add_new_xattr_bucket().

Lift the buckets from ocfs2_add_new_xattr_cluster() up into
ocfs2_add_new_xattr_bucket().  Now ocfs2_add_new_xattr_cluster()
doesn't deal with buffer_heads.  In fact, we no longer have to play
get_bh() tricks at all.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4b24704..5a5a1bd 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4148,11 +4148,10 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
  */
 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 				       struct buffer_head *root_bh,
-				       struct buffer_head **first_bh,
-				       struct buffer_head **header_bh,
+				       struct ocfs2_xattr_bucket *first,
+				       struct ocfs2_xattr_bucket *target,
 				       u32 *num_clusters,
 				       u32 prev_cpos,
-				       u64 prev_blkno,
 				       int *extend,
 				       struct ocfs2_xattr_set_ctxt *ctxt)
 {
@@ -4164,38 +4163,14 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_extent_tree et;
-	struct ocfs2_xattr_bucket *first, *target;
 
 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
 	     "previous xattr blkno = %llu\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
-	     prev_cpos, (unsigned long long)prev_blkno);
+	     prev_cpos, (unsigned long long)bucket_blkno(first));
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	/* The first bucket of the original extent */
-	first = ocfs2_xattr_bucket_new(inode);
-	/* The target bucket for insert */
-	target = ocfs2_xattr_bucket_new(inode);
-	if (!first || !target) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		goto leave;
-	}
-
-	BUG_ON(prev_blkno != (*first_bh)->b_blocknr);
-	ret = ocfs2_read_xattr_bucket(first, prev_blkno);
-	if (ret) {
-		mlog_errno(ret);
-		goto leave;
-	}
-
-	ret = ocfs2_read_xattr_bucket(target, (*header_bh)->b_blocknr);
-	if (ret) {
-		mlog_errno(ret);
-		goto leave;
-	}
-
 	ret = ocfs2_journal_access(handle, inode, root_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
@@ -4217,7 +4192,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	if (prev_blkno + prev_clusters * bpc == block &&
+	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
 		/*
@@ -4246,17 +4221,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 			mlog_errno(ret);
 			goto leave;
 		}
-
-		/* Did first+target get moved? */
-		if (prev_blkno != bucket_blkno(first)) {
-			brelse(*first_bh);
-			*first_bh = first->bu_bhs[0];
-			get_bh(*first_bh);
-
-			brelse(*header_bh);
-			*header_bh = target->bu_bhs[0];
-			get_bh(*header_bh);
-		}
 	}
 
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
@@ -4273,8 +4237,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 		mlog_errno(ret);
 
 leave:
-	ocfs2_xattr_bucket_free(first);
-	ocfs2_xattr_bucket_free(target);
 	return ret;
 }
 
@@ -4357,16 +4319,16 @@ out:
  * We will move all the buckets starting from header_bh to the next place. As
  * for this one, half num of its xattrs will be moved to the next one.
  *
- * We will allocate a new cluster if current cluster is full and adjust
- * header_bh and first_bh if the insert place is moved to the new cluster.
+ * We will allocate a new cluster if current cluster is full.  The
+ * underlying calls will make sure that there is space at the target
+ * bucket, shifting buckets around if necessary.  'target' may be updated
+ * by those calls.
  */
 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 				      struct buffer_head *xb_bh,
 				      struct buffer_head *header_bh,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
-	struct ocfs2_xattr_header *first_xh = NULL;
-	struct buffer_head *first_bh = NULL;
 	struct ocfs2_xattr_block *xb =
 			(struct ocfs2_xattr_block *)xb_bh->b_data;
 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
@@ -4374,31 +4336,26 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 	struct ocfs2_xattr_header *xh =
 			(struct ocfs2_xattr_header *)header_bh->b_data;
 	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
-	struct super_block *sb = inode->i_sb;
-	struct ocfs2_super *osb = OCFS2_SB(sb);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int ret, num_buckets, extend = 1;
 	u64 p_blkno;
 	u32 e_cpos, num_clusters;
 	/* The bucket at the front of the extent */
-	struct ocfs2_xattr_bucket *first;
+	struct ocfs2_xattr_bucket *first, *target;
 
 	mlog(0, "Add new xattr bucket starting form %llu\n",
 	     (unsigned long long)header_bh->b_blocknr);
 
+	/* The first bucket of the original extent */
 	first = ocfs2_xattr_bucket_new(inode);
-	if (!first) {
+	/* The target bucket for insert */
+	target = ocfs2_xattr_bucket_new(inode);
+	if (!first || !target) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
 		goto out;
 	}
 
-	/*
-	 * Add refrence for header_bh here because it may be
-	 * changed in ocfs2_add_new_xattr_cluster and we need
-	 * to free it in the end.
-	 */
-	get_bh(header_bh);
-
 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
 				  &num_clusters, el);
 	if (ret) {
@@ -4406,23 +4363,30 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
+	ret = ocfs2_read_xattr_bucket(first, p_blkno);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
-	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
+	ret = ocfs2_read_xattr_bucket(target, header_bh->b_blocknr);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
 
-	if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
+	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
+	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
+		/*
+		 * This can move first+target if the target bucket moves
+		 * to the new extent.
+		 */
 		ret = ocfs2_add_new_xattr_cluster(inode,
 						  xb_bh,
-						  &first_bh,
-						  &header_bh,
+						  first,
+						  target,
 						  &num_clusters,
 						  e_cpos,
-						  p_blkno,
 						  &extend,
 						  ctxt);
 		if (ret) {
@@ -4432,24 +4396,19 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 	}
 
 	if (extend) {
-		/* These bucket reads should be cached */
-		ret = ocfs2_read_xattr_bucket(first, first_bh->b_blocknr);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
 		ret = ocfs2_extend_xattr_bucket(inode,
 						ctxt->handle,
-						first, header_bh->b_blocknr,
+						first,
+						bucket_blkno(target),
 						num_clusters);
 		if (ret)
 			mlog_errno(ret);
 	}
 
 out:
-	brelse(first_bh);
-	brelse(header_bh);
 	ocfs2_xattr_bucket_free(first);
+	ocfs2_xattr_bucket_free(target);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 91f2033fa997aa92607470ed1ef90685b9d77a8c Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 26 Nov 2008 15:25:41 -0800
Subject: ocfs2: Pass xs->bucket into ocfs2_add_new_xattr_bucket().

Pass the actual target bucket for insert through to
ocfs2_add_new_xattr_bucket().  Now growing a bucket has no buffer_head
knowledge.

ocfs2_add_new_xattr_bucket() leavs xs->bucket in the proper state for
insert.  However, it doesn't update the rest of the search fields in xs,
so we still have to relse() and re-find.  That's OK, because everything
is cached.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 5a5a1bd..dfc51c3 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4314,43 +4314,42 @@ out:
 }
 
 /*
- * Add new xattr bucket in an extent record and adjust the buckets accordingly.
- * xb_bh is the ocfs2_xattr_block.
- * We will move all the buckets starting from header_bh to the next place. As
- * for this one, half num of its xattrs will be moved to the next one.
+ * Add new xattr bucket in an extent record and adjust the buckets
+ * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
+ * bucket we want to insert into.
  *
- * We will allocate a new cluster if current cluster is full.  The
- * underlying calls will make sure that there is space at the target
- * bucket, shifting buckets around if necessary.  'target' may be updated
- * by those calls.
+ * In the easy case, we will move all the buckets after target down by
+ * one. Half of target's xattrs will be moved to the next bucket.
+ *
+ * If current cluster is full, we'll allocate a new one.  This may not
+ * be contiguous.  The underlying calls will make sure that there is
+ * space for the insert, shifting buckets around if necessary.
+ * 'target' may be moved by those calls.
  */
 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 				      struct buffer_head *xb_bh,
-				      struct buffer_head *header_bh,
+				      struct ocfs2_xattr_bucket *target,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct ocfs2_xattr_block *xb =
 			(struct ocfs2_xattr_block *)xb_bh->b_data;
 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
 	struct ocfs2_extent_list *el = &xb_root->xt_list;
-	struct ocfs2_xattr_header *xh =
-			(struct ocfs2_xattr_header *)header_bh->b_data;
-	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
+	u32 name_hash =
+		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int ret, num_buckets, extend = 1;
 	u64 p_blkno;
 	u32 e_cpos, num_clusters;
 	/* The bucket at the front of the extent */
-	struct ocfs2_xattr_bucket *first, *target;
+	struct ocfs2_xattr_bucket *first;
 
-	mlog(0, "Add new xattr bucket starting form %llu\n",
-	     (unsigned long long)header_bh->b_blocknr);
+	mlog(0, "Add new xattr bucket starting from %llu\n",
+	     (unsigned long long)bucket_blkno(target));
 
 	/* The first bucket of the original extent */
 	first = ocfs2_xattr_bucket_new(inode);
-	/* The target bucket for insert */
-	target = ocfs2_xattr_bucket_new(inode);
-	if (!first || !target) {
+	if (!first) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
 		goto out;
@@ -4369,12 +4368,6 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_read_xattr_bucket(target, header_bh->b_blocknr);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
 		/*
@@ -4407,7 +4400,6 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
 
 out:
 	ocfs2_xattr_bucket_free(first);
-	ocfs2_xattr_bucket_free(target);
 
 	return ret;
 }
@@ -5083,15 +5075,21 @@ try_again:
 
 		ret = ocfs2_add_new_xattr_bucket(inode,
 						 xs->xattr_bh,
-						 xs->bucket->bu_bhs[0],
+						 xs->bucket,
 						 ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
+		/*
+		 * ocfs2_add_new_xattr_bucket() will have updated
+		 * xs->bucket if it moved, but it will not have updated
+		 * any of the other search fields.  Thus, we drop it and
+		 * re-search.  Everything should be cached, so it'll be
+		 * quick.
+		 */
 		ocfs2_xattr_bucket_relse(xs->bucket);
-
 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
 						   xi->name_index,
 						   xi->name, xs);
-- 
cgit v0.10.2


From 754938c142ae0c28360426c43f965ddc5164b21e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 15 Dec 2008 06:03:41 +0800
Subject: ocfs2/quota: Add QUOTA in mlog_attribute.

A new mlog mask has to be added into mlog_attribute before it can
be really used in mlog. ML_QUOTA is only added in masklog.h, so
add it to the array to enable it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index d8a0cb9..96df541 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 	define_mask(QUORUM),
 	define_mask(EXPORT),
 	define_mask(XATTR),
+	define_mask(QUOTA),
 	define_mask(ERROR),
 	define_mask(NOTICE),
 	define_mask(KTHREAD),
-- 
cgit v0.10.2


From e06c8227fd94ec181849ba206bf032be31c4295c Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 11 Sep 2008 15:35:47 -0700
Subject: jbd2: Add buffer triggers

Filesystems often to do compute intensive operation on some
metadata.  If this operation is repeated many times, it can be very
expensive.  It would be much nicer if the operation could be performed
once before a buffer goes to disk.

This adds triggers to jbd2 buffer heads.  Just before writing a metadata
buffer to the journal, jbd2 will optionally call a commit trigger associated
with the buffer.  If the journal is aborted, an abort trigger will be
called on any dirty buffers as they are dropped from pending
transactions.

ocfs2 will use this feature.

Initially I tried to come up with a more generic trigger that could be
used for non-buffer-related events like transaction completion.  It
doesn't tie nicely, because the information a buffer trigger needs
(specific to a journal_head) isn't the same as what a transaction
trigger needs (specific to a tranaction_t or perhaps journal_t).  So I
implemented a buffer set, with the understanding that
journal/transaction wide triggers should be implemented separately.

There is only one trigger set allowed per buffer.  I can't think of any
reason to attach more than one set.  Contrast this with a journal or
transaction in which multiple places may want to watch the entire
transaction separately.

The trigger sets are considered static allocation from the jbd2
perspective.  ocfs2 will just have one trigger set per block type,
setting the same set on every bh of the same type.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index ebc667b..c8a1bac 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -509,6 +509,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		if (is_journal_aborted(journal)) {
 			clear_buffer_jbddirty(jh2bh(jh));
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
+			jbd2_buffer_abort_trigger(jh,
+						  jh->b_frozen_data ?
+						  jh->b_frozen_triggers :
+						  jh->b_triggers);
 			jbd2_journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
 			 * any descriptor buffers which may have been
@@ -844,6 +848,9 @@ restart_loop:
 		 * data.
 		 *
 		 * Otherwise, we can just throw away the frozen data now.
+		 *
+		 * We also know that the frozen data has already fired
+		 * its triggers if they exist, so we can clear that too.
 		 */
 		if (jh->b_committed_data) {
 			jbd2_free(jh->b_committed_data, bh->b_size);
@@ -851,10 +858,12 @@ restart_loop:
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
+				jh->b_frozen_triggers = NULL;
 			}
 		} else if (jh->b_frozen_data) {
 			jbd2_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
+			jh->b_frozen_triggers = NULL;
 		}
 
 		spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e70d657..f6bff9d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,6 +50,7 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
 EXPORT_SYMBOL(jbd2_journal_get_write_access);
 EXPORT_SYMBOL(jbd2_journal_get_create_access);
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
+EXPORT_SYMBOL(jbd2_journal_set_triggers);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
 EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
@@ -290,6 +291,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	struct page *new_page;
 	unsigned int new_offset;
 	struct buffer_head *bh_in = jh2bh(jh_in);
+	struct jbd2_buffer_trigger_type *triggers;
 
 	/*
 	 * The buffer really shouldn't be locked: only the current committing
@@ -314,13 +316,23 @@ repeat:
 		done_copy_out = 1;
 		new_page = virt_to_page(jh_in->b_frozen_data);
 		new_offset = offset_in_page(jh_in->b_frozen_data);
+		triggers = jh_in->b_frozen_triggers;
 	} else {
 		new_page = jh2bh(jh_in)->b_page;
 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+		triggers = jh_in->b_triggers;
 	}
 
 	mapped_data = kmap_atomic(new_page, KM_USER0);
 	/*
+	 * Fire any commit trigger.  Do this before checking for escaping,
+	 * as the trigger may modify the magic offset.  If a copy-out
+	 * happens afterwards, it will have the correct data in the buffer.
+	 */
+	jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
+				   triggers);
+
+	/*
 	 * Check for escaping
 	 */
 	if (*((__be32 *)(mapped_data + new_offset)) ==
@@ -352,6 +364,13 @@ repeat:
 		new_page = virt_to_page(tmp);
 		new_offset = offset_in_page(tmp);
 		done_copy_out = 1;
+
+		/*
+		 * This isn't strictly necessary, as we're using frozen
+		 * data for the escaping, but it keeps consistency with
+		 * b_frozen_data usage.
+		 */
+		jh_in->b_frozen_triggers = jh_in->b_triggers;
 	}
 
 	/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 39b7805..4f925a4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -741,6 +741,12 @@ done:
 		source = kmap_atomic(page, KM_USER0);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 		kunmap_atomic(source, KM_USER0);
+
+		/*
+		 * Now that the frozen data is saved off, we need to store
+		 * any matching triggers.
+		 */
+		jh->b_frozen_triggers = jh->b_triggers;
 	}
 	jbd_unlock_bh_state(bh);
 
@@ -944,6 +950,47 @@ out:
 }
 
 /**
+ * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * @bh: buffer to trigger on
+ * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
+ *
+ * Set any triggers on this journal_head.  This is always safe, because
+ * triggers for a committing buffer will be saved off, and triggers for
+ * a running transaction will match the buffer in that transaction.
+ *
+ * Call with NULL to clear the triggers.
+ */
+void jbd2_journal_set_triggers(struct buffer_head *bh,
+			       struct jbd2_buffer_trigger_type *type)
+{
+	struct journal_head *jh = bh2jh(bh);
+
+	jh->b_triggers = type;
+}
+
+void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+				struct jbd2_buffer_trigger_type *triggers)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	if (!triggers || !triggers->t_commit)
+		return;
+
+	triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+}
+
+void jbd2_buffer_abort_trigger(struct journal_head *jh,
+			       struct jbd2_buffer_trigger_type *triggers)
+{
+	if (!triggers || !triggers->t_abort)
+		return;
+
+	triggers->t_abort(triggers, jh2bh(jh));
+}
+
+
+
+/**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
  * @bh: buffer to mark
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f366457..3445647 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1008,6 +1008,35 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
+
+/*
+ * Triggers
+ */
+
+struct jbd2_buffer_trigger_type {
+	/*
+	 * Fired just before a buffer is written to the journal.
+	 * mapped_data is a mapped buffer that is the frozen data for
+	 * commit.
+	 */
+	void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+			 struct buffer_head *bh, void *mapped_data,
+			 size_t size);
+
+	/*
+	 * Fired during journal abort for dirty buffers that will not be
+	 * committed.
+	 */
+	void (*t_abort)(struct jbd2_buffer_trigger_type *type,
+			struct buffer_head *bh);
+};
+
+extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+				       void *mapped_data,
+				       struct jbd2_buffer_trigger_type *triggers);
+extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
+				      struct jbd2_buffer_trigger_type *triggers);
+
 /* Buffer IO */
 extern int
 jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
@@ -1046,6 +1075,8 @@ extern int	 jbd2_journal_extend (handle_t *, int nblocks);
 extern int	 jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
+void		 jbd2_journal_set_triggers(struct buffer_head *,
+					   struct jbd2_buffer_trigger_type *type);
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index bb70ebb..525aac3 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -12,6 +12,8 @@
 
 typedef unsigned int		tid_t;		/* Unique transaction ID */
 typedef struct transaction_s	transaction_t;	/* Compound transaction type */
+
+
 struct buffer_head;
 
 struct journal_head {
@@ -87,6 +89,12 @@ struct journal_head {
 	 * [j_list_lock]
 	 */
 	struct journal_head *b_cpnext, *b_cpprev;
+
+	/* Trigger type */
+	struct jbd2_buffer_trigger_type *b_triggers;
+
+	/* Trigger type for the committing transaction's frozen data */
+	struct jbd2_buffer_trigger_type *b_frozen_triggers;
 };
 
 #endif		/* JOURNAL_HEAD_H_INCLUDED */
-- 
cgit v0.10.2


From ab552d54673f262d7f70014003d3928d29270f22 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 16 Oct 2008 17:50:30 -0700
Subject: ocfs2: Add the on-disk structures for metadata checksums.

Define struct ocfs2_block_check, an 8-byte structure containing a 32bit
crc32_le and a 16bit hamming code ecc.  This will be used for metadata
checksums.  Add the structure to free spaces in the various metadata
structures.

Add the OCFS2_FEATURE_INCOMPAT_META_ECC bit.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 359732e..290fa26 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -149,6 +149,9 @@
 /* Support for extended attributes */
 #define OCFS2_FEATURE_INCOMPAT_XATTR		0x0200
 
+/* Metadata checksum and error correction */
+#define OCFS2_FEATURE_INCOMPAT_META_ECC		0x0800
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
@@ -427,6 +430,22 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
 #define OCFS2_RAW_SB(dinode)		(&((dinode)->id2.i_super))
 
 /*
+ * Block checking structure.  This is used in metadata to validate the
+ * contents.  If OCFS2_FEATURE_INCOMPAT_META_ECC is not set, it is all
+ * zeros.
+ */
+struct ocfs2_block_check {
+/*00*/	__le32 bc_crc32e;	/* 802.3 Ethernet II CRC32 */
+	__le16 bc_ecc;		/* Single-error-correction parity vector.
+				   This is a simple Hamming code dependant
+				   on the blocksize.  OCFS2's maximum
+				   blocksize, 4K, requires 16 parity bits,
+				   so we fit in __le16. */
+	__le16 bc_reserved1;
+/*08*/
+};
+
+/*
  * On disk extent record for OCFS2
  * It describes a range of clusters on disk.
  *
@@ -513,7 +532,7 @@ struct ocfs2_truncate_log {
 struct ocfs2_extent_block
 {
 /*00*/	__u8 h_signature[8];		/* Signature for verification */
-	__le64 h_reserved1;
+	struct ocfs2_block_check h_check;	/* Error checking */
 /*10*/	__le16 h_suballoc_slot;		/* Slot suballocator this
 					   extent_header belongs to */
 	__le16 h_suballoc_bit;		/* Bit offset in suballocator
@@ -683,7 +702,8 @@ struct ocfs2_dinode {
 					   was set in i_flags */
 	__le16 i_dyn_features;
 	__le64 i_xattr_loc;
-/*80*/	__le64 i_reserved2[7];
+/*80*/	struct ocfs2_block_check i_check;	/* Error checking */
+/*88*/	__le64 i_reserved2[6];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
@@ -750,7 +770,8 @@ struct ocfs2_group_desc
 /*20*/	__le64   bg_parent_dinode;       /* dinode which owns me, in
 					   blocks */
 	__le64   bg_blkno;               /* Offset on disk, in blocks */
-/*30*/	__le64   bg_reserved2[2];
+/*30*/	struct ocfs2_block_check bg_check;	/* Error checking */
+	__le64   bg_reserved2;
 /*40*/	__u8    bg_bitmap[0];
 };
 
@@ -793,7 +814,12 @@ struct ocfs2_xattr_header {
 						   in this extent record,
 						   only valid in the first
 						   bucket. */
-	__le64  xh_csum;
+	struct ocfs2_block_check xh_check;	/* Error checking
+						   (Note, this is only
+						    used for xattr
+						    buckets.  A block uses
+						    xb_check and sets
+						    this field to zero.) */
 	struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
 };
 
@@ -844,7 +870,7 @@ struct ocfs2_xattr_block {
 					block group */
 	__le32	xb_fs_generation;    /* Must match super block */
 /*10*/	__le64	xb_blkno;            /* Offset on disk, in blocks */
-	__le64	xb_csum;
+	struct ocfs2_block_check xb_check;	/* Error checking */
 /*20*/	__le16	xb_flags;            /* Indicates whether this block contains
 					real xattr or a xattr tree. */
 	__le16	xb_reserved0;
@@ -988,6 +1014,25 @@ struct ocfs2_local_disk_dqblk {
 /*10*/	__le64 dqb_inodemod;	/* Change in the amount of used inodes */
 };
 
+
+/*
+ * The quota trailer lives at the end of each quota block.
+ */
+
+struct ocfs2_disk_dqtrailer {
+/*00*/	struct ocfs2_block_check dq_check;	/* Error checking */
+/*08*/	/* Cannot be larger than OCFS2_QBLK_RESERVED_SPACE */
+};
+
+static inline struct ocfs2_disk_dqtrailer *ocfs2_block_dqtrailer(int blocksize,
+								 void *buf)
+{
+	char *ptr = buf;
+	ptr += blocksize - OCFS2_QBLK_RESERVED_SPACE;
+
+	return (struct ocfs2_disk_dqtrailer *)ptr;
+}
+
 #ifdef __KERNEL__
 static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
 {
-- 
cgit v0.10.2


From 70ad1ba7b48364d758a112df0823edc5ca6632aa Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 16 Oct 2008 17:54:25 -0700
Subject: ocfs2: Add the underlying blockcheck code.

This is the code that computes crc32 and ecc for ocfs2 metadata blocks.
There are high-level functions that check whether the filesystem has the
ecc feature, mid-level functions that work on a single block or array of
buffer_heads, and the low-level ecc hamming code that can handle
multiple buffers like crc32_le().

It's not hooked up to the filesystem yet.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7e4b361..0159607 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
 ocfs2-objs := \
 	alloc.o 		\
 	aops.o 			\
+	blockcheck.o		\
 	buffer_head_io.o	\
 	dcache.o 		\
 	dir.o 			\
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
new file mode 100644
index 0000000..2bf3d7f
--- /dev/null
+++ b/fs/ocfs2/blockcheck.c
@@ -0,0 +1,480 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * blockcheck.c
+ *
+ * Checksum and ECC codes for the OCFS2 userspace library.
+ *
+ * Copyright (C) 2006, 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include <linux/buffer_head.h>
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+
+#include "ocfs2.h"
+
+#include "blockcheck.h"
+
+
+
+/*
+ * We use the following conventions:
+ *
+ * d = # data bits
+ * p = # parity bits
+ * c = # total code bits (d + p)
+ */
+static int calc_parity_bits(unsigned int d)
+{
+	unsigned int p;
+
+	/*
+	 * Bits required for Single Error Correction is as follows:
+	 *
+	 * d + p + 1 <= 2^p
+	 *
+	 * We're restricting ourselves to 31 bits of parity, that should be
+	 * sufficient.
+	 */
+	for (p = 1; p < 32; p++)
+	{
+		if ((d + p + 1) <= (1 << p))
+			return p;
+	}
+
+	return 0;
+}
+
+/*
+ * Calculate the bit offset in the hamming code buffer based on the bit's
+ * offset in the data buffer.  Since the hamming code reserves all
+ * power-of-two bits for parity, the data bit number and the code bit
+ * number are offest by all the parity bits beforehand.
+ *
+ * Recall that bit numbers in hamming code are 1-based.  This function
+ * takes the 0-based data bit from the caller.
+ *
+ * An example.  Take bit 1 of the data buffer.  1 is a power of two (2^0),
+ * so it's a parity bit.  2 is a power of two (2^1), so it's a parity bit.
+ * 3 is not a power of two.  So bit 1 of the data buffer ends up as bit 3
+ * in the code buffer.
+ */
+static unsigned int calc_code_bit(unsigned int i)
+{
+	unsigned int b, p;
+
+	/*
+	 * Data bits are 0-based, but we're talking code bits, which
+	 * are 1-based.
+	 */
+	b = i + 1;
+
+	/*
+	 * For every power of two below our bit number, bump our bit.
+	 *
+	 * We compare with (b + 1) becuase we have to compare with what b
+	 * would be _if_ it were bumped up by the parity bit.  Capice?
+	 */
+	for (p = 0; (1 << p) < (b + 1); p++)
+		b++;
+
+	return b;
+}
+
+/*
+ * This is the low level encoder function.  It can be called across
+ * multiple hunks just like the crc32 code.  'd' is the number of bits
+ * _in_this_hunk_.  nr is the bit offset of this hunk.  So, if you had
+ * two 512B buffers, you would do it like so:
+ *
+ * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
+ * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
+ *
+ * If you just have one buffer, use ocfs2_hamming_encode_block().
+ */
+u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
+{
+	unsigned int p = calc_parity_bits(nr + d);
+	unsigned int i, j, b;
+
+	BUG_ON(!p);
+
+	/*
+	 * b is the hamming code bit number.  Hamming code specifies a
+	 * 1-based array, but C uses 0-based.  So 'i' is for C, and 'b' is
+	 * for the algorithm.
+	 *
+	 * The i++ in the for loop is so that the start offset passed
+	 * to ocfs2_find_next_bit_set() is one greater than the previously
+	 * found bit.
+	 */
+	for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
+	{
+		/*
+		 * i is the offset in this hunk, nr + i is the total bit
+		 * offset.
+		 */
+		b = calc_code_bit(nr + i);
+
+		for (j = 0; j < p; j++)
+		{
+			/*
+			 * Data bits in the resultant code are checked by
+			 * parity bits that are part of the bit number
+			 * representation.  Huh?
+			 *
+			 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
+			 * In other words, the parity bit at position 2^k
+			 * checks bits in positions having bit k set in
+			 * their binary representation.  Conversely, for
+			 * instance, bit 13, i.e. 1101(2), is checked by
+			 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
+			 * </wikipedia>
+			 *
+			 * Note that 'k' is the _code_ bit number.  'b' in
+			 * our loop.
+			 */
+			if (b & (1 << j))
+				parity ^= (1 << j);
+		}
+	}
+
+	/* While the data buffer was treated as little endian, the
+	 * return value is in host endian. */
+	return parity;
+}
+
+u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
+{
+	return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
+}
+
+/*
+ * Like ocfs2_hamming_encode(), this can handle hunks.  nr is the bit
+ * offset of the current hunk.  If bit to be fixed is not part of the
+ * current hunk, this does nothing.
+ *
+ * If you only have one hunk, use ocfs2_hamming_fix_block().
+ */
+void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
+		       unsigned int fix)
+{
+	unsigned int p = calc_parity_bits(nr + d);
+	unsigned int i, b;
+
+	BUG_ON(!p);
+
+	/*
+	 * If the bit to fix has an hweight of 1, it's a parity bit.  One
+	 * busted parity bit is its own error.  Nothing to do here.
+	 */
+	if (hweight32(fix) == 1)
+		return;
+
+	/*
+	 * nr + d is the bit right past the data hunk we're looking at.
+	 * If fix after that, nothing to do
+	 */
+	if (fix >= calc_code_bit(nr + d))
+		return;
+
+	/*
+	 * nr is the offset in the data hunk we're starting at.  Let's
+	 * start b at the offset in the code buffer.  See hamming_encode()
+	 * for a more detailed description of 'b'.
+	 */
+	b = calc_code_bit(nr);
+	/* If the fix is before this hunk, nothing to do */
+	if (fix < b)
+		return;
+
+	for (i = 0; i < d; i++, b++)
+	{
+		/* Skip past parity bits */
+		while (hweight32(b) == 1)
+			b++;
+
+		/*
+		 * i is the offset in this data hunk.
+		 * nr + i is the offset in the total data buffer.
+		 * b is the offset in the total code buffer.
+		 *
+		 * Thus, when b == fix, bit i in the current hunk needs
+		 * fixing.
+		 */
+		if (b == fix)
+		{
+			if (ocfs2_test_bit(i, data))
+				ocfs2_clear_bit(i, data);
+			else
+				ocfs2_set_bit(i, data);
+			break;
+		}
+	}
+}
+
+void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
+			     unsigned int fix)
+{
+	ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
+}
+
+/*
+ * This function generates check information for a block.
+ * data is the block to be checked.  bc is a pointer to the
+ * ocfs2_block_check structure describing the crc32 and the ecc.
+ *
+ * bc should be a pointer inside data, as the function will
+ * take care of zeroing it before calculating the check information.  If
+ * bc does not point inside data, the caller must make sure any inline
+ * ocfs2_block_check structures are zeroed.
+ *
+ * The data buffer must be in on-disk endian (little endian for ocfs2).
+ * bc will be filled with little-endian values and will be ready to go to
+ * disk.
+ */
+void ocfs2_block_check_compute(void *data, size_t blocksize,
+			       struct ocfs2_block_check *bc)
+{
+	u32 crc;
+	u32 ecc;
+
+	memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+	crc = crc32_le(~0, data, blocksize);
+	ecc = ocfs2_hamming_encode_block(data, blocksize);
+
+	/*
+	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
+	 * larger than 16 bits.
+	 */
+	BUG_ON(ecc > USHORT_MAX);
+
+	bc->bc_crc32e = cpu_to_le32(crc);
+	bc->bc_ecc = cpu_to_le16((u16)ecc);
+}
+
+/*
+ * This function validates existing check information.  Like _compute,
+ * the function will take care of zeroing bc before calculating check codes.
+ * If bc is not a pointer inside data, the caller must have zeroed any
+ * inline ocfs2_block_check structures.
+ *
+ * Again, the data passed in should be the on-disk endian.
+ */
+int ocfs2_block_check_validate(void *data, size_t blocksize,
+			       struct ocfs2_block_check *bc)
+{
+	int rc = 0;
+	struct ocfs2_block_check check;
+	u32 crc, ecc;
+
+	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
+	check.bc_ecc = le16_to_cpu(bc->bc_ecc);
+
+	memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+	/* Fast path - if the crc32 validates, we're good to go */
+	crc = crc32_le(~0, data, blocksize);
+	if (crc == check.bc_crc32e)
+		goto out;
+
+	/* Ok, try ECC fixups */
+	ecc = ocfs2_hamming_encode_block(data, blocksize);
+	ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
+
+	/* And check the crc32 again */
+	crc = crc32_le(~0, data, blocksize);
+	if (crc == check.bc_crc32e)
+		goto out;
+
+	rc = -EIO;
+
+out:
+	bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
+	bc->bc_ecc = cpu_to_le16(check.bc_ecc);
+
+	return rc;
+}
+
+/*
+ * This function generates check information for a list of buffer_heads.
+ * bhs is the blocks to be checked.  bc is a pointer to the
+ * ocfs2_block_check structure describing the crc32 and the ecc.
+ *
+ * bc should be a pointer inside data, as the function will
+ * take care of zeroing it before calculating the check information.  If
+ * bc does not point inside data, the caller must make sure any inline
+ * ocfs2_block_check structures are zeroed.
+ *
+ * The data buffer must be in on-disk endian (little endian for ocfs2).
+ * bc will be filled with little-endian values and will be ready to go to
+ * disk.
+ */
+void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
+				   struct ocfs2_block_check *bc)
+{
+	int i;
+	u32 crc, ecc;
+
+	BUG_ON(nr < 0);
+
+	if (!nr)
+		return;
+
+	memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+	for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
+		crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
+		/*
+		 * The number of bits in a buffer is obviously b_size*8.
+		 * The offset of this buffer is b_size*i, so the bit offset
+		 * of this buffer is b_size*8*i.
+		 */
+		ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
+						bhs[i]->b_size * 8,
+						bhs[i]->b_size * 8 * i);
+	}
+
+	/*
+	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
+	 * larger than 16 bits.
+	 */
+	BUG_ON(ecc > USHORT_MAX);
+
+	bc->bc_crc32e = cpu_to_le32(crc);
+	bc->bc_ecc = cpu_to_le16((u16)ecc);
+}
+
+/*
+ * This function validates existing check information on a list of
+ * buffer_heads.  Like _compute_bhs, the function will take care of
+ * zeroing bc before calculating check codes.  If bc is not a pointer
+ * inside data, the caller must have zeroed any inline
+ * ocfs2_block_check structures.
+ *
+ * Again, the data passed in should be the on-disk endian.
+ */
+int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
+				   struct ocfs2_block_check *bc)
+{
+	int i, rc = 0;
+	struct ocfs2_block_check check;
+	u32 crc, ecc, fix;
+
+	BUG_ON(nr < 0);
+
+	if (!nr)
+		return 0;
+
+	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
+	check.bc_ecc = le16_to_cpu(bc->bc_ecc);
+
+	memset(bc, 0, sizeof(struct ocfs2_block_check));
+
+	/* Fast path - if the crc32 validates, we're good to go */
+	for (i = 0, crc = ~0; i < nr; i++)
+		crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
+	if (crc == check.bc_crc32e)
+		goto out;
+
+	mlog(ML_ERROR,
+	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
+	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
+
+	/* Ok, try ECC fixups */
+	for (i = 0, ecc = 0; i < nr; i++) {
+		/*
+		 * The number of bits in a buffer is obviously b_size*8.
+		 * The offset of this buffer is b_size*i, so the bit offset
+		 * of this buffer is b_size*8*i.
+		 */
+		ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
+						bhs[i]->b_size * 8,
+						bhs[i]->b_size * 8 * i);
+	}
+	fix = ecc ^ check.bc_ecc;
+	for (i = 0; i < nr; i++) {
+		/*
+		 * Try the fix against each buffer.  It will only affect
+		 * one of them.
+		 */
+		ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
+				  bhs[i]->b_size * 8 * i, fix);
+	}
+
+	/* And check the crc32 again */
+	for (i = 0, crc = ~0; i < nr; i++)
+		crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
+	if (crc == check.bc_crc32e)
+		goto out;
+
+	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
+	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
+
+	rc = -EIO;
+
+out:
+	bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
+	bc->bc_ecc = cpu_to_le16(check.bc_ecc);
+
+	return rc;
+}
+
+/*
+ * These are the main API.  They check the superblock flag before
+ * calling the underlying operations.
+ *
+ * They expect the buffer(s) to be in disk format.
+ */
+void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
+			    struct ocfs2_block_check *bc)
+{
+	if (ocfs2_meta_ecc(OCFS2_SB(sb)))
+		ocfs2_block_check_compute(data, sb->s_blocksize, bc);
+}
+
+int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
+			    struct ocfs2_block_check *bc)
+{
+	int rc = 0;
+
+	if (ocfs2_meta_ecc(OCFS2_SB(sb)))
+		rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
+
+	return rc;
+}
+
+void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
+				struct buffer_head **bhs, int nr,
+				struct ocfs2_block_check *bc)
+{
+	if (ocfs2_meta_ecc(OCFS2_SB(sb)))
+		ocfs2_block_check_compute_bhs(bhs, nr, bc);
+}
+
+int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
+				struct buffer_head **bhs, int nr,
+				struct ocfs2_block_check *bc)
+{
+	int rc = 0;
+
+	if (ocfs2_meta_ecc(OCFS2_SB(sb)))
+		rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
+
+	return rc;
+}
+
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
new file mode 100644
index 0000000..70ec3fe
--- /dev/null
+++ b/fs/ocfs2/blockcheck.h
@@ -0,0 +1,82 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * blockcheck.h
+ *
+ * Checksum and ECC codes for the OCFS2 userspace library.
+ *
+ * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OCFS2_BLOCKCHECK_H
+#define OCFS2_BLOCKCHECK_H
+
+
+/* High level block API */
+void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
+			    struct ocfs2_block_check *bc);
+int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
+			    struct ocfs2_block_check *bc);
+void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
+				struct buffer_head **bhs, int nr,
+				struct ocfs2_block_check *bc);
+int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
+				struct buffer_head **bhs, int nr,
+				struct ocfs2_block_check *bc);
+
+/* Lower level API */
+void ocfs2_block_check_compute(void *data, size_t blocksize,
+			       struct ocfs2_block_check *bc);
+int ocfs2_block_check_validate(void *data, size_t blocksize,
+			       struct ocfs2_block_check *bc);
+void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
+				   struct ocfs2_block_check *bc);
+int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
+				   struct ocfs2_block_check *bc);
+
+/*
+ * Hamming code functions
+ */
+
+/*
+ * Encoding hamming code parity bits for a buffer.
+ *
+ * This is the low level encoder function.  It can be called across
+ * multiple hunks just like the crc32 code.  'd' is the number of bits
+ * _in_this_hunk_.  nr is the bit offset of this hunk.  So, if you had
+ * two 512B buffers, you would do it like so:
+ *
+ * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
+ * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
+ *
+ * If you just have one buffer, use ocfs2_hamming_encode_block().
+ */
+u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d,
+			 unsigned int nr);
+/*
+ * Fix a buffer with a bit error.  The 'fix' is the original parity
+ * xor'd with the parity calculated now.
+ *
+ * Like ocfs2_hamming_encode(), this can handle hunks.  nr is the bit
+ * offset of the current hunk.  If bit to be fixed is not part of the
+ * current hunk, this does nothing.
+ *
+ * If you only have one buffer, use ocfs2_hamming_fix_block().
+ */
+void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
+		       unsigned int fix);
+
+/* Convenience wrappers for a single buffer of data */
+extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize);
+extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
+				    unsigned int fix);
+#endif
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 5c77798..2bb389f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -382,6 +382,13 @@ static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
 	return 0;
 }
 
+static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
+{
+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
+		return 1;
+	return 0;
+}
+
 /* set / clear functions because cluster events can make these happen
  * in parallel so we want the transitions to be atomic. this also
  * means that any future flags osb_flags must be protected by spinlock
@@ -615,5 +622,6 @@ static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
 #define ocfs2_clear_bit ext2_clear_bit
 #define ocfs2_test_bit ext2_test_bit
 #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
+#define ocfs2_find_next_bit ext2_find_next_bit
 #endif  /* OCFS2_H */
 
-- 
cgit v0.10.2


From 684ef278377725d505aa23259ee673dab9b11851 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 2 Dec 2008 17:44:05 -0800
Subject: ocfs2: Add a validation hook for quota block reads.

Add a currently-returns-success hook for quota block reads.  We'll be
adding checks to this.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index a10faebe..7dbcfd7 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -87,13 +87,25 @@ struct qtree_fmt_operations ocfs2_global_ops = {
 	.is_id = ocfs2_global_is_id,
 };
 
+static int ocfs2_validate_quota_block(struct super_block *sb,
+				      struct buffer_head *bh)
+{
+	struct ocfs2_disk_dqtrailer *dqt = ocfs2_dq_trailer(sb, bh->b_data);
+
+	mlog(0, "Validating quota block %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
+	return 0;
+}
+
 int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
 			   struct buffer_head **bh)
 {
 	int rc = 0;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL);
+	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
+				    ocfs2_validate_quota_block);
 	if (rc)
 		mlog_errno(rc);
 
-- 
cgit v0.10.2


From d6b32bbb3eae3fb787f1c33bf9f767ca1ddeb208 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 17 Oct 2008 14:55:01 -0700
Subject: ocfs2: block read meta ecc.

Add block check calls to the read_block validate functions.  This is the
almost all of the read-side checking of metaecc.  xattr buckets are not checked
yet.   Writes are also unchecked, and so a read-write mount will quickly fail.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 84a7bd4..6b27f74 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -37,6 +37,7 @@
 
 #include "alloc.h"
 #include "aops.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "inode.h"
@@ -682,12 +683,28 @@ struct ocfs2_merge_ctxt {
 static int ocfs2_validate_extent_block(struct super_block *sb,
 				       struct buffer_head *bh)
 {
+	int rc;
 	struct ocfs2_extent_block *eb =
 		(struct ocfs2_extent_block *)bh->b_data;
 
 	mlog(0, "Validating extent block %llu\n",
 	     (unsigned long long)bh->b_blocknr);
 
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
+	if (rc)
+		return rc;
+
+	/*
+	 * Errors after here are fatal.
+	 */
+
 	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
 		ocfs2_error(sb,
 			    "Extent block #%llu has bad signature %.*s",
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 2bf3d7f..2ce6ae5 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -24,6 +24,8 @@
 #include <linux/bitops.h>
 #include <asm/byteorder.h>
 
+#include <cluster/masklog.h>
+
 #include "ocfs2.h"
 
 #include "blockcheck.h"
@@ -292,6 +294,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
 	if (crc == check.bc_crc32e)
 		goto out;
 
+	mlog(ML_ERROR,
+	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
+	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
+
 	/* Ok, try ECC fixups */
 	ecc = ocfs2_hamming_encode_block(data, blocksize);
 	ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
@@ -301,6 +307,9 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
 	if (crc == check.bc_crc32e)
 		goto out;
 
+	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
+	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
+
 	rc = -EIO;
 
 out:
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 288512c..9370b65 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -38,6 +38,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "file.h"
@@ -1262,7 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode,
 int ocfs2_validate_inode_block(struct super_block *sb,
 			       struct buffer_head *bh)
 {
-	int rc = -EINVAL;
+	int rc;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
 	mlog(0, "Validating dinode %llu\n",
@@ -1270,6 +1271,21 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 
 	BUG_ON(!buffer_uptodate(bh));
 
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
+	if (rc)
+		goto bail;
+
+	/*
+	 * Errors after here are fatal.
+	 */
+
+	rc = -EINVAL;
+
 	if (!OCFS2_IS_VALID_DINODE(di)) {
 		ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
 			    (unsigned long long)bh->b_blocknr, 7,
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 7dbcfd7..a0b8b14 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -16,6 +16,7 @@
 #include "ocfs2_fs.h"
 #include "ocfs2.h"
 #include "alloc.h"
+#include "blockcheck.h"
 #include "inode.h"
 #include "journal.h"
 #include "file.h"
@@ -90,12 +91,20 @@ struct qtree_fmt_operations ocfs2_global_ops = {
 static int ocfs2_validate_quota_block(struct super_block *sb,
 				      struct buffer_head *bh)
 {
-	struct ocfs2_disk_dqtrailer *dqt = ocfs2_dq_trailer(sb, bh->b_data);
+	struct ocfs2_disk_dqtrailer *dqt =
+		ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
 
 	mlog(0, "Validating quota block %llu\n",
 	     (unsigned long long)bh->b_blocknr);
 
-	return 0;
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check);
 }
 
 int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 226fe21..7875576 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -35,6 +35,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "inode.h"
 #include "journal.h"
@@ -250,8 +251,18 @@ int ocfs2_check_group_descriptor(struct super_block *sb,
 				 struct buffer_head *bh)
 {
 	int rc;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
+	BUG_ON(!buffer_uptodate(bh));
 
-	rc = ocfs2_validate_gd_self(sb, bh, 1);
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
+	if (!rc)
+		rc = ocfs2_validate_gd_self(sb, bh, 1);
 	if (!rc)
 		rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
 
@@ -261,9 +272,27 @@ int ocfs2_check_group_descriptor(struct super_block *sb,
 static int ocfs2_validate_group_descriptor(struct super_block *sb,
 					   struct buffer_head *bh)
 {
+	int rc;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
 	mlog(0, "Validating group descriptor %llu\n",
 	     (unsigned long long)bh->b_blocknr);
 
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
+	if (rc)
+		return rc;
+
+	/*
+	 * Errors after here are fatal.
+	 */
+
 	return ocfs2_validate_gd_self(sb, bh, 0);
 }
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index dfc51c3..bc822d6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -42,6 +42,7 @@
 
 #include "ocfs2.h"
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "file.h"
 #include "symlink.h"
@@ -322,12 +323,28 @@ static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 static int ocfs2_validate_xattr_block(struct super_block *sb,
 				      struct buffer_head *bh)
 {
+	int rc;
 	struct ocfs2_xattr_block *xb =
 		(struct ocfs2_xattr_block *)bh->b_data;
 
 	mlog(0, "Validating xattr block %llu\n",
 	     (unsigned long long)bh->b_blocknr);
 
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
+	if (rc)
+		return rc;
+
+	/*
+	 * Errors after here are fatal
+	 */
+
 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
 		ocfs2_error(sb,
 			    "Extended attribute block #%llu has bad "
-- 
cgit v0.10.2


From 50655ae9e91d272d48997bada59efe166aa5e343 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 11 Sep 2008 15:53:07 -0700
Subject: ocfs2: Add journal_access functions with jbd2 triggers.

We create wrappers for ocfs2_journal_access() that are specific to the
type of metadata block.  This allows us to associate jbd2 commit
triggers with the block.  The triggers will compute metadata ecc in a
future commit.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 302f114..2daa584 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -35,6 +35,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dir.h"
 #include "dlmglue.h"
 #include "extent_map.h"
@@ -369,10 +370,110 @@ bail:
 	return status;
 }
 
-int ocfs2_journal_access(handle_t *handle,
-			 struct inode *inode,
-			 struct buffer_head *bh,
-			 int type)
+struct ocfs2_triggers {
+	struct jbd2_buffer_trigger_type	ot_triggers;
+	int				ot_offset;
+};
+
+static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
+{
+	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
+}
+
+static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+				 struct buffer_head *bh,
+				 void *data, size_t size)
+{
+	struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
+
+	/*
+	 * We aren't guaranteed to have the superblock here, so we
+	 * must unconditionally compute the ecc data.
+	 * __ocfs2_journal_access() will only set the triggers if
+	 * metaecc is enabled.
+	 */
+	ocfs2_block_check_compute(data, size, data + ot->ot_offset);
+}
+
+/*
+ * Quota blocks have their own trigger because the struct ocfs2_block_check
+ * offset depends on the blocksize.
+ */
+static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+				 struct buffer_head *bh,
+				 void *data, size_t size)
+{
+	struct ocfs2_disk_dqtrailer *dqt =
+		ocfs2_block_dqtrailer(size, data);
+
+	/*
+	 * We aren't guaranteed to have the superblock here, so we
+	 * must unconditionally compute the ecc data.
+	 * __ocfs2_journal_access() will only set the triggers if
+	 * metaecc is enabled.
+	 */
+	ocfs2_block_check_compute(data, size, &dqt->dq_check);
+}
+
+static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
+				struct buffer_head *bh)
+{
+	mlog(ML_ERROR,
+	     "ocfs2_abort_trigger called by JBD2.  bh = 0x%lx, "
+	     "bh->b_blocknr = %llu\n",
+	     (unsigned long)bh,
+	     (unsigned long long)bh->b_blocknr);
+
+	/* We aren't guaranteed to have the superblock here - but if we
+	 * don't, it'll just crash. */
+	ocfs2_error(bh->b_assoc_map->host->i_sb,
+		    "JBD2 has aborted our journal, ocfs2 cannot continue\n");
+}
+
+static struct ocfs2_triggers di_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
+};
+
+static struct ocfs2_triggers eb_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
+};
+
+static struct ocfs2_triggers gd_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
+};
+
+static struct ocfs2_triggers xb_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
+};
+
+static struct ocfs2_triggers dq_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_dq_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+};
+
+static int __ocfs2_journal_access(handle_t *handle,
+				  struct inode *inode,
+				  struct buffer_head *bh,
+				  struct ocfs2_triggers *triggers,
+				  int type)
 {
 	int status;
 
@@ -418,6 +519,8 @@ int ocfs2_journal_access(handle_t *handle,
 		status = -EINVAL;
 		mlog(ML_ERROR, "Uknown access type!\n");
 	}
+	if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
+		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
 	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
 	if (status < 0)
@@ -428,6 +531,54 @@ int ocfs2_journal_access(handle_t *handle,
 	return status;
 }
 
+int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
+			       struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
+				      type);
+}
+
+int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
+				      type);
+}
+
+int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
+				      type);
+}
+
+int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type)
+{
+	/* Right now, nothing for dirblocks */
+	return __ocfs2_journal_access(handle, inode, bh, NULL, type);
+}
+
+int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
+				      type);
+}
+
+int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
+				      type);
+}
+
+int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+			 struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, inode, bh, NULL, type);
+}
+
 int ocfs2_journal_dirty(handle_t *handle,
 			struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 37013bf..bca370d 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -212,9 +212,12 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *  ocfs2_extend_trans     - Extend a handle by nblocks credits. This may
  *                          commit the handle to disk in the process, but will
  *                          not release any locks taken during the transaction.
- *  ocfs2_journal_access   - Notify the handle that we want to journal this
+ *  ocfs2_journal_access* - Notify the handle that we want to journal this
  *                          buffer. Will have to call ocfs2_journal_dirty once
  *                          we've actually dirtied it. Type is one of . or .
+ *                          Always call the specific flavor of
+ *                          ocfs2_journal_access_*() unless you intend to
+ *                          manage the checksum by hand.
  *  ocfs2_journal_dirty    - Mark a journalled buffer as having dirty data.
  *  ocfs2_jbd2_file_inode  - Mark an inode so that its data goes out before
  *                           the current handle commits.
@@ -244,10 +247,28 @@ int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 #define OCFS2_JOURNAL_ACCESS_WRITE  1
 #define OCFS2_JOURNAL_ACCESS_UNDO   2
 
-int                  ocfs2_journal_access(handle_t *handle,
-					  struct inode *inode,
-					  struct buffer_head *bh,
-					  int type);
+/* ocfs2_inode */
+int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
+			       struct buffer_head *bh, int type);
+/* ocfs2_extent_block */
+int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type);
+/* ocfs2_group_desc */
+int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type);
+/* ocfs2_xattr_block */
+int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type);
+/* quota blocks */
+int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type);
+/* dirblock */
+int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh, int type);
+/* Anything that has no ecc */
+int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+			 struct buffer_head *bh, int type);
+
 /*
  * A word about the journal_access/journal_dirty "dance". It is
  * entirely legal to journal_access a buffer more than once (as long
-- 
cgit v0.10.2


From ffdd7a54631f07918b75e324d86713a08c11ec06 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 17 Oct 2008 22:32:01 -0700
Subject: ocfs2: Wrap up the common use cases of ocfs2_new_path().

The majority of ocfs2_new_path() calls are:

	ocfs2_new_path(path_root_bh(otherpath),
		       path_root_el(otherpath));

Let's call that ocfs2_new_path_from_path().  The rest do similar things
from struct ocfs2_extent_tree.  Let's call those
ocfs2_new_path_from_et().  This will make the next change easier.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 6b27f74..c22ff49 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -532,6 +532,16 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
 	return path;
 }
 
+static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
+{
+	return ocfs2_new_path(path_root_bh(path), path_root_el(path));
+}
+
+static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
+{
+	return ocfs2_new_path(et->et_root_bh, et->et_root_el);
+}
+
 /*
  * Convenience function to journal all components in a path.
  */
@@ -2150,8 +2160,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 
 	*ret_left_path = NULL;
 
-	left_path = ocfs2_new_path(path_root_bh(right_path),
-				   path_root_el(right_path));
+	left_path = ocfs2_new_path_from_path(right_path);
 	if (!left_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -2692,8 +2701,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		goto out;
 	}
 
-	left_path = ocfs2_new_path(path_root_bh(path),
-				   path_root_el(path));
+	left_path = ocfs2_new_path_from_path(path);
 	if (!left_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -2702,8 +2710,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 
 	ocfs2_cp_path(left_path, path);
 
-	right_path = ocfs2_new_path(path_root_bh(path),
-				    path_root_el(path));
+	right_path = ocfs2_new_path_from_path(path);
 	if (!right_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -2833,8 +2840,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		 * We have a path to the left of this one - it needs
 		 * an update too.
 		 */
-		left_path = ocfs2_new_path(path_root_bh(path),
-					   path_root_el(path));
+		left_path = ocfs2_new_path_from_path(path);
 		if (!left_path) {
 			ret = -ENOMEM;
 			mlog_errno(ret);
@@ -3075,8 +3081,7 @@ static int ocfs2_get_right_path(struct inode *inode,
 	/* This function shouldn't be called for the rightmost leaf. */
 	BUG_ON(right_cpos == 0);
 
-	right_path = ocfs2_new_path(path_root_bh(left_path),
-				    path_root_el(left_path));
+	right_path = ocfs2_new_path_from_path(left_path);
 	if (!right_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -3247,8 +3252,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 	/* This function shouldn't be called for the leftmost leaf. */
 	BUG_ON(left_cpos == 0);
 
-	left_path = ocfs2_new_path(path_root_bh(right_path),
-				   path_root_el(right_path));
+	left_path = ocfs2_new_path_from_path(right_path);
 	if (!left_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -3780,8 +3784,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 		 * leftmost leaf.
 		 */
 		if (left_cpos) {
-			left_path = ocfs2_new_path(path_root_bh(right_path),
-						   path_root_el(right_path));
+			left_path = ocfs2_new_path_from_path(right_path);
 			if (!left_path) {
 				ret = -ENOMEM;
 				mlog_errno(ret);
@@ -4018,7 +4021,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		goto out_update_clusters;
 	}
 
-	right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
+	right_path = ocfs2_new_path_from_et(et);
 	if (!right_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -4130,8 +4133,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			goto out;
 
 		if (left_cpos != 0) {
-			left_path = ocfs2_new_path(path_root_bh(path),
-						   path_root_el(path));
+			left_path = ocfs2_new_path_from_path(path);
 			if (!left_path)
 				goto out;
 
@@ -4187,8 +4189,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		if (right_cpos == 0)
 			goto out;
 
-		right_path = ocfs2_new_path(path_root_bh(path),
-					    path_root_el(path));
+		right_path = ocfs2_new_path_from_path(path);
 		if (!right_path)
 			goto out;
 
@@ -4381,7 +4382,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		return 0;
 	}
 
-	path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
+	path = ocfs2_new_path_from_et(et);
 	if (!path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -4910,7 +4911,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
 	if (et->et_ops == &ocfs2_dinode_et_ops)
 		ocfs2_extent_map_trunc(inode, 0);
 
-	left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
+	left_path = ocfs2_new_path_from_et(et);
 	if (!left_path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -5082,8 +5083,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		}
 
 		if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
-			left_path = ocfs2_new_path(path_root_bh(path),
-						   path_root_el(path));
+			left_path = ocfs2_new_path_from_path(path);
 			if (!left_path) {
 				ret = -ENOMEM;
 				mlog_errno(ret);
@@ -5192,7 +5192,7 @@ int ocfs2_remove_extent(struct inode *inode,
 
 	ocfs2_extent_map_trunc(inode, 0);
 
-	path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
+	path = ocfs2_new_path_from_et(et);
 	if (!path) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
-- 
cgit v0.10.2


From 13723d00e374c2a6d6ccb5af6de965e89c3e1b01 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 17 Oct 2008 19:25:01 -0700
Subject: ocfs2: Use metadata-specific ocfs2_journal_access_*() functions.

The per-metadata-type ocfs2_journal_access_*() functions hook up jbd2
commit triggers and allow us to compute metadata ecc right before the
buffers are written out.  This commit provides ecc for inodes, extent
blocks, group descriptors, and quota blocks.  It is not safe to use
extened attributes and metaecc at the same time yet.

The ocfs2_extent_tree and ocfs2_path abstractions in alloc.c both hide
the type of block at their root.  Before, it didn't matter, but now the
root block must use the appropriate ocfs2_journal_access_*() function.
To keep this abstract, the structures now have a pointer to the matching
journal_access function and a wrapper call to call it.

A few places use naked ocfs2_write_block() calls instead of adding the
blocks to the journal.  We make sure to calculate their checksum and ecc
before the write.

Since we pass around the journal_access functions.  Let's typedef them
in ocfs2.h.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c22ff49..6e58fd5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -298,11 +298,13 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 				     struct inode *inode,
 				     struct buffer_head *bh,
+				     ocfs2_journal_access_func access,
 				     void *obj,
 				     struct ocfs2_extent_tree_operations *ops)
 {
 	et->et_ops = ops;
 	et->et_root_bh = bh;
+	et->et_root_journal_access = access;
 	if (!obj)
 		obj = (void *)bh->b_data;
 	et->et_object = obj;
@@ -318,15 +320,16 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
 				   struct inode *inode,
 				   struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops);
+	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
+				 NULL, &ocfs2_dinode_et_ops);
 }
 
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
 				       struct inode *inode,
 				       struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, NULL,
-				 &ocfs2_xattr_tree_et_ops);
+	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
+				 NULL, &ocfs2_xattr_tree_et_ops);
 }
 
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
@@ -334,7 +337,7 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 					struct buffer_head *bh,
 					struct ocfs2_xattr_value_root *xv)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, xv,
+	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv,
 				 &ocfs2_xattr_value_et_ops);
 }
 
@@ -356,6 +359,15 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
 	et->et_ops->eo_update_clusters(inode, et, clusters);
 }
 
+static inline int ocfs2_et_root_journal_access(handle_t *handle,
+					       struct inode *inode,
+					       struct ocfs2_extent_tree *et,
+					       int type)
+{
+	return et->et_root_journal_access(handle, inode, et->et_root_bh,
+					  type);
+}
+
 static inline int ocfs2_et_insert_check(struct inode *inode,
 					struct ocfs2_extent_tree *et,
 					struct ocfs2_extent_rec *rec)
@@ -396,12 +408,14 @@ struct ocfs2_path_item {
 #define OCFS2_MAX_PATH_DEPTH	5
 
 struct ocfs2_path {
-	int			p_tree_depth;
-	struct ocfs2_path_item	p_node[OCFS2_MAX_PATH_DEPTH];
+	int				p_tree_depth;
+	ocfs2_journal_access_func	p_root_access;
+	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
 };
 
 #define path_root_bh(_path) ((_path)->p_node[0].bh)
 #define path_root_el(_path) ((_path)->p_node[0].el)
+#define path_root_access(_path)((_path)->p_root_access)
 #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
 #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
 #define path_num_items(_path) ((_path)->p_tree_depth + 1)
@@ -434,6 +448,8 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
 	 */
 	if (keep_root)
 		depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
+	else
+		path_root_access(path) = NULL;
 
 	path->p_tree_depth = depth;
 }
@@ -459,6 +475,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
 
 	BUG_ON(path_root_bh(dest) != path_root_bh(src));
 	BUG_ON(path_root_el(dest) != path_root_el(src));
+	BUG_ON(path_root_access(dest) != path_root_access(src));
 
 	ocfs2_reinit_path(dest, 1);
 
@@ -480,6 +497,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
 	int i;
 
 	BUG_ON(path_root_bh(dest) != path_root_bh(src));
+	BUG_ON(path_root_access(dest) != path_root_access(src));
 
 	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
 		brelse(dest->p_node[i].bh);
@@ -515,7 +533,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
 }
 
 static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
-					 struct ocfs2_extent_list *root_el)
+					 struct ocfs2_extent_list *root_el,
+					 ocfs2_journal_access_func access)
 {
 	struct ocfs2_path *path;
 
@@ -527,6 +546,7 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
 		get_bh(root_bh);
 		path_root_bh(path) = root_bh;
 		path_root_el(path) = root_el;
+		path_root_access(path) = access;
 	}
 
 	return path;
@@ -534,12 +554,38 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
 
 static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
 {
-	return ocfs2_new_path(path_root_bh(path), path_root_el(path));
+	return ocfs2_new_path(path_root_bh(path), path_root_el(path),
+			      path_root_access(path));
 }
 
 static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
 {
-	return ocfs2_new_path(et->et_root_bh, et->et_root_el);
+	return ocfs2_new_path(et->et_root_bh, et->et_root_el,
+			      et->et_root_journal_access);
+}
+
+/*
+ * Journal the buffer at depth idx.  All idx>0 are extent_blocks,
+ * otherwise it's the root_access function.
+ *
+ * I don't like the way this function's name looks next to
+ * ocfs2_journal_access_path(), but I don't have a better one.
+ */
+static int ocfs2_path_bh_journal_access(handle_t *handle,
+					struct inode *inode,
+					struct ocfs2_path *path,
+					int idx)
+{
+	ocfs2_journal_access_func access = path_root_access(path);
+
+	if (!access)
+		access = ocfs2_journal_access;
+
+	if (idx)
+		access = ocfs2_journal_access_eb;
+
+	return access(handle, inode, path->p_node[idx].bh,
+		      OCFS2_JOURNAL_ACCESS_WRITE);
 }
 
 /*
@@ -554,8 +600,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
 		goto out;
 
 	for(i = 0; i < path_num_items(path); i++) {
-		ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -708,8 +753,11 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
 	 * local to this block.
 	 */
 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
-	if (rc)
+	if (rc) {
+		mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
+		     (unsigned long long)bh->b_blocknr);
 		return rc;
+	}
 
 	/*
 	 * Errors after here are fatal.
@@ -842,8 +890,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 			}
 			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
 
-			status = ocfs2_journal_access(handle, inode, bhs[i],
-						      OCFS2_JOURNAL_ACCESS_CREATE);
+			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
+							 OCFS2_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail;
@@ -986,8 +1034,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
-		status = ocfs2_journal_access(handle, inode, bh,
-					      OCFS2_JOURNAL_ACCESS_CREATE);
+		status = ocfs2_journal_access_eb(handle, inode, bh,
+						 OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1026,21 +1074,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	 * journal_dirty erroring as it won't unless we've aborted the
 	 * handle (in which case we would never be here) so reserving
 	 * the write with journal_access is all we need to do. */
-	status = ocfs2_journal_access(handle, inode, *last_eb_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
-	status = ocfs2_journal_access(handle, inode, et->et_root_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_et_root_journal_access(handle, inode, et,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	if (eb_bh) {
-		status = ocfs2_journal_access(handle, inode, eb_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_eb(handle, inode, eb_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1129,8 +1177,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
 
-	status = ocfs2_journal_access(handle, inode, new_eb_bh,
-				      OCFS2_JOURNAL_ACCESS_CREATE);
+	status = ocfs2_journal_access_eb(handle, inode, new_eb_bh,
+					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1148,8 +1196,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access(handle, inode, et->et_root_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_et_root_journal_access(handle, inode, et,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1918,25 +1966,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	root_bh = left_path->p_node[subtree_index].bh;
 	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-	ret = ocfs2_journal_access(handle, inode, root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_journal_access(handle, inode,
-					   right_path->p_node[i].bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode,
+						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_journal_access(handle, inode,
-					   left_path->p_node[i].bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode,
+						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2455,9 +2501,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 			return -EAGAIN;
 
 		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
-			ret = ocfs2_journal_access(handle, inode,
-						   path_leaf_bh(right_path),
-						   OCFS2_JOURNAL_ACCESS_WRITE);
+			ret = ocfs2_journal_access_eb(handle, inode,
+						      path_leaf_bh(right_path),
+						      OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -2474,8 +2520,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		 * We have to update i_last_eb_blk during the meta
 		 * data delete.
 		 */
-		ret = ocfs2_journal_access(handle, inode, et_root_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_et_root_journal_access(handle, inode, et,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2490,25 +2536,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	 */
 	BUG_ON(right_has_empty && !del_right_subtree);
 
-	ret = ocfs2_journal_access(handle, inode, root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_journal_access(handle, inode,
-					   right_path->p_node[i].bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode,
+						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_journal_access(handle, inode,
-					   left_path->p_node[i].bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode,
+						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2653,16 +2697,17 @@ out:
 
 static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
 					    handle_t *handle,
-					    struct buffer_head *bh,
-					    struct ocfs2_extent_list *el)
+					    struct ocfs2_path *path)
 {
 	int ret;
+	struct buffer_head *bh = path_leaf_bh(path);
+	struct ocfs2_extent_list *el = path_leaf_el(path);
 
 	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
 		return 0;
 
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2744,9 +2789,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		 * Caller might still want to make changes to the
 		 * tree root, so re-add it to the journal here.
 		 */
-		ret = ocfs2_journal_access(handle, inode,
-					   path_root_bh(left_path),
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode,
+						   left_path, 0);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2929,8 +2973,7 @@ rightmost_no_delete:
 		 * it up front.
 		 */
 		ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
-						       path_leaf_bh(path),
-						       path_leaf_el(path));
+						       path);
 		if (ret)
 			mlog_errno(ret);
 		goto out;
@@ -3164,8 +3207,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_journal_access(handle, inode, root_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3173,17 +3216,15 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_journal_access(handle, inode,
-						   right_path->p_node[i].bh,
-						   OCFS2_JOURNAL_ACCESS_WRITE);
+			ret = ocfs2_path_bh_journal_access(handle, inode,
+							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_journal_access(handle, inode,
-						   left_path->p_node[i].bh,
-						   OCFS2_JOURNAL_ACCESS_WRITE);
+			ret = ocfs2_path_bh_journal_access(handle, inode,
+							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -3195,8 +3236,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		right_rec = &el->l_recs[index + 1];
 	}
 
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
+					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3335,8 +3376,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_journal_access(handle, inode, root_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3344,17 +3385,15 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_journal_access(handle, inode,
-						   right_path->p_node[i].bh,
-						   OCFS2_JOURNAL_ACCESS_WRITE);
+			ret = ocfs2_path_bh_journal_access(handle, inode,
+							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_journal_access(handle, inode,
-						   left_path->p_node[i].bh,
-						   OCFS2_JOURNAL_ACCESS_WRITE);
+			ret = ocfs2_path_bh_journal_access(handle, inode,
+							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -3366,8 +3405,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
+					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4009,8 +4048,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 	el = et->et_root_el;
 
-	ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_et_root_journal_access(handle, inode, et,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4071,8 +4110,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		 * ocfs2_rotate_tree_right() might have extended the
 		 * transaction without re-journaling our tree root.
 		 */
-		ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_et_root_journal_access(handle, inode, et,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4593,9 +4632,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 
 	BUG_ON(num_bits > clusters_to_add);
 
-	/* reserve our write early -- insert_extent may update the inode */
-	status = ocfs2_journal_access(handle, inode, et->et_root_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	/* reserve our write early -- insert_extent may update the tree root */
+	status = ocfs2_et_root_journal_access(handle, inode, et,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -5347,8 +5386,8 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_et_root_journal_access(handle, inode, et,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5461,8 +5500,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access(handle, tl_inode, tl_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -5523,8 +5562,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 	while (i >= 0) {
 		/* Caller has given us at least enough credits to
 		 * update the truncate log dinode */
-		status = ocfs2_journal_access(handle, tl_inode, tl_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -5780,6 +5819,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
 		 * tl_used. */
 		tl->tl_used = 0;
 
+		ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
 		status = ocfs2_write_block(osb, tl_bh, tl_inode);
 		if (status < 0) {
 			mlog_errno(status);
@@ -6546,8 +6586,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	}
 
 	if (last_eb_bh) {
-		status = ocfs2_journal_access(handle, inode, last_eb_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_eb(handle, inode, last_eb_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -6908,8 +6948,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		goto out_unlock;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -7043,7 +7083,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
 						     i_size_read(inode));
 
-	path = ocfs2_new_path(fe_bh, &di->id2.i_list);
+	path = ocfs2_new_path(fe_bh, &di->id2.i_list,
+			      ocfs2_journal_access_di);
 	if (!path) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -7276,8 +7317,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 59d37d1..4b6fea2 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,9 @@
  *
  * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
  * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
- * functions.
+ * functions.  With metadata ecc, we now call different journal_access
+ * functions for each type of metadata, so it must have the
+ * root_journal_access function.
  * ocfs2_extent_tree_operations abstract the normal operations we do for
  * the root of extent b-tree.
  */
@@ -54,6 +56,7 @@ struct ocfs2_extent_tree {
 	struct ocfs2_extent_tree_operations	*et_ops;
 	struct buffer_head			*et_root_bh;
 	struct ocfs2_extent_list		*et_root_el;
+	ocfs2_journal_access_func		et_root_journal_access;
 	void					*et_object;
 	unsigned int				et_max_leaf_clusters;
 };
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 6b647ec..a067a6c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1512,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		ocfs2_commit_trans(osb, handle);
 
@@ -1740,8 +1740,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 	 * We don't want this to fail in ocfs2_write_end(), so do it
 	 * here.
 	 */
-	ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_quota;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3708fe4..45e4e03 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -378,14 +378,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
 		       struct inode *new_entry_inode)
 {
 	int ret;
+	ocfs2_journal_access_func access = ocfs2_journal_access_db;
 
 	/*
 	 * The same code works fine for both inline-data and extent
-	 * based directories, so no need to split this up.
+	 * based directories, so no need to split this up.  The only
+	 * difference is the journal_access function.
 	 */
 
-	ret = ocfs2_journal_access(handle, dir, de_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+		access = ocfs2_journal_access_di;
+
+	ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -407,9 +411,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 {
 	struct ocfs2_dir_entry *de, *pde;
 	int i, status = -ENOENT;
+	ocfs2_journal_access_func access = ocfs2_journal_access_db;
 
 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
 
+	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+		access = ocfs2_journal_access_di;
+
 	i = 0;
 	pde = NULL;
 	de = (struct ocfs2_dir_entry *) first_de;
@@ -420,8 +428,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 			goto bail;
 		}
 		if (de == de_del)  {
-			status = ocfs2_journal_access(handle, dir, bh,
-						      OCFS2_JOURNAL_ACCESS_WRITE);
+			status = access(handle, dir, bh,
+					OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				status = -EIO;
 				mlog_errno(status);
@@ -581,8 +589,14 @@ int __ocfs2_add_entry(handle_t *handle,
 				goto bail;
 			}
 
-			status = ocfs2_journal_access(handle, dir, insert_bh,
-						      OCFS2_JOURNAL_ACCESS_WRITE);
+			if (insert_bh == parent_fe_bh)
+				status = ocfs2_journal_access_di(handle, dir,
+								 insert_bh,
+								 OCFS2_JOURNAL_ACCESS_WRITE);
+			else
+				status = ocfs2_journal_access_db(handle, dir,
+								 insert_bh,
+								 OCFS2_JOURNAL_ACCESS_WRITE);
 			/* By now the buffer is marked for journaling */
 			offset += le16_to_cpu(de->rec_len);
 			if (le64_to_cpu(de->inode)) {
@@ -1081,8 +1095,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
 	struct ocfs2_inline_data *data = &di->id2.i_data;
 	unsigned int size = le16_to_cpu(data->id_count);
 
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -1129,8 +1143,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 
 	ocfs2_set_new_buffer_uptodate(inode, new_bh);
 
-	status = ocfs2_journal_access(handle, inode, new_bh,
-				      OCFS2_JOURNAL_ACCESS_CREATE);
+	status = ocfs2_journal_access_db(handle, inode, new_bh,
+					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1292,8 +1306,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
 	ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
 
-	ret = ocfs2_journal_access(handle, dir, dirdata_bh,
-				   OCFS2_JOURNAL_ACCESS_CREATE);
+	ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -1319,8 +1333,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	 * We let the later dirent insert modify c/mtime - to the user
 	 * the data hasn't changed.
 	 */
-	ret = ocfs2_journal_access(handle, dir, di_bh,
-				   OCFS2_JOURNAL_ACCESS_CREATE);
+	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -1583,8 +1597,8 @@ do_extend:
 
 	ocfs2_set_new_buffer_uptodate(dir, new_bh);
 
-	status = ocfs2_journal_access(handle, dir, new_bh,
-				      OCFS2_JOURNAL_ACCESS_CREATE);
+	status = ocfs2_journal_access_db(handle, dir, new_bh,
+					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9374d37..e8f795f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -256,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -353,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 		goto out;
 	}
 
-	status = ocfs2_journal_access(handle, inode, fe_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_commit;
@@ -590,8 +590,8 @@ restarted_transaction:
 	/* reserve a write to the file entry early on - that we if we
 	 * run out of credits in the allocation path, we can still
 	 * update i_size. */
-	status = ocfs2_journal_access(handle, inode, bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, inode, bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -1121,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_trans;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 9370b65..229e707 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -537,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 			goto out;
 		}
 
-		status = ocfs2_journal_access(handle, inode, fe_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_di(handle, inode, fe_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto out;
@@ -621,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	/* set the inodes dtime */
-	status = ocfs2_journal_access(handle, inode, di_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, inode, di_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail_commit;
@@ -1190,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 	mlog_entry("(inode %llu)\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_journal_access(handle, inode, bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, inode, bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -1277,8 +1277,11 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 	 * local to this block.
 	 */
 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
-	if (rc)
+	if (rc) {
+		mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
+		     (unsigned long long)bh->b_blocknr);
 		goto bail;
+	}
 
 	/*
 	 * Errors after here are fatal.
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2daa584..3b54dba 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -752,6 +752,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 	if (replayed)
 		ocfs2_bump_recovery_generation(fe);
 
+	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
 	status = ocfs2_write_block(osb, bh, journal->j_inode);
 	if (status < 0)
 		mlog_errno(status);
@@ -1486,6 +1487,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	osb->slot_recovery_generations[slot_num] =
 					ocfs2_get_recovery_generation(fe);
 
+	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
 	status = ocfs2_write_block(osb, bh, inode);
 	if (status < 0)
 		mlog_errno(status);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index bca370d..3c3532e 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -247,9 +247,10 @@ int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 #define OCFS2_JOURNAL_ACCESS_WRITE  1
 #define OCFS2_JOURNAL_ACCESS_UNDO   2
 
+
 /* ocfs2_inode */
 int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
-			       struct buffer_head *bh, int type);
+			    struct buffer_head *bh, int type);
 /* ocfs2_extent_block */
 int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
 			    struct buffer_head *bh, int type);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 19cfb1b..ec70cdb 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -36,6 +36,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "inode.h"
 #include "journal.h"
@@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	}
 	memcpy(alloc_copy, alloc, bh->b_size);
 
-	status = ocfs2_journal_access(handle, local_alloc_inode, bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_commit;
@@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
 	ocfs2_clear_local_alloc(alloc);
 
+	ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
 	status = ocfs2_write_block(osb, alloc_bh, inode);
 	if (status < 0)
 		mlog_errno(status);
@@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	 * delete bits from it! */
 	*num_bits = bits_wanted;
 
-	status = ocfs2_journal_access(handle, local_alloc_inode,
-				      osb->local_alloc_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+					 osb->local_alloc_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 	}
 	memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
 
-	status = ocfs2_journal_access(handle, local_alloc_inode,
-				      osb->local_alloc_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+					 osb->local_alloc_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6173807..084aba8 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -361,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir,
 			goto leave;
 		}
 
-		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto leave;
@@ -493,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
 
-	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
-				      OCFS2_JOURNAL_ACCESS_CREATE);
+	status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
+					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -664,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_unlock_inode;
 	}
 
-	err = ocfs2_journal_access(handle, inode, fe_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	err = ocfs2_journal_access_di(handle, inode, fe_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (err < 0) {
 		mlog_errno(err);
 		goto out_commit;
@@ -851,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access(handle, inode, fe_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -1265,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir,
 				goto bail;
 			}
 		}
-		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1312,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir,
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
 
-	status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status >= 0) {
 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
 
@@ -1389,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir,
 			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
-			status = ocfs2_journal_access(handle, old_dir,
-						      old_dir_bh,
-						      OCFS2_JOURNAL_ACCESS_WRITE);
+			status = ocfs2_journal_access_di(handle, old_dir,
+							 old_dir_bh,
+							 OCFS2_JOURNAL_ACCESS_WRITE);
 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
 			status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1898,8 +1898,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -1986,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle,orphan_dir_inode,  orphan_dir_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 2bb389f..bad87d0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -339,6 +339,10 @@ struct ocfs2_super
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
 
+/* Useful typedef for passing around journal access functions */
+typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
+					 struct buffer_head *bh, int type);
+
 static inline int ocfs2_should_order_data(struct inode *inode)
 {
 	if (!S_ISREG(inode->i_mode))
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index a0b8b14..444aa5a 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -244,7 +244,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	ocfs2_set_buffer_uptodate(gqinode, bh);
-	err = ocfs2_journal_access(handle, gqinode, bh, ja_type);
+	err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
 	if (err < 0) {
 		brelse(bh);
 		goto out;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index d451b71..07deec5 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -106,8 +106,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
 		mlog_errno(status);
 		return status;
 	}
-	status = ocfs2_journal_access(handle, inode, bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_dq(handle, inode, bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		ocfs2_commit_trans(OCFS2_SB(sb), handle);
@@ -506,7 +506,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 				goto out_commit;
 			}
 			/* Release local quota file entry */
-			status = ocfs2_journal_access(handle, lqinode,
+			status = ocfs2_journal_access_dq(handle, lqinode,
 					qbh, OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -614,8 +614,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 			mlog_errno(status);
 			goto out_bh;
 		}
-		status = ocfs2_journal_access(handle, lqinode, bh,
-					      OCFS2_JOURNAL_ACCESS_WRITE);
+		status = ocfs2_journal_access_dq(handle, lqinode, bh,
+						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
 			goto out_trans;
@@ -981,8 +981,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out;
 	}
 
-	status = ocfs2_journal_access(handle, lqinode, bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_dq(handle, lqinode, bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_trans;
@@ -1074,7 +1074,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		mlog_errno(status);
 		goto out;
 	}
-	status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh,
+	status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
 				 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1207,7 +1207,7 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
 		goto out;
 	}
 
-	status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type],
+	status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type],
 			od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 867de3e..424adaa 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
 		   new_clusters, first_new_cluster);
 
-	ret = ocfs2_journal_access(handle, bm_inode, group_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	}
 
 	/* update the inode accordingly. */
-	ret = ocfs2_journal_access(handle, bm_inode, bm_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_rollback;
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 	cl = &fe->id2.i_chain;
 	cr = &cl->cl_recs[input->chain];
 
-	ret = ocfs2_journal_access(handle, main_bm_inode, group_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		goto out_commit;
 	}
 
-	ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 7875576..a696286 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -261,7 +261,11 @@ int ocfs2_check_group_descriptor(struct super_block *sb,
 	 * local to this block.
 	 */
 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
-	if (!rc)
+	if (rc) {
+		mlog(ML_ERROR,
+		     "Checksum failed for group descriptor %llu\n",
+		     (unsigned long long)bh->b_blocknr);
+	} else
 		rc = ocfs2_validate_gd_self(sb, bh, 1);
 	if (!rc)
 		rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
@@ -343,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access(handle,
-				      alloc_inode,
-				      bg_bh,
-				      OCFS2_JOURNAL_ACCESS_CREATE);
+	status = ocfs2_journal_access_gd(handle,
+					 alloc_inode,
+					 bg_bh,
+					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -476,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 
-	status = ocfs2_journal_access(handle, alloc_inode,
-				      bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, alloc_inode,
+					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -986,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
-	status = ocfs2_journal_access(handle,
-				      alloc_inode,
-				      group_bh,
-				      journal_type);
+	status = ocfs2_journal_access_gd(handle,
+					 alloc_inode,
+					 group_bh,
+					 journal_type);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1060,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
 	prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
 
-	status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1075,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1090,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1242,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
 
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -1414,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 
 	/* Ok, claim our bits now: set the info on dinode, chainlist
 	 * and then the group */
-	status = ocfs2_journal_access(handle,
-				      alloc_inode,
-				      ac->ac_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle,
+					 alloc_inode,
+					 ac->ac_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1824,8 +1828,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
-	status = ocfs2_journal_access(handle, alloc_inode, group_bh,
-				      journal_type);
+	status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
+					 journal_type);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1900,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
-- 
cgit v0.10.2


From 4d0e214ee83185fcaa2cb97cd026d32bdc5c994a Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 5 Dec 2008 11:19:37 -0800
Subject: ocfs2: Add ecc and checksums to ocfs2 xattr buckets.

The xattr bucket can span multiple blocks on disk.  We have wrappers
for this structure in the code.  We use the new multi-block ecc calls to
calculate and validate the bucket.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index bc822d6..7c2f4c9 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -273,6 +273,15 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 	rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
 			       bucket->bu_blocks, bucket->bu_bhs, 0,
 			       NULL);
+	if (!rc) {
+		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
+						 bucket->bu_bhs,
+						 bucket->bu_blocks,
+						 &bucket_xh(bucket)->xh_check);
+		if (rc)
+			mlog_errno(rc);
+	}
+
 	if (rc)
 		ocfs2_xattr_bucket_relse(bucket);
 	return rc;
@@ -301,6 +310,10 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 {
 	int i;
 
+	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
+				   bucket->bu_bhs, bucket->bu_blocks,
+				   &bucket_xh(bucket)->xh_check);
+
 	for (i = 0; i < bucket->bu_blocks; i++)
 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 }
-- 
cgit v0.10.2


From 2a50a743bdaab104155bd9e988d2ba3bb4177263 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 14:24:33 -0800
Subject: ocfs2: Create ocfs2_xattr_value_buf.

When an ocfs2 extended attribute is large enough to require its own
allocation tree, we root it with an ocfs2_xattr_value_root.  However,
these roots can be a part of inodes, xattr blocks, or xattr buckets.
Thus, they need a different journal access function for each container.

We wrap the bh, its journal access function, and the value root (xv) in
a structure called ocfs2_xattr_valu_buf.  This is a package that can
be passed around.  In this first pass, we simply pass it to the
extent tree code.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 6e58fd5..874c0bd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -48,6 +48,7 @@
 #include "file.h"
 #include "super.h"
 #include "uptodate.h"
+#include "xattr.h"
 
 #include "buffer_head_io.h"
 
@@ -207,36 +208,33 @@ static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
 
 static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
 {
-	struct ocfs2_xattr_value_root *xv = et->et_object;
+	struct ocfs2_xattr_value_buf *vb = et->et_object;
 
-	et->et_root_el = &xv->xr_list;
+	et->et_root_el = &vb->vb_xv->xr_list;
 }
 
 static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					      u64 blkno)
 {
-	struct ocfs2_xattr_value_root *xv =
-		(struct ocfs2_xattr_value_root *)et->et_object;
+	struct ocfs2_xattr_value_buf *vb = et->et_object;
 
-	xv->xr_last_eb_blk = cpu_to_le64(blkno);
+	vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
 }
 
 static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
 {
-	struct ocfs2_xattr_value_root *xv =
-		(struct ocfs2_xattr_value_root *) et->et_object;
+	struct ocfs2_xattr_value_buf *vb = et->et_object;
 
-	return le64_to_cpu(xv->xr_last_eb_blk);
+	return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
 }
 
 static void ocfs2_xattr_value_update_clusters(struct inode *inode,
 					      struct ocfs2_extent_tree *et,
 					      u32 clusters)
 {
-	struct ocfs2_xattr_value_root *xv =
-		(struct ocfs2_xattr_value_root *)et->et_object;
+	struct ocfs2_xattr_value_buf *vb = et->et_object;
 
-	le32_add_cpu(&xv->xr_clusters, clusters);
+	le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
 }
 
 static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
@@ -334,10 +332,9 @@ void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
 
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 					struct inode *inode,
-					struct buffer_head *bh,
-					struct ocfs2_xattr_value_root *xv)
+					struct ocfs2_xattr_value_buf *vb)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv,
+	__ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb,
 				 &ocfs2_xattr_value_et_ops);
 }
 
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 4b6fea2..cceff5c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -71,10 +71,10 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
 				       struct inode *inode,
 				       struct buffer_head *bh);
+struct ocfs2_xattr_value_buf;
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 					struct inode *inode,
-					struct buffer_head *bh,
-					struct ocfs2_xattr_value_root *xv);
+					struct ocfs2_xattr_value_buf *vb);
 
 /*
  * Read an extent block into *bh.  If *bh is NULL, a bh will be
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 7c2f4c9..123d378 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -581,21 +581,26 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 	handle_t *handle = ctxt->handle;
 	enum ocfs2_alloc_restarted why;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh	= xattr_bh,
+		.vb_xv = xv,
+		.vb_access = ocfs2_journal_access,
+	};
+	u32 prev_clusters, logical_start = le32_to_cpu(vb.vb_xv->xr_clusters);
 	struct ocfs2_extent_tree et;
 
 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
+	ocfs2_init_xattr_value_extent_tree(&et, inode, &vb);
 
-	status = ocfs2_journal_access(handle, inode, xattr_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	status = vb.vb_access(handle, inode, vb.vb_bh,
+			      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	prev_clusters = le32_to_cpu(xv->xr_clusters);
+	prev_clusters = le32_to_cpu(vb.vb_xv->xr_clusters);
 	status = ocfs2_add_clusters_in_btree(osb,
 					     inode,
 					     &logical_start,
@@ -611,13 +616,13 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 		goto leave;
 	}
 
-	status = ocfs2_journal_dirty(handle, xattr_bh);
+	status = ocfs2_journal_dirty(handle, vb.vb_bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
+	clusters_to_add -= le32_to_cpu(vb.vb_xv->xr_clusters) - prev_clusters;
 
 	/*
 	 * We should have already allocated enough space before the transaction,
@@ -640,11 +645,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_extent_tree et;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = root_bh,
+		.vb_xv = xv,
+		.vb_access = ocfs2_journal_access,
+	};
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
+	ocfs2_init_xattr_value_extent_tree(&et, inode, &vb);
 
-	ret = ocfs2_journal_access(handle, inode, root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = vb.vb_access(handle, inode, vb.vb_bh,
+			   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -657,9 +667,9 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	le32_add_cpu(&xv->xr_clusters, -len);
+	le32_add_cpu(&vb.vb_xv->xr_clusters, -len);
 
-	ret = ocfs2_journal_dirty(handle, root_bh);
+	ret = ocfs2_journal_dirty(handle, vb.vb_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 9a67e7d..5a1ebc7 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -70,4 +70,18 @@ int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
 			  int, struct ocfs2_security_xattr_info *,
 			  int *, int *, struct ocfs2_alloc_context **);
 
+/*
+ * xattrs can live inside an inode, as part of an external xattr block,
+ * or inside an xattr bucket, which is the leaf of a tree rooted in an
+ * xattr block.  Some of the xattr calls, especially the value setting
+ * functions, want to treat each of these locations as equal.  Let's wrap
+ * them in a structure that we can pass around instead of raw buffer_heads.
+ */
+struct ocfs2_xattr_value_buf {
+	struct buffer_head		*vb_bh;
+	ocfs2_journal_access_func	vb_access;
+	struct ocfs2_xattr_value_root	*vb_xv;
+};
+
+
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From d72cc72d57ecaf9047da51269dabd6880c1399ac Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 14:30:41 -0800
Subject: ocfs2: Pull ocfs2_xattr_value_buf up from
 __ocfs2_remove_xattr_range().

Place an ocfs2_xattr_value_buf in __ocfs2_xattr_shrink_size() and pass
it down to __ocfs2_remove_xattr_range().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 123d378..3b059cf 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -636,8 +636,7 @@ leave:
 }
 
 static int __ocfs2_remove_xattr_range(struct inode *inode,
-				      struct buffer_head *root_bh,
-				      struct ocfs2_xattr_value_root *xv,
+				      struct ocfs2_xattr_value_buf *vb,
 				      u32 cpos, u32 phys_cpos, u32 len,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
@@ -645,16 +644,11 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_extent_tree et;
-	struct ocfs2_xattr_value_buf vb = {
-		.vb_bh = root_bh,
-		.vb_xv = xv,
-		.vb_access = ocfs2_journal_access,
-	};
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, &vb);
+	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	ret = vb.vb_access(handle, inode, vb.vb_bh,
-			   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = vb->vb_access(handle, inode, vb->vb_bh,
+			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -667,9 +661,9 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	le32_add_cpu(&vb.vb_xv->xr_clusters, -len);
+	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
 
-	ret = ocfs2_journal_dirty(handle, vb.vb_bh);
+	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -693,6 +687,11 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	int ret = 0;
 	u32 trunc_len, cpos, phys_cpos, alloc_size;
 	u64 block;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = root_bh,
+		.vb_xv = xv,
+		.vb_access = ocfs2_journal_access,
+	};
 
 	if (old_clusters <= new_clusters)
 		return 0;
@@ -701,7 +700,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	trunc_len = old_clusters - new_clusters;
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
-					       &alloc_size, &xv->xr_list);
+					       &alloc_size,
+					       &vb.vb_xv->xr_list);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -710,7 +710,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 		if (alloc_size > trunc_len)
 			alloc_size = trunc_len;
 
-		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
+		ret = __ocfs2_remove_xattr_range(inode, &vb, cpos,
 						 phys_cpos, alloc_size,
 						 ctxt);
 		if (ret) {
-- 
cgit v0.10.2


From 19b801f45fa5e4840b9be3dcf1e73b08f35b04d9 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 14:36:50 -0800
Subject: ocfs2: Pull ocfs2_xattr_value_buf up into
 ocfs2_xattr_value_truncate().

Place an ocfs2_xattr_value_buf in ocfs2_xattr_value_truncate() and pass
it down to ocfs2_xattr_shrink_size().  We can also pass it into
ocfs2_xattr_extend_allocation(), replacing its ocfs2_xattr_value_buf.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b059cf..4ce8019 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -573,34 +573,28 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 
 static int ocfs2_xattr_extend_allocation(struct inode *inode,
 					 u32 clusters_to_add,
-					 struct buffer_head *xattr_bh,
-					 struct ocfs2_xattr_value_root *xv,
+					 struct ocfs2_xattr_value_buf *vb,
 					 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int status = 0;
 	handle_t *handle = ctxt->handle;
 	enum ocfs2_alloc_restarted why;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_xattr_value_buf vb = {
-		.vb_bh	= xattr_bh,
-		.vb_xv = xv,
-		.vb_access = ocfs2_journal_access,
-	};
-	u32 prev_clusters, logical_start = le32_to_cpu(vb.vb_xv->xr_clusters);
+	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 	struct ocfs2_extent_tree et;
 
 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, &vb);
+	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	status = vb.vb_access(handle, inode, vb.vb_bh,
+	status = vb->vb_access(handle, inode, vb->vb_bh,
 			      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	prev_clusters = le32_to_cpu(vb.vb_xv->xr_clusters);
+	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 	status = ocfs2_add_clusters_in_btree(osb,
 					     inode,
 					     &logical_start,
@@ -616,13 +610,13 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 		goto leave;
 	}
 
-	status = ocfs2_journal_dirty(handle, vb.vb_bh);
+	status = ocfs2_journal_dirty(handle, vb->vb_bh);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	clusters_to_add -= le32_to_cpu(vb.vb_xv->xr_clusters) - prev_clusters;
+	clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
 
 	/*
 	 * We should have already allocated enough space before the transaction,
@@ -680,18 +674,12 @@ out:
 static int ocfs2_xattr_shrink_size(struct inode *inode,
 				   u32 old_clusters,
 				   u32 new_clusters,
-				   struct buffer_head *root_bh,
-				   struct ocfs2_xattr_value_root *xv,
+				   struct ocfs2_xattr_value_buf *vb,
 				   struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret = 0;
 	u32 trunc_len, cpos, phys_cpos, alloc_size;
 	u64 block;
-	struct ocfs2_xattr_value_buf vb = {
-		.vb_bh = root_bh,
-		.vb_xv = xv,
-		.vb_access = ocfs2_journal_access,
-	};
 
 	if (old_clusters <= new_clusters)
 		return 0;
@@ -701,7 +689,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 					       &alloc_size,
-					       &vb.vb_xv->xr_list);
+					       &vb->vb_xv->xr_list);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -710,7 +698,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 		if (alloc_size > trunc_len)
 			alloc_size = trunc_len;
 
-		ret = __ocfs2_remove_xattr_range(inode, &vb, cpos,
+		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 						 phys_cpos, alloc_size,
 						 ctxt);
 		if (ret) {
@@ -738,6 +726,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
 	int ret;
 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
 	u32 old_clusters = le32_to_cpu(xv->xr_clusters);
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = root_bh,
+		.vb_xv = xv,
+		.vb_access = ocfs2_journal_access,
+	};
 
 	if (new_clusters == old_clusters)
 		return 0;
@@ -745,11 +738,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
 	if (new_clusters > old_clusters)
 		ret = ocfs2_xattr_extend_allocation(inode,
 						    new_clusters - old_clusters,
-						    root_bh, xv, ctxt);
+						    &vb, ctxt);
 	else
 		ret = ocfs2_xattr_shrink_size(inode,
 					      old_clusters, new_clusters,
-					      root_bh, xv, ctxt);
+					      &vb, ctxt);
 
 	return ret;
 }
-- 
cgit v0.10.2


From b3e5d37905730dc5ddff717f55ed830caa80ea0e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 15:01:04 -0800
Subject: ocfs2: Pass ocfs2_xattr_value_buf into ocfs2_xattr_value_truncate().

The callers of ocfs2_xattr_value_truncate() now pass in
ocfs2_xattr_value_bufs.  These callers are the ones that calculated the
xv location, so they are the right starting point.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ce8019..409f9ee 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -718,19 +718,13 @@ out:
 }
 
 static int ocfs2_xattr_value_truncate(struct inode *inode,
-				      struct buffer_head *root_bh,
-				      struct ocfs2_xattr_value_root *xv,
+				      struct ocfs2_xattr_value_buf *vb,
 				      int len,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret;
 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
-	u32 old_clusters = le32_to_cpu(xv->xr_clusters);
-	struct ocfs2_xattr_value_buf vb = {
-		.vb_bh = root_bh,
-		.vb_xv = xv,
-		.vb_access = ocfs2_journal_access,
-	};
+	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 
 	if (new_clusters == old_clusters)
 		return 0;
@@ -738,11 +732,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
 	if (new_clusters > old_clusters)
 		ret = ocfs2_xattr_extend_allocation(inode,
 						    new_clusters - old_clusters,
-						    &vb, ctxt);
+						    vb, ctxt);
 	else
 		ret = ocfs2_xattr_shrink_size(inode,
 					      old_clusters, new_clusters,
-					      &vb, ctxt);
+					      vb, ctxt);
 
 	return ret;
 }
@@ -1330,6 +1324,10 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 	struct ocfs2_xattr_value_root *xv = NULL;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 	int ret = 0;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = xs->xattr_bh,
+		.vb_access = ocfs2_journal_access
+	};
 
 	memset(val, 0, size);
 	memcpy(val, xi->name, name_len);
@@ -1340,9 +1338,9 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 	xv->xr_list.l_tree_depth = 0;
 	xv->xr_list.l_count = cpu_to_le16(1);
 	xv->xr_list.l_next_free_rec = 0;
+	vb.vb_xv = xv;
 
-	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
-					 xi->value_len, ctxt);
+	ret = ocfs2_xattr_value_truncate(inode, &vb, xi->value_len, ctxt);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
@@ -1352,7 +1350,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
+	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb.vb_xv,
 					      xi->value, xi->value_len);
 	if (ret < 0)
 		mlog_errno(ret);
@@ -1550,9 +1548,12 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 			goto out;
 		} else if (!ocfs2_xattr_is_local(xs->here)) {
 			/* For existing xattr which has value outside */
-			struct ocfs2_xattr_value_root *xv = NULL;
-			xv = (struct ocfs2_xattr_value_root *)(val +
-				OCFS2_XATTR_SIZE(name_len));
+			struct ocfs2_xattr_value_buf vb = {
+				.vb_bh = xs->xattr_bh,
+				.vb_xv = (struct ocfs2_xattr_value_root *)
+					(val + OCFS2_XATTR_SIZE(name_len)),
+				.vb_access = ocfs2_journal_access,
+			};
 
 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
 				/*
@@ -1561,8 +1562,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 				 * then set new value with set_value_outside().
 				 */
 				ret = ocfs2_xattr_value_truncate(inode,
-								 xs->xattr_bh,
-								 xv,
+								 &vb,
 								 xi->value_len,
 								 ctxt);
 				if (ret < 0) {
@@ -1582,7 +1582,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 
 				ret = __ocfs2_xattr_set_value_outside(inode,
 								handle,
-								xv,
+								vb.vb_xv,
 								xi->value,
 								xi->value_len);
 				if (ret < 0)
@@ -1594,8 +1594,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 				 * just trucate old value to zero.
 				 */
 				 ret = ocfs2_xattr_value_truncate(inode,
-								  xs->xattr_bh,
-								  xv,
+								  &vb,
 								  0,
 								  ctxt);
 				if (ret < 0)
@@ -1714,15 +1713,17 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 
 		if (!ocfs2_xattr_is_local(entry)) {
-			struct ocfs2_xattr_value_root *xv;
+			struct ocfs2_xattr_value_buf vb = {
+				.vb_bh = bh,
+				.vb_access = ocfs2_journal_access,
+			};
 			void *val;
 
 			val = (void *)header +
 				le16_to_cpu(entry->xe_name_offset);
-			xv = (struct ocfs2_xattr_value_root *)
+			vb.vb_xv = (struct ocfs2_xattr_value_root *)
 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
-			ret = ocfs2_xattr_value_truncate(inode, bh, xv,
-							 0, &ctxt);
+			ret = ocfs2_xattr_value_truncate(inode, &vb, 0, &ctxt);
 			if (ret < 0) {
 				mlog_errno(ret);
 				break;
@@ -4651,11 +4652,12 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 {
 	int ret, offset;
 	u64 value_blk;
-	struct buffer_head *value_bh = NULL;
-	struct ocfs2_xattr_value_root *xv;
 	struct ocfs2_xattr_entry *xe;
 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	size_t blocksize = inode->i_sb->s_blocksize;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
 
 	xe = &xh->xh_entries[xe_off];
 
@@ -4669,11 +4671,11 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
 
-	value_bh = bucket->bu_bhs[value_blk];
-	BUG_ON(!value_bh);
+	vb.vb_bh = bucket->bu_bhs[value_blk];
+	BUG_ON(!vb.vb_bh);
 
-	xv = (struct ocfs2_xattr_value_root *)
-		(value_bh->b_data + offset % blocksize);
+	vb.vb_xv = (struct ocfs2_xattr_value_root *)
+		(vb.vb_bh->b_data + offset % blocksize);
 
 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
 						OCFS2_JOURNAL_ACCESS_WRITE);
@@ -4691,7 +4693,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	 */
 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
 	     xe_off, (unsigned long long)bucket_blkno(bucket), len);
-	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
+	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_dirty;
-- 
cgit v0.10.2


From 0c748e95327d00e9eb19d0f34b32147ecbc02137 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 15:46:15 -0800
Subject: ocfs2: Pass value buf to ocfs2_xattr_update_entry().

ocfs2_xattr_update_entry() updates the entry portion of an xattr buffer.
This can be part of multiple metadata block types, so pass the buffer in
via an ocfs2_xattr_value_buf.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 409f9ee..6a05612 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1282,12 +1282,13 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
 				    handle_t *handle,
 				    struct ocfs2_xattr_info *xi,
 				    struct ocfs2_xattr_search *xs,
+				    struct ocfs2_xattr_value_buf *vb,
 				    size_t offs)
 {
 	int ret;
 
-	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = vb->vb_access(handle, inode, vb->vb_bh,
+			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -1301,7 +1302,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
 		ocfs2_xattr_set_local(xs->here, 0);
 	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
 
-	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
+	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
 	if (ret < 0)
 		mlog_errno(ret);
 out:
@@ -1345,7 +1346,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
+	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, &vb, offs);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
@@ -1574,6 +1575,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 							       handle,
 							       xi,
 							       xs,
+							       &vb,
 							       offs);
 				if (ret < 0) {
 					mlog_errno(ret);
-- 
cgit v0.10.2


From 512620f44df85df87348fc9a6fc54fcaa254b8d3 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 15:58:35 -0800
Subject: ocfs2: Use ocfs2_xattr_value_buf in ocfs2_xattr_set_entry().

ocfs2_xattr_set_entry is the function that knows what type of block it
is setting into.  This is what we wanted from ocfs2_xattr_value_buf.
Plus, moving the value buf up into ocfs2_xattr_set_entry() allows us to
pass it into ocfs2_xattr_set_value_outside() and ocfs2_xattr_cleanup().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6a05612..c08b5e8 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1252,6 +1252,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 			       handle_t *handle,
 			       struct ocfs2_xattr_info *xi,
 			       struct ocfs2_xattr_search *xs,
+			       struct ocfs2_xattr_value_buf *vb,
 			       size_t offs)
 {
 	int ret = 0;
@@ -1259,8 +1260,8 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 	void *val = xs->base + offs;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 
-	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = vb->vb_access(handle, inode, vb->vb_bh,
+			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -1271,7 +1272,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
 	memset(val, 0, size);
 
-	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
+	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
 	if (ret < 0)
 		mlog_errno(ret);
 out:
@@ -1318,6 +1319,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 					 struct ocfs2_xattr_info *xi,
 					 struct ocfs2_xattr_search *xs,
 					 struct ocfs2_xattr_set_ctxt *ctxt,
+					 struct ocfs2_xattr_value_buf *vb,
 					 size_t offs)
 {
 	size_t name_len = strlen(xi->name);
@@ -1325,10 +1327,6 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 	struct ocfs2_xattr_value_root *xv = NULL;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 	int ret = 0;
-	struct ocfs2_xattr_value_buf vb = {
-		.vb_bh = xs->xattr_bh,
-		.vb_access = ocfs2_journal_access
-	};
 
 	memset(val, 0, size);
 	memcpy(val, xi->name, name_len);
@@ -1339,19 +1337,19 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 	xv->xr_list.l_tree_depth = 0;
 	xv->xr_list.l_count = cpu_to_le16(1);
 	xv->xr_list.l_next_free_rec = 0;
-	vb.vb_xv = xv;
+	vb->vb_xv = xv;
 
-	ret = ocfs2_xattr_value_truncate(inode, &vb, xi->value_len, ctxt);
+	ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, &vb, offs);
+	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb.vb_xv,
+	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
 					      xi->value, xi->value_len);
 	if (ret < 0)
 		mlog_errno(ret);
@@ -1488,6 +1486,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		.value = xi->value,
 		.value_len = xi->value_len,
 	};
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = xs->xattr_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
+
+	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
+		BUG_ON(xs->xattr_bh == xs->inode_bh);
+		vb.vb_access = ocfs2_journal_access_xb;
+	} else
+		BUG_ON(xs->xattr_bh != xs->inode_bh);
 
 	/* Compute min_offs, last and free space. */
 	last = xs->header->xh_entries;
@@ -1543,18 +1551,14 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
 			/* Replace existing local xattr with tree root */
 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
-							    ctxt, offs);
+							    ctxt, &vb, offs);
 			if (ret < 0)
 				mlog_errno(ret);
 			goto out;
 		} else if (!ocfs2_xattr_is_local(xs->here)) {
 			/* For existing xattr which has value outside */
-			struct ocfs2_xattr_value_buf vb = {
-				.vb_bh = xs->xattr_bh,
-				.vb_xv = (struct ocfs2_xattr_value_root *)
-					(val + OCFS2_XATTR_SIZE(name_len)),
-				.vb_access = ocfs2_journal_access,
-			};
+			vb.vb_xv = (struct ocfs2_xattr_value_root *)
+				(val + OCFS2_XATTR_SIZE(name_len));
 
 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
 				/*
@@ -1605,16 +1609,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
-		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = vb.vb_access(handle, inode, vb.vb_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1674,7 +1678,8 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		 * This is the second step for value size > INLINE_SIZE.
 		 */
 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
-		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
+		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
+						    &vb, offs);
 		if (ret < 0) {
 			int ret2;
 
@@ -1684,7 +1689,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 			 * the junk tree root we have already set in local.
 			 */
 			ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
-						   xi, xs, offs);
+						   xi, xs, &vb, offs);
 			if (ret2 < 0)
 				mlog_errno(ret2);
 		}
-- 
cgit v0.10.2


From 4311901daabe1d0f22cfcf86c57ad450f14b4e9f Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 16:24:43 -0800
Subject: ocfs2: Pass value buf to ocfs2_remove_value_outside().

ocfs2_remove_value_outside() needs to know the type of buffer it is
looking at.  Pass in an ocfs2_xattr_value_buf.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c08b5e8..d2760e6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1699,7 +1699,7 @@ out:
 }
 
 static int ocfs2_remove_value_outside(struct inode*inode,
-				      struct buffer_head *bh,
+				      struct ocfs2_xattr_value_buf *vb,
 				      struct ocfs2_xattr_header *header)
 {
 	int ret = 0, i;
@@ -1720,17 +1720,13 @@ static int ocfs2_remove_value_outside(struct inode*inode,
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 
 		if (!ocfs2_xattr_is_local(entry)) {
-			struct ocfs2_xattr_value_buf vb = {
-				.vb_bh = bh,
-				.vb_access = ocfs2_journal_access,
-			};
 			void *val;
 
 			val = (void *)header +
 				le16_to_cpu(entry->xe_name_offset);
-			vb.vb_xv = (struct ocfs2_xattr_value_root *)
+			vb->vb_xv = (struct ocfs2_xattr_value_root *)
 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
-			ret = ocfs2_xattr_value_truncate(inode, &vb, 0, &ctxt);
+			ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
 			if (ret < 0) {
 				mlog_errno(ret);
 				break;
@@ -1752,12 +1748,16 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	struct ocfs2_xattr_header *header;
 	int ret;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = di_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
 
 	header = (struct ocfs2_xattr_header *)
 		 ((void *)di + inode->i_sb->s_blocksize -
 		 le16_to_cpu(di->i_xattr_inline_size));
 
-	ret = ocfs2_remove_value_outside(inode, di_bh, header);
+	ret = ocfs2_remove_value_outside(inode, &vb, header);
 
 	return ret;
 }
@@ -1767,11 +1767,15 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
 {
 	struct ocfs2_xattr_block *xb;
 	int ret = 0;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = blk_bh,
+		.vb_access = ocfs2_journal_access_xb,
+	};
 
 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
-		ret = ocfs2_remove_value_outside(inode, blk_bh, header);
+		ret = ocfs2_remove_value_outside(inode, &vb, header);
 	} else
 		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
 
-- 
cgit v0.10.2


From 84008972491ca91b240f106191519781dabb8016 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 9 Dec 2008 16:11:49 -0800
Subject: ocfs2: Use proper journal_access function in xattr.c

Change the rest of the naked ocfs2_journal_access() calls in
fs/ocfs2/xattr.c to use the appropriate ocfs2_journal_access_*() call
for their metadata type.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d2760e6..17028aa 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1894,8 +1894,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 		mlog_errno(ret);
 		goto out;
 	}
-	ret = ocfs2_journal_access(handle, inode, di_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -2103,8 +2103,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	int ret;
 
 	if (!xs->xattr_bh) {
-		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
-					   OCFS2_JOURNAL_ACCESS_CREATE);
+		ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto end;
@@ -2121,8 +2121,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
 
-		ret = ocfs2_journal_access(handle, inode, new_bh,
-					   OCFS2_JOURNAL_ACCESS_CREATE);
+		ret = ocfs2_journal_access_xb(handle, inode, new_bh,
+					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto end;
@@ -3377,8 +3377,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	 */
 	down_write(&oi->ip_alloc_sem);
 
-	ret = ocfs2_journal_access(handle, inode, xb_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4216,8 +4216,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	ret = ocfs2_journal_access(handle, inode, root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto leave;
@@ -4808,8 +4808,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access(handle, inode, root_bh,
-				   OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
-- 
cgit v0.10.2


From 87d35a74b15ec703910a63e0667692fb5e267be0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Wed, 10 Dec 2008 17:36:25 -0800
Subject: ocfs2: Add directory block trailers.

Future ocfs2 features metaecc and indexed directories need to store a
little bit of data in each dirblock.  For compatibility, we place this
in a trailer at the end of the dirblock.  The trailer plays itself as an
empty dirent, so that if the features are turned off, it can be reused
without requiring a tunefs scan.

This code adds the trailer and validates it when the block is read in.

[ Mark is the original author, but I reinserted this code before his
  dir index work.  -- Joel ]

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 45e4e03..1efd0ab 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -84,6 +84,63 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 			       struct buffer_head **new_bh);
 
 /*
+ * These are distinct checks because future versions of the file system will
+ * want to have a trailing dirent structure independent of indexing.
+ */
+static int ocfs2_dir_has_trailer(struct inode *dir)
+{
+	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+		return 0;
+
+	return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
+}
+
+static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
+{
+	return ocfs2_meta_ecc(osb);
+}
+
+static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
+{
+	return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
+}
+
+#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
+
+/*
+ * XXX: This is executed once on every dirent. We should consider optimizing
+ * it.
+ */
+static int ocfs2_skip_dir_trailer(struct inode *dir,
+				  struct ocfs2_dir_entry *de,
+				  unsigned long offset,
+				  unsigned long blklen)
+{
+	unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
+
+	if (!ocfs2_dir_has_trailer(dir))
+		return 0;
+
+	if (offset != toff)
+		return 0;
+
+	return 1;
+}
+
+static void ocfs2_init_dir_trailer(struct inode *inode,
+				   struct buffer_head *bh)
+{
+	struct ocfs2_dir_block_trailer *trailer;
+
+	trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
+	strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
+	trailer->db_compat_rec_len =
+			cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
+	trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
+	trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
+}
+
+/*
  * bh passed here can be an inode block or a dir data block, depending
  * on the inode inline data flag.
  */
@@ -232,16 +289,60 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
 {
 	int rc = 0;
 	struct buffer_head *tmp = *bh;
+	struct ocfs2_dir_block_trailer *trailer;
 
 	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
 				    ocfs2_validate_dir_block);
-	if (rc)
+	if (rc) {
 		mlog_errno(rc);
+		goto out;
+	}
+
+	/*
+	 * We check the trailer here rather than in
+	 * ocfs2_validate_dir_block() because that function doesn't have
+	 * the inode to test.
+	 */
+	if (!(flags & OCFS2_BH_READAHEAD) &&
+	    ocfs2_dir_has_trailer(inode)) {
+		trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
+		if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
+			rc = -EINVAL;
+			ocfs2_error(inode->i_sb,
+				    "Invalid dirblock #%llu: "
+				    "signature = %.*s\n",
+				    (unsigned long long)tmp->b_blocknr, 7,
+				    trailer->db_signature);
+			goto out;
+		}
+		if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
+			rc = -EINVAL;
+			ocfs2_error(inode->i_sb,
+				    "Directory block #%llu has an invalid "
+				    "db_blkno of %llu",
+				    (unsigned long long)tmp->b_blocknr,
+				    (unsigned long long)le64_to_cpu(trailer->db_blkno));
+			goto out;
+		}
+		if (le64_to_cpu(trailer->db_parent_dinode) !=
+		    OCFS2_I(inode)->ip_blkno) {
+			rc = -EINVAL;
+			ocfs2_error(inode->i_sb,
+				    "Directory block #%llu on dinode "
+				    "#%llu has an invalid parent_dinode "
+				    "of %llu",
+				    (unsigned long long)tmp->b_blocknr,
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)le64_to_cpu(trailer->db_blkno));
+			goto out;
+		}
+	}
 
 	/* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
-	if (!rc && !*bh)
+	if (!*bh)
 		*bh = tmp;
 
+out:
 	return rc ? -EIO : 0;
 }
 
@@ -581,6 +682,16 @@ int __ocfs2_add_entry(handle_t *handle,
 			goto bail;
 		}
 
+		/* We're guaranteed that we should have space, so we
+		 * can't possibly have hit the trailer...right? */
+		mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
+				"Hit dir trailer trying to insert %.*s "
+			        "(namelen %d) into directory %llu.  "
+				"offset is %lu, trailer offset is %d\n",
+				namelen, name, namelen,
+				(unsigned long long)parent_fe_bh->b_blocknr,
+				offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
+
 		if (ocfs2_dirent_would_fit(de, rec_len)) {
 			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
 			retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
@@ -622,6 +733,7 @@ int __ocfs2_add_entry(handle_t *handle,
 			retval = 0;
 			goto bail;
 		}
+
 		offset += le16_to_cpu(de->rec_len);
 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
@@ -1059,9 +1171,15 @@ int ocfs2_empty_dir(struct inode *inode)
 	return !priv.seen_other;
 }
 
-static void ocfs2_fill_initial_dirents(struct inode *inode,
-				       struct inode *parent,
-				       char *start, unsigned int size)
+/*
+ * Fills "." and ".." dirents in a new directory block. Returns dirent for
+ * "..", which might be used during creation of a directory with a trailing
+ * header. It is otherwise safe to ignore the return code.
+ */
+static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
+							  struct inode *parent,
+							  char *start,
+							  unsigned int size)
 {
 	struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
 
@@ -1078,6 +1196,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode,
 	de->name_len = 2;
 	strcpy(de->name, "..");
 	ocfs2_set_de_type(de, S_IFDIR);
+
+	return de;
 }
 
 /*
@@ -1130,10 +1250,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 				 struct ocfs2_alloc_context *data_ac)
 {
 	int status;
+	unsigned int size = osb->sb->s_blocksize;
 	struct buffer_head *new_bh = NULL;
+	struct ocfs2_dir_entry *de;
 
 	mlog_entry_void();
 
+	if (ocfs2_supports_dir_trailer(osb))
+		size = ocfs2_dir_trailer_blk_off(parent->i_sb);
+
 	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
 				     data_ac, NULL, &new_bh);
 	if (status < 0) {
@@ -1151,8 +1276,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 	}
 	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
 
-	ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
-				   osb->sb->s_blocksize);
+	de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
+	if (ocfs2_supports_dir_trailer(osb))
+		ocfs2_init_dir_trailer(inode, new_bh);
 
 	status = ocfs2_journal_dirty(handle, new_bh);
 	if (status < 0) {
@@ -1193,13 +1319,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
 				     data_ac);
 }
 
+/*
+ * Expand rec_len of the rightmost dirent in a directory block so that it
+ * contains the end of our valid space for dirents. We do this during
+ * expansion from an inline directory to one with extents. The first dir block
+ * in that case is taken from the inline data portion of the inode block.
+ *
+ * We add the dir trailer if this filesystem wants it.
+ */
 static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
-				     unsigned int new_size)
+				     struct super_block *sb)
 {
 	struct ocfs2_dir_entry *de;
 	struct ocfs2_dir_entry *prev_de;
 	char *de_buf, *limit;
-	unsigned int bytes = new_size - old_size;
+	unsigned int new_size = sb->s_blocksize;
+	unsigned int bytes;
+
+	if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
+		new_size = ocfs2_dir_trailer_blk_off(sb);
+
+	bytes = new_size - old_size;
 
 	limit = start + old_size;
 	de_buf = start;
@@ -1316,8 +1456,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
 	memset(dirdata_bh->b_data + i_size_read(dir), 0,
 	       sb->s_blocksize - i_size_read(dir));
-	ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
-				 sb->s_blocksize);
+	ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
+	if (ocfs2_supports_dir_trailer(osb))
+		ocfs2_init_dir_trailer(dir, dirdata_bh);
 
 	ret = ocfs2_journal_dirty(handle, dirdata_bh);
 	if (ret) {
@@ -1604,9 +1745,15 @@ do_extend:
 		goto bail;
 	}
 	memset(new_bh->b_data, 0, sb->s_blocksize);
+
 	de = (struct ocfs2_dir_entry *) new_bh->b_data;
 	de->inode = 0;
-	de->rec_len = cpu_to_le16(sb->s_blocksize);
+	if (ocfs2_dir_has_trailer(dir)) {
+		de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
+		ocfs2_init_dir_trailer(dir, new_bh);
+	} else {
+		de->rec_len = cpu_to_le16(sb->s_blocksize);
+	}
 	status = ocfs2_journal_dirty(handle, new_bh);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1648,11 +1795,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
 				   unsigned int *blocks_wanted)
 {
 	int ret;
+	struct super_block *sb = dir->i_sb;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	struct ocfs2_dir_entry *de, *last_de = NULL;
 	char *de_buf, *limit;
 	unsigned long offset = 0;
-	unsigned int rec_len, new_rec_len;
+	unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
+
+	/*
+	 * This calculates how many free bytes we'd have in block zero, should
+	 * this function force expansion to an extent tree.
+	 */
+	if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
+		free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
+	else
+		free_space = dir->i_sb->s_blocksize - i_size_read(dir);
 
 	de_buf = di->id2.i_data.id_data;
 	limit = de_buf + i_size_read(dir);
@@ -1669,6 +1826,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
 			ret = -EEXIST;
 			goto out;
 		}
+		/*
+		 * No need to check for a trailing dirent record here as
+		 * they're not used for inline dirs.
+		 */
+
 		if (ocfs2_dirent_would_fit(de, rec_len)) {
 			/* Ok, we found a spot. Return this bh and let
 			 * the caller actually fill it in. */
@@ -1689,7 +1851,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
 	 * dirent can be found.
 	 */
 	*blocks_wanted = 1;
-	new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
+	new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
 	if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
 		*blocks_wanted = 2;
 
@@ -1707,6 +1869,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
 	struct ocfs2_dir_entry *de;
 	struct super_block *sb = dir->i_sb;
 	int status;
+	int blocksize = dir->i_sb->s_blocksize;
 
 	status = ocfs2_read_dir_block(dir, 0, &bh, 0);
 	if (status) {
@@ -1748,6 +1911,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
 			status = -EEXIST;
 			goto bail;
 		}
+
+		if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
+					   blocksize))
+			goto next;
+
 		if (ocfs2_dirent_would_fit(de, rec_len)) {
 			/* Ok, we found a spot. Return this bh and let
 			 * the caller actually fill it in. */
@@ -1756,6 +1924,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
 			status = 0;
 			goto bail;
 		}
+next:
 		offset += le16_to_cpu(de->rec_len);
 		de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
 	}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bad87d0..ad5c24a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -470,6 +470,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr)					\
 	(!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
 
+#define OCFS2_IS_VALID_DIR_TRAILER(ptr)					\
+	(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
+
 static inline unsigned long ino_from_blkno(struct super_block *sb,
 					   u64 blkno)
 {
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 290fa26..af0013b 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -65,6 +65,7 @@
 #define OCFS2_EXTENT_BLOCK_SIGNATURE	"EXBLK01"
 #define OCFS2_GROUP_DESC_SIGNATURE      "GROUP01"
 #define OCFS2_XATTR_BLOCK_SIGNATURE	"XATTR01"
+#define OCFS2_DIR_TRAILER_SIGNATURE	"DIRTRL1"
 
 /* Compatibility flags */
 #define OCFS2_HAS_COMPAT_FEATURE(sb,mask)			\
@@ -752,6 +753,34 @@ struct ocfs2_dir_entry {
 } __attribute__ ((packed));
 
 /*
+ * Per-block record for the unindexed directory btree. This is carefully
+ * crafted so that the rec_len and name_len records of an ocfs2_dir_entry are
+ * mirrored. That way, the directory manipulation code needs a minimal amount
+ * of update.
+ *
+ * NOTE: Keep this structure aligned to a multiple of 4 bytes.
+ */
+struct ocfs2_dir_block_trailer {
+/*00*/	__le64		db_compat_inode;	/* Always zero. Was inode */
+
+	__le16		db_compat_rec_len;	/* Backwards compatible with
+						 * ocfs2_dir_entry. */
+	__u8		db_compat_name_len;	/* Always zero. Was name_len */
+	__u8		db_reserved0;
+	__le16		db_reserved1;
+	__le16		db_free_rec_len;	/* Size of largest empty hole
+						 * in this block. (unused) */
+/*10*/	__u8		db_signature[8];	/* Signature for verification */
+	__le64		db_reserved2;
+	__le64		db_free_next;		/* Next block in list (unused) */
+/*20*/	__le64		db_blkno;		/* Offset on disk, in blocks */
+	__le64		db_parent_dinode;	/* dinode which owns me, in
+						   blocks */
+/*30*/	__le64		db_check;		/* Error checking */
+/*40*/
+};
+
+/*
  * On disk allocator group structure for OCFS2
  */
 struct ocfs2_group_desc
-- 
cgit v0.10.2


From c175a518b4a1d514483abf61813ce5d855917164 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 10 Dec 2008 17:58:22 -0800
Subject: ocfs2: Checksum and ECC for directory blocks.

Use the db_check field of ocfs2_dir_block_trailer to crc/ecc the
dirblocks.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 1efd0ab..f2c4098 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -48,6 +48,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dir.h"
 #include "dlmglue.h"
 #include "extent_map.h"
@@ -107,6 +108,17 @@ static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
 
 #define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
 
+/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
+ * them more consistent? */
+struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
+							    void *data)
+{
+	char *p = data;
+
+	p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
+	return (struct ocfs2_dir_block_trailer *)p;
+}
+
 /*
  * XXX: This is executed once on every dirent. We should consider optimizing
  * it.
@@ -268,14 +280,35 @@ out:
 static int ocfs2_validate_dir_block(struct super_block *sb,
 				    struct buffer_head *bh)
 {
+	int rc;
+	struct ocfs2_dir_block_trailer *trailer =
+		ocfs2_trailer_from_bh(bh, sb);
+
+
 	/*
-	 * Nothing yet.  We don't validate dirents here, that's handled
+	 * We don't validate dirents here, that's handled
 	 * in-place when the code walks them.
 	 */
 	mlog(0, "Validating dirblock %llu\n",
 	     (unsigned long long)bh->b_blocknr);
 
-	return 0;
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 *
+	 * Note that we are safe to call this even if the directory
+	 * doesn't have a trailer.  Filesystems without metaecc will do
+	 * nothing, and filesystems with it will have one.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
+	if (rc)
+		mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
+		     (unsigned long long)bh->b_blocknr);
+
+	return rc;
 }
 
 /*
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index ce48b90..c511e2e 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
 		       struct buffer_head *fe_bh,
 		       struct ocfs2_alloc_context *data_ac);
 
+struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
+							    void *data);
 #endif /* OCFS2_DIR_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3b54dba..57d7d25 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -415,6 +415,26 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
 	ocfs2_block_check_compute(data, size, &dqt->dq_check);
 }
 
+/*
+ * Directory blocks also have their own trigger because the
+ * struct ocfs2_block_check offset depends on the blocksize.
+ */
+static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+				 struct buffer_head *bh,
+				 void *data, size_t size)
+{
+	struct ocfs2_dir_block_trailer *trailer =
+		ocfs2_dir_trailer_from_size(size, data);
+
+	/*
+	 * We aren't guaranteed to have the superblock here, so we
+	 * must unconditionally compute the ecc data.
+	 * __ocfs2_journal_access() will only set the triggers if
+	 * metaecc is enabled.
+	 */
+	ocfs2_block_check_compute(data, size, &trailer->db_check);
+}
+
 static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 				struct buffer_head *bh)
 {
@@ -454,6 +474,13 @@ static struct ocfs2_triggers gd_triggers = {
 	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
 };
 
+static struct ocfs2_triggers db_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_db_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+};
+
 static struct ocfs2_triggers xb_triggers = {
 	.ot_triggers = {
 		.t_commit = ocfs2_commit_trigger,
@@ -555,8 +582,8 @@ int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
 int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
 			    struct buffer_head *bh, int type)
 {
-	/* Right now, nothing for dirblocks */
-	return __ocfs2_journal_access(handle, inode, bh, NULL, type);
+	return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
+				      type);
 }
 
 int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index af0013b..698ef3d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -776,7 +776,7 @@ struct ocfs2_dir_block_trailer {
 /*20*/	__le64		db_blkno;		/* Offset on disk, in blocks */
 	__le64		db_parent_dinode;	/* dinode which owns me, in
 						   blocks */
-/*30*/	__le64		db_check;		/* Error checking */
+/*30*/	struct ocfs2_block_check db_check;	/* Error checking */
 /*40*/
 };
 
-- 
cgit v0.10.2


From d030cc978e9e636dc39ce9a9e8282d48698a3b30 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 11 Dec 2008 15:04:14 -0800
Subject: ocfs2: Validate superblock with checksum and ecc.

The superblock is read via a raw call.  Validate it after we find it
from its signature.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2eb657c..43ed113 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -52,6 +52,7 @@
 #include "ocfs1_fs_compat.h"
 
 #include "alloc.h"
+#include "blockcheck.h"
 #include "dlmglue.h"
 #include "export.h"
 #include "extent_map.h"
@@ -1989,6 +1990,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 
 	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
 		   strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
+		/* We have to do a raw check of the feature here */
+		if (le32_to_cpu(di->id2.i_super.s_feature_incompat) &
+		    OCFS2_FEATURE_INCOMPAT_META_ECC) {
+			status = ocfs2_block_check_validate(bh->b_data,
+							    bh->b_size,
+							    &di->i_check);
+			if (status)
+				goto out;
+		}
 		status = -EINVAL;
 		if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
 			mlog(ML_ERROR, "found superblock with incorrect block "
@@ -2030,6 +2040,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 		}
 	}
 
+out:
 	mlog_exit(status);
 	return status;
 }
-- 
cgit v0.10.2


From 9d28cfb73f3abccce001daf2d247b16bf20e2248 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 16 Oct 2008 17:53:29 -0700
Subject: ocfs2: Enable metadata checksums.

Add OCFS2_FEATURE_INCOMPAT_META_ECC to the list of supported features.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 698ef3d..c7ae45a 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -94,7 +94,8 @@
 					 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
 					 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
-					 | OCFS2_FEATURE_INCOMPAT_XATTR)
+					 | OCFS2_FEATURE_INCOMPAT_XATTR \
+					 | OCFS2_FEATURE_INCOMPAT_META_ECC)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
-- 
cgit v0.10.2


From e798b3f8a920c82a8e556dd54df97f0d3d0f9144 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 15 Dec 2008 17:13:48 -0800
Subject: ocfs2: Don't hand-code xor in ocfs2_hamming_encode().

When I wrote ocfs2_hamming_encode(), I was following documentation of
the algorithm and didn't have quite the (possibly still imperfect) grasp
of it I do now.  As part of this, I literally hand-coded xor.  I would
test a bit, and then add that bit via xor to the parity word.

I can, of course, just do a single xor of the parity word and the source
word (the code buffer bit offset).  This cuts CPU usage by 53% on a
mostly populated buffer (an inode containing utmp.h inline).

Joel

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 2ce6ae5..1d5083c 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -31,7 +31,6 @@
 #include "blockcheck.h"
 
 
-
 /*
  * We use the following conventions:
  *
@@ -39,26 +38,6 @@
  * p = # parity bits
  * c = # total code bits (d + p)
  */
-static int calc_parity_bits(unsigned int d)
-{
-	unsigned int p;
-
-	/*
-	 * Bits required for Single Error Correction is as follows:
-	 *
-	 * d + p + 1 <= 2^p
-	 *
-	 * We're restricting ourselves to 31 bits of parity, that should be
-	 * sufficient.
-	 */
-	for (p = 1; p < 32; p++)
-	{
-		if ((d + p + 1) <= (1 << p))
-			return p;
-	}
-
-	return 0;
-}
 
 /*
  * Calculate the bit offset in the hamming code buffer based on the bit's
@@ -109,10 +88,9 @@ static unsigned int calc_code_bit(unsigned int i)
  */
 u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
 {
-	unsigned int p = calc_parity_bits(nr + d);
-	unsigned int i, j, b;
+	unsigned int i, b;
 
-	BUG_ON(!p);
+	BUG_ON(!d);
 
 	/*
 	 * b is the hamming code bit number.  Hamming code specifies a
@@ -131,27 +109,23 @@ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr
 		 */
 		b = calc_code_bit(nr + i);
 
-		for (j = 0; j < p; j++)
-		{
-			/*
-			 * Data bits in the resultant code are checked by
-			 * parity bits that are part of the bit number
-			 * representation.  Huh?
-			 *
-			 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
-			 * In other words, the parity bit at position 2^k
-			 * checks bits in positions having bit k set in
-			 * their binary representation.  Conversely, for
-			 * instance, bit 13, i.e. 1101(2), is checked by
-			 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
-			 * </wikipedia>
-			 *
-			 * Note that 'k' is the _code_ bit number.  'b' in
-			 * our loop.
-			 */
-			if (b & (1 << j))
-				parity ^= (1 << j);
-		}
+		/*
+		 * Data bits in the resultant code are checked by
+		 * parity bits that are part of the bit number
+		 * representation.  Huh?
+		 *
+		 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
+		 * In other words, the parity bit at position 2^k
+		 * checks bits in positions having bit k set in
+		 * their binary representation.  Conversely, for
+		 * instance, bit 13, i.e. 1101(2), is checked by
+		 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
+		 * </wikipedia>
+		 *
+		 * Note that 'k' is the _code_ bit number.  'b' in
+		 * our loop.
+		 */
+		parity ^= b;
 	}
 
 	/* While the data buffer was treated as little endian, the
@@ -174,10 +148,9 @@ u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
 void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
 		       unsigned int fix)
 {
-	unsigned int p = calc_parity_bits(nr + d);
 	unsigned int i, b;
 
-	BUG_ON(!p);
+	BUG_ON(!d);
 
 	/*
 	 * If the bit to fix has an hweight of 1, it's a parity bit.  One
-- 
cgit v0.10.2


From 7bb458a58588f397068e4166c615e9fcc7480c16 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 15 Dec 2008 18:24:33 -0800
Subject: ocfs2: Another hamming code optimization.

In the calc_code_bit() function, we must find all powers of two beneath
the code bit number, *after* it's shifted by those powers of two.  This
requires a loop to see where it ends up.

We can optimize it by starting at its most significant bit.  This shaves
32% off the time, for a total of 67.6% shaved off of the original, naive
implementation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 1d5083c..f102ec9 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -39,6 +39,35 @@
  * c = # total code bits (d + p)
  */
 
+
+/*
+ * Find the log base 2 of 32-bit v.
+ *
+ * Algorithm found on http://graphics.stanford.edu/~seander/bithacks.html,
+ * by Sean Eron Anderson.  Code on the page is in the public domain unless
+ * otherwise noted.
+ *
+ * This particular algorithm is credited to Eric Cole.
+ */
+static int find_highest_bit_set(unsigned int v)
+{
+
+	static const int MultiplyDeBruijnBitPosition[32] =
+	{
+		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+	};
+
+	v |= v >> 1; /* first round down to power of 2 */
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v = (v >> 1) + 1;
+
+	return MultiplyDeBruijnBitPosition[(u32)(v * 0x077CB531UL) >> 27];
+}
+
 /*
  * Calculate the bit offset in the hamming code buffer based on the bit's
  * offset in the data buffer.  Since the hamming code reserves all
@@ -64,12 +93,21 @@ static unsigned int calc_code_bit(unsigned int i)
 	b = i + 1;
 
 	/*
+	 * As a cheat, we know that all bits below b's highest bit must be
+	 * parity bits, so we can start there.
+	 */
+        p = find_highest_bit_set(b);
+        b += p;
+
+	/*
 	 * For every power of two below our bit number, bump our bit.
 	 *
 	 * We compare with (b + 1) becuase we have to compare with what b
 	 * would be _if_ it were bumped up by the parity bit.  Capice?
+	 *
+	 * We start p at 2^p because of the cheat above.
 	 */
-	for (p = 0; (1 << p) < (b + 1); p++)
+	for (p = (1 << p); p < (b + 1); p <<= 1)
 		b++;
 
 	return b;
-- 
cgit v0.10.2


From 58896c4d0e5868360ea0693c607d5bf74f79da6b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 16 Dec 2008 13:54:40 -0800
Subject: ocfs2: One more hamming code optimization.

The previous optimization used a fast find-highest-bit-set operation to
give us a good starting point in calc_code_bit().  This version lets the
caller cache the previous code buffer bit offset.  Thus, the next call
always starts where the last one left off.

This reduces the calculation another 39%, for a total 80% reduction from
the original, naive implementation.  At least, on my machine.  This also
brings the parity calculation to within an order of magnitude of the
crc32 calculation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index f102ec9..2a947c4 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -41,34 +41,6 @@
 
 
 /*
- * Find the log base 2 of 32-bit v.
- *
- * Algorithm found on http://graphics.stanford.edu/~seander/bithacks.html,
- * by Sean Eron Anderson.  Code on the page is in the public domain unless
- * otherwise noted.
- *
- * This particular algorithm is credited to Eric Cole.
- */
-static int find_highest_bit_set(unsigned int v)
-{
-
-	static const int MultiplyDeBruijnBitPosition[32] =
-	{
-		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-	};
-
-	v |= v >> 1; /* first round down to power of 2 */
-	v |= v >> 2;
-	v |= v >> 4;
-	v |= v >> 8;
-	v |= v >> 16;
-	v = (v >> 1) + 1;
-
-	return MultiplyDeBruijnBitPosition[(u32)(v * 0x077CB531UL) >> 27];
-}
-
-/*
  * Calculate the bit offset in the hamming code buffer based on the bit's
  * offset in the data buffer.  Since the hamming code reserves all
  * power-of-two bits for parity, the data bit number and the code bit
@@ -81,10 +53,14 @@ static int find_highest_bit_set(unsigned int v)
  * so it's a parity bit.  2 is a power of two (2^1), so it's a parity bit.
  * 3 is not a power of two.  So bit 1 of the data buffer ends up as bit 3
  * in the code buffer.
+ *
+ * The caller can pass in *p if it wants to keep track of the most recent
+ * number of parity bits added.  This allows the function to start the
+ * calculation at the last place.
  */
-static unsigned int calc_code_bit(unsigned int i)
+static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
 {
-	unsigned int b, p;
+	unsigned int b, p = 0;
 
 	/*
 	 * Data bits are 0-based, but we're talking code bits, which
@@ -92,24 +68,25 @@ static unsigned int calc_code_bit(unsigned int i)
 	 */
 	b = i + 1;
 
-	/*
-	 * As a cheat, we know that all bits below b's highest bit must be
-	 * parity bits, so we can start there.
-	 */
-        p = find_highest_bit_set(b);
+	/* Use the cache if it is there */
+	if (p_cache)
+		p = *p_cache;
         b += p;
 
 	/*
 	 * For every power of two below our bit number, bump our bit.
 	 *
-	 * We compare with (b + 1) becuase we have to compare with what b
+	 * We compare with (b + 1) because we have to compare with what b
 	 * would be _if_ it were bumped up by the parity bit.  Capice?
 	 *
-	 * We start p at 2^p because of the cheat above.
+	 * p is set above.
 	 */
-	for (p = (1 << p); p < (b + 1); p <<= 1)
+	for (; (1 << p) < (b + 1); p++)
 		b++;
 
+	if (p_cache)
+		*p_cache = p;
+
 	return b;
 }
 
@@ -126,7 +103,7 @@ static unsigned int calc_code_bit(unsigned int i)
  */
 u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
 {
-	unsigned int i, b;
+	unsigned int i, b, p = 0;
 
 	BUG_ON(!d);
 
@@ -145,7 +122,7 @@ u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr
 		 * i is the offset in this hunk, nr + i is the total bit
 		 * offset.
 		 */
-		b = calc_code_bit(nr + i);
+		b = calc_code_bit(nr + i, &p);
 
 		/*
 		 * Data bits in the resultant code are checked by
@@ -201,7 +178,7 @@ void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
 	 * nr + d is the bit right past the data hunk we're looking at.
 	 * If fix after that, nothing to do
 	 */
-	if (fix >= calc_code_bit(nr + d))
+	if (fix >= calc_code_bit(nr + d, NULL))
 		return;
 
 	/*
@@ -209,7 +186,7 @@ void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
 	 * start b at the offset in the code buffer.  See hamming_encode()
 	 * for a more detailed description of 'b'.
 	 */
-	b = calc_code_bit(nr);
+	b = calc_code_bit(nr, NULL);
 	/* If the fix is before this hunk, nothing to do */
 	if (fix < b)
 		return;
-- 
cgit v0.10.2


From 2b83256407687613e906bee93d98a25339128a4d Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 16 Dec 2008 15:49:19 -0800
Subject: ocfs2/dlm: Fix a race between migrate request and exit domain

Patch address a racing migrate request message and an exit domain message.
Instead of blocking exit domains for the duration of the migrate, we ignore
failure to deliver that message. This is because an exiting domain should
not have any active locks and thus has no role to play in the migration.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 44f87ca..92fd1d7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2949,7 +2949,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
 				  struct dlm_node_iter *iter)
 {
 	struct dlm_migrate_request migrate;
-	int ret, status = 0;
+	int ret, skip, status = 0;
 	int nodenum;
 
 	memset(&migrate, 0, sizeof(migrate));
@@ -2966,12 +2966,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
 		    nodenum == new_master)
 			continue;
 
+		/* We could race exit domain. If exited, skip. */
+		spin_lock(&dlm->spinlock);
+		skip = (!test_bit(nodenum, dlm->domain_map));
+		spin_unlock(&dlm->spinlock);
+		if (skip) {
+			clear_bit(nodenum, iter->node_map);
+			continue;
+		}
+
 		ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
 					 &migrate, sizeof(migrate), nodenum,
 					 &status);
-		if (ret < 0)
-			mlog_errno(ret);
-		else if (status < 0) {
+		if (ret < 0) {
+			mlog(0, "migrate_request returned %d!\n", ret);
+			if (!dlm_is_host_down(ret)) {
+				mlog(ML_ERROR, "unhandled error=%d!\n", ret);
+				BUG();
+			}
+			clear_bit(nodenum, iter->node_map);
+			ret = 0;
+		} else if (status < 0) {
 			mlog(0, "migrate request (node %u) returned %d!\n",
 			     nodenum, status);
 			ret = status;
-- 
cgit v0.10.2


From 57dff2676eb68d805883a2204faaa5339ac44e03 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 16 Dec 2008 15:49:20 -0800
Subject: ocfs2/dlm: Clean up errors in dlm_proxy_ast_handler()

Patch cleans printed errors in dlm_proxy_ast_handler(). The errors now includes
the node number that sent the (b)ast. Also it reduces the number of endian swaps
of the cookie.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 644bee5..d07ddbe 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 	struct list_head *iter, *head=NULL;
 	u64 cookie;
 	u32 flags;
+	u8 node;
 
 	if (!dlm_grab(dlm)) {
 		dlm_error(DLM_REJECTED);
@@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 
 	name = past->name;
 	locklen = past->namelen;
-	cookie = be64_to_cpu(past->cookie);
+	cookie = past->cookie;
 	flags = be32_to_cpu(past->flags);
+	node = past->node_idx;
 
 	if (locklen > DLM_LOCKID_NAME_MAX) {
 		ret = DLM_IVBUFLEN;
-		mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n");
+		mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
+		     "handler!\n", locklen);
 		goto leave;
 	}
 
 	if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
 	     (LKM_PUT_LVB|LKM_GET_LVB)) {
-		mlog(ML_ERROR, "both PUT and GET lvb specified\n");
+		mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
+		     flags);
 		ret = DLM_BADARGS;
 		goto leave;
 	}
@@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 	if (past->type != DLM_AST &&
 	    past->type != DLM_BAST) {
 		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
-		     "name=%.*s\n", past->type, 
-		     dlm_get_lock_cookie_node(cookie),
-		     dlm_get_lock_cookie_seq(cookie),
-		     locklen, name);
+		     "name=%.*s, node=%u\n", past->type,
+		     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+		     locklen, name, node);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
 
 	res = dlm_lookup_lockres(dlm, name, locklen);
 	if (!res) {
-		mlog(0, "got %sast for unknown lockres! "
-		     "cookie=%u:%llu, name=%.*s, namelen=%u\n",
-		     past->type == DLM_AST ? "" : "b",
-		     dlm_get_lock_cookie_node(cookie),
-		     dlm_get_lock_cookie_seq(cookie),
-		     locklen, name, locklen);
+		mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
+		     "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
+		     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+		     locklen, name, node);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 
 	spin_lock(&res->spinlock);
 	if (res->state & DLM_LOCK_RES_RECOVERING) {
-		mlog(0, "responding with DLM_RECOVERING!\n");
+		mlog(0, "Responding with DLM_RECOVERING!\n");
 		ret = DLM_RECOVERING;
 		goto unlock_out;
 	}
 	if (res->state & DLM_LOCK_RES_MIGRATING) {
-		mlog(0, "responding with DLM_MIGRATING!\n");
+		mlog(0, "Responding with DLM_MIGRATING!\n");
 		ret = DLM_MIGRATING;
 		goto unlock_out;
 	}
@@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 	lock = NULL;
 	list_for_each(iter, head) {
 		lock = list_entry (iter, struct dlm_lock, list);
-		if (be64_to_cpu(lock->ml.cookie) == cookie)
+		if (lock->ml.cookie == cookie)
 			goto do_ast;
 	}
 
@@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 
 	list_for_each(iter, head) {
 		lock = list_entry (iter, struct dlm_lock, list);
-		if (be64_to_cpu(lock->ml.cookie) == cookie)
+		if (lock->ml.cookie == cookie)
 			goto do_ast;
 	}
 
-	mlog(0, "got %sast for unknown lock!  cookie=%u:%llu, "
-	     "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 
-	     dlm_get_lock_cookie_node(cookie),
-	     dlm_get_lock_cookie_seq(cookie),
-	     locklen, name, locklen);
+	mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
+	     "node=%u\n", past->type == DLM_AST ? "" : "b",
+	     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+	     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+	     locklen, name, node);
 
 	ret = DLM_NORMAL;
 unlock_out:
@@ -383,8 +386,8 @@ do_ast:
 	if (past->type == DLM_AST) {
 		/* do not alter lock refcount.  switching lists. */
 		list_move_tail(&lock->list, &res->granted);
-		mlog(0, "ast: adding to granted list... type=%d, "
-			  "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+		mlog(0, "ast: Adding to granted list... type=%d, "
+		     "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
 		if (lock->ml.convert_type != LKM_IVMODE) {
 			lock->ml.type = lock->ml.convert_type;
 			lock->ml.convert_type = LKM_IVMODE;
@@ -408,7 +411,6 @@ do_ast:
 		dlm_do_local_bast(dlm, res, lock, past->blocked_type);
 
 leave:
-
 	if (res)
 		dlm_lockres_put(res);
 
-- 
cgit v0.10.2


From d4f7e650e55af6b235871126f747da88600e8040 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 16 Dec 2008 15:49:21 -0800
Subject: ocfs2/dlm: Hold off sending lockres drop ref message while lockres is
 migrating

During lockres purge, o2dlm sends a drop reference message to the lockres
master. This patch delays the message if the lockres is being migrated.

Fixes oss bugzilla#1012
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1012

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 4060bb3..d129520 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 
 		spin_lock(&res->spinlock);
 		/* This ensures that clear refmap is sent after the set */
-		__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
+		__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
+						  DLM_LOCK_RES_MIGRATING));
 		spin_unlock(&res->spinlock);
 
 		/* clear our bit from the master's refmap, ignore errors */
-- 
cgit v0.10.2


From b0d4f817ba5de8adb875ace594554a96d7737710 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 16 Dec 2008 15:49:22 -0800
Subject: ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking
 list

This patch adds a new lock, dlm->tracking_lock, to protect adding/removing
lockres' to/from the dlm->tracking_list. We were previously using dlm->spinlock
for the same, but that proved inadequate as we could be freeing a lockres from
a context that did not hold that lock. As the new lock only protects this list,
we can explicitly take it when removing the lockres from the tracking list.

This bug was exposed when testing multiple processes concurrently flock() the
same file.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index d5a86fb..bb53714 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
 	unsigned int purge_count;
 	spinlock_t spinlock;
 	spinlock_t ast_lock;
+	spinlock_t track_lock;
 	char *name;
 	u8 node_num;
 	u32 key;
@@ -316,6 +317,8 @@ struct dlm_lock_resource
 	 * put on a list for the dlm thread to run. */
 	unsigned long    last_used;
 
+	struct dlm_ctxt *dlm;
+
 	unsigned migration_pending:1;
 	atomic_t asts_reserved;
 	spinlock_t spinlock;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 1b81dcb..b32f60a 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
 {
 	struct debug_lockres *dl = m->private;
 	struct dlm_ctxt *dlm = dl->dl_ctxt;
+	struct dlm_lock_resource *oldres = dl->dl_res;
 	struct dlm_lock_resource *res = NULL;
+	struct list_head *track_list;
 
-	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->track_lock);
+	if (oldres)
+		track_list = &oldres->tracking;
+	else
+		track_list = &dlm->tracking_list;
 
-	if (dl->dl_res) {
-		list_for_each_entry(res, &dl->dl_res->tracking, tracking) {
-			if (dl->dl_res) {
-				dlm_lockres_put(dl->dl_res);
-				dl->dl_res = NULL;
-			}
-			if (&res->tracking == &dlm->tracking_list) {
-				mlog(0, "End of list found, %p\n", res);
-				dl = NULL;
-				break;
-			}
+	list_for_each_entry(res, track_list, tracking) {
+		if (&res->tracking == &dlm->tracking_list)
+			res = NULL;
+		else
 			dlm_lockres_get(res);
-			dl->dl_res = res;
-			break;
-		}
-	} else {
-		if (!list_empty(&dlm->tracking_list)) {
-			list_for_each_entry(res, &dlm->tracking_list, tracking)
-				break;
-			dlm_lockres_get(res);
-			dl->dl_res = res;
-		} else
-			dl = NULL;
+		break;
 	}
+	spin_unlock(&dlm->track_lock);
 
-	if (dl) {
-		spin_lock(&dl->dl_res->spinlock);
-		dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
-		spin_unlock(&dl->dl_res->spinlock);
-	}
+	if (oldres)
+		dlm_lockres_put(oldres);
 
-	spin_unlock(&dlm->spinlock);
+	dl->dl_res = res;
+
+	if (res) {
+		spin_lock(&res->spinlock);
+		dump_lockres(res, dl->dl_buf, dl->dl_len - 1);
+		spin_unlock(&res->spinlock);
+	} else
+		dl = NULL;
 
+	/* passed to seq_show */
 	return dl;
 }
 
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 63f8125..d8d578f 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 	spin_lock_init(&dlm->spinlock);
 	spin_lock_init(&dlm->master_lock);
 	spin_lock_init(&dlm->ast_lock);
+	spin_lock_init(&dlm->track_lock);
 	INIT_LIST_HEAD(&dlm->list);
 	INIT_LIST_HEAD(&dlm->dirty_list);
 	INIT_LIST_HEAD(&dlm->reco.resources);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 92fd1d7..cbf3abe 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
 static void dlm_lockres_release(struct kref *kref)
 {
 	struct dlm_lock_resource *res;
+	struct dlm_ctxt *dlm;
 
 	res = container_of(kref, struct dlm_lock_resource, refs);
+	dlm = res->dlm;
 
 	/* This should not happen -- all lockres' have a name
 	 * associated with them at init time. */
@@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref)
 	mlog(0, "destroying lockres %.*s\n", res->lockname.len,
 	     res->lockname.name);
 
+	spin_lock(&dlm->track_lock);
 	if (!list_empty(&res->tracking))
 		list_del_init(&res->tracking);
 	else {
@@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref)
 		     res->lockname.len, res->lockname.name);
 		dlm_print_one_lock_resource(res);
 	}
+	spin_unlock(&dlm->track_lock);
+
+	dlm_put(dlm);
 
 	if (!hlist_unhashed(&res->hash_node) ||
 	    !list_empty(&res->granted) ||
@@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 	res->migration_pending = 0;
 	res->inflight_locks = 0;
 
+	/* put in dlm_lockres_release */
+	dlm_grab(dlm);
+	res->dlm = dlm;
+
 	kref_init(&res->refs);
 
 	/* just for consistency */
-- 
cgit v0.10.2


From 7b791d68562e4ce5ab57cbacb10a1ad4ee33956e Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 16 Dec 2008 15:49:23 -0800
Subject: ocfs2/dlm: Fix race during lockres mastery

dlm_get_lock_resource() is supposed to return a lock resource with a proper
master. If multiple concurrent threads attempt to lookup the lockres for the
same lockid while the lock mastery in underway, one or more threads are likely
to return a lockres without a proper master.

This patch makes the threads wait in dlm_get_lock_resource() while the mastery
is underway, ensuring all threads return the lockres with a proper master.

This issue is known to be limited to users using the flock() syscall. For all
other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each
lockid.

Users encountering this bug will see flock() return EINVAL and dmesg have the
following error:
ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource <LOCKID>: bad api args

Reported-by: Coly Li <coyli@suse.de>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index cbf3abe..54e182a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -732,14 +732,21 @@ lookup:
 	if (tmpres) {
 		int dropping_ref = 0;
 
+		spin_unlock(&dlm->spinlock);
+
 		spin_lock(&tmpres->spinlock);
+		/* We wait for the other thread that is mastering the resource */
+		if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
+			__dlm_wait_on_lockres(tmpres);
+			BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
+		}
+
 		if (tmpres->owner == dlm->node_num) {
 			BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
 			dlm_lockres_grab_inflight_ref(dlm, tmpres);
 		} else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
 			dropping_ref = 1;
 		spin_unlock(&tmpres->spinlock);
-		spin_unlock(&dlm->spinlock);
 
 		/* wait until done messaging the master, drop our ref to allow
 		 * the lockres to be purged, start over. */
-- 
cgit v0.10.2


From 71d548a6af36fe98c95fbd0522147f842bd5f054 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 5 Dec 2008 06:20:54 +0800
Subject: ocfs2/xattr: Remove extend_trans call and add its credits from the
 beginning

Actually, when setting a new xattr value, we know it from the very
beginning, and it isn't like the extension of bucket in which case
we can't figure it out. So remove ocfs2_extend_trans in that function
and calculate it before the transaction. It also relieve acl operation
from the worry about the side effect of ocfs2_extend_trans.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 17028aa..93a1ab4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1169,7 +1169,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 					   const void *value,
 					   int value_len)
 {
-	int ret = 0, i, cp_len, credits;
+	int ret = 0, i, cp_len;
 	u16 blocksize = inode->i_sb->s_blocksize;
 	u32 p_cluster, num_clusters;
 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
@@ -1179,18 +1179,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 
 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
 
-	/*
-	 * In __ocfs2_xattr_set_value_outside has already been dirtied,
-	 * so we don't need to worry about whether ocfs2_extend_trans
-	 * will create a new transactio for us or not.
-	 */
-	credits = clusters * bpc;
-	ret = ocfs2_extend_trans(handle, credits);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
 					       &num_clusters, &xv->xr_list);
@@ -2233,6 +2221,15 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 						    xi->value_len);
 	u64 value_size;
 
+	/*
+	 * Calculate the clusters we need to write.
+	 * No matter whether we replace an old one or add a new one,
+	 * we need this for writing.
+	 */
+	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+		credits += new_clusters *
+			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
+
 	if (xis->not_found && xbs->not_found) {
 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-- 
cgit v0.10.2


From 4b3f6209bf9eec46fe5ebb168718fef5c443c157 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 5 Dec 2008 06:20:55 +0800
Subject: ocfs2/xattr: Always updating ctime during xattr set.

In xattr set, we should always update ctime if the operation goes
sucessfully. The old one mistakenly put it in ocfs2_xattr_set_entry
which is only called when we set xattr in inode or xattr block. The
side benefit is that it resolve the bug 1052 since in that scenario,
ocfs2_calc_xattr_set_need only calc out the xattr set credits while
ocfs2_xattr_set_entry update the inode also which isn't concerned with
the process of xattr set.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 93a1ab4..3e2e92d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1651,10 +1651,6 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 	oi->ip_dyn_features |= flag;
 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
 	spin_unlock(&oi->ip_lock);
-	/* Update inode ctime */
-	inode->i_ctime = CURRENT_TIME;
-	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 
 	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
 	if (ret < 0)
@@ -2574,6 +2570,20 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 		}
 	}
 
+	if (!ret) {
+		/* Update inode ctime. */
+		ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		inode->i_ctime = CURRENT_TIME;
+		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
+	}
 out:
 	return ret;
 }
@@ -2750,6 +2760,8 @@ int ocfs2_xattr_set(struct inode *inode,
 		goto cleanup;
 	}
 
+	/* we need to update inode's ctime field, so add credit for it. */
+	credits += OCFS2_INODE_UPDATE_CREDITS;
 	ctxt.handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
-- 
cgit v0.10.2


From 90cb546cada68bb8c2278afdb4b65c2ac11f2877 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 5 Dec 2008 06:20:56 +0800
Subject: ocfs2/xattr: fix credits calculation during index create

When creating a xattr index block, the old calculation forget
to add credits for the meta change of the alloc file. So add
more credits and more comments to explain it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3e2e92d..73fb9f7 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2359,13 +2359,21 @@ meta_guess:
 		} else
 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
 
+		/*
+		 * If there is already an xattr tree, good, we can calculate
+		 * like other b-trees. Otherwise we may have the chance of
+		 * create a tree, the credit calculation is borrowed from
+		 * ocfs2_calc_extend_credits with root_el = NULL. And the
+		 * new tree will be cluster based, so no meta is needed.
+		 */
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
 			struct ocfs2_extent_list *el =
 				 &xb->xb_attrs.xb_root.xt_list;
 			meta_add += ocfs2_extend_meta_needed(el);
 			credits += ocfs2_calc_extend_credits(inode->i_sb,
 							     el, 1);
-		}
+		} else
+			credits += OCFS2_SUBALLOC_ALLOC + 1;
 
 		/*
 		 * This cluster will be used either for new bucket or for
-- 
cgit v0.10.2


From 0e445b6fe93c723fe8093fd04ddfeb11ae2de082 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Tue, 9 Dec 2008 16:42:51 +0800
Subject: ocfs2: calculate and reserve credits for xattr value in mknod

We extend the credits for xattr's large value in set_value_outside
before, this can give rise to a credits issue when we set one security
entry and two acl entries duing mknod. As we remove extend_trans form
set_value_outside, we must calculate and reserve the credits for
xattr's large value in mknod.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 73fb9f7..e5be470 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -490,9 +490,14 @@ int ocfs2_calc_security_init(struct inode *dir,
 	}
 
 	/* reserve clusters for xattr value which will be set in B tree*/
-	if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
-		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
-							   si->value_len);
+	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
+		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
+							    si->value_len);
+
+		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
+							   new_clusters);
+		*want_clusters += new_clusters;
+	}
 	return ret;
 }
 
@@ -506,9 +511,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 {
 	int ret = 0;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
-	int s_size = 0;
-	int a_size = 0;
-	int acl_len = 0;
+	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
 
 	if (si->enable)
 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
@@ -556,16 +559,25 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
 	}
 
-	/* reserve clusters for xattr value which will be set in B tree*/
-	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
-		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
-							   si->value_len);
+	/*
+	 * reserve credits and clusters for xattrs which has large value
+	 * and have to be set outside
+	 */
+	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
+		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
+							si->value_len);
+		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
+							   new_clusters);
+		*want_clusters += new_clusters;
+	}
 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
-		*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
-		if (S_ISDIR(mode))
-			*want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
-								   acl_len);
+		/* for directory, it has DEFAULT and ACCESS two types of acls */
+		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
+				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
+		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
+							   new_clusters);
+		*want_clusters += new_clusters;
 	}
 
 	return ret;
-- 
cgit v0.10.2


From 008aafaf0b4aa0476da483e3c6e3edbe951811ff Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Tue, 9 Dec 2008 16:43:08 +0800
Subject: ocfs2: alloc xattr bucket in ocfs2_xattr_set_handle

In extreme situation, may need xattr bucket for setting
security entry and acl entries during mknod. This only
happens when block size is too small.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e5be470..095b0bb 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2611,9 +2611,7 @@ out:
 /*
  * This function only called duing creating inode
  * for init security/acl xattrs of the new inode.
- * The xattrs could be put into ibody or extent block,
- * xattr bucket would not be use in this case.
- * transanction credits also be reserved in here.
+ * All transanction credits have been reserved in mknod.
  */
 int ocfs2_xattr_set_handle(handle_t *handle,
 			   struct inode *inode,
@@ -2653,6 +2651,19 @@ int ocfs2_xattr_set_handle(handle_t *handle,
 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
 		return -EOPNOTSUPP;
 
+	/*
+	 * In extreme situation, may need xattr bucket when
+	 * block size is too small. And we have already reserved
+	 * the credits for bucket in mknod.
+	 */
+	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
+		xbs.bucket = ocfs2_xattr_bucket_new(inode);
+		if (!xbs.bucket) {
+			mlog_errno(-ENOMEM);
+			return -ENOMEM;
+		}
+	}
+
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
 
@@ -2672,6 +2683,7 @@ int ocfs2_xattr_set_handle(handle_t *handle,
 cleanup:
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
 	brelse(xbs.xattr_bh);
+	ocfs2_xattr_bucket_free(xbs.bucket);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 38d59ef61c11cafc50a66787bdbbe80d58bbd9c0 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Wed, 17 Dec 2008 10:22:56 +0800
Subject: ocfs2: Add xattr support checking in init_security

We must check whether ocfs2 volume support xattr in init_security,
if not support xattr and security is enable, would cause failure of mknod.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 095b0bb..e1d638a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5324,6 +5324,9 @@ int ocfs2_init_security_get(struct inode *inode,
 			    struct inode *dir,
 			    struct ocfs2_security_xattr_info *si)
 {
+	/* check whether ocfs2 support feature xattr */
+	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
+		return -EOPNOTSUPP;
 	return security_inode_init_security(inode, dir, &si->name, &si->value,
 					    &si->value_len);
 }
-- 
cgit v0.10.2


From a641dc2a5a1445eb4cb491080dfc41c42a9eb37d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Wed, 24 Dec 2008 16:03:48 -0800
Subject: ocfs2: remove unneeded lvb casts

dlmglue.c has lots of code which casts the return value of ocfs2_dlm_lvb().
This is pointless however, as ocfs2_dlm_lvb() returns void *.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b1c7591..f731ab4 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -115,8 +115,7 @@ static void ocfs2_dump_meta_lvb_info(u64 level,
 				     unsigned int line,
 				     struct ocfs2_lock_res *lockres)
 {
-	struct ocfs2_meta_lvb *lvb =
-		(struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
 	mlog(level, "LVB information for %s (called from %s:%u):\n",
 	     lockres->l_name, function, line);
@@ -1864,7 +1863,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 
 	mlog_entry_void();
 
-	lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
 	/*
 	 * Invalidate the LVB of a deleted inode - this way other
@@ -1916,7 +1915,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 
 	mlog_meta_lvb(0, lockres);
 
-	lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
 	/* We're safe here without the lockres lock... */
 	spin_lock(&oi->ip_lock);
@@ -1951,8 +1950,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 					      struct ocfs2_lock_res *lockres)
 {
-	struct ocfs2_meta_lvb *lvb =
-		(struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
 	if (lvb->lvb_version == OCFS2_LVB_VERSION
 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
@@ -3489,7 +3487,7 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
 
 	mlog_entry_void();
 
-	lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
-- 
cgit v0.10.2


From dad7d975e4bd893c79fd122105b37b9a1776816a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Wed, 24 Dec 2008 16:33:08 -0800
Subject: ocfs2: use min_t in ocfs2_quota_read()

This is preferred to min().

Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 444aa5a..6aff8f2 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -167,7 +167,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
 		len = i_size - off;
 	toread = len;
 	while (toread > 0) {
-		tocopy = min((size_t)(sb->s_blocksize - offset), toread);
+		tocopy = min_t(size_t, (sb->s_blocksize - offset), toread);
 		bh = NULL;
 		err = ocfs2_read_quota_block(gqinode, blk, &bh);
 		if (err) {
-- 
cgit v0.10.2


From 9047beabb8a396f0b18de1e4a9ab920cf92054af Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 5 Jan 2009 14:45:24 +0800
Subject: ocfs2: Access the right buffer_head in ocfs2_merge_rec_left.

In commit "ocfs2: Use metadata-specific ocfs2_journal_access_*()
functions", the wrong buffer_head is accessed. So change it
to the right buffer_head.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 874c0bd..54ff4c77 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3402,8 +3402,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
-					   path_num_items(left_path) - 1);
+	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+					   path_num_items(right_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
-- 
cgit v0.10.2


From 5b6f1eb97d462a45be3b30759758b5fdbb562c8c Mon Sep 17 00:00:00 2001
From: Alain Knaff <alain@knaff.lu>
Date: Mon, 10 Nov 2008 17:08:08 -0800
Subject: vfs: lseek(fd, 0, SEEK_CUR) race condition

This patch fixes a race condition in lseek. While it is expected that
unpredictable behaviour may result while repositioning the offset of a
file descriptor concurrently with reading/writing to the same file
descriptor, this should not happen when merely *reading* the file
descriptor's offset.

Unfortunately, the only portable way in Unix to read a file
descriptor's offset is lseek(fd, 0, SEEK_CUR); however executing this
concurrently with read/write may mess up the position.

[with fixes from akpm]

Signed-off-by: Alain Knaff <alain@knaff.lu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/read_write.c b/fs/read_write.c
index 969a6d9..5cc6924 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -50,6 +50,14 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
 		offset += inode->i_size;
 		break;
 	case SEEK_CUR:
+		/*
+		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
+		 * position-querying operation.  Avoid rewriting the "same"
+		 * f_pos value back to the file because a concurrent read(),
+		 * write() or lseek() might have altered it
+		 */
+		if (offset == 0)
+			return file->f_pos;
 		offset += file->f_pos;
 		break;
 	}
@@ -105,6 +113,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
 			offset += i_size_read(file->f_path.dentry->d_inode);
 			break;
 		case SEEK_CUR:
+			if (offset == 0) {
+				retval = file->f_pos;
+				goto out;
+			}
 			offset += file->f_pos;
 	}
 	retval = -EINVAL;
@@ -115,6 +127,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
 		}
 		retval = offset;
 	}
+out:
 	unlock_kernel();
 	return retval;
 }
-- 
cgit v0.10.2


From 7f5ff766a7babd72fc192125e12ef5570effff4c Mon Sep 17 00:00:00 2001
From: Dmitri Monakhov <dmonakhov@openvz.org>
Date: Mon, 1 Dec 2008 14:34:56 -0800
Subject: kill suid bit only for regular files

We don't have to do it because it is useless for non regular files.
In fact block device may trigger this path without dentry->d_inode->i_mutex.

(akpm: concerns were expressed (by me) about S_ISDIR inodes)

Signed-off-by: Dmitri Monakhov <dmonakhov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/mm/filemap.c b/mm/filemap.c
index f3e5f89..ed53ce8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1766,7 +1766,7 @@ int should_remove_suid(struct dentry *dentry)
 	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
 		kill |= ATTR_KILL_SGID;
 
-	if (unlikely(kill && !capable(CAP_FSETID)))
+	if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
 		return kill;
 
 	return 0;
-- 
cgit v0.10.2


From c765d479037808532310212e9b3fa95760e975f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Dec 2008 09:50:55 -0500
Subject: affs: do not zero ->i_op

it is already set to empty table and should never be NULL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 415d9c6..3c4ec7d 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -119,8 +119,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 		goto bad_inode;
 #else
 		inode->i_mode |= S_IFDIR;
-		inode->i_op = NULL;
-		inode->i_fop = NULL;
+		/* ... and leave ->i_op and ->i_fop pointing to empty */
 		break;
 #endif
 	case ST_LINKFILE:
-- 
cgit v0.10.2


From 261964c60ff6524076d439da9386d4782729c4d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Dec 2008 09:57:25 -0500
Subject: isofs check for NULL ->i_op in root directory is dead code

for one thing it never happens, for another we check that inode
is a directory right after that place anyway (and we'd already
checked that reading it from disk has not failed).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3f8af0f..6147ec3 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -855,10 +855,6 @@ root_found:
 	}
 	sbi->s_joliet_level = joliet_level;
 
-	/* check the root inode */
-	if (!inode->i_op)
-		goto out_bad_root;
-
 	/* Make sure the root inode is a directory */
 	if (!S_ISDIR(inode->i_mode)) {
 		printk(KERN_WARNING
@@ -886,8 +882,6 @@ root_found:
 	/*
 	 * Display error messages and free resources.
 	 */
-out_bad_root:
-	printk(KERN_WARNING "%s: root inode not initialized\n", __func__);
 out_iput:
 	iput(inode);
 	goto out_no_inode;
-- 
cgit v0.10.2


From 9742df331deb3fce95b321f38d4ea0c4e75edb63 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Dec 2008 09:59:23 -0500
Subject: ntfs: don't NULL i_op

it's already set to empty table (and no, ntfs doesn't have any explicit
checks for NULL ->i_op or NULL ->i_fop)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index e9da092..86bef15 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1406,9 +1406,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 		ni->allocated_size = sle64_to_cpu(
 				a->data.non_resident.allocated_size);
 	}
-	/* Setup the operations for this attribute inode. */
-	vi->i_op = NULL;
-	vi->i_fop = NULL;
 	if (NInoMstProtected(ni))
 		vi->i_mapping->a_ops = &ntfs_mst_aops;
 	else
-- 
cgit v0.10.2


From acfa4380efe77e290d3a96b11cd4c9f24f4fbb18 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Dec 2008 10:06:33 -0500
Subject: inode->i_op is never NULL

We used to have rather schizophrenic set of checks for NULL ->i_op even
though it had been eliminated years ago.  You'd need to go out of your
way to set it to NULL explicitly _and_ a bunch of code would die on
such inodes anyway.  After killing two remaining places that still
did that bogosity, all that crap can go away.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f247da9..5ab9896 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1641,7 +1641,7 @@ do_expand:
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
 out_truncate:
-	if (inode->i_op && inode->i_op->truncate)
+	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
 out_sig:
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5e78fc1..0111906 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -612,8 +612,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
 	struct ecryptfs_crypt_stat *crypt_stat;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!lower_dentry->d_inode->i_op ||
-	    !lower_dentry->d_inode->i_op->readlink) {
+	if (!lower_dentry->d_inode->i_op->readlink) {
 		rc = -EINVAL;
 		goto out;
 	}
diff --git a/fs/namei.c b/fs/namei.c
index dd5c9f0..1f6656c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -257,7 +257,7 @@ int inode_permission(struct inode *inode, int mask)
 			return -EACCES;
 	}
 
-	if (inode->i_op && inode->i_op->permission)
+	if (inode->i_op->permission)
 		retval = inode->i_op->permission(inode, mask);
 	else
 		retval = generic_permission(inode, mask, NULL);
@@ -432,7 +432,7 @@ static int exec_permission_lite(struct inode *inode)
 {
 	umode_t	mode = inode->i_mode;
 
-	if (inode->i_op && inode->i_op->permission)
+	if (inode->i_op->permission)
 		return -EAGAIN;
 
 	if (current_fsuid() == inode->i_uid)
@@ -908,9 +908,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		inode = next.dentry->d_inode;
 		if (!inode)
 			goto out_dput;
-		err = -ENOTDIR; 
-		if (!inode->i_op)
-			goto out_dput;
 
 		if (inode->i_op->follow_link) {
 			err = do_follow_link(&next, nd);
@@ -920,9 +917,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 			inode = nd->path.dentry->d_inode;
 			if (!inode)
 				break;
-			err = -ENOTDIR; 
-			if (!inode->i_op)
-				break;
 		} else
 			path_to_nameidata(&next, nd);
 		err = -ENOTDIR; 
@@ -961,7 +955,7 @@ last_component:
 			break;
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
-		    && inode && inode->i_op && inode->i_op->follow_link) {
+		    && inode && inode->i_op->follow_link) {
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
@@ -973,7 +967,7 @@ last_component:
 			break;
 		if (lookup_flags & LOOKUP_DIRECTORY) {
 			err = -ENOTDIR; 
-			if (!inode->i_op || !inode->i_op->lookup)
+			if (!inode->i_op->lookup)
 				break;
 		}
 		goto return_base;
@@ -1469,7 +1463,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	if (error)
 		return error;
 
-	if (!dir->i_op || !dir->i_op->create)
+	if (!dir->i_op->create)
 		return -EACCES;	/* shouldn't it be ENOSYS? */
 	mode &= S_IALLUGO;
 	mode |= S_IFREG;
@@ -1752,7 +1746,7 @@ do_last:
 	error = -ENOENT;
 	if (!path.dentry->d_inode)
 		goto exit_dput;
-	if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
+	if (path.dentry->d_inode->i_op->follow_link)
 		goto do_link;
 
 	path_to_nameidata(&path, &nd);
@@ -1933,7 +1927,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
 		return -EPERM;
 
-	if (!dir->i_op || !dir->i_op->mknod)
+	if (!dir->i_op->mknod)
 		return -EPERM;
 
 	error = devcgroup_inode_mknod(mode, dev);
@@ -2035,7 +2029,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (error)
 		return error;
 
-	if (!dir->i_op || !dir->i_op->mkdir)
+	if (!dir->i_op->mkdir)
 		return -EPERM;
 
 	mode &= (S_IRWXUGO|S_ISVTX);
@@ -2126,7 +2120,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (error)
 		return error;
 
-	if (!dir->i_op || !dir->i_op->rmdir)
+	if (!dir->i_op->rmdir)
 		return -EPERM;
 
 	DQUOT_INIT(dir);
@@ -2213,7 +2207,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	if (error)
 		return error;
 
-	if (!dir->i_op || !dir->i_op->unlink)
+	if (!dir->i_op->unlink)
 		return -EPERM;
 
 	DQUOT_INIT(dir);
@@ -2320,7 +2314,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 	if (error)
 		return error;
 
-	if (!dir->i_op || !dir->i_op->symlink)
+	if (!dir->i_op->symlink)
 		return -EPERM;
 
 	error = security_inode_symlink(dir, dentry, oldname);
@@ -2401,7 +2395,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	 */
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;
-	if (!dir->i_op || !dir->i_op->link)
+	if (!dir->i_op->link)
 		return -EPERM;
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
@@ -2608,7 +2602,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		return error;
 
-	if (!old_dir->i_op || !old_dir->i_op->rename)
+	if (!old_dir->i_op->rename)
 		return -EPERM;
 
 	DQUOT_INIT(old_dir);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d1c5f78..5245a39 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1211,7 +1211,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dirp = dentry->d_inode;
 
 	err = nfserr_notdir;
-	if(!dirp->i_op || !dirp->i_op->lookup)
+	if (!dirp->i_op->lookup)
 		goto out;
 	/*
 	 * Check whether the response file handle has been verified yet.
@@ -1347,7 +1347,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	/* Get all the sanity checks out of the way before
 	 * we lock the parent. */
 	err = nfserr_notdir;
-	if(!dirp->i_op || !dirp->i_op->lookup)
+	if (!dirp->i_op->lookup)
 		goto out;
 	fh_lock_nested(fhp, I_MUTEX_PARENT);
 
@@ -1482,7 +1482,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	inode = dentry->d_inode;
 
 	err = nfserr_inval;
-	if (!inode->i_op || !inode->i_op->readlink)
+	if (!inode->i_op->readlink)
 		goto out;
 
 	touch_atime(fhp->fh_export->ex_path.mnt, dentry);
@@ -2162,7 +2162,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 	size_t size;
 	int error;
 
-	if (!IS_POSIXACL(inode) || !inode->i_op ||
+	if (!IS_POSIXACL(inode) ||
 	    !inode->i_op->setxattr || !inode->i_op->removexattr)
 		return -EOPNOTSUPP;
 	switch(type) {
diff --git a/fs/open.c b/fs/open.c
index 1cd7d40..d882fd2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -412,7 +412,7 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
 		goto out_fput;
 
-	if (inode->i_op && inode->i_op->fallocate)
+	if (inode->i_op->fallocate)
 		ret = inode->i_op->fallocate(inode, mode, offset, len);
 	else
 		ret = -EOPNOTSUPP;
diff --git a/fs/stat.c b/fs/stat.c
index 7c46fbe..7e12a6f8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -305,7 +305,7 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
 		struct inode *inode = path.dentry->d_inode;
 
 		error = -EINVAL;
-		if (inode->i_op && inode->i_op->readlink) {
+		if (inode->i_op->readlink) {
 			error = security_inode_readlink(path.dentry);
 			if (!error) {
 				touch_atime(path.mnt, path.dentry);
diff --git a/fs/xattr.c b/fs/xattr.c
index 468377e..237804c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -175,7 +175,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
 	if (error)
 		return error;
 	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
+	if (d->d_inode->i_op->listxattr) {
 		error = d->d_inode->i_op->listxattr(d, list, size);
 	} else {
 		error = security_inode_listsecurity(d->d_inode, list, size);
diff --git a/mm/memory.c b/mm/memory.c
index 0a2010a..7b9db65 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2266,7 +2266,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
 		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	}
 
-	if (inode->i_op && inode->i_op->truncate)
+	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
 
@@ -2286,7 +2286,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 	 * a way to truncate a range of blocks (punch a hole) -
 	 * we should return failure right now.
 	 */
-	if (!inode->i_op || !inode->i_op->truncate_range)
+	if (!inode->i_op->truncate_range)
 		return -ENOSYS;
 
 	mutex_lock(&inode->i_mutex);
diff --git a/mm/nommu.c b/mm/nommu.c
index 7695dc8..1c28ea3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -86,7 +86,7 @@ do_expand:
 	i_size_write(inode, offset);
 
 out_truncate:
-	if (inode->i_op && inode->i_op->truncate)
+	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
 out_sig:
diff --git a/security/commoncap.c b/security/commoncap.c
index 7971354..69fc995 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -238,7 +238,7 @@ int cap_inode_need_killpriv(struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	int error;
 
-	if (!inode->i_op || !inode->i_op->getxattr)
+	if (!inode->i_op->getxattr)
 	       return 0;
 
 	error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
@@ -259,7 +259,7 @@ int cap_inode_killpriv(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 
-	if (!inode->i_op || !inode->i_op->removexattr)
+	if (!inode->i_op->removexattr)
 	       return 0;
 
 	return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
@@ -317,7 +317,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 
 	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
 
-	if (!inode || !inode->i_op || !inode->i_op->getxattr)
+	if (!inode || !inode->i_op->getxattr)
 		return -ENODATA;
 
 	size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps,
-- 
cgit v0.10.2


From 56ff5efad96182f4d3cb3dc6b07396762c658f16 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 9 Dec 2008 09:34:39 -0500
Subject: zero i_uid/i_gid on inode allocation

... and don't bother in callers.  Don't bother with zeroing i_blocks,
while we are at it - it's already been zeroed.

i_mode is not worth the effort; it has no common default value.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 6296bfd..e309ef7 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode)
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 out:
 	return inode;
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 9d4f8e6..5a805df 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -106,7 +106,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
 		ret->i_mode = mode;
 		ret->i_uid = hypfs_info->uid;
 		ret->i_gid = hypfs_info->gid;
-		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 		if (mode & S_IFDIR)
 			ret->i_nlink = 2;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 53912c3..8dc2bb7 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -57,9 +57,6 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
 	}
 
 	inode->i_mode = mode;
-	inode->i_uid = 0;
-	inode->i_gid = 0;
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_private = data;
 	if ((mode & S_IFMT) == S_IFDIR) {
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index 0aa66ec..b129409 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -111,8 +111,6 @@ capifs_fill_super(struct super_block *s, void *data, int silent)
 		goto fail;
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_blocks = 0;
-	inode->i_uid = inode->i_gid = 0;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 22a7e8b..de966a6 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -146,8 +146,6 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
 
 	if (ret) {
 		ret->i_mode = mode;
-		ret->i_uid = ret->i_gid = 0;
-		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
 	return ret;
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index ddc4c59..b7e4cee 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -29,9 +29,6 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
 
 	if (inode) {
 		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	}
 	return inode;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 185be76..2a129cb 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -279,7 +279,6 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index eeb26c0..317b48f 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -2001,7 +2001,6 @@ gadgetfs_make_inode (struct super_block *sb,
 		inode->i_mode = mode;
 		inode->i_uid = default_uid;
 		inode->i_gid = default_gid;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime
 				= CURRENT_TIME;
 		inode->i_private = data;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index c773680..e1734f2 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -251,13 +251,11 @@ struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
 	inode->i_nlink = 2;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_blocks = 0;
 
 	if (ino == AUTOFS_ROOT_INO) {
 		inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 		inode->i_op = &autofs_root_inode_operations;
 		inode->i_fop = &autofs_root_operations;
-		inode->i_uid = inode->i_gid = 0; /* Changed in read_super */
 		goto done;
 	} 
 	
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 7b19802..cfc23e5 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -455,11 +455,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
 	if (sb->s_root) {
 		inode->i_uid = sb->s_root->d_inode->i_uid;
 		inode->i_gid = sb->s_root->d_inode->i_gid;
-	} else {
-		inode->i_uid = 0;
-		inode->i_gid = 0;
 	}
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
 	if (S_ISDIR(inf->mode)) {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index f2744ab..e1158cb 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -496,9 +496,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 
 	if (inode) {
 		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime =
 			current_fs_time(inode->i_sb);
 	}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4803ccc..5d349d3 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -117,8 +117,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
 static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
 {
 	inode->i_mode = mode;
-	inode->i_uid = 0;
-	inode->i_gid = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 }
 
@@ -136,7 +134,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
-		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &configfs_aops;
 		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
 		inode->i_op = &configfs_inode_operations;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index f40423e..a07338d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -83,8 +83,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
 			inode->i_op = &page_symlink_inode_operations;
 			inode->i_data.a_ops = &cramfs_aops;
 		} else {
-			inode->i_size = 0;
-			inode->i_blocks = 0;
 			init_special_inode(inode, inode->i_mode,
 				old_decode_dev(cramfs_inode->size));
 		}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3dbe216..81ae9ea 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -37,9 +37,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 
 	if (inode) {
 		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index fff96e1..5f3231b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -189,8 +189,6 @@ static int mknod_ptmx(struct super_block *sb)
 	}
 
 	inode->i_ino = 2;
-	inode->i_uid = inode->i_gid = 0;
-	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 
 	mode = S_IFCHR|opts->ptmxmode;
@@ -300,8 +298,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 		goto free_fsi;
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_blocks = 0;
-	inode->i_uid = inode->i_gid = 0;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7d479ce..0ab0c6f5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -506,7 +506,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_mode = mode;
 		inode->i_uid = uid;
 		inode->i_gid = gid;
-		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/inode.c b/fs/inode.c
index 7de1cda..bd48e5e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -131,6 +131,8 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_op = &empty_iops;
 	inode->i_fop = &empty_fops;
 	inode->i_nlink = 1;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
 	atomic_set(&inode->i_writecount, 0);
 	inode->i_size = 0;
 	inode->i_blocks = 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index e960a83..7de05f7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -231,7 +231,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	 */
 	root->i_ino = 1;
 	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
-	root->i_uid = root->i_gid = 0;
 	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 	dentry = d_alloc(NULL, &d_name);
 	if (!dentry) {
@@ -436,8 +435,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
 	 */
 	inode->i_ino = 1;
 	inode->i_mode = S_IFDIR | 0755;
-	inode->i_uid = inode->i_gid = 0;
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
@@ -464,8 +461,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
 		if (!inode)
 			goto out;
 		inode->i_mode = S_IFREG | files->mode;
-		inode->i_uid = inode->i_gid = 0;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_fop = files->ops;
 		inode->i_ino = i;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 6f7a77d..1c9efb4 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -341,7 +341,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
-		inode->i_blocks = 0;
 		inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inc_nlink(inode);
@@ -367,7 +366,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_blocks = 0;
 	inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 6afe57c..633e9dc 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -39,7 +39,6 @@ struct inode *omfs_new_inode(struct inode *dir, int mode)
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_blocks = 0;
 	inode->i_mapping->a_ops = &omfs_aops;
 
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d41bdc7..ffcd04f 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -256,9 +256,6 @@ found:
 		break;
 	}
 
-	inode->i_gid = 0;
-	inode->i_uid = 0;
-
 	d_add(dentry, inode);
 	return NULL;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index cad92c1..10fd522 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1426,8 +1426,6 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 	if (!ei->pid)
 		goto out_unlock;
 
-	inode->i_uid = 0;
-	inode->i_gid = 0;
 	if (task_dumpable(task)) {
 		rcu_read_lock();
 		cred = __task_cred(task);
@@ -2349,8 +2347,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 	if (!ei->pid)
 		goto out_iput;
 
-	inode->i_uid = 0;
-	inode->i_gid = 0;
 	inode->i_mode = p->mode;
 	if (S_ISDIR(inode->i_mode))
 		inode->i_nlink = 2;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 06ed10b..94fcfff 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -31,7 +31,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
 	inode->i_mode = table->mode;
-	inode->i_uid = inode->i_gid = 0;
 	if (!table->child) {
 		inode->i_mode |= S_IFREG;
 		inode->i_op = &proc_sys_inode_operations;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a83a351..b7e6ac7 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -57,7 +57,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
-		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
 		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 60d2f82..c97d4c9 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -524,7 +524,6 @@ romfs_iget(struct super_block *sb, unsigned long ino)
 	i->i_size = be32_to_cpu(ri.size);
 	i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
 	i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
-	i->i_uid = i->i_gid = 0;
 
         /* Precalculate the data offset */
         ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index eb53c63..dfa3d94 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -107,8 +107,6 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
 {
 	inode->i_mode = mode;
-	inode->i_uid = 0;
-	inode->i_gid = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 }
 
@@ -149,7 +147,6 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
 	struct bin_attribute *bin_attr;
 
-	inode->i_blocks = 0;
 	inode->i_mapping->a_ops = &sysfs_aops;
 	inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
 	inode->i_op = &sysfs_inode_operations;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d9393f8..41b72f0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -120,7 +120,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
-		inode->i_blocks = 0;
 		inode->i_mtime = inode->i_ctime = inode->i_atime =
 				CURRENT_TIME;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 48348dd..f7c5099 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -573,7 +573,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
 	}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 1924532..577385a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -522,8 +522,6 @@ rpc_get_inode(struct super_block *sb, int mode)
 	if (!inode)
 		return NULL;
 	inode->i_mode = mode;
-	inode->i_uid = inode->i_gid = 0;
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	switch(mode & S_IFMT) {
 		case S_IFDIR:
diff --git a/security/inode.c b/security/inode.c
index efea5a6..007ef25 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -61,9 +61,6 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
 
 	if (inode) {
 		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index e552099..8f612c8 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -847,8 +847,6 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
 
 	if (ret) {
 		ret->i_mode = mode;
-		ret->i_uid = ret->i_gid = 0;
-		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
 	return ret;
-- 
cgit v0.10.2


From 6110e3abbff8b785907d4db50240e63c1be726e3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 17 Dec 2008 13:53:20 -0500
Subject: sys_execve and sys_uselib do not call into fsnotify

sys_execve and sys_uselib do not call into fsnotify so inotify does not get
open events for these types of syscalls.  This patch simply makes the
requisite fsnotify calls.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/exec.c b/fs/exec.c
index 3ef9cf9..9c33f54 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -51,6 +51,7 @@
 #include <linux/audit.h>
 #include <linux/tracehook.h>
 #include <linux/kmod.h>
+#include <linux/fsnotify.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -132,6 +133,8 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (IS_ERR(file))
 		goto out;
 
+	fsnotify_open(file->f_path.dentry);
+
 	error = -ENOEXEC;
 	if(file->f_op) {
 		struct linux_binfmt * fmt;
@@ -684,6 +687,8 @@ struct file *open_exec(const char *name)
 	if (IS_ERR(file))
 		return file;
 
+	fsnotify_open(file->f_path.dentry);
+
 	err = deny_write_access(file);
 	if (err) {
 		fput(file);
-- 
cgit v0.10.2


From 4c728ef583b3d82266584da5cb068294c09df31e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Dec 2008 21:11:15 +0100
Subject: add a vfs_fsync helper

Fsync currently has a fdatawrite/fdatawait pair around the method call,
and a mutex_lock/unlock of the inode mutex.  All callers of fsync have
to duplicate this, but we have a few and most of them don't quite get
it right.  This patch adds a new vfs_fsync that takes care of this.
It's a little more complicated as usual as ->fsync might get a NULL file
pointer and just a dentry from nfsd, but otherwise gets afile and we
want to take the mapping and file operations from it when it is there.

Notes on the fsync callers:

 - ecryptfs wasn't calling filemap_fdatawrite / filemap_fdatawait on the
   	lower file
 - coda wasn't calling filemap_fdatawrite / filemap_fdatawait on the host
	file, and returning 0 when ->fsync was missing
 - shm wasn't calling either filemap_fdatawrite / filemap_fdatawait nor
   taking i_mutex.  Now given that shared memory doesn't have disk
   backing not doing anything in fsync seems fine and I left it out of
   the vfs_fsync conversion for now, but in that case we might just
   not pass it through to the lower file at all but just call the no-op
   simple_sync_file directly.

[and now actually export vfs_fsync]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index c4e62a6..2e71368 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1863,26 +1863,10 @@ static int do_write(struct fsg_dev *fsg)
 static int fsync_sub(struct lun *curlun)
 {
 	struct file	*filp = curlun->filp;
-	struct inode	*inode;
-	int		rc, err;
 
 	if (curlun->ro || !filp)
 		return 0;
-	if (!filp->f_op->fsync)
-		return -EINVAL;
-
-	inode = filp->f_path.dentry->d_inode;
-	mutex_lock(&inode->i_mutex);
-	rc = filemap_fdatawrite(inode->i_mapping);
-	err = filp->f_op->fsync(filp, filp->f_path.dentry, 1);
-	if (!rc)
-		rc = err;
-	err = filemap_fdatawait(inode->i_mapping);
-	if (!rc)
-		rc = err;
-	mutex_unlock(&inode->i_mutex);
-	VLDBG(curlun, "fdatasync -> %d\n", rc);
-	return rc;
+	return vfs_fsync(filp, filp->f_path.dentry, 1);
 }
 
 static void fsync_all(struct fsg_dev *fsg)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 466303d..6a347fb 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -201,8 +201,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 {
 	struct file *host_file;
-	struct dentry *host_dentry;
-	struct inode *host_inode, *coda_inode = coda_dentry->d_inode;
+	struct inode *coda_inode = coda_dentry->d_inode;
 	struct coda_file_info *cfi;
 	int err = 0;
 
@@ -214,14 +213,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (host_file->f_op && host_file->f_op->fsync) {
-		host_dentry = host_file->f_path.dentry;
-		host_inode = host_dentry->d_inode;
-		mutex_lock(&host_inode->i_mutex);
-		err = host_file->f_op->fsync(host_file, host_dentry, datasync);
-		mutex_unlock(&host_inode->i_mutex);
-	}
-
+	err = vfs_fsync(host_file, host_file->f_path.dentry, datasync);
 	if ( !err && !datasync ) {
 		lock_kernel();
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index eb3dc4c..7138343 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,18 +275,9 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	struct file *lower_file = ecryptfs_file_to_lower(file);
-	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	struct inode *lower_inode = lower_dentry->d_inode;
-	int rc = -EINVAL;
-
-	if (lower_inode->i_fop->fsync) {
-		mutex_lock(&lower_inode->i_mutex);
-		rc = lower_inode->i_fop->fsync(lower_file, lower_dentry,
-					       datasync);
-		mutex_unlock(&lower_inode->i_mutex);
-	}
-	return rc;
+	return vfs_fsync(ecryptfs_file_to_lower(file),
+			 ecryptfs_dentry_to_lower(dentry),
+			 datasync);
 }
 
 static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5245a39..44aa92a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -744,45 +744,16 @@ nfsd_close(struct file *filp)
 	fput(filp);
 }
 
-/*
- * Sync a file
- * As this calls fsync (not fdatasync) there is no need for a write_inode
- * after it.
- */
-static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-			      const struct file_operations *fop)
-{
-	struct inode *inode = dp->d_inode;
-	int (*fsync) (struct file *, struct dentry *, int);
-	int err;
-
-	err = filemap_fdatawrite(inode->i_mapping);
-	if (err == 0 && fop && (fsync = fop->fsync))
-		err = fsync(filp, dp, 0);
-	if (err == 0)
-		err = filemap_fdatawait(inode->i_mapping);
-
-	return err;
-}
-	
-
 static int
 nfsd_sync(struct file *filp)
 {
-        int err;
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
-	mutex_lock(&inode->i_mutex);
-	err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
-	mutex_unlock(&inode->i_mutex);
-
-	return err;
+	return vfs_fsync(filp, filp->f_path.dentry, 0);
 }
 
 int
-nfsd_sync_dir(struct dentry *dp)
+nfsd_sync_dir(struct dentry *dentry)
 {
-	return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
+	return vfs_fsync(NULL, dentry, 0);
 }
 
 /*
diff --git a/fs/sync.c b/fs/sync.c
index 2967562..0921d6d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -75,14 +75,39 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-long do_fsync(struct file *file, int datasync)
+/**
+ * vfs_fsync - perform a fsync or fdatasync on a file
+ * @file:		file to sync
+ * @dentry:		dentry of @file
+ * @data:		only perform a fdatasync operation
+ *
+ * Write back data and metadata for @file to disk.  If @datasync is
+ * set only metadata needed to access modified file data is written.
+ *
+ * In case this function is called from nfsd @file may be %NULL and
+ * only @dentry is set.  This can only happen when the filesystem
+ * implements the export_operations API.
+ */
+int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	int ret;
-	int err;
-	struct address_space *mapping = file->f_mapping;
+	const struct file_operations *fop;
+	struct address_space *mapping;
+	int err, ret;
+
+	/*
+	 * Get mapping and operations from the file in case we have
+	 * as file, or get the default values for them in case we
+	 * don't have a struct file available.  Damn nfsd..
+	 */
+	if (file) {
+		mapping = file->f_mapping;
+		fop = file->f_op;
+	} else {
+		mapping = dentry->d_inode->i_mapping;
+		fop = dentry->d_inode->i_fop;
+	}
 
-	if (!file->f_op || !file->f_op->fsync) {
-		/* Why?  We can still call filemap_fdatawrite */
+	if (!fop || !fop->fsync) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -94,7 +119,7 @@ long do_fsync(struct file *file, int datasync)
 	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
-	err = file->f_op->fsync(file, file->f_path.dentry, datasync);
+	err = fop->fsync(file, dentry, datasync);
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
@@ -104,15 +129,16 @@ long do_fsync(struct file *file, int datasync)
 out:
 	return ret;
 }
+EXPORT_SYMBOL(vfs_fsync);
 
-static long __do_fsync(unsigned int fd, int datasync)
+static int do_fsync(unsigned int fd, int datasync)
 {
 	struct file *file;
 	int ret = -EBADF;
 
 	file = fget(fd);
 	if (file) {
-		ret = do_fsync(file, datasync);
+		ret = vfs_fsync(file, file->f_path.dentry, datasync);
 		fput(file);
 	}
 	return ret;
@@ -120,12 +146,12 @@ static long __do_fsync(unsigned int fd, int datasync)
 
 asmlinkage long sys_fsync(unsigned int fd)
 {
-	return __do_fsync(fd, 0);
+	return do_fsync(fd, 0);
 }
 
 asmlinkage long sys_fdatasync(unsigned int fd)
 {
-	return __do_fsync(fd, 1);
+	return do_fsync(fd, 1);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2170ee..9ad9eac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1827,7 +1827,7 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 
-extern long do_fsync(struct file *file, int datasync);
+extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
 extern void __fsync_super(struct super_block *sb);
diff --git a/mm/msync.c b/mm/msync.c
index 144a757..07dae08 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -82,7 +82,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 				(vma->vm_flags & VM_SHARED)) {
 			get_file(file);
 			up_read(&mm->mmap_sem);
-			error = do_fsync(file, 0);
+			error = vfs_fsync(file, file->f_path.dentry, 0);
 			fput(file);
 			if (error || start >= end)
 				goto out;
-- 
cgit v0.10.2


From d8e9650dff48055057253ca30933605bd7d0733b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 25 Dec 2008 13:32:15 +0800
Subject: vfs: remove duplicate code in get_fs_type()

save 14 bytes:

   text    data     bss     dec     hex filename
   1354      32       4    1390     56e fs/filesystems.o.before
   text    data     bss     dec     hex filename
   1340      32       4    1376     560 fs/filesystems.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/filesystems.c b/fs/filesystems.c
index d0e20ce..d488dcd 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -253,24 +253,27 @@ static int __init proc_filesystems_init(void)
 module_init(proc_filesystems_init);
 #endif
 
-struct file_system_type *get_fs_type(const char *name)
+static struct file_system_type *__get_fs_type(const char *name, int len)
 {
 	struct file_system_type *fs;
-	const char *dot = strchr(name, '.');
-	unsigned len = dot ? dot - name : strlen(name);
 
 	read_lock(&file_systems_lock);
 	fs = *(find_filesystem(name, len));
 	if (fs && !try_module_get(fs->owner))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
-	if (!fs && (request_module("%.*s", len, name) == 0)) {
-		read_lock(&file_systems_lock);
-		fs = *(find_filesystem(name, len));
-		if (fs && !try_module_get(fs->owner))
-			fs = NULL;
-		read_unlock(&file_systems_lock);
-	}
+	return fs;
+}
+
+struct file_system_type *get_fs_type(const char *name)
+{
+	struct file_system_type *fs;
+	const char *dot = strchr(name, '.');
+	int len = dot ? dot - name : strlen(name);
+
+	fs = __get_fs_type(name, len);
+	if (!fs && (request_module("%.*s", len, name) == 0))
+		fs = __get_fs_type(name, len);
 
 	if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
 		put_filesystem(fs);
-- 
cgit v0.10.2


From 5b45d96bf963afeb931a75faf02fb424e446e5a9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 29 Dec 2008 07:40:31 -0500
Subject: fix the treatment of jfs special inodes

We used to put them on a single list, without any locking.  Racy.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index d6363d8..0f94381 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -58,9 +58,9 @@
 
 /*
  * __mark_inode_dirty expects inodes to be hashed.  Since we don't want
- * special inodes in the fileset inode space, we hash them to a dummy head
+ * special inodes in the fileset inode space, we make them appear hashed,
+ * but do not put on any lists.
  */
-static HLIST_HEAD(aggregate_hash);
 
 /*
  * imap locks
@@ -496,7 +496,11 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	/* release the page */
 	release_metapage(mp);
 
-	hlist_add_head(&ip->i_hash, &aggregate_hash);
+	/*
+	 * that will look hashed, but won't be on any list; hlist_del()
+	 * will work fine and require no locking.
+	 */
+	ip->i_hash.pprev = &ip->i_hash.next;
 
 	return (ip);
 }
-- 
cgit v0.10.2


From 2f1169e2dc0c70e213f79ada88a10912cc2fbe94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 2 Jan 2009 08:16:51 -0500
Subject: fix breakage in reiserfs_new_inode()

now that we use ih.key earlier, we need to do all its setup early enough

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 145c2d3..1306d4f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,6 +1782,12 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		goto out_bad_inode;
 	}
 	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
+	if (old_format_only(sb))
+		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
+				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
+	else
+		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
+				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
 	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
 	if (insert_inode_locked4(inode, args.objectid,
@@ -1834,13 +1840,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
-	if (old_format_only(sb))
-		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
-				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
-	else
-		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
-				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
-
 	/* key to search for correct place for new stat data */
 	_make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
 		      le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
-- 
cgit v0.10.2


From 4ae8978cf92a96257cd8998a49e781be83571d64 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@googlemail.com>
Date: Mon, 5 Jan 2009 07:19:16 -0500
Subject: inotify: fix type errors in interfaces

The problems lie in the types used for some inotify interfaces, both at the kernel level and at the glibc level. This mail addresses the kernel problem. I will follow up with some suggestions for glibc changes.

For the sys_inotify_rm_watch() interface, the type of the 'wd' argument is
currently 'u32', it should be '__s32' .  That is Robert's suggestion, and
is consistent with the other declarations of watch descriptors in the
kernel source, in particular, the inotify_event structure in
include/linux/inotify.h:

struct inotify_event {
        __s32           wd;             /* watch descriptor */
        __u32           mask;           /* watch mask */
        __u32           cookie;         /* cookie to synchronize two events */
        __u32           len;            /* length (including nulls) of name */
        char            name[0];        /* stub for possible name */
};

The patch makes the changes needed for inotify_rm_watch().

Signed-off-by: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Robert Love <rlove@google.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 400f806..81b8644 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -704,7 +704,7 @@ fput_and_out:
 	return ret;
 }
 
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd)
 {
 	struct file *filp;
 	struct inotify_device *dev;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 04fb47b..18d0a24 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -549,7 +549,7 @@ asmlinkage long sys_inotify_init(void);
 asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
+asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd);
 
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
-- 
cgit v0.10.2


From 8eca75382e012b74b98526a1679ada2a1849024b Mon Sep 17 00:00:00 2001
From: Alan Horstmann <gineera@aspect135.co.uk>
Date: Mon, 5 Jan 2009 18:30:04 +0100
Subject: ALSA: ice1724 - Fix a typo in IEC958 PCM name

Fix trivial name string typo as reported in bug 2552.

Signed-off-by: Alan Horstmann <gineera@aspect135.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 0dfa054..bb8d8c7 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -1239,7 +1239,7 @@ static int __devinit snd_vt1724_pcm_spdif(struct snd_ice1712 *ice, int device)
 	if (ice->force_pdma4 || ice->force_rdma1)
 		name = "ICE1724 Secondary";
 	else
-		name = "IEC1724 IEC958";
+		name = "ICE1724 IEC958";
 	err = snd_pcm_new(ice->card, name, device, play, capt, &pcm);
 	if (err < 0)
 		return err;
-- 
cgit v0.10.2


From c276e098d3ee33059b4a1c747354226cec58487c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 5 Jan 2009 16:01:51 -0800
Subject: Revert "net: Fix for initial link state in 2.6.28"

This reverts commit 22604c866889c4b2e12b73cbf1683bda1b72a313.

We can't fix this issue in this way, because we now can try
to take the dev_base_lock rwlock as a writer in software interrupt
context and that is not allowed without major surgery elsewhere.

This initial link state problem needs to be solved in some other
way.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 1e401e1..bf8f7af 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -178,6 +178,7 @@ static void __linkwatch_run_queue(int urgent_only)
 		 */
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
+		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev))
 				dev_activate(dev);
@@ -214,12 +215,6 @@ void linkwatch_fire_event(struct net_device *dev)
 {
 	bool urgent = linkwatch_urgent_event(dev);
 
-	rfc2863_policy(dev);
-
-	/* Some drivers call netif_carrier_off early */
-	if (dev->reg_state == NETREG_UNINITIALIZED)
-		return;
-
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
 		dev_hold(dev);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 23a8e61..5f5efe4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev)
 void netif_carrier_on(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
 		linkwatch_fire_event(dev);
 		if (netif_running(dev))
 			__netdev_watchdog_up(dev);
@@ -286,6 +288,8 @@ EXPORT_SYMBOL(netif_carrier_on);
 void netif_carrier_off(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
 		linkwatch_fire_event(dev);
 	}
 }
-- 
cgit v0.10.2


From 48e4cc777c091b037acaf39036a77ece43fe1ab9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 5 Jan 2009 16:06:02 -0800
Subject: net/ehea: bitops work on unsigned longs

The flags field of struct ehea_port is only used with test_bit(),
clear_bit() and set_bit() and these interfaces only work on
"unsigned long"s, so change the field to be an "unsigned long".  Also,
this field only has two bits defined for it (0 and 1) so will still be
fine if someone builds this driver for a 32 bit arch (at least as far as
this flags field is concerned).

Also note that ehea_driver_flags is only used in ehca_main.c, so make it
static in there.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 9930d5f..6271b94 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -478,7 +478,7 @@ struct ehea_port {
 	int num_add_tx_qps;
 	int num_mcs;
 	int resets;
-	u64 flags;
+	unsigned long flags;
 	u64 mac_addr;
 	u32 logical_port_id;
 	u32 port_speed;
@@ -510,7 +510,6 @@ void ehea_set_ethtool_ops(struct net_device *netdev);
 int ehea_sense_port_attr(struct ehea_port *port);
 int ehea_set_portspeed(struct ehea_port *port, u32 port_speed);
 
-extern u64 ehea_driver_flags;
 extern struct work_struct ehea_rereg_mr_task;
 
 #endif	/* __EHEA_H__ */
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index a2f1905..e3131ea 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -99,7 +99,7 @@ MODULE_PARM_DESC(use_lro, " Large Receive Offload, 1: enable, 0: disable, "
 
 static int port_name_cnt;
 static LIST_HEAD(adapter_list);
-u64 ehea_driver_flags;
+static unsigned long ehea_driver_flags;
 struct work_struct ehea_rereg_mr_task;
 static DEFINE_MUTEX(dlpar_mem_lock);
 struct ehea_fw_handle_array ehea_fw_handles;
-- 
cgit v0.10.2


From 22409f9c80d0a742ff98d1cfe1bf60fce1927be1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 5 Jan 2009 17:18:42 +0000
Subject: get rid of the last symlink in uml build

We need to make asm-offsets.h contents visible for objects built
with userland headers.  Instead of creating a symlink, just have the
file with equivalent include (relative to location of header) created
once.  That kills the last symlink used in arch/um builds.

Additionally, both generated headers can become dependencies of
archprepare now, killing the misuse of prepare.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/Makefile b/arch/um/Makefile
index d944c34..0728def 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -22,10 +22,11 @@ MODE_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/include/shared/skas
 
 include $(srctree)/$(ARCH_DIR)/Makefile-skas
 
-ARCH_INCLUDE	:= -I$(srctree)/$(ARCH_DIR)/include/shared
+SHARED_HEADERS	:= $(ARCH_DIR)/include/shared
+ARCH_INCLUDE	:= -I$(srctree)/$(SHARED_HEADERS)
 ARCH_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared
 ifneq ($(KBUILD_SRC),)
-ARCH_INCLUDE	+= -I$(ARCH_DIR)/include/shared # for two generated files
+ARCH_INCLUDE	+= -I$(SHARED_HEADERS)
 endif
 KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
 
@@ -85,8 +86,8 @@ endef
 
 KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH)
 
-archprepare: $(ARCH_DIR)/include/shared/user_constants.h
-prepare: $(ARCH_DIR)/include/shared/kern_constants.h
+archprepare: $(SHARED_HEADERS)/user_constants.h
+archprepare: $(SHARED_HEADERS)/kern_constants.h
 
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
 LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -119,17 +120,13 @@ endef
 # When cleaning we don't include .config, so we don't include
 # TT or skas makefiles and don't clean skas_ptregs.h.
 CLEAN_FILES += linux x.i gmon.out \
-	$(ARCH_DIR)/include/shared/user_constants.h \
-	$(ARCH_DIR)/include/shared/kern_constants.h
+	$(SHARED_HEADERS)/user_constants.h \
+	$(SHARED_HEADERS)/kern_constants.h
 
 archclean:
 	@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
 		-o -name '*.gcov' \) -type f -print | xargs rm -f
 
-$(objtree)/$(ARCH_DIR)/include/shared:
-	@echo '  MKDIR $@'
-	$(Q)mkdir -p $@
-
 # Generated files
 
 $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE
@@ -148,11 +145,11 @@ define filechk_gen-asm-offsets
          echo ""; )
 endef
 
-$(ARCH_DIR)/include/shared/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
+$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
 	$(call filechk,gen-asm-offsets)
 
-$(ARCH_DIR)/include/shared/kern_constants.h: $(objtree)/$(ARCH_DIR)/include/shared
-	@echo '  SYMLINK $@'
-	$(Q)ln -sf ../../../../include/asm/asm-offsets.h $@
+$(SHARED_HEADERS)/kern_constants.h:
+	$(Q)mkdir -p $(dir $@)
+	$(Q)echo '#include "../../../../include/asm/asm-offsets.h"' >$@
 
 export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH
-- 
cgit v0.10.2


From 7483cb7bbc02b9471dda28e54f41287d5374e3ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 5 Jan 2009 17:18:52 +0000
Subject: uml got broken by commit 30742d5c2277c325fb0e9d2d817d55a19995fe8f

... if you revert a commit, revert the fixups elsewhere that had been
triggered by it.  Such as 8c56250f48347750c82ab18d98d647dcf99ca674
(lockdep, UML: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
index ae5f94d..753346e 100644
--- a/arch/um/include/asm/system.h
+++ b/arch/um/include/asm/system.h
@@ -11,21 +11,21 @@ extern int get_signals(void);
 extern void block_signals(void);
 extern void unblock_signals(void);
 
-#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
+#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
 				     (flags) = get_signals(); } while(0)
-#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
+#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
 				      set_signals(flags); } while(0)
 
-#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
-                                   raw_local_irq_disable(); } while(0)
+#define local_irq_save(flags) do { local_save_flags(flags); \
+                                   local_irq_disable(); } while(0)
 
-#define raw_local_irq_enable() unblock_signals()
-#define raw_local_irq_disable() block_signals()
+#define local_irq_enable() unblock_signals()
+#define local_irq_disable() block_signals()
 
 #define irqs_disabled()                 \
 ({                                      \
         unsigned long flags;            \
-        raw_local_save_flags(flags);        \
+        local_save_flags(flags);        \
         (flags == 0);                   \
 })
 
-- 
cgit v0.10.2


From 5641f1fde074651ce2488e93944cf05dedd9bf74 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 5 Jan 2009 17:19:02 +0000
Subject: X86_DEBUGCTLMSR won't work on uml

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 85a7857..8078955 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -408,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY
 
 config X86_DEBUGCTLMSR
 	def_bool y
-	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML
 
 menuconfig PROCESSOR_SELECT
 	bool "Supported processor vendors" if EMBEDDED
-- 
cgit v0.10.2


From 046c68842bce6b77509cf56e94a561029124b0ce Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 5 Jan 2009 14:06:29 +0000
Subject: mm: update my address

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/ipc/sem.c b/ipc/sem.c
index 0821224..fea0ad3 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -58,7 +58,7 @@
  * SMP-threaded, sysctl's added
  * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
  * Enforced range limit on SEM_UNDO
- * (c) 2001 Red Hat Inc <alan@redhat.com>
+ * (c) 2001 Red Hat Inc
  * Lockless wakeup
  * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
  *
diff --git a/mm/mmap.c b/mm/mmap.c
index d4855a6..2c778fc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3,7 +3,7 @@
  *
  * Written by obz.
  *
- * Address space accounting code	<alan@redhat.com>
+ * Address space accounting code	<alan@lxorguk.ukuu.org.uk>
  */
 
 #include <linux/slab.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index fded06f..cfb4c48 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -4,7 +4,7 @@
  *  (C) Copyright 1994 Linus Torvalds
  *  (C) Copyright 2002 Christoph Hellwig
  *
- *  Address space accounting code	<alan@redhat.com>
+ *  Address space accounting code	<alan@lxorguk.ukuu.org.uk>
  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
  */
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 58a2908..646de95 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -3,7 +3,7 @@
  *
  *	(C) Copyright 1996 Linus Torvalds
  *
- *	Address space accounting code	<alan@redhat.com>
+ *	Address space accounting code	<alan@lxorguk.ukuu.org.uk>
  *	(C) Copyright 2002 Red Hat Inc, All Rights Reserved
  */
 
-- 
cgit v0.10.2


From 55cdea9ed9cf2d76993e40ed7a1fc649a14db07c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 5 Jan 2009 18:07:07 -0800
Subject: af_iucv: New error return codes for connect()

If the iucv_path_connect() call fails then return an error code that
corresponds to the iucv_path_connect() failure condition; instead of
returning -ECONNREFUSED for any failure.

This helps to improve error handling for user space applications
(e.g.  inform the user that the z/VM guest is not authorized to
connect to other guest virtual machines).

The error return codes are based on those described in connect(2).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index af3192d..1077bc4 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -494,7 +494,21 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 	if (err) {
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
-		err = -ECONNREFUSED;
+		switch (err) {
+		case 0x0b:	/* Target communicator is not logged on */
+			err = -ENETUNREACH;
+			break;
+		case 0x0d:	/* Max connections for this guest exceeded */
+		case 0x0e:	/* Max connections for target guest exceeded */
+			err = -EAGAIN;
+			break;
+		case 0x0f:	/* Missing IUCV authorization */
+			err = -EACCES;
+			break;
+		default:
+			err = -ECONNREFUSED;
+			break;
+		}
 		goto done;
 	}
 
-- 
cgit v0.10.2


From 18becbc5479f88d5adc218374ca62b8b93ec2545 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Mon, 5 Jan 2009 18:07:46 -0800
Subject: af_iucv: avoid left over IUCV connections from failing connects

For certain types of AFIUCV socket connect failures IUCV connections
are left over. Add some cleanup-statements to avoid cluttered IUCV
connections.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1077bc4..6b5f193 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -521,6 +521,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 		release_sock(sk);
 		return -ECONNREFUSED;
 	}
+
+	if (err) {
+		iucv_path_sever(iucv->path, NULL);
+		iucv_path_free(iucv->path);
+		iucv->path = NULL;
+	}
+
 done:
 	release_sock(sk);
 	return err;
-- 
cgit v0.10.2


From 65dbd7c2778f1921ef1ee2a73e47a2a126fed30f Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 5 Jan 2009 18:08:23 -0800
Subject: af_iucv: Free iucv path/socket in path_pending callback

Free iucv path after iucv_path_sever() calls in iucv_callback_connreq()
(path_pending() iucv callback).
If iucv_path_accept() fails, free path and free/kill newly created socket.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6b5f193..eb8a2a0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1042,12 +1042,14 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	ASCEBC(user_data, sizeof(user_data));
 	if (sk->sk_state != IUCV_LISTEN) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(sk)) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
@@ -1055,6 +1057,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
 	if (!nsk) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
@@ -1078,6 +1081,8 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
+		iucv_sock_kill(nsk);
 		goto fail;
 	}
 
-- 
cgit v0.10.2


From f1d3e4dca3f8d4f55656477e83d0afe0ea7cbaed Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 5 Jan 2009 18:09:02 -0800
Subject: iucv: fix cpu hotplug

If the iucv module is compiled in/loaded but no user is registered cpu
hot remove doesn't work. Reason for that is that the iucv cpu hotplug
notifier on CPU_DOWN_PREPARE checks if the iucv_buffer_cpumask would
be empty after the corresponding bit would be cleared. However the bit
was never set since iucv wasn't enable. That causes all cpu hot unplug
operations to fail in this scenario.
To fix this use iucv_path_table as an indicator wether iucv is enabled
or not.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 8f57d4f..032f61e 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -517,6 +517,7 @@ static int iucv_enable(void)
 	size_t alloc_size;
 	int cpu, rc;
 
+	get_online_cpus();
 	rc = -ENOMEM;
 	alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
 	iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -524,19 +525,17 @@ static int iucv_enable(void)
 		goto out;
 	/* Declare per cpu buffers. */
 	rc = -EIO;
-	get_online_cpus();
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
-		goto out_path;
+		goto out;
 	put_online_cpus();
 	return 0;
-
-out_path:
-	put_online_cpus();
-	kfree(iucv_path_table);
 out:
+	kfree(iucv_path_table);
+	iucv_path_table = NULL;
+	put_online_cpus();
 	return rc;
 }
 
@@ -551,8 +550,9 @@ static void iucv_disable(void)
 {
 	get_online_cpus();
 	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
-	put_online_cpus();
 	kfree(iucv_path_table);
+	iucv_path_table = NULL;
+	put_online_cpus();
 }
 
 static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
@@ -589,10 +589,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 	case CPU_ONLINE_FROZEN:
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
+		if (!iucv_path_table)
+			break;
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
+		if (!iucv_path_table)
+			break;
 		cpumask = iucv_buffer_cpumask;
 		cpu_clear(cpu, cpumask);
 		if (cpus_empty(cpumask))
-- 
cgit v0.10.2


From 4696b64d234b84b5b70ffd49a76833aa5c49cb61 Mon Sep 17 00:00:00 2001
From: Julian Calaby <julian.calaby@gmail.com>
Date: Mon, 5 Jan 2009 18:13:49 -0800
Subject: sparc: Fix minor SPARC32 compile error

When CONFIG_PROC_FS is unset, include/linux/interrupt.h defines
init_irq_proc() as an empty function.

arch/sparc/kernel/irq_32.c defines this function unconditionally.

Fix the latter so that it only defines this function when CONFIG_PROC_FS
is set.

This fixes the following error:
arch/sparc/kernel/irq_32.c:672: error: redefinition of 'init_irq_proc'
include/linux/interrupt.h:461: error: previous definition of
'init_irq_proc' was here

This was found using randconfig builds.

Signed-off-by: Julian Calaby <julian.calaby@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index f3488c4..1eff942 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -669,7 +669,9 @@ void __init init_IRQ(void)
 	btfixup();
 }
 
+#ifdef CONFIG_PROC_FS
 void init_irq_proc(void)
 {
 	/* For now, nothing... */
 }
+#endif /* CONFIG_PROC_FS */
-- 
cgit v0.10.2


From 6f57321422e0d359e83c978c2b03db77b967b7d5 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 5 Jan 2009 18:14:19 -0800
Subject: pkt_sched: cls_u32: Fix locking in u32_change()

New nodes are inserted in u32_change() under rtnl_lock() with wmb(),
so without tcf_tree_lock() like in other classifiers (e.g. cls_fw).
This isn't enough without rmb() on the read side, but on the other
hand adding such barriers doesn't give any savings, so the lock is
added instead.

Reported-by: m0sia <m0sia@plotinka.ru>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 05d1780..07372f6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -638,8 +638,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 				break;
 
 		n->next = *ins;
-		wmb();
+		tcf_tree_lock(tp);
 		*ins = n;
+		tcf_tree_unlock(tp);
 
 		*arg = (unsigned long)n;
 		return 0;
-- 
cgit v0.10.2


From 0f840011f0396dcb97ca82c64fd43f6990a574dd Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Mon, 5 Jan 2009 18:16:14 -0800
Subject: myri10ge: print MAC and serial number on probe failure

To help board identification and diagnosis, print the MAC
and serial number on probe failure if they are available.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 5e70180..6bb71b6 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -75,7 +75,7 @@
 #include "myri10ge_mcp.h"
 #include "myri10ge_mcp_gen_header.h"
 
-#define MYRI10GE_VERSION_STR "1.4.4-1.395"
+#define MYRI10GE_VERSION_STR "1.4.4-1.398"
 
 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)");
 MODULE_AUTHOR("Maintainer: help@myri.com");
@@ -3929,6 +3929,10 @@ abort_with_firmware:
 	myri10ge_dummy_rdma(mgp, 0);
 
 abort_with_ioremap:
+	if (mgp->mac_addr_string != NULL)
+		dev_err(&pdev->dev,
+			"myri10ge_probe() failed: MAC=%s, SN=%ld\n",
+			mgp->mac_addr_string, mgp->serial_number);
 	iounmap(mgp->sram);
 
 abort_with_mtrr:
-- 
cgit v0.10.2


From 8306c952a523ad2f87c101427c3ece91176b822c Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Mon, 5 Jan 2009 18:17:33 -0800
Subject: qlge: Fix sparse warnings for byte swapping in qlge_ethool.c

drivers/net/qlge/qlge_ethtool.c:59:23: warning: cast to restricted type
drivers/net/qlge/qlge_ethtool.c:59:21: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_ethtool.c:59:21:    expected restricted unsigned short [usertype] irq_delay
drivers/net/qlge/qlge_ethtool.c:59:21:    got unsigned short [unsigned] [usertype] <noident>
drivers/net/qlge/qlge_ethtool.c:61:8: warning: cast to restricted type
drivers/net/qlge/qlge_ethtool.c:60:21: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_ethtool.c:60:21:    expected restricted unsigned short [usertype] pkt_delay
drivers/net/qlge/qlge_ethtool.c:60:21:    got unsigned short [unsigned] [usertype] <noident>
drivers/net/qlge/qlge_ethtool.c:82:23: warning: cast to restricted type
drivers/net/qlge/qlge_ethtool.c:82:21: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_ethtool.c:82:21:    expected restricted unsigned short [usertype] irq_delay
drivers/net/qlge/qlge_ethtool.c:82:21:    got unsigned short [unsigned] [usertype] <noident>
drivers/net/qlge/qlge_ethtool.c:84:8: warning: cast to restricted type
drivers/net/qlge/qlge_ethtool.c:83:21: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_ethtool.c:83:21:    expected restricted unsigned short [usertype] pkt_delay
drivers/net/qlge/qlge_ethtool.c:83:21:    got unsigned short [unsigned] [usertype] <noident>

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_ethtool.c b/drivers/net/qlge/qlge_ethtool.c
index eefb81b..9d922e2 100644
--- a/drivers/net/qlge/qlge_ethtool.c
+++ b/drivers/net/qlge/qlge_ethtool.c
@@ -56,9 +56,9 @@ static int ql_update_ring_coalescing(struct ql_adapter *qdev)
 		for (i = 1; i < qdev->rss_ring_first_cq_id; i++, rx_ring++) {
 			rx_ring = &qdev->rx_ring[i];
 			cqicb = (struct cqicb *)rx_ring;
-			cqicb->irq_delay = le16_to_cpu(qdev->tx_coalesce_usecs);
+			cqicb->irq_delay = cpu_to_le16(qdev->tx_coalesce_usecs);
 			cqicb->pkt_delay =
-			    le16_to_cpu(qdev->tx_max_coalesced_frames);
+			    cpu_to_le16(qdev->tx_max_coalesced_frames);
 			cqicb->flags = FLAGS_LI;
 			status = ql_write_cfg(qdev, cqicb, sizeof(cqicb),
 						CFG_LCQ, rx_ring->cq_id);
@@ -79,9 +79,9 @@ static int ql_update_ring_coalescing(struct ql_adapter *qdev)
 		     i++) {
 			rx_ring = &qdev->rx_ring[i];
 			cqicb = (struct cqicb *)rx_ring;
-			cqicb->irq_delay = le16_to_cpu(qdev->rx_coalesce_usecs);
+			cqicb->irq_delay = cpu_to_le16(qdev->rx_coalesce_usecs);
 			cqicb->pkt_delay =
-			    le16_to_cpu(qdev->rx_max_coalesced_frames);
+			    cpu_to_le16(qdev->rx_max_coalesced_frames);
 			cqicb->flags = FLAGS_LI;
 			status = ql_write_cfg(qdev, cqicb, sizeof(cqicb),
 						CFG_LCQ, rx_ring->cq_id);
-- 
cgit v0.10.2


From a303ce0972d04036316e85568682a2b89fe123d9 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Mon, 5 Jan 2009 18:18:22 -0800
Subject: qlge: Fix sparse endian warning for inbound packet control block
 flags.

Changed flags element from __le32 to 3 reserved bytes and one byte of
flags.  Changed flags bit definitions to reflect byte width instead of
__le32 width.

Warnings:
drivers/net/qlge/qlge_main.c:1206:16: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1207:16: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1233:17: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1276:17: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1349:19: warning: restricted degrades to integer

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h
index 97321bb..71cc487 100644
--- a/drivers/net/qlge/qlge.h
+++ b/drivers/net/qlge/qlge.h
@@ -979,10 +979,11 @@ struct ib_mac_iocb_rsp {
 
 	__le16 reserved1;
 	__le32 reserved2[6];
-	__le32 flags4;
-#define IB_MAC_IOCB_RSP_HV	0x20000000	/* */
-#define IB_MAC_IOCB_RSP_HS	0x40000000	/* */
-#define IB_MAC_IOCB_RSP_HL	0x80000000	/* */
+	u8 reserved3[3];
+	u8 flags4;
+#define IB_MAC_IOCB_RSP_HV	0x20
+#define IB_MAC_IOCB_RSP_HS	0x40
+#define IB_MAC_IOCB_RSP_HL	0x80
 	__le32 hdr_len;		/* */
 	__le32 hdr_addr_lo;	/* */
 	__le32 hdr_addr_hi;	/* */
diff --git a/drivers/net/qlge/qlge_dbg.c b/drivers/net/qlge/qlge_dbg.c
index 47df304..3f5e02d 100644
--- a/drivers/net/qlge/qlge_dbg.c
+++ b/drivers/net/qlge/qlge_dbg.c
@@ -821,14 +821,11 @@ void ql_dump_ib_mac_rsp(struct ib_mac_iocb_rsp *ib_mac_rsp)
 		       le16_to_cpu(ib_mac_rsp->vlan_id));
 
 	printk(KERN_ERR PFX "flags4 = %s%s%s.\n",
-	       le32_to_cpu(ib_mac_rsp->
-			   flags4) & IB_MAC_IOCB_RSP_HV ? "HV " : "",
-	       le32_to_cpu(ib_mac_rsp->
-			   flags4) & IB_MAC_IOCB_RSP_HS ? "HS " : "",
-	       le32_to_cpu(ib_mac_rsp->
-			   flags4) & IB_MAC_IOCB_RSP_HL ? "HL " : "");
-
-	if (le32_to_cpu(ib_mac_rsp->flags4) & IB_MAC_IOCB_RSP_HV) {
+		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV ? "HV " : "",
+		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS ? "HS " : "",
+		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HL ? "HL " : "");
+
+	if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV) {
 		printk(KERN_ERR PFX "hdr length	= %d.\n",
 		       le32_to_cpu(ib_mac_rsp->hdr_len));
 		printk(KERN_ERR PFX "hdr addr_hi    = 0x%x.\n",
-- 
cgit v0.10.2


From fd2df4f7439cd3e87090e067d5aec8f1336f4f0e Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Mon, 5 Jan 2009 18:18:45 -0800
Subject: qlge: Fix sparse endian warning in ql_hw_csum_setup().

Changed u16 to __sum16 usage.

Warnings:
drivers/net/qlge/qlge_main.c:1897:9: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:1897:9:    expected unsigned short [usertype] *check
drivers/net/qlge/qlge_main.c:1897:9:    got restricted unsigned short *<noident>
drivers/net/qlge/qlge_main.c:1903:9: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:1903:9:    expected unsigned short [usertype] *check
drivers/net/qlge/qlge_main.c:1903:9:    got restricted unsigned short *<noident>
drivers/net/qlge/qlge_main.c:1909:9: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:1909:9:    expected unsigned short [unsigned] [short] [usertype] <noident>

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 837be72..d7894aa 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1884,7 +1884,7 @@ static void ql_hw_csum_setup(struct sk_buff *skb,
 {
 	int len;
 	struct iphdr *iph = ip_hdr(skb);
-	u16 *check;
+	__sum16 *check;
 	mac_iocb_ptr->opcode = OPCODE_OB_MAC_TSO_IOCB;
 	mac_iocb_ptr->frame_len = cpu_to_le32((u32) skb->len);
 	mac_iocb_ptr->net_trans_offset =
-- 
cgit v0.10.2


From 2c9a0d41e944807bf763f42e4a3526210e98c741 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Mon, 5 Jan 2009 18:19:20 -0800
Subject: qlge: Fix sparse warning regarding rx buffer queues.

Warnings:
drivers/net/qlge/qlge_main.c:909:17: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:909:17:    expected unsigned int [unsigned] [usertype] addr_lo
drivers/net/qlge/qlge_main.c:909:17:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:911:17: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:911:17:    expected unsigned int [unsigned] [usertype] addr_hi
drivers/net/qlge/qlge_main.c:911:17:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:974:17: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:974:17:    expected unsigned int [unsigned] [usertype] addr_lo
drivers/net/qlge/qlge_main.c:974:17:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:975:17: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:975:17:    expected unsigned int [unsigned] [usertype] addr_hi
drivers/net/qlge/qlge_main.c:975:17:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:2132:16: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:2132:16:    expected unsigned int [unsigned] [usertype] addr_lo
drivers/net/qlge/qlge_main.c:2132:16:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:2133:16: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:2133:16:    expected unsigned int [unsigned] [usertype] addr_hi
drivers/net/qlge/qlge_main.c:2133:16:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:2212:15: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:2212:15:    expected unsigned int [unsigned] [usertype] addr_lo
drivers/net/qlge/qlge_main.c:2212:15:    got restricted unsigned int [usertype] <noident>
drivers/net/qlge/qlge_main.c:2214:15: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:2214:15:    expected unsigned int [unsigned] [usertype] addr_hi
drivers/net/qlge/qlge_main.c:2214:15:    got restricted unsigned int [usertype] <noident>

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h
index 71cc487..76ef2bc 100644
--- a/drivers/net/qlge/qlge.h
+++ b/drivers/net/qlge/qlge.h
@@ -818,15 +818,6 @@ struct tx_doorbell_context {
 };
 
 /* DATA STRUCTURES SHARED WITH HARDWARE. */
-
-struct bq_element {
-	u32 addr_lo;
-#define BQ_END	0x00000001
-#define BQ_CONT	0x00000002
-#define BQ_MASK	0x00000003
-	u32 addr_hi;
-} __attribute((packed));
-
 struct tx_buf_desc {
 	__le64 addr;
 	__le32 len;
@@ -1139,7 +1130,7 @@ struct bq_desc {
 		struct page *lbq_page;
 		struct sk_buff *skb;
 	} p;
-	struct bq_element *bq;
+	__le64 *addr;
 	int index;
 	 DECLARE_PCI_UNMAP_ADDR(mapaddr);
 	 DECLARE_PCI_UNMAP_LEN(maplen);
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index d7894aa..f4c0160 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -874,7 +874,6 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 {
 	int clean_idx = rx_ring->lbq_clean_idx;
 	struct bq_desc *lbq_desc;
-	struct bq_element *bq;
 	u64 map;
 	int i;
 
@@ -884,7 +883,6 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 				"lbq: try cleaning clean_idx = %d.\n",
 				clean_idx);
 			lbq_desc = &rx_ring->lbq[clean_idx];
-			bq = lbq_desc->bq;
 			if (lbq_desc->p.lbq_page == NULL) {
 				QPRINTK(qdev, RX_STATUS, DEBUG,
 					"lbq: getting new page for index %d.\n",
@@ -906,10 +904,7 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 				}
 				pci_unmap_addr_set(lbq_desc, mapaddr, map);
 				pci_unmap_len_set(lbq_desc, maplen, PAGE_SIZE);
-				bq->addr_lo =	/*lbq_desc->addr_lo = */
-				    cpu_to_le32(map);
-				bq->addr_hi =	/*lbq_desc->addr_hi = */
-				    cpu_to_le32(map >> 32);
+				*lbq_desc->addr = cpu_to_le64(map);
 			}
 			clean_idx++;
 			if (clean_idx == rx_ring->lbq_len)
@@ -934,7 +929,6 @@ static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 {
 	int clean_idx = rx_ring->sbq_clean_idx;
 	struct bq_desc *sbq_desc;
-	struct bq_element *bq;
 	u64 map;
 	int i;
 
@@ -944,7 +938,6 @@ static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 			QPRINTK(qdev, RX_STATUS, DEBUG,
 				"sbq: try cleaning clean_idx = %d.\n",
 				clean_idx);
-			bq = sbq_desc->bq;
 			if (sbq_desc->p.skb == NULL) {
 				QPRINTK(qdev, RX_STATUS, DEBUG,
 					"sbq: getting new skb for index %d.\n",
@@ -971,8 +964,7 @@ static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 				pci_unmap_addr_set(sbq_desc, mapaddr, map);
 				pci_unmap_len_set(sbq_desc, maplen,
 						  rx_ring->sbq_buf_size / 2);
-				bq->addr_lo = cpu_to_le32(map);
-				bq->addr_hi = cpu_to_le32(map >> 32);
+				*sbq_desc->addr = cpu_to_le64(map);
 			}
 
 			clean_idx++;
@@ -1340,7 +1332,7 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
 		 *          eventually be in trouble.
 		 */
 		int size, offset, i = 0;
-		struct bq_element *bq, bq_array[8];
+		__le64 *bq, bq_array[8];
 		sbq_desc = ql_get_curr_sbuf(rx_ring);
 		pci_unmap_single(qdev->pdev,
 				 pci_unmap_addr(sbq_desc, mapaddr),
@@ -1366,16 +1358,10 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
 		} else {
 			QPRINTK(qdev, RX_STATUS, DEBUG,
 				"Headers in small, %d bytes of data in chain of large.\n", length);
-			bq = (struct bq_element *)sbq_desc->p.skb->data;
+			bq = (__le64 *)sbq_desc->p.skb->data;
 		}
 		while (length > 0) {
 			lbq_desc = ql_get_curr_lbuf(rx_ring);
-			if ((bq->addr_lo & ~BQ_MASK) != lbq_desc->bq->addr_lo) {
-				QPRINTK(qdev, RX_STATUS, ERR,
-					"Panic!!! bad large buffer address, expected 0x%.08x, got 0x%.08x.\n",
-					lbq_desc->bq->addr_lo, bq->addr_lo);
-				return NULL;
-			}
 			pci_unmap_page(qdev->pdev,
 				       pci_unmap_addr(lbq_desc,
 						      mapaddr),
@@ -2093,8 +2079,6 @@ static void ql_free_lbq_buffers(struct ql_adapter *qdev, struct rx_ring *rx_ring
 			put_page(lbq_desc->p.lbq_page);
 			lbq_desc->p.lbq_page = NULL;
 		}
-		lbq_desc->bq->addr_lo = 0;
-		lbq_desc->bq->addr_hi = 0;
 	}
 }
 
@@ -2107,12 +2091,12 @@ static int ql_alloc_lbq_buffers(struct ql_adapter *qdev,
 	int i;
 	struct bq_desc *lbq_desc;
 	u64 map;
-	struct bq_element *bq = rx_ring->lbq_base;
+	__le64 *bq = rx_ring->lbq_base;
 
 	for (i = 0; i < rx_ring->lbq_len; i++) {
 		lbq_desc = &rx_ring->lbq[i];
 		memset(lbq_desc, 0, sizeof(lbq_desc));
-		lbq_desc->bq = bq;
+		lbq_desc->addr = bq;
 		lbq_desc->index = i;
 		lbq_desc->p.lbq_page = alloc_page(GFP_ATOMIC);
 		if (unlikely(!lbq_desc->p.lbq_page)) {
@@ -2129,8 +2113,7 @@ static int ql_alloc_lbq_buffers(struct ql_adapter *qdev,
 			}
 			pci_unmap_addr_set(lbq_desc, mapaddr, map);
 			pci_unmap_len_set(lbq_desc, maplen, PAGE_SIZE);
-			bq->addr_lo = cpu_to_le32(map);
-			bq->addr_hi = cpu_to_le32(map >> 32);
+			*lbq_desc->addr = cpu_to_le64(map);
 		}
 		bq++;
 	}
@@ -2159,13 +2142,6 @@ static void ql_free_sbq_buffers(struct ql_adapter *qdev, struct rx_ring *rx_ring
 			dev_kfree_skb(sbq_desc->p.skb);
 			sbq_desc->p.skb = NULL;
 		}
-		if (sbq_desc->bq == NULL) {
-			QPRINTK(qdev, IFUP, ERR, "sbq_desc->bq %d is NULL.\n",
-				i);
-			return;
-		}
-		sbq_desc->bq->addr_lo = 0;
-		sbq_desc->bq->addr_hi = 0;
 	}
 }
 
@@ -2177,13 +2153,13 @@ static int ql_alloc_sbq_buffers(struct ql_adapter *qdev,
 	struct bq_desc *sbq_desc;
 	struct sk_buff *skb;
 	u64 map;
-	struct bq_element *bq = rx_ring->sbq_base;
+	__le64 *bq = rx_ring->sbq_base;
 
 	for (i = 0; i < rx_ring->sbq_len; i++) {
 		sbq_desc = &rx_ring->sbq[i];
 		memset(sbq_desc, 0, sizeof(sbq_desc));
 		sbq_desc->index = i;
-		sbq_desc->bq = bq;
+		sbq_desc->addr = bq;
 		skb = netdev_alloc_skb(qdev->ndev, rx_ring->sbq_buf_size);
 		if (unlikely(!skb)) {
 			/* Better luck next round */
@@ -2209,10 +2185,7 @@ static int ql_alloc_sbq_buffers(struct ql_adapter *qdev,
 		}
 		pci_unmap_addr_set(sbq_desc, mapaddr, map);
 		pci_unmap_len_set(sbq_desc, maplen, rx_ring->sbq_buf_size / 2);
-		bq->addr_lo =	/*sbq_desc->addr_lo = */
-		    cpu_to_le32(map);
-		bq->addr_hi =	/*sbq_desc->addr_hi = */
-		    cpu_to_le32(map >> 32);
+		*sbq_desc->addr = cpu_to_le64(map);
 		bq++;
 	}
 	return 0;
@@ -3356,11 +3329,11 @@ static int ql_configure_rings(struct ql_adapter *qdev)
 			    rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb);
 			rx_ring->lbq_len = NUM_LARGE_BUFFERS;
 			rx_ring->lbq_size =
-			    rx_ring->lbq_len * sizeof(struct bq_element);
+			    rx_ring->lbq_len * sizeof(__le64);
 			rx_ring->lbq_buf_size = LARGE_BUFFER_SIZE;
 			rx_ring->sbq_len = NUM_SMALL_BUFFERS;
 			rx_ring->sbq_size =
-			    rx_ring->sbq_len * sizeof(struct bq_element);
+			    rx_ring->sbq_len * sizeof(__le64);
 			rx_ring->sbq_buf_size = SMALL_BUFFER_SIZE * 2;
 			rx_ring->type = DEFAULT_Q;
 		} else if (i < qdev->rss_ring_first_cq_id) {
@@ -3387,11 +3360,11 @@ static int ql_configure_rings(struct ql_adapter *qdev)
 			    rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb);
 			rx_ring->lbq_len = NUM_LARGE_BUFFERS;
 			rx_ring->lbq_size =
-			    rx_ring->lbq_len * sizeof(struct bq_element);
+			    rx_ring->lbq_len * sizeof(__le64);
 			rx_ring->lbq_buf_size = LARGE_BUFFER_SIZE;
 			rx_ring->sbq_len = NUM_SMALL_BUFFERS;
 			rx_ring->sbq_size =
-			    rx_ring->sbq_len * sizeof(struct bq_element);
+			    rx_ring->sbq_len * sizeof(__le64);
 			rx_ring->sbq_buf_size = SMALL_BUFFER_SIZE * 2;
 			rx_ring->type = RX_Q;
 		}
-- 
cgit v0.10.2


From 3537d54c0c39de5738bba8d19f128478b0b96a71 Mon Sep 17 00:00:00 2001
From: Ron Mercer <ron.mercer@qlogic.com>
Date: Mon, 5 Jan 2009 18:19:59 -0800
Subject: qlge: Fix sparse warnings for tx ring indexes.

Warnings:
drivers/net/qlge/qlge_main.c:1474:34: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1475:36: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1592:51: warning: restricted degrades to integer
drivers/net/qlge/qlge_main.c:1941:20: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:1941:20:    expected restricted unsigned int [usertype] tid
drivers/net/qlge/qlge_main.c:1941:20:    got int [signed] index
drivers/net/qlge/qlge_main.c:1945:24: warning: incorrect type in assignment (different base types)
drivers/net/qlge/qlge_main.c:1945:24:    expected restricted unsigned int [usertype] txq_idx
drivers/net/qlge/qlge_main.c:1945:24:    got unsigned int [unsigned] [usertype] tx_ring_idx

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h
index 76ef2bc..459663a 100644
--- a/drivers/net/qlge/qlge.h
+++ b/drivers/net/qlge/qlge.h
@@ -851,8 +851,8 @@ struct ob_mac_iocb_req {
 	__le16 frame_len;
 #define OB_MAC_IOCB_LEN_MASK 0x3ffff
 	__le16 reserved2;
-	__le32 tid;
-	__le32 txq_idx;
+	u32 tid;
+	u32 txq_idx;
 	__le32 reserved3;
 	__le16 vlan_tci;
 	__le16 reserved4;
@@ -871,8 +871,8 @@ struct ob_mac_iocb_rsp {
 	u8 flags2;		/* */
 	u8 flags3;		/* */
 #define OB_MAC_IOCB_RSP_B	0x80	/* */
-	__le32 tid;
-	__le32 txq_idx;
+	u32 tid;
+	u32 txq_idx;
 	__le32 reserved[13];
 } __attribute((packed));
 
@@ -894,8 +894,8 @@ struct ob_mac_tso_iocb_req {
 #define OB_MAC_TSO_IOCB_V	0x04
 	__le32 reserved1[2];
 	__le32 frame_len;
-	__le32 tid;
-	__le32 txq_idx;
+	u32 tid;
+	u32 txq_idx;
 	__le16 total_hdrs_len;
 	__le16 net_trans_offset;
 #define OB_MAC_TRANSPORT_HDR_SHIFT 6
@@ -916,8 +916,8 @@ struct ob_mac_tso_iocb_rsp {
 	u8 flags2;		/* */
 	u8 flags3;		/* */
 #define OB_MAC_TSO_IOCB_RSP_B	0x8000
-	__le32 tid;
-	__le32 txq_idx;
+	u32 tid;
+	u32 txq_idx;
 	__le32 reserved2[13];
 } __attribute((packed));
 
@@ -1118,7 +1118,7 @@ struct map_list {
 struct tx_ring_desc {
 	struct sk_buff *skb;
 	struct ob_mac_iocb_req *queue_entry;
-	int index;
+	u32 index;
 	struct oal oal;
 	struct map_list map[MAX_SKB_FRAGS + 1];
 	int map_cnt;
@@ -1131,7 +1131,7 @@ struct bq_desc {
 		struct sk_buff *skb;
 	} p;
 	__le64 *addr;
-	int index;
+	u32 index;
 	 DECLARE_PCI_UNMAP_ADDR(mapaddr);
 	 DECLARE_PCI_UNMAP_LEN(maplen);
 };
-- 
cgit v0.10.2


From f1b11e505463fd597ab7963df26dd1f446dcceae Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 5 Jan 2009 14:04:40 +0000
Subject: i2o: Update my address

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 56faef1..06c655c 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -19,7 +19,7 @@
  *		Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
  *		Deepak Saxena <deepak@plexity.net>
  *		Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
- *		Alan Cox <alan@redhat.com>:
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>:
  *			Ported to Linux 2.5.
  *		Markus Lidel <Markus.Lidel@shadowconnect.com>:
  *			Minor fixes for 2.6.
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index f3384c3..efba702 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -19,7 +19,7 @@
  *		Changed ioctl_swdl(), implemented ioctl_swul() and ioctl_swdel()
  *	Deepak Saxena (11/18/1999):
  *		Added event managmenet support
- *	Alan Cox <alan@redhat.com>:
+ *	Alan Cox <alan@lxorguk.ukuu.org.uk>:
  *		2.4 rewrite ported to 2.5
  *	Markus Lidel <Markus.Lidel@shadowconnect.com>:
  *		Added pass-thru support for Adaptec's raidutils
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index 6e53a30..35c67d1 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -19,7 +19,7 @@
  *		Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
  *		Deepak Saxena <deepak@plexity.net>
  *		Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
- *		Alan Cox <alan@redhat.com>:
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>:
  *			Ported to Linux 2.5.
  *		Markus Lidel <Markus.Lidel@shadowconnect.com>:
  *			Minor fixes for 2.6.
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index 610ef12..25d6f23 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -19,7 +19,7 @@
  *		Auvo Häkkinen <Auvo.Hakkinen@cs.Helsinki.FI>
  *		Deepak Saxena <deepak@plexity.net>
  *		Boji T Kannanthanam <boji.t.kannanthanam@intel.com>
- *		Alan Cox <alan@redhat.com>:
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>:
  *			Ported to Linux 2.5.
  *		Markus Lidel <Markus.Lidel@shadowconnect.com>:
  *			Minor fixes for 2.6.
-- 
cgit v0.10.2


From e8c82c2e23e3527e0c9dc195e432c16784d270fa Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 03:05:50 +0100
Subject: mm lockless pagecache barrier fix

An XFS workload showed up a bug in the lockless pagecache patch. Basically it
would go into an "infinite" loop, although it would sometimes be able to break
out of the loop! The reason is a missing compiler barrier in the "increment
reference count unless it was zero" case of the lockless pagecache protocol in
the gang lookup functions.

This would cause the compiler to use a cached value of struct page pointer to
retry the operation with, rather than reload it. So the page might have been
removed from pagecache and freed (refcount==0) but the lookup would not correctly
notice the page is no longer in pagecache, and keep attempting to increment the
refcount and failing, until the page gets reallocated for something else. This
isn't a data corruption because the condition will be detected if the page has
been reallocated. However it can result in a lockup.

Linus points out that ACCESS_ONCE is also required in that pointer load, even
if it's absence is not causing a bug on our particular build. The most general
way to solve this is just to put an rcu_dereference in radix_tree_deref_slot.

Assembly of find_get_pages,
before:
.L220:
        movq    (%rbx), %rax    #* ivtmp.1162, tmp82
        movq    (%rax), %rdi    #, prephitmp.1149
.L218:
        testb   $1, %dil        #, prephitmp.1149
        jne     .L217   #,
        testq   %rdi, %rdi      # prephitmp.1149
        je      .L203   #,
        cmpq    $-1, %rdi       #, prephitmp.1149
        je      .L217   #,
        movl    8(%rdi), %esi   # <variable>._count.counter, c
        testl   %esi, %esi      # c
        je      .L218   #,

after:
.L212:
        movq    (%rbx), %rax    #* ivtmp.1109, tmp81
        movq    (%rax), %rdi    #, ret
        testb   $1, %dil        #, ret
        jne     .L211   #,
        testq   %rdi, %rdi      # ret
        je      .L197   #,
        cmpq    $-1, %rdi       #, ret
        je      .L211   #,
        movl    8(%rdi), %esi   # <variable>._count.counter, c
        testl   %esi, %esi      # c
        je      .L212   #,

(notice the obvious infinite loop in the first example, if page->count remains 0)

Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index a916c66..355f6e8 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -136,7 +136,7 @@ do {									\
  */
 static inline void *radix_tree_deref_slot(void **pslot)
 {
-	void *ret = *pslot;
+	void *ret = rcu_dereference(*pslot);
 	if (unlikely(radix_tree_is_indirect_ptr(ret)))
 		ret = RADIX_TREE_RETRY;
 	return ret;
-- 
cgit v0.10.2


From e42e4ba07bc72c0eb7c7ab3bf9e5076db90d0f37 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 5 Jan 2009 18:47:12 -0800
Subject: igb: fix anoying type mismatch warning on rx/tx queue sizing

When using "min()", the types of both sides should match.  With the cpu
mask changes, the type of num_online_cpus() will now depend on config
options. Use "min_t()" with an explicit type instead.

And make the rx/tx case look the same too, just for sanity.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 022794e..b82b0fb 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1457,8 +1457,8 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
 
 	/* Number of supported queues. */
 	/* Having more queues than CPUs doesn't make sense. */
-	adapter->num_rx_queues = min((u32)IGB_MAX_RX_QUEUES, (u32)num_online_cpus());
-	adapter->num_tx_queues = min(IGB_MAX_TX_QUEUES, num_online_cpus());
+	adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
+	adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
 
 	/* This call may decrease the number of queues depending on
 	 * interrupt mode. */
-- 
cgit v0.10.2


From a1b51e98676932d031f5eec1325b2df4bbdc8f26 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 6 Jan 2009 03:04:53 +0000
Subject: dm table: drop reference at unbind

Move one dm_table_put() so that the last reference in the thread
gets dropped in __unbind().

This is required for a following patch,
dm-table-rework-reference-counting.patch, which will change the logic in
such a way that table destructor is called only at specific points in
the code.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 421c9f0..8237141 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1330,8 +1330,8 @@ void dm_put(struct mapped_device *md)
 			dm_table_presuspend_targets(map);
 			dm_table_postsuspend_targets(map);
 		}
-		__unbind(md);
 		dm_table_put(map);
+		__unbind(md);
 		free_dev(md);
 	}
 }
-- 
cgit v0.10.2


From 90fa1527bddc7147dc0d590ee6184ca88bc50ecf Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 6 Jan 2009 03:04:54 +0000
Subject: dm snapshot: change yield to msleep

Change yield() to msleep(1). If the thread had realtime priority,
yield() doesn't really yield, so the yielding process would loop
indefinitely and cause machine lockup.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 6c96db2..4ceedd4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -9,6 +9,7 @@
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
 #include <linux/device-mapper.h>
+#include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kdev_t.h>
@@ -735,7 +736,7 @@ static void snapshot_dtr(struct dm_target *ti)
 	unregister_snapshot(s);
 
 	while (atomic_read(&s->pending_exceptions_count))
-		yield();
+		msleep(1);
 	/*
 	 * Ensure instructions in mempool_destroy aren't reordered
 	 * before atomic_read.
@@ -888,10 +889,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 
 	/*
 	 * Check for conflicting reads. This is extremely improbable,
-	 * so yield() is sufficient and there is no need for a wait queue.
+	 * so msleep(1) is sufficient and there is no need for a wait queue.
 	 */
 	while (__chunk_is_tracked(s, pe->e.old_chunk))
-		yield();
+		msleep(1);
 
 	/*
 	 * Add a proper exception, and remove the
-- 
cgit v0.10.2


From c7a2bd19b7c1e0bd2c7604c53d2583e91e536948 Mon Sep 17 00:00:00 2001
From: Takahiro Yasui <tyasui@redhat.com>
Date: Tue, 6 Jan 2009 03:04:56 +0000
Subject: dm log: fix dm_io_client leak on error paths

In create_log_context function, dm_io_client_destroy function needs
to be called, when memory allocation of disk_header, sync_bits and
recovering_bits failed, but dm_io_client_destroy is not called.

Cc: stable@kernel.org
Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Acked-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a8c0fc7..13e2a1a 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -467,6 +467,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		lc->disk_header = vmalloc(buf_size);
 		if (!lc->disk_header) {
 			DMWARN("couldn't allocate disk log buffer");
+			dm_io_client_destroy(lc->io_req.client);
 			kfree(lc);
 			return -ENOMEM;
 		}
@@ -482,6 +483,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		DMWARN("couldn't allocate sync bitset");
 		if (!dev)
 			vfree(lc->clean_bits);
+		else
+			dm_io_client_destroy(lc->io_req.client);
 		vfree(lc->disk_header);
 		kfree(lc);
 		return -ENOMEM;
@@ -495,6 +498,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		vfree(lc->sync_bits);
 		if (!dev)
 			vfree(lc->clean_bits);
+		else
+			dm_io_client_destroy(lc->io_req.client);
 		vfree(lc->disk_header);
 		kfree(lc);
 		return -ENOMEM;
-- 
cgit v0.10.2


From d460c65a6a9ec9e0d284864ec3a9a2d1b73f0e43 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Tue, 6 Jan 2009 03:04:57 +0000
Subject: dm raid1: fix error count

Always increase the error count when I/O on a leg of a mirror fails.

The error count is used to decide whether to select an alternative
mirror leg.  If the target doesn't use the "handle_errors" feature, the
error count is not updated and the bio can get requeued forever by the
read callback.

Fix it by increasing error_count before the handle_errors feature
checking.

Cc: stable@kernel.org
Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ec43f9f..d0fed2b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -197,9 +197,6 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
 	struct mirror_set *ms = m->ms;
 	struct mirror *new;
 
-	if (!errors_handled(ms))
-		return;
-
 	/*
 	 * error_count is used for nothing more than a
 	 * simple way to tell if a device has encountered
@@ -210,6 +207,9 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
 	if (test_and_set_bit(error_type, &m->error_type))
 		return;
 
+	if (!errors_handled(ms))
+		return;
+
 	if (m != get_default_mirror(ms))
 		goto out;
 
-- 
cgit v0.10.2


From 10d3bd09a3c25df114f74f7f86e1b58d070bef32 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 6 Jan 2009 03:04:58 +0000
Subject: dm: consolidate target deregistration error handling

Change dm_unregister_target to return void and use BUG() for error
reporting.

dm_unregister_target can only fail because of programming bug in the
target driver. It can't fail because of user's behavior or disk errors.

This patch changes unregister_target to return void and use BUG if
someone tries to unregister non-registered target or unregister target
that is in use.

This patch removes code duplication (testing of error codes in all dm
targets) and reports bugs in just one place, in dm_unregister_target. In
some target drivers, these return codes were ignored, which could lead
to a situation where bugs could be missed.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3326750..35bda49 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void)
 
 static void __exit dm_crypt_exit(void)
 {
-	int r = dm_unregister_target(&crypt_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-
+	dm_unregister_target(&crypt_target);
 	kmem_cache_destroy(_crypt_io_pool);
 }
 
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 848b381..59ee1b0 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -364,11 +364,7 @@ bad_queue:
 
 static void __exit dm_delay_exit(void)
 {
-	int r = dm_unregister_target(&delay_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-
+	dm_unregister_target(&delay_target);
 	kmem_cache_destroy(delayed_cache);
 	destroy_workqueue(kdelayd_wq);
 }
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 44042be..79fb53e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -156,8 +156,5 @@ int __init dm_linear_init(void)
 
 void dm_linear_exit(void)
 {
-	int r = dm_unregister_target(&linear_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&linear_target);
 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3d7f492..345a260 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void)
 
 static void __exit dm_multipath_exit(void)
 {
-	int r;
-
 	destroy_workqueue(kmpath_handlerd);
 	destroy_workqueue(kmultipathd);
 
-	r = dm_unregister_target(&multipath_target);
-	if (r < 0)
-		DMERR("target unregister failed %d", r);
+	dm_unregister_target(&multipath_target);
 	kmem_cache_destroy(_mpio_cache);
 }
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d0fed2b..250f401 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1300,11 +1300,7 @@ static int __init dm_mirror_init(void)
 
 static void __exit dm_mirror_exit(void)
 {
-	int r;
-
-	r = dm_unregister_target(&mirror_target);
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&mirror_target);
 }
 
 /* Module hooks */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 4ceedd4..a8005b4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1470,17 +1470,10 @@ static int __init dm_snapshot_init(void)
 
 static void __exit dm_snapshot_exit(void)
 {
-	int r;
-
 	destroy_workqueue(ksnapd);
 
-	r = dm_unregister_target(&snapshot_target);
-	if (r)
-		DMERR("snapshot unregister failed %d", r);
-
-	r = dm_unregister_target(&origin_target);
-	if (r)
-		DMERR("origin unregister failed %d", r);
+	dm_unregister_target(&snapshot_target);
+	dm_unregister_target(&origin_target);
 
 	exit_origin_hash();
 	kmem_cache_destroy(pending_cache);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 9e4ef88..41569bc 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -337,9 +337,7 @@ int __init dm_stripe_init(void)
 
 void dm_stripe_exit(void)
 {
-	if (dm_unregister_target(&stripe_target))
-		DMWARN("target unregistration failed");
-
+	dm_unregister_target(&stripe_target);
 	destroy_workqueue(kstriped);
 
 	return;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 835cf95..7decf10 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t)
 	return rv;
 }
 
-int dm_unregister_target(struct target_type *t)
+void dm_unregister_target(struct target_type *t)
 {
 	struct tt_internal *ti;
 
 	down_write(&_lock);
 	if (!(ti = __find_target_type(t->name))) {
-		up_write(&_lock);
-		return -EINVAL;
+		DMCRIT("Unregistering unrecognised target: %s", t->name);
+		BUG();
 	}
 
 	if (ti->use) {
-		up_write(&_lock);
-		return -ETXTBSY;
+		DMCRIT("Attempt to unregister target still in use: %s",
+		       t->name);
+		BUG();
 	}
 
 	list_del(&ti->list);
 	kfree(ti);
 
 	up_write(&_lock);
-	return 0;
 }
 
 /*
@@ -187,8 +187,7 @@ int __init dm_target_init(void)
 
 void dm_target_exit(void)
 {
-	if (dm_unregister_target(&error_target))
-		DMWARN("error target unregistration failed");
+	dm_unregister_target(&error_target);
 }
 
 EXPORT_SYMBOL(dm_register_target);
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index cdbf126..bbc9703 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -69,10 +69,7 @@ static int __init dm_zero_init(void)
 
 static void __exit dm_zero_exit(void)
 {
-	int r = dm_unregister_target(&zero_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&zero_target);
 }
 
 module_init(dm_zero_init)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c17fd33..89ff2df 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -157,8 +157,7 @@ struct dm_target {
 };
 
 int dm_register_target(struct target_type *t);
-int dm_unregister_target(struct target_type *t);
-
+void dm_unregister_target(struct target_type *t);
 
 /*-----------------------------------------------------------------
  * Functions for creating and manipulating mapped devices.
@@ -276,6 +275,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
  *---------------------------------------------------------------*/
 #define DM_NAME "device-mapper"
 
+#define DMCRIT(f, arg...) \
+	printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
+
 #define DMERR(f, arg...) \
 	printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 #define DMERR_LIMIT(f, arg...) \
-- 
cgit v0.10.2


From 6f3af01cb0eda0ec50fe1e4cbdf028269dc396fe Mon Sep 17 00:00:00 2001
From: Takahiro Yasui <tyasui@redhat.com>
Date: Tue, 6 Jan 2009 03:04:59 +0000
Subject: dm log: avoid reinitialising io_req on every operation

rw_header function updates three members of io_req data every time
when I/O is processed. bi_rw and notify.fn are never modified once
they get initialized, and so they can be set in advance.

header_to_disk() can also be pulled out of write_header() since only one
caller needs it and write_header() can be replaced by rw_header()
directly.

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 13e2a1a..691cb9c 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -326,8 +326,6 @@ static void header_from_disk(struct log_header *core, struct log_header *disk)
 static int rw_header(struct log_c *lc, int rw)
 {
 	lc->io_req.bi_rw = rw;
-	lc->io_req.mem.ptr.vma = lc->disk_header;
-	lc->io_req.notify.fn = NULL;
 
 	return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -362,12 +360,6 @@ static int read_header(struct log_c *log)
 	return 0;
 }
 
-static inline int write_header(struct log_c *log)
-{
-	header_to_disk(&log->header, log->disk_header);
-	return rw_header(log, WRITE);
-}
-
 /*----------------------------------------------------------------
  * core log constructor/destructor
  *
@@ -454,7 +446,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
 				       bitset_size, ti->limits.hardsect_size);
 		lc->header_location.count = buf_size >> SECTOR_SHIFT;
+
 		lc->io_req.mem.type = DM_IO_VMA;
+		lc->io_req.notify.fn = NULL;
 		lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
 								   PAGE_SIZE));
 		if (IS_ERR(lc->io_req.client)) {
@@ -472,6 +466,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 			return -ENOMEM;
 		}
 
+		lc->io_req.mem.ptr.vma = lc->disk_header;
 		lc->clean_bits = (void *)lc->disk_header +
 				 (LOG_OFFSET << SECTOR_SHIFT);
 	}
@@ -636,8 +631,10 @@ static int disk_resume(struct dm_dirty_log *log)
 	/* set the correct number of regions in the header */
 	lc->header.nr_regions = lc->region_count;
 
+	header_to_disk(&lc->header, lc->disk_header);
+
 	/* write the new header */
-	r = write_header(lc);
+	r = rw_header(lc, WRITE);
 	if (r) {
 		DMWARN("%s: Failed to write header on dirty region log device",
 		       lc->log_dev->name);
@@ -687,7 +684,7 @@ static int disk_flush(struct dm_dirty_log *log)
 	if (!lc->touched)
 		return 0;
 
-	r = write_header(lc);
+	r = rw_header(lc, WRITE);
 	if (r)
 		fail_log_device(lc);
 	else
-- 
cgit v0.10.2


From 2045e88edb4e0c9ce62d317f77dc59d27d9c530e Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Tue, 6 Jan 2009 03:05:01 +0000
Subject: dm log: move region_size validation

Move log size validation from mirror target to log constructor.

Removed PAGE_SIZE restriction we no longer think necessary.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 691cb9c..40ed70d 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -360,6 +360,17 @@ static int read_header(struct log_c *log)
 	return 0;
 }
 
+static int _check_region_size(struct dm_target *ti, uint32_t region_size)
+{
+	if (region_size < 2 || region_size > ti->len)
+		return 0;
+
+	if (!is_power_of_2(region_size))
+		return 0;
+
+	return 1;
+}
+
 /*----------------------------------------------------------------
  * core log constructor/destructor
  *
@@ -395,8 +406,9 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		}
 	}
 
-	if (sscanf(argv[0], "%u", &region_size) != 1) {
-		DMWARN("invalid region size string");
+	if (sscanf(argv[0], "%u", &region_size) != 1 ||
+	    !_check_region_size(ti, region_size)) {
+		DMWARN("invalid region size %s", argv[0]);
 		return -EINVAL;
 	}
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 250f401..4d6bc10 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -808,12 +808,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
 	kfree(ms);
 }
 
-static inline int _check_region_size(struct dm_target *ti, uint32_t size)
-{
-	return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
-		 size > ti->len);
-}
-
 static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		      unsigned int mirror, char **argv)
 {
@@ -872,12 +866,6 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
 		return NULL;
 	}
 
-	if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
-		ti->error = "Invalid region size";
-		dm_dirty_log_destroy(dl);
-		return NULL;
-	}
-
 	return dl;
 }
 
-- 
cgit v0.10.2


From ac1f0ac22c7be908fd33407273b9808bfaedada4 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Tue, 6 Jan 2009 03:05:02 +0000
Subject: dm log: ensure log bitmap fits on log device

Check that the log bitmap will fit within the log device.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 40ed70d..737961f 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -457,6 +457,14 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		 */
 		buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
 				       bitset_size, ti->limits.hardsect_size);
+
+		if (buf_size > dev->bdev->bd_inode->i_size) {
+			DMWARN("log device %s too small: need %llu bytes",
+				dev->name, (unsigned long long)buf_size);
+			kfree(lc);
+			return -EINVAL;
+		}
+
 		lc->header_location.count = buf_size >> SECTOR_SHIFT;
 
 		lc->io_req.mem.type = DM_IO_VMA;
-- 
cgit v0.10.2


From 23d39f63aa87e812fd879b8bc32ee6ccfe733de3 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Tue, 6 Jan 2009 03:05:04 +0000
Subject: dm ioctl: allow dm_copy_name_and_uuid to return only one field

Allow NULL buffer in dm_copy_name_and_uuid if you only want to return one of
the fields.

(Required by a following patch that adds these fields to sysfs.)

Signed-off-by: Milan Broz <mbroz@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 777c948..8da7a01 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1550,8 +1550,10 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
 		goto out;
 	}
 
-	strcpy(name, hc->name);
-	strcpy(uuid, hc->uuid ? : "");
+	if (name)
+		strcpy(name, hc->name);
+	if (uuid)
+		strcpy(uuid, hc->uuid ? : "");
 
 out:
 	up_read(&_hash_lock);
-- 
cgit v0.10.2


From 8fbf26ad5b16ad3a826ca7fe3e86700420abed1f Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Tue, 6 Jan 2009 03:05:06 +0000
Subject: dm request: add caches

This patch prepares some kmem_caches for request-based dm.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8237141..4882ce7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -32,6 +32,7 @@ static unsigned int _major = 0;
 
 static DEFINE_SPINLOCK(_minor_lock);
 /*
+ * For bio-based dm.
  * One of these is allocated per bio.
  */
 struct dm_io {
@@ -43,6 +44,7 @@ struct dm_io {
 };
 
 /*
+ * For bio-based dm.
  * One of these is allocated per target within a bio.  Hopefully
  * this will be simplified out one day.
  */
@@ -54,6 +56,27 @@ struct dm_target_io {
 
 DEFINE_TRACE(block_bio_complete);
 
+/*
+ * For request-based dm.
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+	struct mapped_device *md;
+	struct dm_target *ti;
+	struct request *orig, clone;
+	int error;
+	union map_info info;
+};
+
+/*
+ * For request-based dm.
+ * One of these is allocated per bio.
+ */
+struct dm_rq_clone_bio_info {
+	struct bio *orig;
+	struct request *rq;
+};
+
 union map_info *dm_get_mapinfo(struct bio *bio)
 {
 	if (bio && bio->bi_private)
@@ -149,6 +172,8 @@ struct mapped_device {
 #define MIN_IOS 256
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_tio_cache;
+static struct kmem_cache *_rq_tio_cache;
+static struct kmem_cache *_rq_bio_info_cache;
 
 static int __init local_init(void)
 {
@@ -164,9 +189,17 @@ static int __init local_init(void)
 	if (!_tio_cache)
 		goto out_free_io_cache;
 
+	_rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
+	if (!_rq_tio_cache)
+		goto out_free_tio_cache;
+
+	_rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
+	if (!_rq_bio_info_cache)
+		goto out_free_rq_tio_cache;
+
 	r = dm_uevent_init();
 	if (r)
-		goto out_free_tio_cache;
+		goto out_free_rq_bio_info_cache;
 
 	_major = major;
 	r = register_blkdev(_major, _name);
@@ -180,6 +213,10 @@ static int __init local_init(void)
 
 out_uevent_exit:
 	dm_uevent_exit();
+out_free_rq_bio_info_cache:
+	kmem_cache_destroy(_rq_bio_info_cache);
+out_free_rq_tio_cache:
+	kmem_cache_destroy(_rq_tio_cache);
 out_free_tio_cache:
 	kmem_cache_destroy(_tio_cache);
 out_free_io_cache:
@@ -190,6 +227,8 @@ out_free_io_cache:
 
 static void local_exit(void)
 {
+	kmem_cache_destroy(_rq_bio_info_cache);
+	kmem_cache_destroy(_rq_tio_cache);
 	kmem_cache_destroy(_tio_cache);
 	kmem_cache_destroy(_io_cache);
 	unregister_blkdev(_major, _name);
-- 
cgit v0.10.2


From 7d76345da6ed3927c9cbf5d3f7a7021e8bba7374 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Tue, 6 Jan 2009 03:05:07 +0000
Subject: dm request: extend target interface

This patch adds the following target interfaces for request-based dm.

  map_rq    : for mapping a request

  rq_end_io : for finishing a request

  busy      : for avoiding performance regression from bio-based dm.
              Target can tell dm core not to map requests now, and
              that may help requests in the block layer queue to be
              bigger by I/O merging.
              In bio-based dm, this behavior is done by device
              drivers managing the block layer queue.
              But in request-based dm, dm core has to do that
              since dm core manages the block layer queue.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 89ff2df..c1ba76c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  */
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
 			  union map_info *map_context);
+typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
+				  union map_info *map_context);
 
 /*
  * Returns:
@@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
 typedef int (*dm_endio_fn) (struct dm_target *ti,
 			    struct bio *bio, int error,
 			    union map_info *map_context);
+typedef int (*dm_request_endio_fn) (struct dm_target *ti,
+				    struct request *clone, int error,
+				    union map_info *map_context);
 
 typedef void (*dm_flush_fn) (struct dm_target *ti);
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
@@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
 			    struct bio_vec *biovec, int max_size);
 
+/*
+ * Returns:
+ *    0: The target can handle the next I/O immediately.
+ *    1: The target can't handle the next I/O immediately.
+ */
+typedef int (*dm_busy_fn) (struct dm_target *ti);
+
 void dm_error(const char *message);
 
 /*
@@ -107,7 +119,9 @@ struct target_type {
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
+	dm_map_request_fn map_rq;
 	dm_endio_fn end_io;
+	dm_request_endio_fn rq_end_io;
 	dm_flush_fn flush;
 	dm_presuspend_fn presuspend;
 	dm_postsuspend_fn postsuspend;
@@ -117,6 +131,7 @@ struct target_type {
 	dm_message_fn message;
 	dm_ioctl_fn ioctl;
 	dm_merge_fn merge;
+	dm_busy_fn busy;
 };
 
 struct io_restrictions {
-- 
cgit v0.10.2


From ab4c1424882be9cd70b89abf2b484add355712fa Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 6 Jan 2009 03:05:09 +0000
Subject: dm: support barriers on simple devices

Implement barrier support for single device DM devices

This patch implements barrier support in DM for the common case of dm linear
just remapping a single underlying device. In this case we can safely
pass the barrier through because there can be no reordering between
devices.

 NB. Any DM device might cease to support barriers if it gets
     reconfigured so code must continue to allow for a possible
     -EOPNOTSUPP on every barrier bio submitted.  - agk

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 79fb53e..bfa107f 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -142,6 +142,7 @@ static struct target_type linear_target = {
 	.status = linear_status,
 	.ioctl  = linear_ioctl,
 	.merge  = linear_merge,
+	.features = DM_TARGET_SUPPORTS_BARRIERS,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 04e5fd7..ebaaf72 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -38,6 +38,8 @@ struct dm_table {
 	sector_t *highs;
 	struct dm_target *targets;
 
+	unsigned barriers_supported:1;
+
 	/*
 	 * Indicates the rw permissions for the new logical
 	 * device.  This should be a combination of FMODE_READ
@@ -227,6 +229,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 
 	INIT_LIST_HEAD(&t->devices);
 	atomic_set(&t->holders, 1);
+	t->barriers_supported = 1;
 
 	if (!num_targets)
 		num_targets = KEYS_PER_NODE;
@@ -728,6 +731,10 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 	/* FIXME: the plan is to combine high here and then have
 	 * the merge fn apply the target level restrictions. */
 	combine_restrictions_low(&t->limits, &tgt->limits);
+
+	if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS))
+		t->barriers_supported = 0;
+
 	return 0;
 
  bad:
@@ -772,6 +779,12 @@ int dm_table_complete(struct dm_table *t)
 
 	check_for_valid_limits(&t->limits);
 
+	/*
+	 * We only support barriers if there is exactly one underlying device.
+	 */
+	if (!list_is_singular(&t->devices))
+		t->barriers_supported = 0;
+
 	/* how many indexes will the btree have ? */
 	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
 	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -986,6 +999,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
 	return t->md;
 }
 
+int dm_table_barrier_ok(struct dm_table *t)
+{
+	return t->barriers_supported;
+}
+EXPORT_SYMBOL(dm_table_barrier_ok);
+
 EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4882ce7..dd953b1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -835,7 +835,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map))
 		return -EIO;
-
+	if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
+		dm_table_put(ci.map);
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
+	}
 	ci.md = md;
 	ci.bio = bio;
 	ci.io = alloc_io(md);
@@ -919,15 +923,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	struct mapped_device *md = q->queuedata;
 	int cpu;
 
-	/*
-	 * There is no use in forwarding any barrier request since we can't
-	 * guarantee it is (or can be) handled by the targets correctly.
-	 */
-	if (unlikely(bio_barrier(bio))) {
-		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
-	}
-
 	down_read(&md->io_lock);
 
 	cpu = part_stat_lock();
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 0ade60c..5b5d08b 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -51,6 +51,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
  * To check the return value from dm_table_find_target().
  */
 #define dm_target_is_valid(t) ((t)->table)
+int dm_table_barrier_ok(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c1ba76c..8209e08 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -112,7 +112,14 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 /*
  * Information about a target type
  */
+
+/*
+ * Target features
+ */
+#define DM_TARGET_SUPPORTS_BARRIERS 0x00000001
+
 struct target_type {
+	uint64_t features;
 	const char *name;
 	struct module *module;
 	unsigned version[3];
-- 
cgit v0.10.2


From d58168763f74d1edbc296d7038c60efe6493fdd4 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 6 Jan 2009 03:05:10 +0000
Subject: dm table: rework reference counting

Rework table reference counting.

The existing code uses a reference counter. When the last reference is
dropped and the counter reaches zero, the table destructor is called.
Table reference counters are acquired/released from upcalls from other
kernel code (dm_any_congested, dm_merge_bvec, dm_unplug_all).
If the reference counter reaches zero in one of the upcalls, the table
destructor is called from almost random kernel code.

This leads to various problems:
* dm_any_congested being called under a spinlock, which calls the
  destructor, which calls some sleeping function.
* the destructor attempting to take a lock that is already taken by the
  same process.
* stale reference from some other kernel code keeps the table
  constructed, which keeps some devices open, even after successful
  return from "dmsetup remove". This can confuse lvm and prevent closing
  of underlying devices or reusing device minor numbers.

The patch changes reference counting so that the table destructor can be
called only at predetermined places.

The table has always exactly one reference from either mapped_device->map
or hash_cell->new_map. After this patch, this reference is not counted
in table->holders.  A pair of dm_create_table/dm_destroy_table functions
is used for table creation/destruction.

Temporary references from the other code increase table->holders. A pair
of dm_table_get/dm_table_put functions is used to manipulate it.

When the table is about to be destroyed, we wait for table->holders to
reach 0. Then, we call the table destructor.  We use active waiting with
msleep(1), because the situation happens rarely (to one user in 5 years)
and removing the device isn't performance-critical task: the user doesn't
care if it takes one tick more or not.

This way, the destructor is called only at specific points
(dm_table_destroy function) and the above problems associated with lazy
destruction can't happen.

Finally remove the temporary protection added to dm_any_congested().

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 8da7a01..54d0588 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc)
 	}
 
 	if (hc->new_map)
-		dm_table_put(hc->new_map);
+		dm_table_destroy(hc->new_map);
 	dm_put(hc->md);
 	free_cell(hc);
 }
@@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param)
 
 		r = dm_swap_table(md, new_map);
 		if (r) {
+			dm_table_destroy(new_map);
 			dm_put(md);
-			dm_table_put(new_map);
 			return r;
 		}
 
@@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param)
 			set_disk_ro(dm_disk(md), 0);
 		else
 			set_disk_ro(dm_disk(md), 1);
-
-		dm_table_put(new_map);
 	}
 
 	if (dm_suspended(md))
@@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 	}
 
 	if (hc->new_map)
-		dm_table_put(hc->new_map);
+		dm_table_destroy(hc->new_map);
 	hc->new_map = t;
 	up_write(&_hash_lock);
 
@@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
 	}
 
 	if (hc->new_map) {
-		dm_table_put(hc->new_map);
+		dm_table_destroy(hc->new_map);
 		hc->new_map = NULL;
 	}
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ebaaf72..2fd66c3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2001 Sistina Software (UK) Limited.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
+#include <linux/delay.h>
 #include <asm/atomic.h>
 
 #define DM_MSG_PREFIX "table"
@@ -24,6 +25,19 @@
 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
 
+/*
+ * The table has always exactly one reference from either mapped_device->map
+ * or hash_cell->new_map. This reference is not counted in table->holders.
+ * A pair of dm_create_table/dm_destroy_table functions is used for table
+ * creation/destruction.
+ *
+ * Temporary references from the other code increase table->holders. A pair
+ * of dm_table_get/dm_table_put functions is used to manipulate it.
+ *
+ * When the table is about to be destroyed, we wait for table->holders to
+ * drop to zero.
+ */
+
 struct dm_table {
 	struct mapped_device *md;
 	atomic_t holders;
@@ -228,7 +242,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&t->devices);
-	atomic_set(&t->holders, 1);
+	atomic_set(&t->holders, 0);
 	t->barriers_supported = 1;
 
 	if (!num_targets)
@@ -259,10 +273,14 @@ static void free_devices(struct list_head *devices)
 	}
 }
 
-static void table_destroy(struct dm_table *t)
+void dm_table_destroy(struct dm_table *t)
 {
 	unsigned int i;
 
+	while (atomic_read(&t->holders))
+		msleep(1);
+	smp_mb();
+
 	/* free the indexes (see dm_table_complete) */
 	if (t->depth >= 2)
 		vfree(t->index[t->depth - 2]);
@@ -300,8 +318,8 @@ void dm_table_put(struct dm_table *t)
 	if (!t)
 		return;
 
-	if (atomic_dec_and_test(&t->holders))
-		table_destroy(t);
+	smp_mb__before_atomic_dec();
+	atomic_dec(&t->holders);
 }
 
 /*
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dd953b1..9f9aa64 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -977,8 +977,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 	struct mapped_device *md = congested_data;
 	struct dm_table *map;
 
-	atomic_inc(&md->pending);
-
 	if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
 		map = dm_get_table(md);
 		if (map) {
@@ -987,10 +985,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 		}
 	}
 
-	if (!atomic_dec_return(&md->pending))
-		/* nudge anyone waiting on suspend queue */
-		wake_up(&md->wait);
-
 	return r;
 }
 
@@ -1250,10 +1244,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 
 	if (md->suspended_bdev)
 		__set_size(md, size);
-	if (size == 0)
+
+	if (!size) {
+		dm_table_destroy(t);
 		return 0;
+	}
 
-	dm_table_get(t);
 	dm_table_event_callback(t, event_callback, md);
 
 	write_lock(&md->map_lock);
@@ -1275,7 +1271,7 @@ static void __unbind(struct mapped_device *md)
 	write_lock(&md->map_lock);
 	md->map = NULL;
 	write_unlock(&md->map_lock);
-	dm_table_put(map);
+	dm_table_destroy(map);
 }
 
 /*
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 5b5d08b..bbbe911 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -36,6 +36,7 @@ struct dm_table;
 /*-----------------------------------------------------------------
  * Internal table functions.
  *---------------------------------------------------------------*/
+void dm_table_destroy(struct dm_table *t);
 void dm_table_event_callback(struct dm_table *t,
 			     void (*fn)(void *), void *context);
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
-- 
cgit v0.10.2


From 784aae735d9b0bba3f8b9faef4c8b30df3bf0128 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Tue, 6 Jan 2009 03:05:12 +0000
Subject: dm: add name and uuid to sysfs

Implement simple read-only sysfs entry for device-mapper block device.

This patch adds a simple sysfs directory named "dm" under block device
properties and implements
	- name attribute (string containing mapped device name)
	- uuid attribute (string containing UUID, or empty string if not set)

The kobject is embedded in mapped_device struct, so no additional
memory allocation is needed for initializing sysfs entry.

During the processing of sysfs attribute we need to lock mapped device
which is done by a new function dm_get_from_kobj, which returns the md
associated with kobject and increases the usage count.

Each 'show attribute' function is responsible for its own locking.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 1c61580..63f0ae9 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,7 +3,7 @@
 #
 
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o dm-kcopyd.o
+		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
 dm-multipath-objs := dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-raid1.o
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
new file mode 100644
index 0000000..a2a45e6
--- /dev/null
+++ b/drivers/md/dm-sysfs.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/dm-ioctl.h>
+#include "dm.h"
+
+struct dm_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct mapped_device *, char *);
+	ssize_t (*store)(struct mapped_device *, char *);
+};
+
+#define DM_ATTR_RO(_name) \
+struct dm_sysfs_attr dm_attr_##_name = \
+	__ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL)
+
+static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr,
+			    char *page)
+{
+	struct dm_sysfs_attr *dm_attr;
+	struct mapped_device *md;
+	ssize_t ret;
+
+	dm_attr = container_of(attr, struct dm_sysfs_attr, attr);
+	if (!dm_attr->show)
+		return -EIO;
+
+	md = dm_get_from_kobject(kobj);
+	if (!md)
+		return -EINVAL;
+
+	ret = dm_attr->show(md, page);
+	dm_put(md);
+
+	return ret;
+}
+
+static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf)
+{
+	if (dm_copy_name_and_uuid(md, buf, NULL))
+		return -EIO;
+
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
+{
+	if (dm_copy_name_and_uuid(md, NULL, buf))
+		return -EIO;
+
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+static DM_ATTR_RO(name);
+static DM_ATTR_RO(uuid);
+
+static struct attribute *dm_attrs[] = {
+	&dm_attr_name.attr,
+	&dm_attr_uuid.attr,
+	NULL,
+};
+
+static struct sysfs_ops dm_sysfs_ops = {
+	.show	= dm_attr_show,
+};
+
+/*
+ * dm kobject is embedded in mapped_device structure
+ * no need to define release function here
+ */
+static struct kobj_type dm_ktype = {
+	.sysfs_ops	= &dm_sysfs_ops,
+	.default_attrs	= dm_attrs,
+};
+
+/*
+ * Initialize kobj
+ * because nobody using md yet, no need to call explicit dm_get/put
+ */
+int dm_sysfs_init(struct mapped_device *md)
+{
+	return kobject_init_and_add(dm_kobject(md), &dm_ktype,
+				    &disk_to_dev(dm_disk(md))->kobj,
+				    "%s", "dm");
+}
+
+/*
+ * Remove kobj, called after all references removed
+ */
+void dm_sysfs_exit(struct mapped_device *md)
+{
+	kobject_put(dm_kobject(md));
+}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9f9aa64..51ba1db 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -167,6 +167,9 @@ struct mapped_device {
 
 	/* forced geometry settings */
 	struct hd_geometry geometry;
+
+	/* sysfs handle */
+	struct kobject kobj;
 };
 
 #define MIN_IOS 256
@@ -1285,6 +1288,8 @@ int dm_create(int minor, struct mapped_device **result)
 	if (!md)
 		return -ENXIO;
 
+	dm_sysfs_init(md);
+
 	*result = md;
 	return 0;
 }
@@ -1360,6 +1365,7 @@ void dm_put(struct mapped_device *md)
 			dm_table_presuspend_targets(map);
 			dm_table_postsuspend_targets(map);
 		}
+		dm_sysfs_exit(md);
 		dm_table_put(map);
 		__unbind(md);
 		free_dev(md);
@@ -1699,6 +1705,27 @@ struct gendisk *dm_disk(struct mapped_device *md)
 	return md->disk;
 }
 
+struct kobject *dm_kobject(struct mapped_device *md)
+{
+	return &md->kobj;
+}
+
+/*
+ * struct mapped_device should not be exported outside of dm.c
+ * so use this check to verify that kobj is part of md structure
+ */
+struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
+{
+	struct mapped_device *md;
+
+	md = container_of(kobj, struct mapped_device, kobj);
+	if (&md->kobj != kobj)
+		return NULL;
+
+	dm_get(md);
+	return md;
+}
+
 int dm_suspended(struct mapped_device *md)
 {
 	return test_bit(DMF_SUSPENDED, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index bbbe911..20194e0 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -74,6 +74,14 @@ int dm_interface_init(void);
 void dm_interface_exit(void);
 
 /*
+ * sysfs interface
+ */
+int dm_sysfs_init(struct mapped_device *md);
+void dm_sysfs_exit(struct mapped_device *md);
+struct kobject *dm_kobject(struct mapped_device *md);
+struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
+
+/*
  * Targets for linear and striped mappings
  */
 int dm_linear_init(void);
-- 
cgit v0.10.2


From fe9cf30eb8186ef267d1868dc9f12f2d0f40835a Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Tue, 6 Jan 2009 03:05:13 +0000
Subject: dm mpath: move trigger_event to system workqueue

The same workqueue is used both for sending uevents and processing queued I/O.
Deadlock has been reported in RHEL5 when sending a uevent was blocked waiting
for the queued I/O to be processed.  Use scheduled_work() for the asynchronous
uevents instead.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 345a260..095f77b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -889,7 +889,7 @@ static int fail_path(struct pgpath *pgpath)
 	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
 		      pgpath->path.dev->name, m->nr_valid_paths);
 
-	queue_work(kmultipathd, &m->trigger_event);
+	schedule_work(&m->trigger_event);
 	queue_work(kmultipathd, &pgpath->deactivate_path);
 
 out:
@@ -932,7 +932,7 @@ static int reinstate_path(struct pgpath *pgpath)
 	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
 		      pgpath->path.dev->name, m->nr_valid_paths);
 
-	queue_work(kmultipathd, &m->trigger_event);
+	schedule_work(&m->trigger_event);
 
 out:
 	spin_unlock_irqrestore(&m->lock, flags);
@@ -976,7 +976,7 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg,
 
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	queue_work(kmultipathd, &m->trigger_event);
+	schedule_work(&m->trigger_event);
 }
 
 /*
@@ -1006,7 +1006,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
 	}
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	queue_work(kmultipathd, &m->trigger_event);
+	schedule_work(&m->trigger_event);
 	return 0;
 }
 
-- 
cgit v0.10.2


From aea53d92f70eeb00ae480e399a997dd55fd5055d Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Tue, 6 Jan 2009 03:05:15 +0000
Subject: dm snapshot: separate out exception store interface

Pull structures that bridge the gap between snapshot and
exception store out of dm-snap.h and put them in a new
.h file - dm-exception-store.h.  This file will define the
API for new exception stores.

Ultimately, dm-snap.h is unnecessary, since only dm-snap.c
should be using it.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 01590f3e..ef152e6 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -7,6 +7,7 @@
  * This file is released under the GPL.
  */
 
+#include "dm-exception-store.h"
 #include "dm-snap.h"
 
 #include <linux/mm.h>
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
new file mode 100644
index 0000000..d75f775
--- /dev/null
+++ b/drivers/md/dm-exception-store.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
+ *
+ * Device-mapper snapshot exception store.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _LINUX_DM_EXCEPTION_STORE
+#define _LINUX_DM_EXCEPTION_STORE
+
+#include <linux/blkdev.h>
+
+/*
+ * The snapshot code deals with largish chunks of the disk at a
+ * time. Typically 32k - 512k.
+ */
+typedef sector_t chunk_t;
+
+/*
+ * An exception is used where an old chunk of data has been
+ * replaced by a new one.
+ * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
+ * of chunks that follow contiguously.  Remaining bits hold the number of the
+ * chunk within the device.
+ */
+struct dm_snap_exception {
+	struct list_head hash_list;
+
+	chunk_t old_chunk;
+	chunk_t new_chunk;
+};
+
+/*
+ * Abstraction to handle the meta/layout of exception stores (the
+ * COW device).
+ */
+struct exception_store {
+	/*
+	 * Destroys this object when you've finished with it.
+	 */
+	void (*destroy) (struct exception_store *store);
+
+	/*
+	 * The target shouldn't read the COW device until this is
+	 * called.
+	 */
+	int (*read_metadata) (struct exception_store *store);
+
+	/*
+	 * Find somewhere to store the next exception.
+	 */
+	int (*prepare_exception) (struct exception_store *store,
+				  struct dm_snap_exception *e);
+
+	/*
+	 * Update the metadata with this exception.
+	 */
+	void (*commit_exception) (struct exception_store *store,
+				  struct dm_snap_exception *e,
+				  void (*callback) (void *, int success),
+				  void *callback_context);
+
+	/*
+	 * The snapshot is invalid, note this in the metadata.
+	 */
+	void (*drop_snapshot) (struct exception_store *store);
+
+	/*
+	 * Return how full the snapshot is.
+	 */
+	void (*fraction_full) (struct exception_store *store,
+			       sector_t *numerator,
+			       sector_t *denominator);
+
+	struct dm_snapshot *snap;
+	void *context;
+};
+
+/*
+ * Funtions to manipulate consecutive chunks
+ */
+#  if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+#    define DM_CHUNK_CONSECUTIVE_BITS 8
+#    define DM_CHUNK_NUMBER_BITS 56
+
+static inline chunk_t dm_chunk_number(chunk_t chunk)
+{
+	return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
+}
+
+static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
+{
+	return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
+}
+
+static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
+{
+	e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
+
+	BUG_ON(!dm_consecutive_chunk_count(e));
+}
+
+#  else
+#    define DM_CHUNK_CONSECUTIVE_BITS 0
+
+static inline chunk_t dm_chunk_number(chunk_t chunk)
+{
+	return chunk;
+}
+
+static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
+{
+	return 0;
+}
+
+static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
+{
+}
+
+#  endif
+
+/*
+ * Two exception store implementations.
+ */
+int dm_create_persistent(struct exception_store *store);
+
+int dm_create_transient(struct exception_store *store);
+
+#endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a8005b4..81f03a0 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -21,6 +21,7 @@
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
 
+#include "dm-exception-store.h"
 #include "dm-snap.h"
 #include "dm-bio-list.h"
 
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
index 99c0106..6e4beaf 100644
--- a/drivers/md/dm-snap.h
+++ b/drivers/md/dm-snap.h
@@ -1,6 +1,4 @@
 /*
- * dm-snapshot.c
- *
  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
@@ -10,6 +8,7 @@
 #define DM_SNAPSHOT_H
 
 #include <linux/device-mapper.h>
+#include "dm-exception-store.h"
 #include "dm-bio-list.h"
 #include <linux/blkdev.h>
 #include <linux/workqueue.h>
@@ -20,116 +19,6 @@ struct exception_table {
 	struct list_head *table;
 };
 
-/*
- * The snapshot code deals with largish chunks of the disk at a
- * time. Typically 32k - 512k.
- */
-typedef sector_t chunk_t;
-
-/*
- * An exception is used where an old chunk of data has been
- * replaced by a new one.
- * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
- * of chunks that follow contiguously.  Remaining bits hold the number of the
- * chunk within the device.
- */
-struct dm_snap_exception {
-	struct list_head hash_list;
-
-	chunk_t old_chunk;
-	chunk_t new_chunk;
-};
-
-/*
- * Funtions to manipulate consecutive chunks
- */
-#  if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
-#    define DM_CHUNK_CONSECUTIVE_BITS 8
-#    define DM_CHUNK_NUMBER_BITS 56
-
-static inline chunk_t dm_chunk_number(chunk_t chunk)
-{
-	return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
-}
-
-static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
-{
-	return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
-}
-
-static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
-{
-	e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
-
-	BUG_ON(!dm_consecutive_chunk_count(e));
-}
-
-#  else
-#    define DM_CHUNK_CONSECUTIVE_BITS 0
-
-static inline chunk_t dm_chunk_number(chunk_t chunk)
-{
-	return chunk;
-}
-
-static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
-{
-	return 0;
-}
-
-static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
-{
-}
-
-#  endif
-
-/*
- * Abstraction to handle the meta/layout of exception stores (the
- * COW device).
- */
-struct exception_store {
-
-	/*
-	 * Destroys this object when you've finished with it.
-	 */
-	void (*destroy) (struct exception_store *store);
-
-	/*
-	 * The target shouldn't read the COW device until this is
-	 * called.
-	 */
-	int (*read_metadata) (struct exception_store *store);
-
-	/*
-	 * Find somewhere to store the next exception.
-	 */
-	int (*prepare_exception) (struct exception_store *store,
-				  struct dm_snap_exception *e);
-
-	/*
-	 * Update the metadata with this exception.
-	 */
-	void (*commit_exception) (struct exception_store *store,
-				  struct dm_snap_exception *e,
-				  void (*callback) (void *, int success),
-				  void *callback_context);
-
-	/*
-	 * The snapshot is invalid, note this in the metadata.
-	 */
-	void (*drop_snapshot) (struct exception_store *store);
-
-	/*
-	 * Return how full the snapshot is.
-	 */
-	void (*fraction_full) (struct exception_store *store,
-			       sector_t *numerator,
-			       sector_t *denominator);
-
-	struct dm_snapshot *snap;
-	void *context;
-};
-
 #define DM_TRACKED_CHUNK_HASH_SIZE	16
 #define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
 					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
@@ -193,14 +82,6 @@ struct dm_snapshot {
 int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
 
 /*
- * Constructor and destructor for the default persistent
- * store.
- */
-int dm_create_persistent(struct exception_store *store);
-
-int dm_create_transient(struct exception_store *store);
-
-/*
  * Return the number of sectors in the device.
  */
 static inline sector_t get_dev_size(struct block_device *bdev)
-- 
cgit v0.10.2


From 1ae25f9c933d1432fbffdf3e126051a974608abf Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Tue, 6 Jan 2009 03:05:16 +0000
Subject: dm snapshot: rename struct exception_store

Rename struct exception_store to dm_exception_store.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index ef152e6..c5c9a26 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -462,19 +462,19 @@ static int read_exceptions(struct pstore *ps)
 	return 0;
 }
 
-static struct pstore *get_info(struct exception_store *store)
+static struct pstore *get_info(struct dm_exception_store *store)
 {
 	return (struct pstore *) store->context;
 }
 
-static void persistent_fraction_full(struct exception_store *store,
+static void persistent_fraction_full(struct dm_exception_store *store,
 				     sector_t *numerator, sector_t *denominator)
 {
 	*numerator = get_info(store)->next_free * store->snap->chunk_size;
 	*denominator = get_dev_size(store->snap->cow->bdev);
 }
 
-static void persistent_destroy(struct exception_store *store)
+static void persistent_destroy(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
 
@@ -485,7 +485,7 @@ static void persistent_destroy(struct exception_store *store)
 	kfree(ps);
 }
 
-static int persistent_read_metadata(struct exception_store *store)
+static int persistent_read_metadata(struct dm_exception_store *store)
 {
 	int r, uninitialized_var(new_snapshot);
 	struct pstore *ps = get_info(store);
@@ -551,7 +551,7 @@ static int persistent_read_metadata(struct exception_store *store)
 	return 0;
 }
 
-static int persistent_prepare(struct exception_store *store,
+static int persistent_prepare(struct dm_exception_store *store,
 			      struct dm_snap_exception *e)
 {
 	struct pstore *ps = get_info(store);
@@ -578,7 +578,7 @@ static int persistent_prepare(struct exception_store *store,
 	return 0;
 }
 
-static void persistent_commit(struct exception_store *store,
+static void persistent_commit(struct dm_exception_store *store,
 			      struct dm_snap_exception *e,
 			      void (*callback) (void *, int success),
 			      void *callback_context)
@@ -640,7 +640,7 @@ static void persistent_commit(struct exception_store *store,
 	ps->callback_count = 0;
 }
 
-static void persistent_drop(struct exception_store *store)
+static void persistent_drop(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
 
@@ -649,7 +649,7 @@ static void persistent_drop(struct exception_store *store)
 		DMWARN("write header failed");
 }
 
-int dm_create_persistent(struct exception_store *store)
+int dm_create_persistent(struct dm_exception_store *store)
 {
 	struct pstore *ps;
 
@@ -694,17 +694,17 @@ struct transient_c {
 	sector_t next_free;
 };
 
-static void transient_destroy(struct exception_store *store)
+static void transient_destroy(struct dm_exception_store *store)
 {
 	kfree(store->context);
 }
 
-static int transient_read_metadata(struct exception_store *store)
+static int transient_read_metadata(struct dm_exception_store *store)
 {
 	return 0;
 }
 
-static int transient_prepare(struct exception_store *store,
+static int transient_prepare(struct dm_exception_store *store,
 			     struct dm_snap_exception *e)
 {
 	struct transient_c *tc = (struct transient_c *) store->context;
@@ -719,7 +719,7 @@ static int transient_prepare(struct exception_store *store,
 	return 0;
 }
 
-static void transient_commit(struct exception_store *store,
+static void transient_commit(struct dm_exception_store *store,
 			     struct dm_snap_exception *e,
 			     void (*callback) (void *, int success),
 			     void *callback_context)
@@ -728,14 +728,14 @@ static void transient_commit(struct exception_store *store,
 	callback(callback_context, 1);
 }
 
-static void transient_fraction_full(struct exception_store *store,
+static void transient_fraction_full(struct dm_exception_store *store,
 				    sector_t *numerator, sector_t *denominator)
 {
 	*numerator = ((struct transient_c *) store->context)->next_free;
 	*denominator = get_dev_size(store->snap->cow->bdev);
 }
 
-int dm_create_transient(struct exception_store *store)
+int dm_create_transient(struct dm_exception_store *store)
 {
 	struct transient_c *tc;
 
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index d75f775..25677df 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -36,28 +36,29 @@ struct dm_snap_exception {
  * Abstraction to handle the meta/layout of exception stores (the
  * COW device).
  */
-struct exception_store {
+struct dm_exception_store {
+
 	/*
 	 * Destroys this object when you've finished with it.
 	 */
-	void (*destroy) (struct exception_store *store);
+	void (*destroy) (struct dm_exception_store *store);
 
 	/*
 	 * The target shouldn't read the COW device until this is
 	 * called.
 	 */
-	int (*read_metadata) (struct exception_store *store);
+	int (*read_metadata) (struct dm_exception_store *store);
 
 	/*
 	 * Find somewhere to store the next exception.
 	 */
-	int (*prepare_exception) (struct exception_store *store,
+	int (*prepare_exception) (struct dm_exception_store *store,
 				  struct dm_snap_exception *e);
 
 	/*
 	 * Update the metadata with this exception.
 	 */
-	void (*commit_exception) (struct exception_store *store,
+	void (*commit_exception) (struct dm_exception_store *store,
 				  struct dm_snap_exception *e,
 				  void (*callback) (void *, int success),
 				  void *callback_context);
@@ -65,12 +66,12 @@ struct exception_store {
 	/*
 	 * The snapshot is invalid, note this in the metadata.
 	 */
-	void (*drop_snapshot) (struct exception_store *store);
+	void (*drop_snapshot) (struct dm_exception_store *store);
 
 	/*
 	 * Return how full the snapshot is.
 	 */
-	void (*fraction_full) (struct exception_store *store,
+	void (*fraction_full) (struct dm_exception_store *store,
 			       sector_t *numerator,
 			       sector_t *denominator);
 
@@ -124,8 +125,8 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
 /*
  * Two exception store implementations.
  */
-int dm_create_persistent(struct exception_store *store);
+int dm_create_persistent(struct dm_exception_store *store);
 
-int dm_create_transient(struct exception_store *store);
+int dm_create_transient(struct dm_exception_store *store);
 
 #endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
index 6e4beaf..9281236 100644
--- a/drivers/md/dm-snap.h
+++ b/drivers/md/dm-snap.h
@@ -61,7 +61,7 @@ struct dm_snapshot {
 	spinlock_t pe_lock;
 
 	/* The on disk metadata handler */
-	struct exception_store store;
+	struct dm_exception_store store;
 
 	struct dm_kcopyd_client *kcopyd_client;
 
-- 
cgit v0.10.2


From 4db6bfe02bdc7dc5048f46dd682a94801d029adc Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Tue, 6 Jan 2009 03:05:17 +0000
Subject: dm snapshot: split out exception store implementations

Move the existing snapshot exception store implementations out into
separate files.  Later patches will place these behind a new
interface in preparation for alternative implementations.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 63f0ae9..72880b7 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -5,7 +5,8 @@
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
 		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
 dm-multipath-objs := dm-path-selector.o dm-mpath.o
-dm-snapshot-objs := dm-snap.o dm-exception-store.o
+dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
+		    dm-snap-persistent.o
 dm-mirror-objs	:= dm-raid1.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index c5c9a26..74777e0 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -1,757 +1,46 @@
 /*
- * dm-exception-store.c
- *
  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- * Copyright (C) 2006 Red Hat GmbH
+ * Copyright (C) 2006-2008 Red Hat GmbH
  *
  * This file is released under the GPL.
  */
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
-#include <linux/dm-io.h>
-#include <linux/dm-kcopyd.h>
-
-#define DM_MSG_PREFIX "snapshots"
-#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32	/* 16KB */
-
-/*-----------------------------------------------------------------
- * Persistent snapshots, by persistent we mean that the snapshot
- * will survive a reboot.
- *---------------------------------------------------------------*/
-
-/*
- * We need to store a record of which parts of the origin have
- * been copied to the snapshot device.  The snapshot code
- * requires that we copy exception chunks to chunk aligned areas
- * of the COW store.  It makes sense therefore, to store the
- * metadata in chunk size blocks.
- *
- * There is no backward or forward compatibility implemented,
- * snapshots with different disk versions than the kernel will
- * not be usable.  It is expected that "lvcreate" will blank out
- * the start of a fresh COW device before calling the snapshot
- * constructor.
- *
- * The first chunk of the COW device just contains the header.
- * After this there is a chunk filled with exception metadata,
- * followed by as many exception chunks as can fit in the
- * metadata areas.
- *
- * All on disk structures are in little-endian format.  The end
- * of the exceptions info is indicated by an exception with a
- * new_chunk of 0, which is invalid since it would point to the
- * header chunk.
- */
-
-/*
- * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
- */
-#define SNAP_MAGIC 0x70416e53
-
-/*
- * The on-disk version of the metadata.
- */
-#define SNAPSHOT_DISK_VERSION 1
-
-struct disk_header {
-	uint32_t magic;
-
-	/*
-	 * Is this snapshot valid.  There is no way of recovering
-	 * an invalid snapshot.
-	 */
-	uint32_t valid;
-
-	/*
-	 * Simple, incrementing version. no backward
-	 * compatibility.
-	 */
-	uint32_t version;
-
-	/* In sectors */
-	uint32_t chunk_size;
-};
-
-struct disk_exception {
-	uint64_t old_chunk;
-	uint64_t new_chunk;
-};
-
-struct commit_callback {
-	void (*callback)(void *, int success);
-	void *context;
-};
-
-/*
- * The top level structure for a persistent exception store.
- */
-struct pstore {
-	struct dm_snapshot *snap;	/* up pointer to my snapshot */
-	int version;
-	int valid;
-	uint32_t exceptions_per_area;
-
-	/*
-	 * Now that we have an asynchronous kcopyd there is no
-	 * need for large chunk sizes, so it wont hurt to have a
-	 * whole chunks worth of metadata in memory at once.
-	 */
-	void *area;
-
-	/*
-	 * An area of zeros used to clear the next area.
-	 */
-	void *zero_area;
-
-	/*
-	 * Used to keep track of which metadata area the data in
-	 * 'chunk' refers to.
-	 */
-	chunk_t current_area;
-
-	/*
-	 * The next free chunk for an exception.
-	 */
-	chunk_t next_free;
-
-	/*
-	 * The index of next free exception in the current
-	 * metadata area.
-	 */
-	uint32_t current_committed;
-
-	atomic_t pending_count;
-	uint32_t callback_count;
-	struct commit_callback *callbacks;
-	struct dm_io_client *io_client;
-
-	struct workqueue_struct *metadata_wq;
-};
-
-static unsigned sectors_to_pages(unsigned sectors)
-{
-	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
-}
-
-static int alloc_area(struct pstore *ps)
-{
-	int r = -ENOMEM;
-	size_t len;
-
-	len = ps->snap->chunk_size << SECTOR_SHIFT;
-
-	/*
-	 * Allocate the chunk_size block of memory that will hold
-	 * a single metadata area.
-	 */
-	ps->area = vmalloc(len);
-	if (!ps->area)
-		return r;
-
-	ps->zero_area = vmalloc(len);
-	if (!ps->zero_area) {
-		vfree(ps->area);
-		return r;
-	}
-	memset(ps->zero_area, 0, len);
-
-	return 0;
-}
-
-static void free_area(struct pstore *ps)
-{
-	vfree(ps->area);
-	ps->area = NULL;
-	vfree(ps->zero_area);
-	ps->zero_area = NULL;
-}
-
-struct mdata_req {
-	struct dm_io_region *where;
-	struct dm_io_request *io_req;
-	struct work_struct work;
-	int result;
-};
-
-static void do_metadata(struct work_struct *work)
-{
-	struct mdata_req *req = container_of(work, struct mdata_req, work);
-
-	req->result = dm_io(req->io_req, 1, req->where, NULL);
-}
-
-/*
- * Read or write a chunk aligned and sized block of data from a device.
- */
-static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
-{
-	struct dm_io_region where = {
-		.bdev = ps->snap->cow->bdev,
-		.sector = ps->snap->chunk_size * chunk,
-		.count = ps->snap->chunk_size,
-	};
-	struct dm_io_request io_req = {
-		.bi_rw = rw,
-		.mem.type = DM_IO_VMA,
-		.mem.ptr.vma = ps->area,
-		.client = ps->io_client,
-		.notify.fn = NULL,
-	};
-	struct mdata_req req;
-
-	if (!metadata)
-		return dm_io(&io_req, 1, &where, NULL);
-
-	req.where = &where;
-	req.io_req = &io_req;
-
-	/*
-	 * Issue the synchronous I/O from a different thread
-	 * to avoid generic_make_request recursion.
-	 */
-	INIT_WORK(&req.work, do_metadata);
-	queue_work(ps->metadata_wq, &req.work);
-	flush_workqueue(ps->metadata_wq);
-
-	return req.result;
-}
-
-/*
- * Convert a metadata area index to a chunk index.
- */
-static chunk_t area_location(struct pstore *ps, chunk_t area)
-{
-	return 1 + ((ps->exceptions_per_area + 1) * area);
-}
-
-/*
- * Read or write a metadata area.  Remembering to skip the first
- * chunk which holds the header.
- */
-static int area_io(struct pstore *ps, int rw)
-{
-	int r;
-	chunk_t chunk;
-
-	chunk = area_location(ps, ps->current_area);
-
-	r = chunk_io(ps, chunk, rw, 0);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-static void zero_memory_area(struct pstore *ps)
-{
-	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
-}
-
-static int zero_disk_area(struct pstore *ps, chunk_t area)
-{
-	struct dm_io_region where = {
-		.bdev = ps->snap->cow->bdev,
-		.sector = ps->snap->chunk_size * area_location(ps, area),
-		.count = ps->snap->chunk_size,
-	};
-	struct dm_io_request io_req = {
-		.bi_rw = WRITE,
-		.mem.type = DM_IO_VMA,
-		.mem.ptr.vma = ps->zero_area,
-		.client = ps->io_client,
-		.notify.fn = NULL,
-	};
-
-	return dm_io(&io_req, 1, &where, NULL);
-}
-
-static int read_header(struct pstore *ps, int *new_snapshot)
-{
-	int r;
-	struct disk_header *dh;
-	chunk_t chunk_size;
-	int chunk_size_supplied = 1;
-
-	/*
-	 * Use default chunk size (or hardsect_size, if larger) if none supplied
-	 */
-	if (!ps->snap->chunk_size) {
-        	ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
-		ps->snap->chunk_mask = ps->snap->chunk_size - 1;
-		ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
-		chunk_size_supplied = 0;
-	}
-
-	ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
-							     chunk_size));
-	if (IS_ERR(ps->io_client))
-		return PTR_ERR(ps->io_client);
-
-	r = alloc_area(ps);
-	if (r)
-		return r;
-
-	r = chunk_io(ps, 0, READ, 1);
-	if (r)
-		goto bad;
-
-	dh = (struct disk_header *) ps->area;
-
-	if (le32_to_cpu(dh->magic) == 0) {
-		*new_snapshot = 1;
-		return 0;
-	}
-
-	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
-		DMWARN("Invalid or corrupt snapshot");
-		r = -ENXIO;
-		goto bad;
-	}
-
-	*new_snapshot = 0;
-	ps->valid = le32_to_cpu(dh->valid);
-	ps->version = le32_to_cpu(dh->version);
-	chunk_size = le32_to_cpu(dh->chunk_size);
-
-	if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
-		return 0;
-
-	DMWARN("chunk size %llu in device metadata overrides "
-	       "table chunk size of %llu.",
-	       (unsigned long long)chunk_size,
-	       (unsigned long long)ps->snap->chunk_size);
-
-	/* We had a bogus chunk_size. Fix stuff up. */
-	free_area(ps);
-
-	ps->snap->chunk_size = chunk_size;
-	ps->snap->chunk_mask = chunk_size - 1;
-	ps->snap->chunk_shift = ffs(chunk_size) - 1;
-
-	r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
-				ps->io_client);
-	if (r)
-		return r;
-
-	r = alloc_area(ps);
-	return r;
-
-bad:
-	free_area(ps);
-	return r;
-}
-
-static int write_header(struct pstore *ps)
-{
-	struct disk_header *dh;
-
-	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
-
-	dh = (struct disk_header *) ps->area;
-	dh->magic = cpu_to_le32(SNAP_MAGIC);
-	dh->valid = cpu_to_le32(ps->valid);
-	dh->version = cpu_to_le32(ps->version);
-	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
-
-	return chunk_io(ps, 0, WRITE, 1);
-}
-
-/*
- * Access functions for the disk exceptions, these do the endian conversions.
- */
-static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
-{
-	BUG_ON(index >= ps->exceptions_per_area);
-
-	return ((struct disk_exception *) ps->area) + index;
-}
+#include <linux/device-mapper.h>
 
-static void read_exception(struct pstore *ps,
-			   uint32_t index, struct disk_exception *result)
-{
-	struct disk_exception *e = get_exception(ps, index);
-
-	/* copy it */
-	result->old_chunk = le64_to_cpu(e->old_chunk);
-	result->new_chunk = le64_to_cpu(e->new_chunk);
-}
-
-static void write_exception(struct pstore *ps,
-			    uint32_t index, struct disk_exception *de)
-{
-	struct disk_exception *e = get_exception(ps, index);
-
-	/* copy it */
-	e->old_chunk = cpu_to_le64(de->old_chunk);
-	e->new_chunk = cpu_to_le64(de->new_chunk);
-}
+#define DM_MSG_PREFIX "snapshot exception stores"
 
-/*
- * Registers the exceptions that are present in the current area.
- * 'full' is filled in to indicate if the area has been
- * filled.
- */
-static int insert_exceptions(struct pstore *ps, int *full)
+int dm_exception_store_init(void)
 {
 	int r;
-	unsigned int i;
-	struct disk_exception de;
-
-	/* presume the area is full */
-	*full = 1;
-
-	for (i = 0; i < ps->exceptions_per_area; i++) {
-		read_exception(ps, i, &de);
-
-		/*
-		 * If the new_chunk is pointing at the start of
-		 * the COW device, where the first metadata area
-		 * is we know that we've hit the end of the
-		 * exceptions.  Therefore the area is not full.
-		 */
-		if (de.new_chunk == 0LL) {
-			ps->current_committed = i;
-			*full = 0;
-			break;
-		}
-
-		/*
-		 * Keep track of the start of the free chunks.
-		 */
-		if (ps->next_free <= de.new_chunk)
-			ps->next_free = de.new_chunk + 1;
-
-		/*
-		 * Otherwise we add the exception to the snapshot.
-		 */
-		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-static int read_exceptions(struct pstore *ps)
-{
-	int r, full = 1;
-
-	/*
-	 * Keeping reading chunks and inserting exceptions until
-	 * we find a partially full area.
-	 */
-	for (ps->current_area = 0; full; ps->current_area++) {
-		r = area_io(ps, READ);
-		if (r)
-			return r;
 
-		r = insert_exceptions(ps, &full);
-		if (r)
-			return r;
+	r = dm_transient_snapshot_init();
+	if (r) {
+		DMERR("Unable to register transient exception store type.");
+		goto transient_fail;
 	}
 
-	ps->current_area--;
-
-	return 0;
-}
-
-static struct pstore *get_info(struct dm_exception_store *store)
-{
-	return (struct pstore *) store->context;
-}
-
-static void persistent_fraction_full(struct dm_exception_store *store,
-				     sector_t *numerator, sector_t *denominator)
-{
-	*numerator = get_info(store)->next_free * store->snap->chunk_size;
-	*denominator = get_dev_size(store->snap->cow->bdev);
-}
-
-static void persistent_destroy(struct dm_exception_store *store)
-{
-	struct pstore *ps = get_info(store);
-
-	destroy_workqueue(ps->metadata_wq);
-	dm_io_client_destroy(ps->io_client);
-	vfree(ps->callbacks);
-	free_area(ps);
-	kfree(ps);
-}
-
-static int persistent_read_metadata(struct dm_exception_store *store)
-{
-	int r, uninitialized_var(new_snapshot);
-	struct pstore *ps = get_info(store);
-
-	/*
-	 * Read the snapshot header.
-	 */
-	r = read_header(ps, &new_snapshot);
-	if (r)
-		return r;
-
-	/*
-	 * Now we know correct chunk_size, complete the initialisation.
-	 */
-	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
-				  sizeof(struct disk_exception);
-	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-			sizeof(*ps->callbacks));
-	if (!ps->callbacks)
-		return -ENOMEM;
-
-	/*
-	 * Do we need to setup a new snapshot ?
-	 */
-	if (new_snapshot) {
-		r = write_header(ps);
-		if (r) {
-			DMWARN("write_header failed");
-			return r;
-		}
-
-		ps->current_area = 0;
-		zero_memory_area(ps);
-		r = zero_disk_area(ps, 0);
-		if (r) {
-			DMWARN("zero_disk_area(0) failed");
-			return r;
-		}
-	} else {
-		/*
-		 * Sanity checks.
-		 */
-		if (ps->version != SNAPSHOT_DISK_VERSION) {
-			DMWARN("unable to handle snapshot disk version %d",
-			       ps->version);
-			return -EINVAL;
-		}
-
-		/*
-		 * Metadata are valid, but snapshot is invalidated
-		 */
-		if (!ps->valid)
-			return 1;
-
-		/*
-		 * Read the metadata.
-		 */
-		r = read_exceptions(ps);
-		if (r)
-			return r;
+	r = dm_persistent_snapshot_init();
+	if (r) {
+		DMERR("Unable to register persistent exception store type");
+		goto persistent_fail;
 	}
 
 	return 0;
-}
-
-static int persistent_prepare(struct dm_exception_store *store,
-			      struct dm_snap_exception *e)
-{
-	struct pstore *ps = get_info(store);
-	uint32_t stride;
-	chunk_t next_free;
-	sector_t size = get_dev_size(store->snap->cow->bdev);
-
-	/* Is there enough room ? */
-	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
-		return -ENOSPC;
 
-	e->new_chunk = ps->next_free;
-
-	/*
-	 * Move onto the next free pending, making sure to take
-	 * into account the location of the metadata chunks.
-	 */
-	stride = (ps->exceptions_per_area + 1);
-	next_free = ++ps->next_free;
-	if (sector_div(next_free, stride) == 1)
-		ps->next_free++;
-
-	atomic_inc(&ps->pending_count);
-	return 0;
-}
-
-static void persistent_commit(struct dm_exception_store *store,
-			      struct dm_snap_exception *e,
-			      void (*callback) (void *, int success),
-			      void *callback_context)
-{
-	unsigned int i;
-	struct pstore *ps = get_info(store);
-	struct disk_exception de;
-	struct commit_callback *cb;
-
-	de.old_chunk = e->old_chunk;
-	de.new_chunk = e->new_chunk;
-	write_exception(ps, ps->current_committed++, &de);
-
-	/*
-	 * Add the callback to the back of the array.  This code
-	 * is the only place where the callback array is
-	 * manipulated, and we know that it will never be called
-	 * multiple times concurrently.
-	 */
-	cb = ps->callbacks + ps->callback_count++;
-	cb->callback = callback;
-	cb->context = callback_context;
-
-	/*
-	 * If there are exceptions in flight and we have not yet
-	 * filled this metadata area there's nothing more to do.
-	 */
-	if (!atomic_dec_and_test(&ps->pending_count) &&
-	    (ps->current_committed != ps->exceptions_per_area))
-		return;
-
-	/*
-	 * If we completely filled the current area, then wipe the next one.
-	 */
-	if ((ps->current_committed == ps->exceptions_per_area) &&
-	     zero_disk_area(ps, ps->current_area + 1))
-		ps->valid = 0;
-
-	/*
-	 * Commit exceptions to disk.
-	 */
-	if (ps->valid && area_io(ps, WRITE))
-		ps->valid = 0;
-
-	/*
-	 * Advance to the next area if this one is full.
-	 */
-	if (ps->current_committed == ps->exceptions_per_area) {
-		ps->current_committed = 0;
-		ps->current_area++;
-		zero_memory_area(ps);
-	}
-
-	for (i = 0; i < ps->callback_count; i++) {
-		cb = ps->callbacks + i;
-		cb->callback(cb->context, ps->valid);
-	}
-
-	ps->callback_count = 0;
-}
-
-static void persistent_drop(struct dm_exception_store *store)
-{
-	struct pstore *ps = get_info(store);
-
-	ps->valid = 0;
-	if (write_header(ps))
-		DMWARN("write header failed");
-}
-
-int dm_create_persistent(struct dm_exception_store *store)
-{
-	struct pstore *ps;
-
-	/* allocate the pstore */
-	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		return -ENOMEM;
-
-	ps->snap = store->snap;
-	ps->valid = 1;
-	ps->version = SNAPSHOT_DISK_VERSION;
-	ps->area = NULL;
-	ps->next_free = 2;	/* skipping the header and first area */
-	ps->current_committed = 0;
-
-	ps->callback_count = 0;
-	atomic_set(&ps->pending_count, 0);
-	ps->callbacks = NULL;
-
-	ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
-	if (!ps->metadata_wq) {
-		kfree(ps);
-		DMERR("couldn't start header metadata update thread");
-		return -ENOMEM;
-	}
-
-	store->destroy = persistent_destroy;
-	store->read_metadata = persistent_read_metadata;
-	store->prepare_exception = persistent_prepare;
-	store->commit_exception = persistent_commit;
-	store->drop_snapshot = persistent_drop;
-	store->fraction_full = persistent_fraction_full;
-	store->context = ps;
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * Implementation of the store for non-persistent snapshots.
- *---------------------------------------------------------------*/
-struct transient_c {
-	sector_t next_free;
-};
-
-static void transient_destroy(struct dm_exception_store *store)
-{
-	kfree(store->context);
-}
-
-static int transient_read_metadata(struct dm_exception_store *store)
-{
-	return 0;
-}
-
-static int transient_prepare(struct dm_exception_store *store,
-			     struct dm_snap_exception *e)
-{
-	struct transient_c *tc = (struct transient_c *) store->context;
-	sector_t size = get_dev_size(store->snap->cow->bdev);
-
-	if (size < (tc->next_free + store->snap->chunk_size))
-		return -1;
-
-	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
-	tc->next_free += store->snap->chunk_size;
-
-	return 0;
-}
-
-static void transient_commit(struct dm_exception_store *store,
-			     struct dm_snap_exception *e,
-			     void (*callback) (void *, int success),
-			     void *callback_context)
-{
-	/* Just succeed */
-	callback(callback_context, 1);
-}
-
-static void transient_fraction_full(struct dm_exception_store *store,
-				    sector_t *numerator, sector_t *denominator)
-{
-	*numerator = ((struct transient_c *) store->context)->next_free;
-	*denominator = get_dev_size(store->snap->cow->bdev);
+persistent_fail:
+	dm_persistent_snapshot_exit();
+transient_fail:
+	return r;
 }
 
-int dm_create_transient(struct dm_exception_store *store)
+void dm_exception_store_exit(void)
 {
-	struct transient_c *tc;
-
-	store->destroy = transient_destroy;
-	store->read_metadata = transient_read_metadata;
-	store->prepare_exception = transient_prepare;
-	store->commit_exception = transient_commit;
-	store->drop_snapshot = NULL;
-	store->fraction_full = transient_fraction_full;
-
-	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
-	if (!tc)
-		return -ENOMEM;
-
-	tc->next_free = 0;
-	store->context = tc;
-
-	return 0;
+	dm_persistent_snapshot_exit();
+	dm_transient_snapshot_exit();
 }
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 25677df..78d1ace 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -122,9 +122,18 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
 
 #  endif
 
+int dm_exception_store_init(void);
+void dm_exception_store_exit(void);
+
 /*
  * Two exception store implementations.
  */
+int dm_persistent_snapshot_init(void);
+void dm_persistent_snapshot_exit(void);
+
+int dm_transient_snapshot_init(void);
+void dm_transient_snapshot_exit(void);
+
 int dm_create_persistent(struct dm_exception_store *store);
 
 int dm_create_transient(struct dm_exception_store *store);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
new file mode 100644
index 0000000..57c946c
--- /dev/null
+++ b/drivers/md/dm-snap-persistent.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ * Copyright (C) 2006-2008 Red Hat GmbH
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-exception-store.h"
+#include "dm-snap.h"
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/dm-io.h>
+
+#define DM_MSG_PREFIX "persistent snapshot"
+#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32	/* 16KB */
+
+/*-----------------------------------------------------------------
+ * Persistent snapshots, by persistent we mean that the snapshot
+ * will survive a reboot.
+ *---------------------------------------------------------------*/
+
+/*
+ * We need to store a record of which parts of the origin have
+ * been copied to the snapshot device.  The snapshot code
+ * requires that we copy exception chunks to chunk aligned areas
+ * of the COW store.  It makes sense therefore, to store the
+ * metadata in chunk size blocks.
+ *
+ * There is no backward or forward compatibility implemented,
+ * snapshots with different disk versions than the kernel will
+ * not be usable.  It is expected that "lvcreate" will blank out
+ * the start of a fresh COW device before calling the snapshot
+ * constructor.
+ *
+ * The first chunk of the COW device just contains the header.
+ * After this there is a chunk filled with exception metadata,
+ * followed by as many exception chunks as can fit in the
+ * metadata areas.
+ *
+ * All on disk structures are in little-endian format.  The end
+ * of the exceptions info is indicated by an exception with a
+ * new_chunk of 0, which is invalid since it would point to the
+ * header chunk.
+ */
+
+/*
+ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
+ */
+#define SNAP_MAGIC 0x70416e53
+
+/*
+ * The on-disk version of the metadata.
+ */
+#define SNAPSHOT_DISK_VERSION 1
+
+struct disk_header {
+	uint32_t magic;
+
+	/*
+	 * Is this snapshot valid.  There is no way of recovering
+	 * an invalid snapshot.
+	 */
+	uint32_t valid;
+
+	/*
+	 * Simple, incrementing version. no backward
+	 * compatibility.
+	 */
+	uint32_t version;
+
+	/* In sectors */
+	uint32_t chunk_size;
+};
+
+struct disk_exception {
+	uint64_t old_chunk;
+	uint64_t new_chunk;
+};
+
+struct commit_callback {
+	void (*callback)(void *, int success);
+	void *context;
+};
+
+/*
+ * The top level structure for a persistent exception store.
+ */
+struct pstore {
+	struct dm_snapshot *snap;	/* up pointer to my snapshot */
+	int version;
+	int valid;
+	uint32_t exceptions_per_area;
+
+	/*
+	 * Now that we have an asynchronous kcopyd there is no
+	 * need for large chunk sizes, so it wont hurt to have a
+	 * whole chunks worth of metadata in memory at once.
+	 */
+	void *area;
+
+	/*
+	 * An area of zeros used to clear the next area.
+	 */
+	void *zero_area;
+
+	/*
+	 * Used to keep track of which metadata area the data in
+	 * 'chunk' refers to.
+	 */
+	chunk_t current_area;
+
+	/*
+	 * The next free chunk for an exception.
+	 */
+	chunk_t next_free;
+
+	/*
+	 * The index of next free exception in the current
+	 * metadata area.
+	 */
+	uint32_t current_committed;
+
+	atomic_t pending_count;
+	uint32_t callback_count;
+	struct commit_callback *callbacks;
+	struct dm_io_client *io_client;
+
+	struct workqueue_struct *metadata_wq;
+};
+
+static unsigned sectors_to_pages(unsigned sectors)
+{
+	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
+}
+
+static int alloc_area(struct pstore *ps)
+{
+	int r = -ENOMEM;
+	size_t len;
+
+	len = ps->snap->chunk_size << SECTOR_SHIFT;
+
+	/*
+	 * Allocate the chunk_size block of memory that will hold
+	 * a single metadata area.
+	 */
+	ps->area = vmalloc(len);
+	if (!ps->area)
+		return r;
+
+	ps->zero_area = vmalloc(len);
+	if (!ps->zero_area) {
+		vfree(ps->area);
+		return r;
+	}
+	memset(ps->zero_area, 0, len);
+
+	return 0;
+}
+
+static void free_area(struct pstore *ps)
+{
+	vfree(ps->area);
+	ps->area = NULL;
+	vfree(ps->zero_area);
+	ps->zero_area = NULL;
+}
+
+struct mdata_req {
+	struct dm_io_region *where;
+	struct dm_io_request *io_req;
+	struct work_struct work;
+	int result;
+};
+
+static void do_metadata(struct work_struct *work)
+{
+	struct mdata_req *req = container_of(work, struct mdata_req, work);
+
+	req->result = dm_io(req->io_req, 1, req->where, NULL);
+}
+
+/*
+ * Read or write a chunk aligned and sized block of data from a device.
+ */
+static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
+{
+	struct dm_io_region where = {
+		.bdev = ps->snap->cow->bdev,
+		.sector = ps->snap->chunk_size * chunk,
+		.count = ps->snap->chunk_size,
+	};
+	struct dm_io_request io_req = {
+		.bi_rw = rw,
+		.mem.type = DM_IO_VMA,
+		.mem.ptr.vma = ps->area,
+		.client = ps->io_client,
+		.notify.fn = NULL,
+	};
+	struct mdata_req req;
+
+	if (!metadata)
+		return dm_io(&io_req, 1, &where, NULL);
+
+	req.where = &where;
+	req.io_req = &io_req;
+
+	/*
+	 * Issue the synchronous I/O from a different thread
+	 * to avoid generic_make_request recursion.
+	 */
+	INIT_WORK(&req.work, do_metadata);
+	queue_work(ps->metadata_wq, &req.work);
+	flush_workqueue(ps->metadata_wq);
+
+	return req.result;
+}
+
+/*
+ * Convert a metadata area index to a chunk index.
+ */
+static chunk_t area_location(struct pstore *ps, chunk_t area)
+{
+	return 1 + ((ps->exceptions_per_area + 1) * area);
+}
+
+/*
+ * Read or write a metadata area.  Remembering to skip the first
+ * chunk which holds the header.
+ */
+static int area_io(struct pstore *ps, int rw)
+{
+	int r;
+	chunk_t chunk;
+
+	chunk = area_location(ps, ps->current_area);
+
+	r = chunk_io(ps, chunk, rw, 0);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static void zero_memory_area(struct pstore *ps)
+{
+	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+}
+
+static int zero_disk_area(struct pstore *ps, chunk_t area)
+{
+	struct dm_io_region where = {
+		.bdev = ps->snap->cow->bdev,
+		.sector = ps->snap->chunk_size * area_location(ps, area),
+		.count = ps->snap->chunk_size,
+	};
+	struct dm_io_request io_req = {
+		.bi_rw = WRITE,
+		.mem.type = DM_IO_VMA,
+		.mem.ptr.vma = ps->zero_area,
+		.client = ps->io_client,
+		.notify.fn = NULL,
+	};
+
+	return dm_io(&io_req, 1, &where, NULL);
+}
+
+static int read_header(struct pstore *ps, int *new_snapshot)
+{
+	int r;
+	struct disk_header *dh;
+	chunk_t chunk_size;
+	int chunk_size_supplied = 1;
+
+	/*
+	 * Use default chunk size (or hardsect_size, if larger) if none supplied
+	 */
+	if (!ps->snap->chunk_size) {
+		ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
+		    bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
+		ps->snap->chunk_mask = ps->snap->chunk_size - 1;
+		ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
+		chunk_size_supplied = 0;
+	}
+
+	ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
+							     chunk_size));
+	if (IS_ERR(ps->io_client))
+		return PTR_ERR(ps->io_client);
+
+	r = alloc_area(ps);
+	if (r)
+		return r;
+
+	r = chunk_io(ps, 0, READ, 1);
+	if (r)
+		goto bad;
+
+	dh = (struct disk_header *) ps->area;
+
+	if (le32_to_cpu(dh->magic) == 0) {
+		*new_snapshot = 1;
+		return 0;
+	}
+
+	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
+		DMWARN("Invalid or corrupt snapshot");
+		r = -ENXIO;
+		goto bad;
+	}
+
+	*new_snapshot = 0;
+	ps->valid = le32_to_cpu(dh->valid);
+	ps->version = le32_to_cpu(dh->version);
+	chunk_size = le32_to_cpu(dh->chunk_size);
+
+	if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
+		return 0;
+
+	DMWARN("chunk size %llu in device metadata overrides "
+	       "table chunk size of %llu.",
+	       (unsigned long long)chunk_size,
+	       (unsigned long long)ps->snap->chunk_size);
+
+	/* We had a bogus chunk_size. Fix stuff up. */
+	free_area(ps);
+
+	ps->snap->chunk_size = chunk_size;
+	ps->snap->chunk_mask = chunk_size - 1;
+	ps->snap->chunk_shift = ffs(chunk_size) - 1;
+
+	r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
+				ps->io_client);
+	if (r)
+		return r;
+
+	r = alloc_area(ps);
+	return r;
+
+bad:
+	free_area(ps);
+	return r;
+}
+
+static int write_header(struct pstore *ps)
+{
+	struct disk_header *dh;
+
+	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+
+	dh = (struct disk_header *) ps->area;
+	dh->magic = cpu_to_le32(SNAP_MAGIC);
+	dh->valid = cpu_to_le32(ps->valid);
+	dh->version = cpu_to_le32(ps->version);
+	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
+
+	return chunk_io(ps, 0, WRITE, 1);
+}
+
+/*
+ * Access functions for the disk exceptions, these do the endian conversions.
+ */
+static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
+{
+	BUG_ON(index >= ps->exceptions_per_area);
+
+	return ((struct disk_exception *) ps->area) + index;
+}
+
+static void read_exception(struct pstore *ps,
+			   uint32_t index, struct disk_exception *result)
+{
+	struct disk_exception *e = get_exception(ps, index);
+
+	/* copy it */
+	result->old_chunk = le64_to_cpu(e->old_chunk);
+	result->new_chunk = le64_to_cpu(e->new_chunk);
+}
+
+static void write_exception(struct pstore *ps,
+			    uint32_t index, struct disk_exception *de)
+{
+	struct disk_exception *e = get_exception(ps, index);
+
+	/* copy it */
+	e->old_chunk = cpu_to_le64(de->old_chunk);
+	e->new_chunk = cpu_to_le64(de->new_chunk);
+}
+
+/*
+ * Registers the exceptions that are present in the current area.
+ * 'full' is filled in to indicate if the area has been
+ * filled.
+ */
+static int insert_exceptions(struct pstore *ps, int *full)
+{
+	int r;
+	unsigned int i;
+	struct disk_exception de;
+
+	/* presume the area is full */
+	*full = 1;
+
+	for (i = 0; i < ps->exceptions_per_area; i++) {
+		read_exception(ps, i, &de);
+
+		/*
+		 * If the new_chunk is pointing at the start of
+		 * the COW device, where the first metadata area
+		 * is we know that we've hit the end of the
+		 * exceptions.  Therefore the area is not full.
+		 */
+		if (de.new_chunk == 0LL) {
+			ps->current_committed = i;
+			*full = 0;
+			break;
+		}
+
+		/*
+		 * Keep track of the start of the free chunks.
+		 */
+		if (ps->next_free <= de.new_chunk)
+			ps->next_free = de.new_chunk + 1;
+
+		/*
+		 * Otherwise we add the exception to the snapshot.
+		 */
+		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int read_exceptions(struct pstore *ps)
+{
+	int r, full = 1;
+
+	/*
+	 * Keeping reading chunks and inserting exceptions until
+	 * we find a partially full area.
+	 */
+	for (ps->current_area = 0; full; ps->current_area++) {
+		r = area_io(ps, READ);
+		if (r)
+			return r;
+
+		r = insert_exceptions(ps, &full);
+		if (r)
+			return r;
+	}
+
+	ps->current_area--;
+
+	return 0;
+}
+
+static struct pstore *get_info(struct dm_exception_store *store)
+{
+	return (struct pstore *) store->context;
+}
+
+static void persistent_fraction_full(struct dm_exception_store *store,
+				     sector_t *numerator, sector_t *denominator)
+{
+	*numerator = get_info(store)->next_free * store->snap->chunk_size;
+	*denominator = get_dev_size(store->snap->cow->bdev);
+}
+
+static void persistent_destroy(struct dm_exception_store *store)
+{
+	struct pstore *ps = get_info(store);
+
+	destroy_workqueue(ps->metadata_wq);
+	dm_io_client_destroy(ps->io_client);
+	vfree(ps->callbacks);
+	free_area(ps);
+	kfree(ps);
+}
+
+static int persistent_read_metadata(struct dm_exception_store *store)
+{
+	int r, uninitialized_var(new_snapshot);
+	struct pstore *ps = get_info(store);
+
+	/*
+	 * Read the snapshot header.
+	 */
+	r = read_header(ps, &new_snapshot);
+	if (r)
+		return r;
+
+	/*
+	 * Now we know correct chunk_size, complete the initialisation.
+	 */
+	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+				  sizeof(struct disk_exception);
+	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
+			sizeof(*ps->callbacks));
+	if (!ps->callbacks)
+		return -ENOMEM;
+
+	/*
+	 * Do we need to setup a new snapshot ?
+	 */
+	if (new_snapshot) {
+		r = write_header(ps);
+		if (r) {
+			DMWARN("write_header failed");
+			return r;
+		}
+
+		ps->current_area = 0;
+		zero_memory_area(ps);
+		r = zero_disk_area(ps, 0);
+		if (r) {
+			DMWARN("zero_disk_area(0) failed");
+			return r;
+		}
+	} else {
+		/*
+		 * Sanity checks.
+		 */
+		if (ps->version != SNAPSHOT_DISK_VERSION) {
+			DMWARN("unable to handle snapshot disk version %d",
+			       ps->version);
+			return -EINVAL;
+		}
+
+		/*
+		 * Metadata are valid, but snapshot is invalidated
+		 */
+		if (!ps->valid)
+			return 1;
+
+		/*
+		 * Read the metadata.
+		 */
+		r = read_exceptions(ps);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int persistent_prepare(struct dm_exception_store *store,
+			      struct dm_snap_exception *e)
+{
+	struct pstore *ps = get_info(store);
+	uint32_t stride;
+	chunk_t next_free;
+	sector_t size = get_dev_size(store->snap->cow->bdev);
+
+	/* Is there enough room ? */
+	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
+		return -ENOSPC;
+
+	e->new_chunk = ps->next_free;
+
+	/*
+	 * Move onto the next free pending, making sure to take
+	 * into account the location of the metadata chunks.
+	 */
+	stride = (ps->exceptions_per_area + 1);
+	next_free = ++ps->next_free;
+	if (sector_div(next_free, stride) == 1)
+		ps->next_free++;
+
+	atomic_inc(&ps->pending_count);
+	return 0;
+}
+
+static void persistent_commit(struct dm_exception_store *store,
+			      struct dm_snap_exception *e,
+			      void (*callback) (void *, int success),
+			      void *callback_context)
+{
+	unsigned int i;
+	struct pstore *ps = get_info(store);
+	struct disk_exception de;
+	struct commit_callback *cb;
+
+	de.old_chunk = e->old_chunk;
+	de.new_chunk = e->new_chunk;
+	write_exception(ps, ps->current_committed++, &de);
+
+	/*
+	 * Add the callback to the back of the array.  This code
+	 * is the only place where the callback array is
+	 * manipulated, and we know that it will never be called
+	 * multiple times concurrently.
+	 */
+	cb = ps->callbacks + ps->callback_count++;
+	cb->callback = callback;
+	cb->context = callback_context;
+
+	/*
+	 * If there are exceptions in flight and we have not yet
+	 * filled this metadata area there's nothing more to do.
+	 */
+	if (!atomic_dec_and_test(&ps->pending_count) &&
+	    (ps->current_committed != ps->exceptions_per_area))
+		return;
+
+	/*
+	 * If we completely filled the current area, then wipe the next one.
+	 */
+	if ((ps->current_committed == ps->exceptions_per_area) &&
+	     zero_disk_area(ps, ps->current_area + 1))
+		ps->valid = 0;
+
+	/*
+	 * Commit exceptions to disk.
+	 */
+	if (ps->valid && area_io(ps, WRITE))
+		ps->valid = 0;
+
+	/*
+	 * Advance to the next area if this one is full.
+	 */
+	if (ps->current_committed == ps->exceptions_per_area) {
+		ps->current_committed = 0;
+		ps->current_area++;
+		zero_memory_area(ps);
+	}
+
+	for (i = 0; i < ps->callback_count; i++) {
+		cb = ps->callbacks + i;
+		cb->callback(cb->context, ps->valid);
+	}
+
+	ps->callback_count = 0;
+}
+
+static void persistent_drop(struct dm_exception_store *store)
+{
+	struct pstore *ps = get_info(store);
+
+	ps->valid = 0;
+	if (write_header(ps))
+		DMWARN("write header failed");
+}
+
+int dm_create_persistent(struct dm_exception_store *store)
+{
+	struct pstore *ps;
+
+	/* allocate the pstore */
+	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
+	ps->snap = store->snap;
+	ps->valid = 1;
+	ps->version = SNAPSHOT_DISK_VERSION;
+	ps->area = NULL;
+	ps->next_free = 2;	/* skipping the header and first area */
+	ps->current_committed = 0;
+
+	ps->callback_count = 0;
+	atomic_set(&ps->pending_count, 0);
+	ps->callbacks = NULL;
+
+	ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
+	if (!ps->metadata_wq) {
+		kfree(ps);
+		DMERR("couldn't start header metadata update thread");
+		return -ENOMEM;
+	}
+
+	store->destroy = persistent_destroy;
+	store->read_metadata = persistent_read_metadata;
+	store->prepare_exception = persistent_prepare;
+	store->commit_exception = persistent_commit;
+	store->drop_snapshot = persistent_drop;
+	store->fraction_full = persistent_fraction_full;
+	store->context = ps;
+
+	return 0;
+}
+
+int dm_persistent_snapshot_init(void)
+{
+	return 0;
+}
+
+void dm_persistent_snapshot_exit(void)
+{
+}
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
new file mode 100644
index 0000000..2a781df
--- /dev/null
+++ b/drivers/md/dm-snap-transient.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ * Copyright (C) 2006-2008 Red Hat GmbH
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-exception-store.h"
+#include "dm-snap.h"
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/dm-io.h>
+
+#define DM_MSG_PREFIX "transient snapshot"
+
+/*-----------------------------------------------------------------
+ * Implementation of the store for non-persistent snapshots.
+ *---------------------------------------------------------------*/
+struct transient_c {
+	sector_t next_free;
+};
+
+static void transient_destroy(struct dm_exception_store *store)
+{
+	kfree(store->context);
+}
+
+static int transient_read_metadata(struct dm_exception_store *store)
+{
+	return 0;
+}
+
+static int transient_prepare(struct dm_exception_store *store,
+			     struct dm_snap_exception *e)
+{
+	struct transient_c *tc = (struct transient_c *) store->context;
+	sector_t size = get_dev_size(store->snap->cow->bdev);
+
+	if (size < (tc->next_free + store->snap->chunk_size))
+		return -1;
+
+	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
+	tc->next_free += store->snap->chunk_size;
+
+	return 0;
+}
+
+static void transient_commit(struct dm_exception_store *store,
+			     struct dm_snap_exception *e,
+			     void (*callback) (void *, int success),
+			     void *callback_context)
+{
+	/* Just succeed */
+	callback(callback_context, 1);
+}
+
+static void transient_fraction_full(struct dm_exception_store *store,
+				    sector_t *numerator, sector_t *denominator)
+{
+	*numerator = ((struct transient_c *) store->context)->next_free;
+	*denominator = get_dev_size(store->snap->cow->bdev);
+}
+
+int dm_create_transient(struct dm_exception_store *store)
+{
+	struct transient_c *tc;
+
+	store->destroy = transient_destroy;
+	store->read_metadata = transient_read_metadata;
+	store->prepare_exception = transient_prepare;
+	store->commit_exception = transient_commit;
+	store->drop_snapshot = NULL;
+	store->fraction_full = transient_fraction_full;
+
+	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->next_free = 0;
+	store->context = tc;
+
+	return 0;
+}
+
+int dm_transient_snapshot_init(void)
+{
+	return 0;
+}
+
+void dm_transient_snapshot_exit(void)
+{
+}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 81f03a0..018b567 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1406,6 +1406,12 @@ static int __init dm_snapshot_init(void)
 {
 	int r;
 
+	r = dm_exception_store_init();
+	if (r) {
+		DMERR("Failed to initialize exception stores");
+		return r;
+	}
+
 	r = dm_register_target(&snapshot_target);
 	if (r) {
 		DMERR("snapshot target register failed %d", r);
@@ -1454,17 +1460,17 @@ static int __init dm_snapshot_init(void)
 
 	return 0;
 
-      bad_pending_pool:
+bad_pending_pool:
 	kmem_cache_destroy(tracked_chunk_cache);
-      bad5:
+bad5:
 	kmem_cache_destroy(pending_cache);
-      bad4:
+bad4:
 	kmem_cache_destroy(exception_cache);
-      bad3:
+bad3:
 	exit_origin_hash();
-      bad2:
+bad2:
 	dm_unregister_target(&origin_target);
-      bad1:
+bad1:
 	dm_unregister_target(&snapshot_target);
 	return r;
 }
@@ -1480,6 +1486,8 @@ static void __exit dm_snapshot_exit(void)
 	kmem_cache_destroy(pending_cache);
 	kmem_cache_destroy(exception_cache);
 	kmem_cache_destroy(tracked_chunk_cache);
+
+	dm_exception_store_exit();
 }
 
 /* Module hooks */
-- 
cgit v0.10.2


From a159c1ac5f33c6cf0f5aa3c9d1ccdc82c907ee46 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Tue, 6 Jan 2009 03:05:19 +0000
Subject: dm snapshot: extend exception store functions

Supply dm_add_exception as a callback to the read_metadata function.
Add a status function ready for a later patch and name the functions
consistently.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 74777e0..dccbfb0 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -11,7 +11,6 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
-#include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "snapshot exception stores"
 
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 78d1ace..bb9f33d 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -11,6 +11,7 @@
 #define _LINUX_DM_EXCEPTION_STORE
 
 #include <linux/blkdev.h>
+#include <linux/device-mapper.h>
 
 /*
  * The snapshot code deals with largish chunks of the disk at a
@@ -37,7 +38,6 @@ struct dm_snap_exception {
  * COW device).
  */
 struct dm_exception_store {
-
 	/*
 	 * Destroys this object when you've finished with it.
 	 */
@@ -45,9 +45,13 @@ struct dm_exception_store {
 
 	/*
 	 * The target shouldn't read the COW device until this is
-	 * called.
+	 * called.  As exceptions are read from the COW, they are
+	 * reported back via the callback.
 	 */
-	int (*read_metadata) (struct dm_exception_store *store);
+	int (*read_metadata) (struct dm_exception_store *store,
+			      int (*callback)(void *callback_context,
+					      chunk_t old, chunk_t new),
+			      void *callback_context);
 
 	/*
 	 * Find somewhere to store the next exception.
@@ -68,6 +72,9 @@ struct dm_exception_store {
 	 */
 	void (*drop_snapshot) (struct dm_exception_store *store);
 
+	int (*status) (struct dm_exception_store *store, status_type_t status,
+		       char *result, unsigned int maxlen);
+
 	/*
 	 * Return how full the snapshot is.
 	 */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 57c946c..936b34e 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -395,7 +395,11 @@ static void write_exception(struct pstore *ps,
  * 'full' is filled in to indicate if the area has been
  * filled.
  */
-static int insert_exceptions(struct pstore *ps, int *full)
+static int insert_exceptions(struct pstore *ps,
+			     int (*callback)(void *callback_context,
+					     chunk_t old, chunk_t new),
+			     void *callback_context,
+			     int *full)
 {
 	int r;
 	unsigned int i;
@@ -428,7 +432,7 @@ static int insert_exceptions(struct pstore *ps, int *full)
 		/*
 		 * Otherwise we add the exception to the snapshot.
 		 */
-		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
+		r = callback(callback_context, de.old_chunk, de.new_chunk);
 		if (r)
 			return r;
 	}
@@ -436,7 +440,10 @@ static int insert_exceptions(struct pstore *ps, int *full)
 	return 0;
 }
 
-static int read_exceptions(struct pstore *ps)
+static int read_exceptions(struct pstore *ps,
+			   int (*callback)(void *callback_context, chunk_t old,
+					   chunk_t new),
+			   void *callback_context)
 {
 	int r, full = 1;
 
@@ -449,7 +456,7 @@ static int read_exceptions(struct pstore *ps)
 		if (r)
 			return r;
 
-		r = insert_exceptions(ps, &full);
+		r = insert_exceptions(ps, callback, callback_context, &full);
 		if (r)
 			return r;
 	}
@@ -482,7 +489,10 @@ static void persistent_destroy(struct dm_exception_store *store)
 	kfree(ps);
 }
 
-static int persistent_read_metadata(struct dm_exception_store *store)
+static int persistent_read_metadata(struct dm_exception_store *store,
+				    int (*callback)(void *callback_context,
+						    chunk_t old, chunk_t new),
+				    void *callback_context)
 {
 	int r, uninitialized_var(new_snapshot);
 	struct pstore *ps = get_info(store);
@@ -540,7 +550,7 @@ static int persistent_read_metadata(struct dm_exception_store *store)
 		/*
 		 * Read the metadata.
 		 */
-		r = read_exceptions(ps);
+		r = read_exceptions(ps, callback, callback_context);
 		if (r)
 			return r;
 	}
@@ -548,8 +558,8 @@ static int persistent_read_metadata(struct dm_exception_store *store)
 	return 0;
 }
 
-static int persistent_prepare(struct dm_exception_store *store,
-			      struct dm_snap_exception *e)
+static int persistent_prepare_exception(struct dm_exception_store *store,
+					struct dm_snap_exception *e)
 {
 	struct pstore *ps = get_info(store);
 	uint32_t stride;
@@ -575,10 +585,10 @@ static int persistent_prepare(struct dm_exception_store *store,
 	return 0;
 }
 
-static void persistent_commit(struct dm_exception_store *store,
-			      struct dm_snap_exception *e,
-			      void (*callback) (void *, int success),
-			      void *callback_context)
+static void persistent_commit_exception(struct dm_exception_store *store,
+					struct dm_snap_exception *e,
+					void (*callback) (void *, int success),
+					void *callback_context)
 {
 	unsigned int i;
 	struct pstore *ps = get_info(store);
@@ -637,7 +647,7 @@ static void persistent_commit(struct dm_exception_store *store,
 	ps->callback_count = 0;
 }
 
-static void persistent_drop(struct dm_exception_store *store)
+static void persistent_drop_snapshot(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
 
@@ -675,9 +685,9 @@ int dm_create_persistent(struct dm_exception_store *store)
 
 	store->destroy = persistent_destroy;
 	store->read_metadata = persistent_read_metadata;
-	store->prepare_exception = persistent_prepare;
-	store->commit_exception = persistent_commit;
-	store->drop_snapshot = persistent_drop;
+	store->prepare_exception = persistent_prepare_exception;
+	store->commit_exception = persistent_commit_exception;
+	store->drop_snapshot = persistent_drop_snapshot;
 	store->fraction_full = persistent_fraction_full;
 	store->context = ps;
 
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 2a781df..7f6e2e6 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -28,13 +28,16 @@ static void transient_destroy(struct dm_exception_store *store)
 	kfree(store->context);
 }
 
-static int transient_read_metadata(struct dm_exception_store *store)
+static int transient_read_metadata(struct dm_exception_store *store,
+				   int (*callback)(void *callback_context,
+						   chunk_t old, chunk_t new),
+				   void *callback_context)
 {
 	return 0;
 }
 
-static int transient_prepare(struct dm_exception_store *store,
-			     struct dm_snap_exception *e)
+static int transient_prepare_exception(struct dm_exception_store *store,
+				       struct dm_snap_exception *e)
 {
 	struct transient_c *tc = (struct transient_c *) store->context;
 	sector_t size = get_dev_size(store->snap->cow->bdev);
@@ -48,10 +51,10 @@ static int transient_prepare(struct dm_exception_store *store,
 	return 0;
 }
 
-static void transient_commit(struct dm_exception_store *store,
-			     struct dm_snap_exception *e,
-			     void (*callback) (void *, int success),
-			     void *callback_context)
+static void transient_commit_exception(struct dm_exception_store *store,
+				       struct dm_snap_exception *e,
+				       void (*callback) (void *, int success),
+				       void *callback_context)
 {
 	/* Just succeed */
 	callback(callback_context, 1);
@@ -70,8 +73,8 @@ int dm_create_transient(struct dm_exception_store *store)
 
 	store->destroy = transient_destroy;
 	store->read_metadata = transient_read_metadata;
-	store->prepare_exception = transient_prepare;
-	store->commit_exception = transient_commit;
+	store->prepare_exception = transient_prepare_exception;
+	store->commit_exception = transient_commit_exception;
 	store->drop_snapshot = NULL;
 	store->fraction_full = transient_fraction_full;
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 018b567..65ff82f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -430,8 +430,13 @@ out:
 	list_add(&new_e->hash_list, e ? &e->hash_list : l);
 }
 
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
+/*
+ * Callback used by the exception stores to load exceptions when
+ * initialising.
+ */
+static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 {
+	struct dm_snapshot *s = context;
 	struct dm_snap_exception *e;
 
 	e = alloc_exception();
@@ -660,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	spin_lock_init(&s->tracked_chunk_lock);
 
 	/* Metadata must only be loaded into one table at once */
-	r = s->store.read_metadata(&s->store);
+	r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
 	if (r < 0) {
 		ti->error = "Failed to read snapshot metadata";
 		goto bad_load_and_register;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
index 9281236..d9e62b4 100644
--- a/drivers/md/dm-snap.h
+++ b/drivers/md/dm-snap.h
@@ -76,12 +76,6 @@ struct dm_snapshot {
 };
 
 /*
- * Used by the exception stores to load exceptions hen
- * initialising.
- */
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
-
-/*
  * Return the number of sectors in the device.
  */
 static inline sector_t get_dev_size(struct block_device *bdev)
-- 
cgit v0.10.2