From 81bb5d31fbf3893a8e041c649dea704dd11d5272 Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Thu, 21 Feb 2013 11:55:05 -0800
Subject: Input: cypress_ps2 - fix trackpadi found in Dell XPS12

Avoid firmware glitch in Cypress PS/2 Trackpad firmware version 11
(as observed in Dell XPS12) which prevents driver from recognizing
the trackpad.

BugLink: http://launchpad.net/bugs/1103594

Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Cc: Dudley Du <dudl@cypress.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/mouse/cypress_ps2.c b/drivers/input/mouse/cypress_ps2.c
index 1673dc6..f51765f 100644
--- a/drivers/input/mouse/cypress_ps2.c
+++ b/drivers/input/mouse/cypress_ps2.c
@@ -236,6 +236,13 @@ static int cypress_read_fw_version(struct psmouse *psmouse)
 	cytp->fw_version = param[2] & FW_VERSION_MASX;
 	cytp->tp_metrics_supported = (param[2] & TP_METRICS_MASK) ? 1 : 0;
 
+	/*
+	 * Trackpad fw_version 11 (in Dell XPS12) yields a bogus response to
+	 * CYTP_CMD_READ_TP_METRICS so do not try to use it. LP: #1103594.
+	 */
+	if (cytp->fw_version >= 11)
+		cytp->tp_metrics_supported = 0;
+
 	psmouse_dbg(psmouse, "cytp->fw_version = %d\n", cytp->fw_version);
 	psmouse_dbg(psmouse, "cytp->tp_metrics_supported = %d\n",
 		 cytp->tp_metrics_supported);
@@ -258,6 +265,9 @@ static int cypress_read_tp_metrics(struct psmouse *psmouse)
 	cytp->tp_res_x = cytp->tp_max_abs_x / cytp->tp_width;
 	cytp->tp_res_y = cytp->tp_max_abs_y / cytp->tp_high;
 
+	if (!cytp->tp_metrics_supported)
+		return 0;
+
 	memset(param, 0, sizeof(param));
 	if (cypress_send_ext_cmd(psmouse, CYTP_CMD_READ_TP_METRICS, param) == 0) {
 		/* Update trackpad parameters. */
@@ -315,18 +325,15 @@ static int cypress_read_tp_metrics(struct psmouse *psmouse)
 
 static int cypress_query_hardware(struct psmouse *psmouse)
 {
-	struct cytp_data *cytp = psmouse->private;
 	int ret;
 
 	ret = cypress_read_fw_version(psmouse);
 	if (ret)
 		return ret;
 
-	if (cytp->tp_metrics_supported) {
-		ret = cypress_read_tp_metrics(psmouse);
-		if (ret)
-			return ret;
-	}
+	ret = cypress_read_tp_metrics(psmouse);
+	if (ret)
+		return ret;
 
 	return 0;
 }
-- 
cgit v0.10.2


From d18e53fce2f6e42bfb8ac157547dd3f804385749 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Thu, 21 Feb 2013 22:58:20 -0800
Subject: Input: ALPS - remove unused argument to alps_enter_command_mode()

Now that alps_identify() explicitly issues an EC report using
alps_rpt_cmd(), we no longer need to look at the magic numbers returned
by alps_enter_command_mode().

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 7b99fc7..9c97531 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -994,8 +994,7 @@ static int alps_rpt_cmd(struct psmouse *psmouse, int init_command,
 	return 0;
 }
 
-static int alps_enter_command_mode(struct psmouse *psmouse,
-				   unsigned char *resp)
+static int alps_enter_command_mode(struct psmouse *psmouse)
 {
 	unsigned char param[4];
 
@@ -1009,9 +1008,6 @@ static int alps_enter_command_mode(struct psmouse *psmouse,
 			    "unknown response while entering command mode\n");
 		return -1;
 	}
-
-	if (resp)
-		*resp = param[2];
 	return 0;
 }
 
@@ -1176,7 +1172,7 @@ static int alps_passthrough_mode_v3(struct psmouse *psmouse,
 {
 	int reg_val, ret = -1;
 
-	if (alps_enter_command_mode(psmouse, NULL))
+	if (alps_enter_command_mode(psmouse))
 		return -1;
 
 	reg_val = alps_command_mode_read_reg(psmouse, reg_base + 0x0008);
@@ -1216,7 +1212,7 @@ static int alps_probe_trackstick_v3(struct psmouse *psmouse, int reg_base)
 {
 	int ret = -EIO, reg_val;
 
-	if (alps_enter_command_mode(psmouse, NULL))
+	if (alps_enter_command_mode(psmouse))
 		goto error;
 
 	reg_val = alps_command_mode_read_reg(psmouse, reg_base + 0x08);
@@ -1279,7 +1275,7 @@ static int alps_setup_trackstick_v3(struct psmouse *psmouse, int reg_base)
 		 * supported by this driver. If bit 1 isn't set the packet
 		 * format is different.
 		 */
-		if (alps_enter_command_mode(psmouse, NULL) ||
+		if (alps_enter_command_mode(psmouse) ||
 		    alps_command_mode_write_reg(psmouse,
 						reg_base + 0x08, 0x82) ||
 		    alps_exit_command_mode(psmouse))
@@ -1306,7 +1302,7 @@ static int alps_hw_init_v3(struct psmouse *psmouse)
 	    alps_setup_trackstick_v3(psmouse, ALPS_REG_BASE_PINNACLE) == -EIO)
 		goto error;
 
-	if (alps_enter_command_mode(psmouse, NULL) ||
+	if (alps_enter_command_mode(psmouse) ||
 	    alps_absolute_mode_v3(psmouse)) {
 		psmouse_err(psmouse, "Failed to enter absolute mode\n");
 		goto error;
@@ -1381,7 +1377,7 @@ static int alps_hw_init_rushmore_v3(struct psmouse *psmouse)
 			priv->flags &= ~ALPS_DUALPOINT;
 	}
 
-	if (alps_enter_command_mode(psmouse, NULL) ||
+	if (alps_enter_command_mode(psmouse) ||
 	    alps_command_mode_read_reg(psmouse, 0xc2d9) == -1 ||
 	    alps_command_mode_write_reg(psmouse, 0xc2cb, 0x00))
 		goto error;
@@ -1431,7 +1427,7 @@ static int alps_hw_init_v4(struct psmouse *psmouse)
 	struct ps2dev *ps2dev = &psmouse->ps2dev;
 	unsigned char param[4];
 
-	if (alps_enter_command_mode(psmouse, NULL))
+	if (alps_enter_command_mode(psmouse))
 		goto error;
 
 	if (alps_absolute_mode_v4(psmouse)) {
-- 
cgit v0.10.2


From 75af9e56c1e309a4132d15120d7061656609b84e Mon Sep 17 00:00:00 2001
From: Dave Turvene <dturvene@dahetral.com>
Date: Thu, 21 Feb 2013 22:58:28 -0800
Subject: Input: ALPS - add "Dolphin V1" touchpad support

These touchpads use a different protocol; they have been seen on Dell
N5110, Dell 17R SE, and others.

The official ALPS driver identifies them by looking for an exact match
on the E7 report: 73 03 50.  Dolphin V1 returns an EC report of
73 01 xx (02 and 0d have been seen); Dolphin V2 returns an EC report of
73 02 xx (02 has been seen).

Dolphin V2 probably needs a different initialization sequence and/or
report parser, so it is left for a future commit.

Signed-off-by: Dave Turvene <dturvene@dahetral.com>
Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 9c97531..0238e0e 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -490,6 +490,29 @@ static void alps_decode_rushmore(struct alps_fields *f, unsigned char *p)
 	f->y_map |= (p[5] & 0x20) << 6;
 }
 
+static void alps_decode_dolphin(struct alps_fields *f, unsigned char *p)
+{
+	f->first_mp = !!(p[0] & 0x02);
+	f->is_mp = !!(p[0] & 0x20);
+
+	f->fingers = ((p[0] & 0x6) >> 1 |
+		     (p[0] & 0x10) >> 2);
+	f->x_map = ((p[2] & 0x60) >> 5) |
+		   ((p[4] & 0x7f) << 2) |
+		   ((p[5] & 0x7f) << 9) |
+		   ((p[3] & 0x07) << 16) |
+		   ((p[3] & 0x70) << 15) |
+		   ((p[0] & 0x01) << 22);
+	f->y_map = (p[1] & 0x7f) |
+		   ((p[2] & 0x1f) << 7);
+
+	f->x = ((p[1] & 0x7f) | ((p[4] & 0x0f) << 7));
+	f->y = ((p[2] & 0x7f) | ((p[4] & 0xf0) << 3));
+	f->z = (p[0] & 4) ? 0 : p[5] & 0x7f;
+
+	alps_decode_buttons_v3(f, p);
+}
+
 static void alps_process_touchpad_packet_v3(struct psmouse *psmouse)
 {
 	struct alps_data *priv = psmouse->private;
@@ -874,7 +897,8 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse)
 	}
 
 	/* Bytes 2 - pktsize should have 0 in the highest bit */
-	if (psmouse->pktcnt >= 2 && psmouse->pktcnt <= psmouse->pktsize &&
+	if (priv->proto_version != ALPS_PROTO_V5 &&
+	    psmouse->pktcnt >= 2 && psmouse->pktcnt <= psmouse->pktsize &&
 	    (psmouse->packet[psmouse->pktcnt - 1] & 0x80)) {
 		psmouse_dbg(psmouse, "refusing packet[%i] = %x\n",
 			    psmouse->pktcnt - 1,
@@ -1003,7 +1027,8 @@ static int alps_enter_command_mode(struct psmouse *psmouse)
 		return -1;
 	}
 
-	if (param[0] != 0x88 || (param[1] != 0x07 && param[1] != 0x08)) {
+	if ((param[0] != 0x88 || (param[1] != 0x07 && param[1] != 0x08)) &&
+	    param[0] != 0x73) {
 		psmouse_dbg(psmouse,
 			    "unknown response while entering command mode\n");
 		return -1;
@@ -1495,6 +1520,23 @@ error:
 	return -1;
 }
 
+static int alps_hw_init_dolphin_v1(struct psmouse *psmouse)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char param[2];
+
+	/* This is dolphin "v1" as empirically defined by florin9doi */
+	param[0] = 0x64;
+	param[1] = 0x28;
+
+	if (ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSTREAM) ||
+	    ps2_command(ps2dev, &param[0], PSMOUSE_CMD_SETRATE) ||
+	    ps2_command(ps2dev, &param[1], PSMOUSE_CMD_SETRATE))
+		return -1;
+
+	return 0;
+}
+
 static void alps_set_defaults(struct alps_data *priv)
 {
 	priv->byte0 = 0x8f;
@@ -1528,6 +1570,21 @@ static void alps_set_defaults(struct alps_data *priv)
 		priv->nibble_commands = alps_v4_nibble_commands;
 		priv->addr_command = PSMOUSE_CMD_DISABLE;
 		break;
+	case ALPS_PROTO_V5:
+		priv->hw_init = alps_hw_init_dolphin_v1;
+		priv->process_packet = alps_process_packet_v3;
+		priv->decode_fields = alps_decode_dolphin;
+		priv->set_abs_params = alps_set_abs_params_mt;
+		priv->nibble_commands = alps_v3_nibble_commands;
+		priv->addr_command = PSMOUSE_CMD_RESET_WRAP;
+		priv->byte0 = 0xc8;
+		priv->mask0 = 0xc8;
+		priv->flags = 0;
+		priv->x_max = 1360;
+		priv->y_max = 660;
+		priv->x_bits = 23;
+		priv->y_bits = 12;
+		break;
 	}
 }
 
@@ -1588,6 +1645,12 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 
 	if (alps_match_table(psmouse, priv, e7, ec) == 0) {
 		return 0;
+	} else if (e7[0] == 0x73 && e7[1] == 0x03 && e7[2] == 0x50 &&
+		   ec[0] == 0x73 && ec[1] == 0x01) {
+		priv->proto_version = ALPS_PROTO_V5;
+		alps_set_defaults(priv);
+
+		return 0;
 	} else if (ec[0] == 0x88 && ec[1] == 0x08) {
 		priv->proto_version = ALPS_PROTO_V3;
 		alps_set_defaults(priv);
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index 9704805..eee5985 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -16,6 +16,7 @@
 #define ALPS_PROTO_V2	2
 #define ALPS_PROTO_V3	3
 #define ALPS_PROTO_V4	4
+#define ALPS_PROTO_V5	5
 
 /**
  * struct alps_model_info - touchpad ID table
-- 
cgit v0.10.2


From 171fb58da00277b099d5a1c5e114fa57b77c4f90 Mon Sep 17 00:00:00 2001
From: dave turvene <dturvene@dahetral.com>
Date: Sat, 23 Feb 2013 20:06:34 -0800
Subject: Input: ALPS - update documentation for recent touchpad driver mods

Updated documentation for the new ALPS touchpad support submitted in two
patchsets by Kevin Cernekee.  My understanding is the most recent
patchset '"Dolphin V2" touchpad support' may still need some work but
Future work on the ALPS driver should not impact these documentation changes.

See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/606238

Signed-off-by: David Turvene <dturvene@dahetral.com>
Acked-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt
index ae8ba9a..9b1084c 100644
--- a/Documentation/input/alps.txt
+++ b/Documentation/input/alps.txt
@@ -3,10 +3,26 @@ ALPS Touchpad Protocol
 
 Introduction
 ------------
-
-Currently the ALPS touchpad driver supports four protocol versions in use by
-ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various
-protocol versions is contained in the following sections.
+Currently the ALPS touchpad driver supports five protocol versions in use by
+ALPS touchpads, called versions 1, 2, 3, 4 and 5.
+
+Since roughly mid-2010 several new ALPS touchpads have been released and
+integrated into a variety of laptops and netbooks.  These new touchpads
+have enough behavior differences that the alps_model_data definition
+table, describing the properties of the different versions, is no longer
+adequate.  The design choices were to re-define the alps_model_data
+table, with the risk of regression testing existing devices, or isolate
+the new devices outside of the alps_model_data table.  The latter design
+choice was made.  The new touchpad signatures are named: "Rushmore",
+"Pinnacle", and "Dolphin", which you will see in the alps.c code.
+For the purposes of this document, this group of ALPS touchpads will
+generically be called "new ALPS touchpads".
+
+We experimented with probing the ACPI interface _HID (Hardware ID)/_CID
+(Compatibility ID) definition as a way to uniquely identify the
+different ALPS variants but there did not appear to be a 1:1 mapping.
+In fact, it appeared to be an m:n mapping between the _HID and actual
+hardware type.
 
 Detection
 ---------
@@ -20,9 +36,13 @@ If the E6 report is successful, the touchpad model is identified using the "E7
 report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
 matched against known models in the alps_model_data_array.
 
-With protocol versions 3 and 4, the E7 report model signature is always
-73-02-64. To differentiate between these versions, the response from the
-"Enter Command Mode" sequence must be inspected as described below.
+For older touchpads supporting protocol versions 3 and 4, the E7 report
+model signature is always 73-02-64. To differentiate between these
+versions, the response from the "Enter Command Mode" sequence must be
+inspected as described below.
+
+The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but
+seem to be better differentiated by the EC Command Mode response.
 
 Command Mode
 ------------
@@ -47,6 +67,14 @@ address of the register being read, and the third contains the value of the
 register. Registers are written by writing the value one nibble at a time
 using the same encoding used for addresses.
 
+For the new ALPS touchpads, the EC command is used to enter command
+mode. The response in the new ALPS touchpads is significantly different,
+and more important in determining the behavior.  This code has been
+separated from the original alps_model_data table and put in the
+alps_identify function.  For example, there seem to be two hardware init
+sequences for the "Dolphin" touchpads as determined by the second byte
+of the EC response.
+
 Packet Format
 -------------
 
@@ -187,3 +215,28 @@ There are several things worth noting here.
     well.
 
 So far no v4 devices with tracksticks have been encountered.
+
+ALPS Absolute Mode - Protocol Version 5
+---------------------------------------
+This is basically Protocol Version 3 but with different logic for packet
+decode.  It uses the same alps_process_touchpad_packet_v3 call with a
+specialized decode_fields function pointer to correctly interpret the
+packets.  This appears to only be used by the Dolphin devices.
+
+For single-touch, the 6-byte packet format is:
+
+ byte 0:    1    1    0    0    1    0    0    0
+ byte 1:    0   x6   x5   x4   x3   x2   x1   x0
+ byte 2:    0   y6   y5   y4   y3   y2   y1   y0
+ byte 3:    0    M    R    L    1    m    r    l
+ byte 4:   y10  y9   y8   y7  x10   x9   x8   x7
+ byte 5:    0   z6   z5   z4   z3   z2   z1   z0
+
+For mt, the format is:
+
+ byte 0:    1    1    1    n3   1   n2   n1   x24
+ byte 1:    1   y7   y6    y5  y4   y3   y2    y1
+ byte 2:    ?   x2   x1   y12 y11  y10   y9    y8
+ byte 3:    0  x23  x22   x21 x20  x19  x18   x17
+ byte 4:    0   x9   x8    x7  x6   x5   x4    x3
+ byte 5:    0  x16  x15   x14 x13  x12  x11   x10
-- 
cgit v0.10.2


From 9b5bd5a4917eeb5eca00d1842a74186cfc8dd1c6 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 20 Feb 2013 09:41:08 -0800
Subject: mac80211: stop timers before canceling work items

Re-order the quiesce code so that timers are always
stopped before work-items are flushed. This was not
the problem I saw, but I think it may still be more
correct.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9f6464f..6044c6d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3503,6 +3503,14 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
 	/*
+	 * Stop timers before deleting work items, as timers
+	 * could race and re-add the work-items. They will be
+	 * re-established on connection.
+	 */
+	del_timer_sync(&ifmgd->conn_mon_timer);
+	del_timer_sync(&ifmgd->bcn_mon_timer);
+
+	/*
 	 * we need to use atomic bitops for the running bits
 	 * only because both timers might fire at the same
 	 * time -- the code here is properly synchronised.
@@ -3516,13 +3524,9 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	if (del_timer_sync(&ifmgd->timer))
 		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
 
-	cancel_work_sync(&ifmgd->chswitch_work);
 	if (del_timer_sync(&ifmgd->chswitch_timer))
 		set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
-
-	/* these will just be re-established on connection */
-	del_timer_sync(&ifmgd->conn_mon_timer);
-	del_timer_sync(&ifmgd->bcn_mon_timer);
+	cancel_work_sync(&ifmgd->chswitch_work);
 }
 
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
-- 
cgit v0.10.2


From 499218595a2e8296b7492af32fcca141b7b8184a Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 20 Feb 2013 09:41:09 -0800
Subject: mac80211: Fix crash due to un-canceled work-items

Some mlme work structs are not cancelled on disassociation
nor interface deletion, which leads to them running after
the memory has been freed

There is not a clean way to cancel these in the disassociation
logic because they must be canceled outside of the ifmgd->mtx
lock, so just cancel them in mgd_stop logic that tears down
the station.

This fixes the crashes we see in 3.7.9+.  The crash stack
trace itself isn't so helpful, but this warning gives
more useful info:

WARNING: at /home/greearb/git/linux-3.7.dev.y/lib/debugobjects.c:261 debug_print_object+0x7c/0x8d()
ODEBUG: free active (active state 0) object type: work_struct hint: ieee80211_sta_monitor_work+0x0/0x14 [mac80211]
Modules linked in: [...]
Pid: 14743, comm: iw Tainted: G         C O 3.7.9+ #11
Call Trace:
 [<ffffffff81087ef8>] warn_slowpath_common+0x80/0x98
 [<ffffffff81087fa4>] warn_slowpath_fmt+0x41/0x43
 [<ffffffff812a2608>] debug_print_object+0x7c/0x8d
 [<ffffffff812a2bca>] debug_check_no_obj_freed+0x95/0x1c3
 [<ffffffff8114cc69>] slab_free_hook+0x70/0x79
 [<ffffffff8114ea3e>] kfree+0x62/0xb7
 [<ffffffff8149f465>] netdev_release+0x39/0x3e
 [<ffffffff8136ad67>] device_release+0x52/0x8a
 [<ffffffff812937db>] kobject_release+0x121/0x158
 [<ffffffff81293612>] kobject_put+0x4c/0x50
 [<ffffffff8148f0d7>] netdev_run_todo+0x25c/0x27e

Cc: stable@vger.kernel.org
Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6044c6d..b756d29 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4319,6 +4319,17 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+	/*
+	 * Make sure some work items will not run after this,
+	 * they will not do anything but might not have been
+	 * cancelled when disconnecting.
+	 */
+	cancel_work_sync(&ifmgd->monitor_work);
+	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
+	cancel_work_sync(&ifmgd->request_smps_work);
+	cancel_work_sync(&ifmgd->csa_connection_drop_work);
+	cancel_work_sync(&ifmgd->chswitch_work);
+
 	mutex_lock(&ifmgd->mtx);
 	if (ifmgd->assoc_data)
 		ieee80211_destroy_assoc_data(sdata, false);
-- 
cgit v0.10.2


From d0ae708d1acd4bf6ad5b9937d9da44d16ca18f13 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 27 Feb 2013 15:08:28 +0100
Subject: nl80211: remove channel width and extended capa advertising

This is another case of data increasing the size of the
wiphy information significantly with a new feature, for
now remove this as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 33b75b9..9220021 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -367,8 +367,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.rts_threshold = (u32) -1;
 	rdev->wiphy.coverage_class = 0;
 
-	rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH |
-			       NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
+	rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH;
 
 	return &rdev->wiphy;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e652d05..7a7b621 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -557,18 +557,6 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 	if ((chan->flags & IEEE80211_CHAN_RADAR) &&
 	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
 		goto nla_put_failure;
-	if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) &&
-	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS))
-		goto nla_put_failure;
-	if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) &&
-	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS))
-		goto nla_put_failure;
-	if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) &&
-	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ))
-		goto nla_put_failure;
-	if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
-	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
-		goto nla_put_failure;
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
 			DBM_TO_MBM(chan->max_power)))
@@ -1310,15 +1298,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag
 			dev->wiphy.max_acl_mac_addrs))
 		goto nla_put_failure;
 
-	if (dev->wiphy.extended_capabilities &&
-	    (nla_put(msg, NL80211_ATTR_EXT_CAPA,
-		     dev->wiphy.extended_capabilities_len,
-		     dev->wiphy.extended_capabilities) ||
-	     nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
-		     dev->wiphy.extended_capabilities_len,
-		     dev->wiphy.extended_capabilities_mask)))
-		goto nla_put_failure;
-
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v0.10.2


From db05021d49a994ee40a9735d9c3cb0060c9babb8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 27 Feb 2013 21:48:09 -0500
Subject: ftrace: Update the kconfig for DYNAMIC_FTRACE

The prompt to enable DYNAMIC_FTRACE (the ability to nop and
enable function tracing at run time) had a confusing statement:

 "enable/disable ftrace tracepoints dynamically"

This was written before tracepoints were added to the kernel,
but now that tracepoints have been added, this is very confusing
and has confused people enough to give wrong information during
presentations.

Not only that, I looked at the help text, and it still references
that dreaded daemon that use to wake up once a second to update
the nop locations and brick NICs, that hasn't been around for over
five years.

Time to bring the text up to the current decade.

Cc: stable@vger.kernel.org
Reported-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3656756..b516a8e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,24 +429,28 @@ config PROBE_EVENTS
 	def_bool n
 
 config DYNAMIC_FTRACE
-	bool "enable/disable ftrace tracepoints dynamically"
+	bool "enable/disable function tracing dynamically"
 	depends on FUNCTION_TRACER
 	depends on HAVE_DYNAMIC_FTRACE
 	default y
 	help
-          This option will modify all the calls to ftrace dynamically
-	  (will patch them out of the binary image and replace them
-	  with a No-Op instruction) as they are called. A table is
-	  created to dynamically enable them again.
+	  This option will modify all the calls to function tracing
+	  dynamically (will patch them out of the binary image and
+	  replace them with a No-Op instruction) on boot up. During
+	  compile time, a table is made of all the locations that ftrace
+	  can function trace, and this table is linked into the kernel
+	  image. When this is enabled, functions can be individually
+	  enabled, and the functions not enabled will not affect
+	  performance of the system.
+
+	  See the files in /sys/kernel/debug/tracing:
+	    available_filter_functions
+	    set_ftrace_filter
+	    set_ftrace_notrace
 
 	  This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
 	  otherwise has native performance as long as no tracing is active.
 
-	  The changes to the code are done by a kernel thread that
-	  wakes up once a second and checks to see if any ftrace calls
-	  were made. If so, it runs stop_machine (stops all CPUS)
-	  and modifies the code to jump over the call to ftrace.
-
 config DYNAMIC_FTRACE_WITH_REGS
 	def_bool y
 	depends on DYNAMIC_FTRACE
-- 
cgit v0.10.2


From feda30271e5455394c57e35eba66db88d1b15077 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 Feb 2013 09:59:22 +0100
Subject: mac80211: really fix monitor mode channel reporting

After Felix's patch it was still broken in case you
used more than just a single monitor interface. Fix
it better now.

Reported-by: Sujith Manoharan <sujith@msujith.org>
Tested-by: Sujith Manoharan <sujith@msujith.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 808f5fc..fb30681 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3290,14 +3290,19 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
 	int ret = -ENODATA;
 
 	rcu_read_lock();
-	if (local->use_chanctx) {
-		chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-		if (chanctx_conf) {
-			*chandef = chanctx_conf->def;
-			ret = 0;
-		}
-	} else if (local->open_count == local->monitors) {
-		*chandef = local->monitor_chandef;
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	if (chanctx_conf) {
+		*chandef = chanctx_conf->def;
+		ret = 0;
+	} else if (local->open_count > 0 &&
+		   local->open_count == local->monitors &&
+		   sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+		if (local->use_chanctx)
+			*chandef = local->monitor_chandef;
+		else
+			cfg80211_chandef_create(chandef,
+						local->_oper_channel,
+						local->_oper_channel_type);
 		ret = 0;
 	}
 	rcu_read_unlock();
-- 
cgit v0.10.2


From 98891754ea9453de4db9111c91b20122ca330101 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 Feb 2013 11:28:19 +0100
Subject: iwlwifi: don't map complete commands bidirectionally

The reason we mapped them bidirectionally was that not doing
so had caused IOMMU exceptions, due to the fact that the HW
writes back into the command. Now that the first part of the
command including the write-back part is always in the first
buffer, we don't need to map the remaining buffer(s) bidi
and can get rid of the special-casing for commands.

This is a requisite patch for another one to fix DMA mapping.

Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index 8b625a7..975492f 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -367,8 +367,8 @@ static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd)
 }
 
 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
-			       struct iwl_cmd_meta *meta, struct iwl_tfd *tfd,
-			       enum dma_data_direction dma_dir)
+			       struct iwl_cmd_meta *meta,
+			       struct iwl_tfd *tfd)
 {
 	int i;
 	int num_tbs;
@@ -392,7 +392,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 	/* Unmap chunks, if any. */
 	for (i = 1; i < num_tbs; i++)
 		dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i),
-				 iwl_pcie_tfd_tb_get_len(tfd, i), dma_dir);
+				 iwl_pcie_tfd_tb_get_len(tfd, i),
+				 DMA_TO_DEVICE);
 
 	tfd->num_tbs = 0;
 }
@@ -406,8 +407,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
  * Does NOT advance any TFD circular buffer read/write indexes
  * Does NOT free the TFD itself (which is within circular buffer)
  */
-static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-				  enum dma_data_direction dma_dir)
+static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 {
 	struct iwl_tfd *tfd_tmp = txq->tfds;
 
@@ -418,8 +418,7 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
 	lockdep_assert_held(&txq->lock);
 
 	/* We have only q->n_window txq->entries, but we use q->n_bd tfds */
-	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr],
-			   dma_dir);
+	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]);
 
 	/* free SKB */
 	if (txq->entries) {
@@ -565,22 +564,13 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
 	struct iwl_queue *q = &txq->q;
-	enum dma_data_direction dma_dir;
 
 	if (!q->n_bd)
 		return;
 
-	/* In the command queue, all the TBs are mapped as BIDI
-	 * so unmap them as such.
-	 */
-	if (txq_id == trans_pcie->cmd_queue)
-		dma_dir = DMA_BIDIRECTIONAL;
-	else
-		dma_dir = DMA_TO_DEVICE;
-
 	spin_lock_bh(&txq->lock);
 	while (q->write_ptr != q->read_ptr) {
-		iwl_pcie_txq_free_tfd(trans, txq, dma_dir);
+		iwl_pcie_txq_free_tfd(trans, txq);
 		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd);
 	}
 	spin_unlock_bh(&txq->lock);
@@ -962,7 +952,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 
 		iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
 
-		iwl_pcie_txq_free_tfd(trans, txq, DMA_TO_DEVICE);
+		iwl_pcie_txq_free_tfd(trans, txq);
 	}
 
 	iwl_pcie_txq_progress(trans_pcie, txq);
@@ -1340,11 +1330,10 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
 			data = dup_buf;
 		phys_addr = dma_map_single(trans->dev, (void *)data,
-					   cmdlen[i], DMA_BIDIRECTIONAL);
+					   cmdlen[i], DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
 			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr],
-					   DMA_BIDIRECTIONAL);
+					   &txq->tfds[q->write_ptr]);
 			idx = -ENOMEM;
 			goto out;
 		}
@@ -1418,7 +1407,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 	cmd = txq->entries[cmd_index].cmd;
 	meta = &txq->entries[cmd_index].meta;
 
-	iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index], DMA_BIDIRECTIONAL);
+	iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]);
 
 	/* Input error checking is done when commands are added to queue. */
 	if (meta->flags & CMD_WANT_SKB) {
-- 
cgit v0.10.2


From 1afbfb6041fb8f639e742620ad933c347e14ba2c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 Feb 2013 11:32:26 +0100
Subject: iwlwifi: rename IWL_MAX_CMD_TFDS to IWL_MAX_CMD_TBS_PER_TFD

The IWL_MAX_CMD_TFDS name for this constant is wrong, the
constant really indicates how many TBs we can use in the
driver for a single command TFD, rename the constant and
also add a comment explaining it.

Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/iwlwifi/iwl-devtrace.h
index 10f0179..81aa91f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-devtrace.h
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace.h
@@ -363,7 +363,7 @@ TRACE_EVENT(iwlwifi_dev_hcmd,
 		__entry->flags = cmd->flags;
 		memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr));
 
-		for (i = 0; i < IWL_MAX_CMD_TFDS; i++) {
+		for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
 			if (!cmd->len[i])
 				continue;
 			memcpy((u8 *)__get_dynamic_array(hcmd) + offset,
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 8c7bec6..058947f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h
@@ -217,7 +217,11 @@ struct iwl_device_cmd {
 
 #define TFD_MAX_PAYLOAD_SIZE (sizeof(struct iwl_device_cmd))
 
-#define IWL_MAX_CMD_TFDS	2
+/*
+ * number of transfer buffers (fragments) per transmit frame descriptor;
+ * this is just the driver's idea, the hardware supports 20
+ */
+#define IWL_MAX_CMD_TBS_PER_TFD	2
 
 /**
  * struct iwl_hcmd_dataflag - flag for each one of the chunks of the command
@@ -254,15 +258,15 @@ enum iwl_hcmd_dataflag {
  * @id: id of the host command
  */
 struct iwl_host_cmd {
-	const void *data[IWL_MAX_CMD_TFDS];
+	const void *data[IWL_MAX_CMD_TBS_PER_TFD];
 	struct iwl_rx_packet *resp_pkt;
 	unsigned long _rx_page_addr;
 	u32 _rx_page_order;
 	int handler_status;
 
 	u32 flags;
-	u16 len[IWL_MAX_CMD_TFDS];
-	u8 dataflags[IWL_MAX_CMD_TFDS];
+	u16 len[IWL_MAX_CMD_TBS_PER_TFD];
+	u8 dataflags[IWL_MAX_CMD_TBS_PER_TFD];
 	u8 id;
 };
 
diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index 975492f..ff80a7e 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -1146,16 +1146,16 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	bool had_nocopy = false;
 	int i;
 	u32 cmd_pos;
-	const u8 *cmddata[IWL_MAX_CMD_TFDS];
-	u16 cmdlen[IWL_MAX_CMD_TFDS];
+	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
+	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
 
 	copy_size = sizeof(out_cmd->hdr);
 	cmd_size = sizeof(out_cmd->hdr);
 
 	/* need one for the header if the first is NOCOPY */
-	BUILD_BUG_ON(IWL_MAX_CMD_TFDS > IWL_NUM_OF_TBS - 1);
+	BUILD_BUG_ON(IWL_MAX_CMD_TBS_PER_TFD > IWL_NUM_OF_TBS - 1);
 
-	for (i = 0; i < IWL_MAX_CMD_TFDS; i++) {
+	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
 		cmddata[i] = cmd->data[i];
 		cmdlen[i] = cmd->len[i];
 
@@ -1250,7 +1250,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	/* and copy the data that needs to be copied */
 	cmd_pos = offsetof(struct iwl_device_cmd, payload);
 	copy_size = sizeof(out_cmd->hdr);
-	for (i = 0; i < IWL_MAX_CMD_TFDS; i++) {
+	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
 		int copy = 0;
 
 		if (!cmd->len)
@@ -1319,7 +1319,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1);
 
 	/* map the remaining (adjusted) nocopy/dup fragments */
-	for (i = 0; i < IWL_MAX_CMD_TFDS; i++) {
+	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
 		const void *data = cmddata[i];
 
 		if (!cmdlen[i])
-- 
cgit v0.10.2


From aed7d9ac1836defe033b561f4306e39014ac56fd Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 20 Feb 2013 11:33:00 +0200
Subject: iwlwifi: disable 8K A-MSDU by default

Supporting 8K A-MSDU means that we need to allocate order 1
pages for every Rx packet. Even when there is no traffic.
This adds stress on the memory manager. The handling of
compound pages is also less trivial for the memory manager
and not using them will make the allocation code run faster
although I didn't really measure.
Eric also pointed out that having huge buffers with little
data in them is not very nice towards the TCP stack since
the truesize of the skb is huge. This doesn't allow TCP
to have a big Rx window.
See https://patchwork.kernel.org/patch/2167711/ for details.

Note that very few vendors will actually send A-MSDU.
Disable this feature by default.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c
index 6f228bb..fbfd2d1 100644
--- a/drivers/net/wireless/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/iwlwifi/iwl-drv.c
@@ -1102,7 +1102,6 @@ void iwl_drv_stop(struct iwl_drv *drv)
 
 /* shared module parameters */
 struct iwl_mod_params iwlwifi_mod_params = {
-	.amsdu_size_8K = 1,
 	.restart_fw = 1,
 	.plcp_check = true,
 	.bt_coex_active = true,
@@ -1207,7 +1206,7 @@ MODULE_PARM_DESC(11n_disable,
 	"disable 11n functionality, bitmap: 1: full, 2: agg TX, 4: agg RX");
 module_param_named(amsdu_size_8K, iwlwifi_mod_params.amsdu_size_8K,
 		   int, S_IRUGO);
-MODULE_PARM_DESC(amsdu_size_8K, "enable 8K amsdu size");
+MODULE_PARM_DESC(amsdu_size_8K, "enable 8K amsdu size (default 0)");
 module_param_named(fw_restart, iwlwifi_mod_params.restart_fw, int, S_IRUGO);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error");
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-modparams.h b/drivers/net/wireless/iwlwifi/iwl-modparams.h
index e5e3a79..2c2a729 100644
--- a/drivers/net/wireless/iwlwifi/iwl-modparams.h
+++ b/drivers/net/wireless/iwlwifi/iwl-modparams.h
@@ -91,7 +91,7 @@ enum iwl_power_level {
  * @sw_crypto: using hardware encryption, default = 0
  * @disable_11n: disable 11n capabilities, default = 0,
  *	use IWL_DISABLE_HT_* constants
- * @amsdu_size_8K: enable 8K amsdu size, default = 1
+ * @amsdu_size_8K: enable 8K amsdu size, default = 0
  * @restart_fw: restart firmware, default = 1
  * @plcp_check: enable plcp health check, default = true
  * @wd_disable: enable stuck queue check, default = 0
-- 
cgit v0.10.2


From 38c0f334b359953f010e9b921e0b55278d3918f7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 27 Feb 2013 13:18:50 +0100
Subject: iwlwifi: use coherent DMA memory for command header

Recently in commit 8a964f44e01ad3bbc208c3e80d931ba91b9ea786
("iwlwifi: always copy first 16 bytes of commands") we fixed
the problem that the hardware writes back to the command and
that could overwrite parts of the data that was still needed
and would thus be corrupted.

Investigating this problem more closely we found that this
write-back isn't really ordered very well with respect to
other DMA traffic. Therefore, it sometimes happened that the
write-back occurred after unmapping the command again which
is clearly an issue and could corrupt the next allocation
that goes to that spot, or (better) cause IOMMU faults.

To fix this, allocate coherent memory for the first 16 bytes
of each command, containing the write-back part, and use it
for all queues. All the dynamic DMA mappings only need to be
TO_DEVICE then. This ensures that even when the write-back
happens "too late" it can't hit memory that has been freed
or a mapping that doesn't exist any more.

Since now the actual command is no longer modified, we can
also remove CMD_WANT_HCMD and get rid of the DMA sync that
was necessary to update the scratch pointer.

Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c
index 94ef338..b775769 100644
--- a/drivers/net/wireless/iwlwifi/dvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/dvm/sta.c
@@ -151,7 +151,7 @@ int iwl_send_add_sta(struct iwl_priv *priv,
 		       sta_id, sta->sta.addr, flags & CMD_ASYNC ?  "a" : "");
 
 	if (!(flags & CMD_ASYNC)) {
-		cmd.flags |= CMD_WANT_SKB | CMD_WANT_HCMD;
+		cmd.flags |= CMD_WANT_SKB;
 		might_sleep();
 	}
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 058947f..0cac2b7 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h
@@ -186,19 +186,13 @@ struct iwl_rx_packet {
  * @CMD_ASYNC: Return right away and don't want for the response
  * @CMD_WANT_SKB: valid only with CMD_SYNC. The caller needs the buffer of the
  *	response. The caller needs to call iwl_free_resp when done.
- * @CMD_WANT_HCMD: The caller needs to get the HCMD that was sent in the
- *	response handler. Chunks flagged by %IWL_HCMD_DFL_NOCOPY won't be
- *	copied. The pointer passed to the response handler is in the transport
- *	ownership and don't need to be freed by the op_mode. This also means
- *	that the pointer is invalidated after the op_mode's handler returns.
  * @CMD_ON_DEMAND: This command is sent by the test mode pipe.
  */
 enum CMD_MODE {
 	CMD_SYNC		= 0,
 	CMD_ASYNC		= BIT(0),
 	CMD_WANT_SKB		= BIT(1),
-	CMD_WANT_HCMD		= BIT(2),
-	CMD_ON_DEMAND		= BIT(3),
+	CMD_ON_DEMAND		= BIT(2),
 };
 
 #define DEF_CMD_PAYLOAD_SIZE 320
diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h
index 3d62e80..148843e 100644
--- a/drivers/net/wireless/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/iwlwifi/pcie/internal.h
@@ -137,10 +137,6 @@ static inline int iwl_queue_dec_wrap(int index, int n_bd)
 struct iwl_cmd_meta {
 	/* only for SYNC commands, iff the reply skb is wanted */
 	struct iwl_host_cmd *source;
-
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	DEFINE_DMA_UNMAP_LEN(len);
-
 	u32 flags;
 };
 
@@ -185,25 +181,36 @@ struct iwl_queue {
 /*
  * The FH will write back to the first TB only, so we need
  * to copy some data into the buffer regardless of whether
- * it should be mapped or not. This indicates how much to
- * copy, even for HCMDs it must be big enough to fit the
- * DRAM scratch from the TX cmd, at least 16 bytes.
+ * it should be mapped or not. This indicates how big the
+ * first TB must be to include the scratch buffer. Since
+ * the scratch is 4 bytes at offset 12, it's 16 now. If we
+ * make it bigger then allocations will be bigger and copy
+ * slower, so that's probably not useful.
  */
-#define IWL_HCMD_MIN_COPY_SIZE	16
+#define IWL_HCMD_SCRATCHBUF_SIZE	16
 
 struct iwl_pcie_txq_entry {
 	struct iwl_device_cmd *cmd;
-	struct iwl_device_cmd *copy_cmd;
 	struct sk_buff *skb;
 	/* buffer to free after command completes */
 	const void *free_buf;
 	struct iwl_cmd_meta meta;
 };
 
+struct iwl_pcie_txq_scratch_buf {
+	struct iwl_cmd_header hdr;
+	u8 buf[8];
+	__le32 scratch;
+};
+
 /**
  * struct iwl_txq - Tx Queue for DMA
  * @q: generic Rx/Tx queue descriptor
  * @tfds: transmit frame descriptors (DMA memory)
+ * @scratchbufs: start of command headers, including scratch buffers, for
+ *	the writeback -- this is DMA memory and an array holding one buffer
+ *	for each command on the queue
+ * @scratchbufs_dma: DMA address for the scratchbufs start
  * @entries: transmit entries (driver state)
  * @lock: queue lock
  * @stuck_timer: timer that fires if queue gets stuck
@@ -217,6 +224,8 @@ struct iwl_pcie_txq_entry {
 struct iwl_txq {
 	struct iwl_queue q;
 	struct iwl_tfd *tfds;
+	struct iwl_pcie_txq_scratch_buf *scratchbufs;
+	dma_addr_t scratchbufs_dma;
 	struct iwl_pcie_txq_entry *entries;
 	spinlock_t lock;
 	struct timer_list stuck_timer;
@@ -225,6 +234,13 @@ struct iwl_txq {
 	u8 active;
 };
 
+static inline dma_addr_t
+iwl_pcie_get_scratchbuf_dma(struct iwl_txq *txq, int idx)
+{
+	return txq->scratchbufs_dma +
+	       sizeof(struct iwl_pcie_txq_scratch_buf) * idx;
+}
+
 /**
  * struct iwl_trans_pcie - PCIe transport specific data
  * @rxq: all the RX queue data
diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index b0ae06d..567e67a 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c
@@ -637,22 +637,14 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 		index = SEQ_TO_INDEX(sequence);
 		cmd_index = get_cmd_index(&txq->q, index);
 
-		if (reclaim) {
-			struct iwl_pcie_txq_entry *ent;
-			ent = &txq->entries[cmd_index];
-			cmd = ent->copy_cmd;
-			WARN_ON_ONCE(!cmd && ent->meta.flags & CMD_WANT_HCMD);
-		} else {
+		if (reclaim)
+			cmd = txq->entries[cmd_index].cmd;
+		else
 			cmd = NULL;
-		}
 
 		err = iwl_op_mode_rx(trans->op_mode, &rxcb, cmd);
 
 		if (reclaim) {
-			/* The original command isn't needed any more */
-			kfree(txq->entries[cmd_index].copy_cmd);
-			txq->entries[cmd_index].copy_cmd = NULL;
-			/* nor is the duplicated part of the command */
 			kfree(txq->entries[cmd_index].free_buf);
 			txq->entries[cmd_index].free_buf = NULL;
 		}
diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index ff80a7e..8595c16 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -191,12 +191,9 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
 	}
 
 	for (i = q->read_ptr; i != q->write_ptr;
-	     i = iwl_queue_inc_wrap(i, q->n_bd)) {
-		struct iwl_tx_cmd *tx_cmd =
-			(struct iwl_tx_cmd *)txq->entries[i].cmd->payload;
+	     i = iwl_queue_inc_wrap(i, q->n_bd))
 		IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
-			get_unaligned_le32(&tx_cmd->scratch));
-	}
+			le32_to_cpu(txq->scratchbufs[i].scratch));
 
 	iwl_op_mode_nic_error(trans->op_mode);
 }
@@ -382,14 +379,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 		return;
 	}
 
-	/* Unmap tx_cmd */
-	if (num_tbs)
-		dma_unmap_single(trans->dev,
-				dma_unmap_addr(meta, mapping),
-				dma_unmap_len(meta, len),
-				DMA_BIDIRECTIONAL);
+	/* first TB is never freed - it's the scratchbuf data */
 
-	/* Unmap chunks, if any. */
 	for (i = 1; i < num_tbs; i++)
 		dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i),
 				 iwl_pcie_tfd_tb_get_len(tfd, i),
@@ -478,6 +469,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
+	size_t scratchbuf_sz;
 	int i;
 
 	if (WARN_ON(txq->entries || txq->tfds))
@@ -513,9 +505,25 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 		IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz);
 		goto error;
 	}
+
+	BUILD_BUG_ON(IWL_HCMD_SCRATCHBUF_SIZE != sizeof(*txq->scratchbufs));
+	BUILD_BUG_ON(offsetof(struct iwl_pcie_txq_scratch_buf, scratch) !=
+			sizeof(struct iwl_cmd_header) +
+			offsetof(struct iwl_tx_cmd, scratch));
+
+	scratchbuf_sz = sizeof(*txq->scratchbufs) * slots_num;
+
+	txq->scratchbufs = dma_alloc_coherent(trans->dev, scratchbuf_sz,
+					      &txq->scratchbufs_dma,
+					      GFP_KERNEL);
+	if (!txq->scratchbufs)
+		goto err_free_tfds;
+
 	txq->q.id = txq_id;
 
 	return 0;
+err_free_tfds:
+	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr);
 error:
 	if (txq->entries && txq_id == trans_pcie->cmd_queue)
 		for (i = 0; i < slots_num; i++)
@@ -600,7 +608,6 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 	if (txq_id == trans_pcie->cmd_queue)
 		for (i = 0; i < txq->q.n_window; i++) {
 			kfree(txq->entries[i].cmd);
-			kfree(txq->entries[i].copy_cmd);
 			kfree(txq->entries[i].free_buf);
 		}
 
@@ -609,6 +616,10 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 		dma_free_coherent(dev, sizeof(struct iwl_tfd) *
 				  txq->q.n_bd, txq->tfds, txq->q.dma_addr);
 		txq->q.dma_addr = 0;
+
+		dma_free_coherent(dev,
+				  sizeof(*txq->scratchbufs) * txq->q.n_window,
+				  txq->scratchbufs, txq->scratchbufs_dma);
 	}
 
 	kfree(txq->entries);
@@ -1142,7 +1153,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	void *dup_buf = NULL;
 	dma_addr_t phys_addr;
 	int idx;
-	u16 copy_size, cmd_size, dma_size;
+	u16 copy_size, cmd_size, scratch_size;
 	bool had_nocopy = false;
 	int i;
 	u32 cmd_pos;
@@ -1162,9 +1173,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		if (!cmd->len[i])
 			continue;
 
-		/* need at least IWL_HCMD_MIN_COPY_SIZE copied */
-		if (copy_size < IWL_HCMD_MIN_COPY_SIZE) {
-			int copy = IWL_HCMD_MIN_COPY_SIZE - copy_size;
+		/* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
+		if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
+			int copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
 
 			if (copy > cmdlen[i])
 				copy = cmdlen[i];
@@ -1256,9 +1267,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		if (!cmd->len)
 			continue;
 
-		/* need at least IWL_HCMD_MIN_COPY_SIZE copied */
-		if (copy_size < IWL_HCMD_MIN_COPY_SIZE) {
-			copy = IWL_HCMD_MIN_COPY_SIZE - copy_size;
+		/* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
+		if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
+			copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
 
 			if (copy > cmd->len[i])
 				copy = cmd->len[i];
@@ -1276,48 +1287,36 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		}
 	}
 
-	WARN_ON_ONCE(txq->entries[idx].copy_cmd);
-
-	/*
-	 * since out_cmd will be the source address of the FH, it will write
-	 * the retry count there. So when the user needs to receivce the HCMD
-	 * that corresponds to the response in the response handler, it needs
-	 * to set CMD_WANT_HCMD.
-	 */
-	if (cmd->flags & CMD_WANT_HCMD) {
-		txq->entries[idx].copy_cmd =
-			kmemdup(out_cmd, cmd_pos, GFP_ATOMIC);
-		if (unlikely(!txq->entries[idx].copy_cmd)) {
-			idx = -ENOMEM;
-			goto out;
-		}
-	}
-
 	IWL_DEBUG_HC(trans,
 		     "Sending command %s (#%x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
 		     get_cmd_string(trans_pcie, out_cmd->hdr.cmd),
 		     out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
 		     cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
 
-	/*
-	 * If the entire command is smaller than IWL_HCMD_MIN_COPY_SIZE, we must
-	 * still map at least that many bytes for the hardware to write back to.
-	 * We have enough space, so that's not a problem.
-	 */
-	dma_size = max_t(u16, copy_size, IWL_HCMD_MIN_COPY_SIZE);
+	/* start the TFD with the scratchbuf */
+	scratch_size = min_t(int, copy_size, IWL_HCMD_SCRATCHBUF_SIZE);
+	memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
+	iwl_pcie_txq_build_tfd(trans, txq,
+			       iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
+			       scratch_size, 1);
+
+	/* map first command fragment, if any remains */
+	if (copy_size > scratch_size) {
+		phys_addr = dma_map_single(trans->dev,
+					   ((u8 *)&out_cmd->hdr) + scratch_size,
+					   copy_size - scratch_size,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(trans->dev, phys_addr)) {
+			iwl_pcie_tfd_unmap(trans, out_meta,
+					   &txq->tfds[q->write_ptr]);
+			idx = -ENOMEM;
+			goto out;
+		}
 
-	phys_addr = dma_map_single(trans->dev, &out_cmd->hdr, dma_size,
-				   DMA_BIDIRECTIONAL);
-	if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
-		idx = -ENOMEM;
-		goto out;
+		iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
+				       copy_size - scratch_size, 0);
 	}
 
-	dma_unmap_addr_set(out_meta, mapping, phys_addr);
-	dma_unmap_len_set(out_meta, len, dma_size);
-
-	iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1);
-
 	/* map the remaining (adjusted) nocopy/dup fragments */
 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
 		const void *data = cmddata[i];
@@ -1586,10 +1585,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	struct iwl_cmd_meta *out_meta;
 	struct iwl_txq *txq;
 	struct iwl_queue *q;
-	dma_addr_t phys_addr = 0;
-	dma_addr_t txcmd_phys;
-	dma_addr_t scratch_phys;
-	u16 len, firstlen, secondlen;
+	dma_addr_t tb0_phys, tb1_phys, scratch_phys;
+	void *tb1_addr;
+	u16 len, tb1_len, tb2_len;
 	u8 wait_write_ptr = 0;
 	__le16 fc = hdr->frame_control;
 	u8 hdr_len = ieee80211_hdrlen(fc);
@@ -1627,85 +1625,80 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
 			    INDEX_TO_SEQ(q->write_ptr)));
 
+	tb0_phys = iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr);
+	scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
+		       offsetof(struct iwl_tx_cmd, scratch);
+
+	tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
+	tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
+
 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
 	out_meta = &txq->entries[q->write_ptr].meta;
 
 	/*
-	 * Use the first empty entry in this queue's command buffer array
-	 * to contain the Tx command and MAC header concatenated together
-	 * (payload data will be in another buffer).
-	 * Size of this varies, due to varying MAC header length.
-	 * If end is not dword aligned, we'll have 2 extra bytes at the end
-	 * of the MAC header (device reads on dword boundaries).
-	 * We'll tell device about this padding later.
+	 * The second TB (tb1) points to the remainder of the TX command
+	 * and the 802.11 header - dword aligned size
+	 * (This calculation modifies the TX command, so do it before the
+	 * setup of the first TB)
 	 */
-	len = sizeof(struct iwl_tx_cmd) +
-		sizeof(struct iwl_cmd_header) + hdr_len;
-	firstlen = (len + 3) & ~3;
+	len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
+	      hdr_len - IWL_HCMD_SCRATCHBUF_SIZE;
+	tb1_len = (len + 3) & ~3;
 
 	/* Tell NIC about any 2-byte padding after MAC header */
-	if (firstlen != len)
+	if (tb1_len != len)
 		tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
 
-	/* Physical address of this Tx command's header (not MAC header!),
-	 * within command buffer array. */
-	txcmd_phys = dma_map_single(trans->dev,
-				    &dev_cmd->hdr, firstlen,
-				    DMA_BIDIRECTIONAL);
-	if (unlikely(dma_mapping_error(trans->dev, txcmd_phys)))
-		goto out_err;
-	dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
-	dma_unmap_len_set(out_meta, len, firstlen);
+	/* The first TB points to the scratchbuf data - min_copy bytes */
+	memcpy(&txq->scratchbufs[q->write_ptr], &dev_cmd->hdr,
+	       IWL_HCMD_SCRATCHBUF_SIZE);
+	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
+			       IWL_HCMD_SCRATCHBUF_SIZE, 1);
 
-	if (!ieee80211_has_morefrags(fc)) {
-		txq->need_update = 1;
-	} else {
-		wait_write_ptr = 1;
-		txq->need_update = 0;
-	}
+	/* there must be data left over for TB1 or this code must be changed */
+	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_HCMD_SCRATCHBUF_SIZE);
+
+	/* map the data for TB1 */
+	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_HCMD_SCRATCHBUF_SIZE;
+	tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
+		goto out_err;
+	iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, 0);
 
-	/* Set up TFD's 2nd entry to point directly to remainder of skb,
-	 * if any (802.11 null frames have no payload). */
-	secondlen = skb->len - hdr_len;
-	if (secondlen > 0) {
-		phys_addr = dma_map_single(trans->dev, skb->data + hdr_len,
-					   secondlen, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
-			dma_unmap_single(trans->dev,
-					 dma_unmap_addr(out_meta, mapping),
-					 dma_unmap_len(out_meta, len),
-					 DMA_BIDIRECTIONAL);
+	/*
+	 * Set up TFD's third entry to point directly to remainder
+	 * of skb, if any (802.11 null frames have no payload).
+	 */
+	tb2_len = skb->len - hdr_len;
+	if (tb2_len > 0) {
+		dma_addr_t tb2_phys = dma_map_single(trans->dev,
+						     skb->data + hdr_len,
+						     tb2_len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
+			iwl_pcie_tfd_unmap(trans, out_meta,
+					   &txq->tfds[q->write_ptr]);
 			goto out_err;
 		}
+		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, 0);
 	}
 
-	/* Attach buffers to TFD */
-	iwl_pcie_txq_build_tfd(trans, txq, txcmd_phys, firstlen, 1);
-	if (secondlen > 0)
-		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, secondlen, 0);
-
-	scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
-				offsetof(struct iwl_tx_cmd, scratch);
-
-	/* take back ownership of DMA buffer to enable update */
-	dma_sync_single_for_cpu(trans->dev, txcmd_phys, firstlen,
-				DMA_BIDIRECTIONAL);
-	tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
-	tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
-
 	/* Set up entry for this TFD in Tx byte-count array */
 	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
 
-	dma_sync_single_for_device(trans->dev, txcmd_phys, firstlen,
-				   DMA_BIDIRECTIONAL);
-
 	trace_iwlwifi_dev_tx(trans->dev, skb,
 			     &txq->tfds[txq->q.write_ptr],
 			     sizeof(struct iwl_tfd),
-			     &dev_cmd->hdr, firstlen,
-			     skb->data + hdr_len, secondlen);
+			     &dev_cmd->hdr, IWL_HCMD_SCRATCHBUF_SIZE + tb1_len,
+			     skb->data + hdr_len, tb2_len);
 	trace_iwlwifi_dev_tx_data(trans->dev, skb,
-				  skb->data + hdr_len, secondlen);
+				  skb->data + hdr_len, tb2_len);
+
+	if (!ieee80211_has_morefrags(fc)) {
+		txq->need_update = 1;
+	} else {
+		wait_write_ptr = 1;
+		txq->need_update = 0;
+	}
 
 	/* start timer if queue currently empty */
 	if (txq->need_update && q->read_ptr == q->write_ptr &&
-- 
cgit v0.10.2


From 645e77def93f1dd0e211c7244fbe152dac8a7100 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 1 Mar 2013 14:03:49 +0100
Subject: nl80211: increase wiphy dump size dynamically

Given a device with many channels capabilities the wiphy
information can still overflow even though its size in
3.9 was reduced to 3.8 levels. For new userspace and
kernel 3.10 we're going to implement a new "split dump"
protocol that can use multiple messages per wiphy.

For now though, add a workaround to be able to send more
information to userspace. Since generic netlink doesn't
have a way to set the minimum dump size globally, and we
wouldn't really want to set it globally anyway, increase
the size only when needed, as described in the comments.
As userspace might not be prepared for large buffers, we
can only use 4k.

Also increase the size for the get_wiphy command.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7a7b621..d44ab21 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1307,7 +1307,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag
 
 static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx = 0;
+	int idx = 0, ret;
 	int start = cb->args[0];
 	struct cfg80211_registered_device *dev;
 
@@ -1317,9 +1317,29 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		if (++idx <= start)
 			continue;
-		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
-				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				       dev) < 0) {
+		ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 dev);
+		if (ret < 0) {
+			/*
+			 * If sending the wiphy data didn't fit (ENOBUFS or
+			 * EMSGSIZE returned), this SKB is still empty (so
+			 * it's not too big because another wiphy dataset is
+			 * already in the skb) and we've not tried to adjust
+			 * the dump allocation yet ... then adjust the alloc
+			 * size to be bigger, and return 1 but with the empty
+			 * skb. This results in an empty message being RX'ed
+			 * in userspace, but that is ignored.
+			 *
+			 * We can then retry with the larger buffer.
+			 */
+			if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
+			    !skb->len &&
+			    cb->min_dump_alloc < 4096) {
+				cb->min_dump_alloc = 4096;
+				mutex_unlock(&cfg80211_mutex);
+				return 1;
+			}
 			idx--;
 			break;
 		}
@@ -1336,7 +1356,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	struct cfg80211_registered_device *dev = info->user_ptr[0];
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	msg = nlmsg_new(4096, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-- 
cgit v0.10.2


From 7cbf9d017dbb5e3276de7d527925d42d4c11e732 Mon Sep 17 00:00:00 2001
From: Marco Porsch <marco@cozybit.com>
Date: Fri, 1 Mar 2013 16:01:18 +0100
Subject: mac80211: fix oops on mesh PS broadcast forwarding

Introduced with de74a1d9032f4d37ea453ad2a647e1aff4cd2591
"mac80211: fix WPA with VLAN on AP side with ps-sta".
Apparently overwrites the sdata pointer with non-valid data in
the case of mesh.
Fix this by checking for IFTYPE_AP_VLAN.

Signed-off-by: Marco Porsch <marco@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c592a41..0d74f24 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2755,7 +2755,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 		}
 
-		sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
 		if (!ieee80211_tx_prepare(sdata, &tx, skb))
 			break;
 		dev_kfree_skb_any(skb);
-- 
cgit v0.10.2


From 24af717c35189f7a83c34e637256ccb7295a617b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 1 Mar 2013 17:33:18 +0100
Subject: mac80211: fix VHT MCS calculation

The VHT MCSes we advertise to the AP were supposed to
be restricted to the AP, but due to a bug in the logic
mac80211 will advertise rates to the AP that aren't
even supported by the local device. To fix this skip
any adjustment if the NSS isn't supported at all.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b756d29..1415774 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -647,6 +647,9 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 		our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) &
 								mask) >> shift;
 
+		if (our_mcs == IEEE80211_VHT_MCS_NOT_SUPPORTED)
+			continue;
+
 		switch (ap_mcs) {
 		default:
 			if (our_mcs <= ap_mcs)
-- 
cgit v0.10.2


From 801d929ca7d935ee199fd61d8ef914f51e892270 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 2 Mar 2013 19:05:47 +0100
Subject: mac80211: another fix for idle handling in monitor mode

When setting a monitor interface up or down, the idle state needs to be
recalculated, otherwise the hardware will just stay in its previous idle
state.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 640afab..baaa860 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -541,6 +541,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 
 		ieee80211_adjust_monitor_flags(sdata, 1);
 		ieee80211_configure_filter(local);
+		mutex_lock(&local->mtx);
+		ieee80211_recalc_idle(local);
+		mutex_unlock(&local->mtx);
 
 		netif_carrier_on(dev);
 		break;
@@ -812,6 +815,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 		ieee80211_adjust_monitor_flags(sdata, -1);
 		ieee80211_configure_filter(local);
+		mutex_lock(&local->mtx);
+		ieee80211_recalc_idle(local);
+		mutex_unlock(&local->mtx);
 		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/* relies on synchronize_rcu() below */
-- 
cgit v0.10.2


From f9caed59f801f77a2844ab04d2dea8df33ac862b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 18 Feb 2013 21:51:07 +0000
Subject: netfilter: nf_ct_helper: Fix logging for dropped packets

Update nf_ct_helper_log to emit args along with the format.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 013cdf6..bb4188f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -341,6 +341,13 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
 {
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_helper *helper;
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
 
 	/* Called from the helper function, this call never fails */
 	help = nfct_help(ct);
@@ -349,7 +356,9 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
 	helper = rcu_dereference(help->helper);
 
 	nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
-		      "nf_ct_%s: dropping packet: %s ", helper->name, fmt);
+		      "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
+
+	va_end(args);
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_log);
 
-- 
cgit v0.10.2


From 715c998ff4d1106c3096bc5a48e4196663e6701a Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 28 Feb 2013 08:57:31 +0200
Subject: iwlwifi: mvm: restart the NIC of the cmd queue gets full

This situation is clearly an error situation and the only
way to recover is to restart the driver / fw.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index aa59adf..d0f9c1e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -624,12 +624,8 @@ static void iwl_mvm_free_skb(struct iwl_op_mode *op_mode, struct sk_buff *skb)
 	ieee80211_free_txskb(mvm->hw, skb);
 }
 
-static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode)
+static void iwl_mvm_nic_restart(struct iwl_mvm *mvm)
 {
-	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
-
-	iwl_mvm_dump_nic_error_log(mvm);
-
 	iwl_abort_notification_waits(&mvm->notif_wait);
 
 	/*
@@ -663,9 +659,21 @@ static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode)
 	}
 }
 
+static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode)
+{
+	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+
+	iwl_mvm_dump_nic_error_log(mvm);
+
+	iwl_mvm_nic_restart(mvm);
+}
+
 static void iwl_mvm_cmd_queue_full(struct iwl_op_mode *op_mode)
 {
+	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+
 	WARN_ON(1);
+	iwl_mvm_nic_restart(mvm);
 }
 
 static const struct iwl_op_mode_ops iwl_mvm_ops = {
-- 
cgit v0.10.2


From e07cbb536acb249db5fd63f6884354630ae875ad Mon Sep 17 00:00:00 2001
From: Dor Shaish <dor.shaish@intel.com>
Date: Wed, 27 Feb 2013 23:00:27 +0200
Subject: iwlwifi: mvm: Set valid TX antennas value before calib request

We must set the valid TX antennas number in the ucode before
sending the phy_cfg_cmd and request for calibrations.

Signed-off-by: Dor Shaish <dor.shaish@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index d3d959d..e6d51a9 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -446,6 +446,11 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	ret = iwl_nvm_check_version(mvm->nvm_data, mvm->trans);
 	WARN_ON(ret);
 
+	/* Send TX valid antennas before triggering calibrations */
+	ret = iwl_send_tx_ant_cfg(mvm, mvm->nvm_data->valid_tx_ant);
+	if (ret)
+		goto error;
+
 	/* Override the calibrations from TLV and the const of fw */
 	iwl_set_default_calib_trigger(mvm);
 
-- 
cgit v0.10.2


From 6221d47cf7a57eb1e2b5b51c65e2edcde369a0d4 Mon Sep 17 00:00:00 2001
From: Dor Shaish <dor.shaish@intel.com>
Date: Wed, 27 Feb 2013 15:55:48 +0200
Subject: iwlwifi: mvm: Take the phy_cfg from the TLV value

The phy_cfg is given from the TLV value and does not have to be
built by us.

Signed-off-by: Dor Shaish <dor.shaish@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index e6d51a9..d3c067e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -241,20 +241,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 
 	return 0;
 }
-#define IWL_HW_REV_ID_RAINBOW	0x2
-#define IWL_PROJ_TYPE_LHP	0x5
-
-static u32 iwl_mvm_build_phy_cfg(struct iwl_mvm *mvm)
-{
-	struct iwl_nvm_data *data = mvm->nvm_data;
-	/* Temp calls to static definitions, will be changed to CSR calls */
-	u8 hw_rev_id = IWL_HW_REV_ID_RAINBOW;
-	u8 project_type = IWL_PROJ_TYPE_LHP;
-
-	return data->radio_cfg_dash | (data->radio_cfg_step << 2) |
-		(hw_rev_id << 4) | ((project_type & 0x7f) << 6) |
-		(data->valid_tx_ant << 16) | (data->valid_rx_ant << 20);
-}
 
 static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 {
@@ -262,7 +248,7 @@ static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 	enum iwl_ucode_type ucode_type = mvm->cur_ucode;
 
 	/* Set parameters */
-	phy_cfg_cmd.phy_cfg = cpu_to_le32(iwl_mvm_build_phy_cfg(mvm));
+	phy_cfg_cmd.phy_cfg = cpu_to_le32(mvm->fw->phy_config);
 	phy_cfg_cmd.calib_control.event_trigger =
 		mvm->fw->default_calib[ucode_type].event_trigger;
 	phy_cfg_cmd.calib_control.flow_trigger =
-- 
cgit v0.10.2


From de8bc6dd2d52cacaa76ea381ffdc00919b100a2c Mon Sep 17 00:00:00 2001
From: Dor Shaish <dor.shaish@intel.com>
Date: Wed, 27 Feb 2013 13:01:09 +0200
Subject: iwlwifi: mvm: Remove overriding calibrations for the 7000 family

This fix removes the override of calibration request values sent
to the FW.
Due to that, the sending of default values to now implemented
calibrations is removed.

Signed-off-by: Dor Shaish <dor.shaish@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index d3c067e..500f818 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -79,17 +79,8 @@
 #define UCODE_VALID_OK	cpu_to_le32(0x1)
 
 /* Default calibration values for WkP - set to INIT image w/o running */
-static const u8 wkp_calib_values_bb_filter[] = { 0xbf, 0x00, 0x5f, 0x00, 0x2f,
-						 0x00, 0x18, 0x00 };
-static const u8 wkp_calib_values_rx_dc[] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-					     0x7f, 0x7f, 0x7f };
-static const u8 wkp_calib_values_tx_lo[] = { 0x00, 0x00, 0x00, 0x00 };
-static const u8 wkp_calib_values_tx_iq[] = { 0xff, 0x00, 0xff, 0x00, 0x00,
-					     0x00 };
-static const u8 wkp_calib_values_rx_iq[] = { 0xff, 0x00, 0x00, 0x00 };
 static const u8 wkp_calib_values_rx_iq_skew[] = { 0x00, 0x00, 0x01, 0x00 };
 static const u8 wkp_calib_values_tx_iq_skew[] = { 0x01, 0x00, 0x00, 0x00 };
-static const u8 wkp_calib_values_xtal[] = { 0xd2, 0xd2 };
 
 struct iwl_calib_default_data {
 	u16 size;
@@ -99,12 +90,7 @@ struct iwl_calib_default_data {
 #define CALIB_SIZE_N_DATA(_buf) {.size = sizeof(_buf), .data = &_buf}
 
 static const struct iwl_calib_default_data wkp_calib_default_data[12] = {
-	[5] = CALIB_SIZE_N_DATA(wkp_calib_values_rx_dc),
-	[6] = CALIB_SIZE_N_DATA(wkp_calib_values_bb_filter),
-	[7] = CALIB_SIZE_N_DATA(wkp_calib_values_tx_lo),
-	[8] = CALIB_SIZE_N_DATA(wkp_calib_values_tx_iq),
 	[9] = CALIB_SIZE_N_DATA(wkp_calib_values_tx_iq_skew),
-	[10] = CALIB_SIZE_N_DATA(wkp_calib_values_rx_iq),
 	[11] = CALIB_SIZE_N_DATA(wkp_calib_values_rx_iq_skew),
 };
 
@@ -261,103 +247,6 @@ static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 				    sizeof(phy_cfg_cmd), &phy_cfg_cmd);
 }
 
-/* Starting with the new PHY DB implementation - New calibs are enabled */
-/* Value - 0x405e7 */
-#define IWL_CALIB_DEFAULT_FLOW_INIT	(IWL_CALIB_CFG_XTAL_IDX		|\
-					 IWL_CALIB_CFG_TEMPERATURE_IDX	|\
-					 IWL_CALIB_CFG_VOLTAGE_READ_IDX	|\
-					 IWL_CALIB_CFG_DC_IDX		|\
-					 IWL_CALIB_CFG_BB_FILTER_IDX	|\
-					 IWL_CALIB_CFG_LO_LEAKAGE_IDX	|\
-					 IWL_CALIB_CFG_TX_IQ_IDX	|\
-					 IWL_CALIB_CFG_RX_IQ_IDX	|\
-					 IWL_CALIB_CFG_AGC_IDX)
-
-#define IWL_CALIB_DEFAULT_EVENT_INIT	0x0
-
-/* Value 0x41567 */
-#define IWL_CALIB_DEFAULT_FLOW_RUN	(IWL_CALIB_CFG_XTAL_IDX		|\
-					 IWL_CALIB_CFG_TEMPERATURE_IDX	|\
-					 IWL_CALIB_CFG_VOLTAGE_READ_IDX	|\
-					 IWL_CALIB_CFG_BB_FILTER_IDX	|\
-					 IWL_CALIB_CFG_DC_IDX		|\
-					 IWL_CALIB_CFG_TX_IQ_IDX	|\
-					 IWL_CALIB_CFG_RX_IQ_IDX	|\
-					 IWL_CALIB_CFG_SENSITIVITY_IDX	|\
-					 IWL_CALIB_CFG_AGC_IDX)
-
-#define IWL_CALIB_DEFAULT_EVENT_RUN	(IWL_CALIB_CFG_XTAL_IDX		|\
-					 IWL_CALIB_CFG_TEMPERATURE_IDX	|\
-					 IWL_CALIB_CFG_VOLTAGE_READ_IDX	|\
-					 IWL_CALIB_CFG_TX_PWR_IDX	|\
-					 IWL_CALIB_CFG_DC_IDX		|\
-					 IWL_CALIB_CFG_TX_IQ_IDX	|\
-					 IWL_CALIB_CFG_SENSITIVITY_IDX)
-
-/*
- * Sets the calibrations trigger values that will be sent to the FW for runtime
- * and init calibrations.
- * The ones given in the FW TLV are not correct.
- */
-static void iwl_set_default_calib_trigger(struct iwl_mvm *mvm)
-{
-	struct iwl_tlv_calib_ctrl default_calib;
-
-	/*
-	 * WkP FW TLV calib bits are wrong, overwrite them.
-	 * This defines the dynamic calibrations which are implemented in the
-	 * uCode both for init(flow) calculation and event driven calibs.
-	 */
-
-	/* Init Image */
-	default_calib.event_trigger = cpu_to_le32(IWL_CALIB_DEFAULT_EVENT_INIT);
-	default_calib.flow_trigger = cpu_to_le32(IWL_CALIB_DEFAULT_FLOW_INIT);
-
-	if (default_calib.event_trigger !=
-	    mvm->fw->default_calib[IWL_UCODE_INIT].event_trigger)
-		IWL_ERR(mvm,
-			"Updating the event calib for INIT image: 0x%x -> 0x%x\n",
-			mvm->fw->default_calib[IWL_UCODE_INIT].event_trigger,
-			default_calib.event_trigger);
-	if (default_calib.flow_trigger !=
-	    mvm->fw->default_calib[IWL_UCODE_INIT].flow_trigger)
-		IWL_ERR(mvm,
-			"Updating the flow calib for INIT image: 0x%x -> 0x%x\n",
-			mvm->fw->default_calib[IWL_UCODE_INIT].flow_trigger,
-			default_calib.flow_trigger);
-
-	memcpy((void *)&mvm->fw->default_calib[IWL_UCODE_INIT],
-	       &default_calib, sizeof(struct iwl_tlv_calib_ctrl));
-	IWL_ERR(mvm,
-		"Setting uCode init calibrations event 0x%x, trigger 0x%x\n",
-		default_calib.event_trigger,
-		default_calib.flow_trigger);
-
-	/* Run time image */
-	default_calib.event_trigger = cpu_to_le32(IWL_CALIB_DEFAULT_EVENT_RUN);
-	default_calib.flow_trigger = cpu_to_le32(IWL_CALIB_DEFAULT_FLOW_RUN);
-
-	if (default_calib.event_trigger !=
-	    mvm->fw->default_calib[IWL_UCODE_REGULAR].event_trigger)
-		IWL_ERR(mvm,
-			"Updating the event calib for RT image: 0x%x -> 0x%x\n",
-			mvm->fw->default_calib[IWL_UCODE_REGULAR].event_trigger,
-			default_calib.event_trigger);
-	if (default_calib.flow_trigger !=
-	    mvm->fw->default_calib[IWL_UCODE_REGULAR].flow_trigger)
-		IWL_ERR(mvm,
-			"Updating the flow calib for RT image: 0x%x -> 0x%x\n",
-			mvm->fw->default_calib[IWL_UCODE_REGULAR].flow_trigger,
-			default_calib.flow_trigger);
-
-	memcpy((void *)&mvm->fw->default_calib[IWL_UCODE_REGULAR],
-	       &default_calib, sizeof(struct iwl_tlv_calib_ctrl));
-	IWL_ERR(mvm,
-		"Setting uCode runtime calibs event 0x%x, trigger 0x%x\n",
-		default_calib.event_trigger,
-		default_calib.flow_trigger);
-}
-
 static int iwl_set_default_calibrations(struct iwl_mvm *mvm)
 {
 	u8 cmd_raw[16]; /* holds the variable size commands */
@@ -437,9 +326,6 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	if (ret)
 		goto error;
 
-	/* Override the calibrations from TLV and the const of fw */
-	iwl_set_default_calib_trigger(mvm);
-
 	/* WkP doesn't have all calibrations, need to set default values */
 	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
 		ret = iwl_set_default_calibrations(mvm);
-- 
cgit v0.10.2


From f9aa8dd33714f17c7229ad89309406a1ccb3cd3f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 4 Mar 2013 09:11:08 +0200
Subject: iwlwifi: mvm: ignore STOP_AGG when restarting

Since the device is being restarted, all the Rx / Tx Block
Ack sessions are been wiped out by the driver. So ignore
the requests from mac80211 that stops Tx agg while
reconfiguring the device.

Note that stopping a non-existing Rx BA session is harmless,
so just honor mac80211's request.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index 861a7f9..274f44e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c
@@ -770,6 +770,16 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	u16 txq_id;
 	int err;
 
+
+	/*
+	 * If mac80211 is cleaning its state, then say that we finished since
+	 * our state has been cleared anyway.
+	 */
+	if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+		return 0;
+	}
+
 	spin_lock_bh(&mvmsta->lock);
 
 	txq_id = tid_data->txq_id;
-- 
cgit v0.10.2


From 8101a7f0656bf11c385d6e14f52313b19f017e70 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 28 Feb 2013 11:54:28 +0200
Subject: iwlwifi: mvm: update the rssi calculation

Make the rssi more accurate by taking in count per-chain AGC
values. Without this, the RSSI reports inaccurate values.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
index 23eebda..2adb61f 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
@@ -762,18 +762,20 @@ struct iwl_phy_context_cmd {
 #define IWL_RX_INFO_PHY_CNT 8
 #define IWL_RX_INFO_AGC_IDX 1
 #define IWL_RX_INFO_RSSI_AB_IDX 2
-#define IWL_RX_INFO_RSSI_C_IDX 3
-#define IWL_OFDM_AGC_DB_MSK 0xfe00
-#define IWL_OFDM_AGC_DB_POS 9
+#define IWL_OFDM_AGC_A_MSK 0x0000007f
+#define IWL_OFDM_AGC_A_POS 0
+#define IWL_OFDM_AGC_B_MSK 0x00003f80
+#define IWL_OFDM_AGC_B_POS 7
+#define IWL_OFDM_AGC_CODE_MSK 0x3fe00000
+#define IWL_OFDM_AGC_CODE_POS 20
 #define IWL_OFDM_RSSI_INBAND_A_MSK 0x00ff
-#define IWL_OFDM_RSSI_ALLBAND_A_MSK 0xff00
 #define IWL_OFDM_RSSI_A_POS 0
+#define IWL_OFDM_RSSI_ALLBAND_A_MSK 0xff00
+#define IWL_OFDM_RSSI_ALLBAND_A_POS 8
 #define IWL_OFDM_RSSI_INBAND_B_MSK 0xff0000
-#define IWL_OFDM_RSSI_ALLBAND_B_MSK 0xff000000
 #define IWL_OFDM_RSSI_B_POS 16
-#define IWL_OFDM_RSSI_INBAND_C_MSK 0x00ff
-#define IWL_OFDM_RSSI_ALLBAND_C_MSK 0xff00
-#define IWL_OFDM_RSSI_C_POS 0
+#define IWL_OFDM_RSSI_ALLBAND_B_MSK 0xff000000
+#define IWL_OFDM_RSSI_ALLBAND_B_POS 24
 
 /**
  * struct iwl_rx_phy_info - phy info
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index 537711b..bdae700 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -80,7 +80,8 @@
 
 #define IWL_INVALID_MAC80211_QUEUE	0xff
 #define IWL_MVM_MAX_ADDRESSES		2
-#define IWL_RSSI_OFFSET 44
+/* RSSI offset for WkP */
+#define IWL_RSSI_OFFSET 50
 
 enum iwl_mvm_tx_fifo {
 	IWL_MVM_TX_FIFO_BK = 0,
diff --git a/drivers/net/wireless/iwlwifi/mvm/rx.c b/drivers/net/wireless/iwlwifi/mvm/rx.c
index 3f40ab0..b0b190d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/iwlwifi/mvm/rx.c
@@ -131,33 +131,42 @@ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
 static int iwl_mvm_calc_rssi(struct iwl_mvm *mvm,
 			     struct iwl_rx_phy_info *phy_info)
 {
-	u32 rssi_a, rssi_b, rssi_c, max_rssi, agc_db;
+	int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm;
+	int rssi_all_band_a, rssi_all_band_b;
+	u32 agc_a, agc_b, max_agc;
 	u32 val;
 
-	/* Find max rssi among 3 possible receivers.
+	/* Find max rssi among 2 possible receivers.
 	 * These values are measured by the Digital Signal Processor (DSP).
 	 * They should stay fairly constant even as the signal strength varies,
 	 * if the radio's Automatic Gain Control (AGC) is working right.
 	 * AGC value (see below) will provide the "interesting" info.
 	 */
+	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_AGC_IDX]);
+	agc_a = (val & IWL_OFDM_AGC_A_MSK) >> IWL_OFDM_AGC_A_POS;
+	agc_b = (val & IWL_OFDM_AGC_B_MSK) >> IWL_OFDM_AGC_B_POS;
+	max_agc = max_t(u32, agc_a, agc_b);
+
 	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_RSSI_AB_IDX]);
 	rssi_a = (val & IWL_OFDM_RSSI_INBAND_A_MSK) >> IWL_OFDM_RSSI_A_POS;
 	rssi_b = (val & IWL_OFDM_RSSI_INBAND_B_MSK) >> IWL_OFDM_RSSI_B_POS;
-	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_RSSI_C_IDX]);
-	rssi_c = (val & IWL_OFDM_RSSI_INBAND_C_MSK) >> IWL_OFDM_RSSI_C_POS;
-
-	val = le32_to_cpu(phy_info->non_cfg_phy[IWL_RX_INFO_AGC_IDX]);
-	agc_db = (val & IWL_OFDM_AGC_DB_MSK) >> IWL_OFDM_AGC_DB_POS;
+	rssi_all_band_a = (val & IWL_OFDM_RSSI_ALLBAND_A_MSK) >>
+				IWL_OFDM_RSSI_ALLBAND_A_POS;
+	rssi_all_band_b = (val & IWL_OFDM_RSSI_ALLBAND_B_MSK) >>
+				IWL_OFDM_RSSI_ALLBAND_B_POS;
 
-	max_rssi = max_t(u32, rssi_a, rssi_b);
-	max_rssi = max_t(u32, max_rssi, rssi_c);
+	/*
+	 * dBm = rssi dB - agc dB - constant.
+	 * Higher AGC (higher radio gain) means lower signal.
+	 */
+	rssi_a_dbm = rssi_a - IWL_RSSI_OFFSET - agc_a;
+	rssi_b_dbm = rssi_b - IWL_RSSI_OFFSET - agc_b;
+	max_rssi_dbm = max_t(int, rssi_a_dbm, rssi_b_dbm);
 
-	IWL_DEBUG_STATS(mvm, "Rssi In A %d B %d C %d Max %d AGC dB %d\n",
-			rssi_a, rssi_b, rssi_c, max_rssi, agc_db);
+	IWL_DEBUG_STATS(mvm, "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n",
+			rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b);
 
-	/* dBm = max_rssi dB - agc dB - constant.
-	 * Higher AGC (higher radio gain) means lower signal. */
-	return max_rssi - agc_db - IWL_RSSI_OFFSET;
+	return max_rssi_dbm;
 }
 
 /*
-- 
cgit v0.10.2


From 2470b36e84a2e680d7a7e3809cbceae5bfae3606 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 3 Mar 2013 14:35:03 +0200
Subject: iwlwifi: mvm: don't warn on normal BAR sending

This flow happens when we get a failed single Tx response
on an AMPDU queue. In this case, the frame won't be sent
any more. So we need to move the window on the recipient
side. This is done by a BAR.

Now if we are in the following case: 10, 12 and 13 are ACKed
and 11 isn't.

 10  11  12  13.
 V   X   V   V

Then, 11 will be sent 16 times as an MPDU (as oppsed to
A-MPDU). If this failed, we are entering the flow described
above. So we need to send a BAR with ssn = 12.
But in this case, the scheduler will tell us to free frames
up to 13 (included).

So, it is perfectly possible to get a failed single Tx
response on an AMPDU queue that makes the scheduler's ssn
jump by more than 1 single packet.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c
index 6b67ce3..6645efe 100644
--- a/drivers/net/wireless/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/iwlwifi/mvm/tx.c
@@ -607,12 +607,8 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 
 		/* Single frame failure in an AMPDU queue => send BAR */
 		if (txq_id >= IWL_FIRST_AMPDU_QUEUE &&
-		    !(info->flags & IEEE80211_TX_STAT_ACK)) {
-			/* there must be only one skb in the skb_list */
-			WARN_ON_ONCE(skb_freed > 1 ||
-				     !skb_queue_empty(&skbs));
+		    !(info->flags & IEEE80211_TX_STAT_ACK))
 			info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK;
-		}
 
 		/* W/A FW bug: seq_ctl is wrong when the queue is flushed */
 		if (status == TX_STATUS_FAIL_FIFO_FLUSHED) {
-- 
cgit v0.10.2


From ed018fa4dfc3d26da56b9ee7dc75e9d39a39a02b Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 4 Mar 2013 00:29:12 +0000
Subject: netfilter: xt_AUDIT: only generate audit log when audit enabled

We should stop generting audit log if audit is disabled.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index ba92824..3228d7f 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -124,6 +124,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct xt_audit_info *info = par->targinfo;
 	struct audit_buffer *ab;
 
+	if (audit_enabled == 0)
+		goto errout;
+
 	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
 	if (ab == NULL)
 		goto errout;
-- 
cgit v0.10.2


From 9df9e7832391cf699abbf39fc8d95d7e78297462 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Mon, 4 Mar 2013 02:45:41 +0000
Subject: netfilter: nfnetlink: silence warning if CONFIG_PROVE_RCU isn't set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit c14b78e7decd0d1d5add6a4604feb8609fe920a9 ("netfilter:
nfnetlink: add mutex per subsystem") building nefnetlink.o without
CONFIG_PROVE_RCU set, triggers this GCC warning:
    net/netfilter/nfnetlink.c:65:22: warning: ‘nfnl_get_lock’ defined but not used [-Wunused-function]

The cause of that warning is, in short, that rcu_lockdep_assert()
compiles away if CONFIG_PROVE_RCU is not set. Silence this warning by
open coding nfnl_get_lock() in the sole place it was called, which
allows to remove that function.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index d578ec2..0b1b32c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -62,11 +62,6 @@ void nfnl_unlock(__u8 subsys_id)
 }
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
-static struct mutex *nfnl_get_lock(__u8 subsys_id)
-{
-	return &table[subsys_id].mutex;
-}
-
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
 	nfnl_lock(n->subsys_id);
@@ -199,7 +194,7 @@ replay:
 			rcu_read_unlock();
 			nfnl_lock(subsys_id);
 			if (rcu_dereference_protected(table[subsys_id].subsys,
-				lockdep_is_held(nfnl_get_lock(subsys_id))) != ss ||
+				lockdep_is_held(&table[subsys_id].mutex)) != ss ||
 			    nfnetlink_find_client(type, ss) != nc)
 				err = -EAGAIN;
 			else if (nc->call)
-- 
cgit v0.10.2


From e08f626b33eb636dbf38b21618ab32b7fd8e1ec4 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 20 Feb 2013 03:06:34 +0000
Subject: e1000e: workaround DMA unit hang on I218

At 1000Mbps link speed, one of the MAC's internal clocks can be stopped for
up to 4us when entering K1 (a power mode of the MAC-PHY interconnect).  If
the MAC is waiting for completion indications for 2 DMA write requests into
Host memory (e.g. descriptor writeback or Rx packet writing) and the
indications occur while the clock is stopped, both indications will be
missed by the MAC causing the MAC to wait for the completion indications
and be unable to generate further DMA write requests.  This results in an
apparent hardware hang.

Work-around the issue by disabling the de-assertion of the clock request
when 1000Mbps link is acquired (K1 must be disabled while doing this).

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index dff7bff..121a865 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -782,6 +782,59 @@ release:
 }
 
 /**
+ *  e1000_k1_workaround_lpt_lp - K1 workaround on Lynxpoint-LP
+ *  @hw:   pointer to the HW structure
+ *  @link: link up bool flag
+ *
+ *  When K1 is enabled for 1Gbps, the MAC can miss 2 DMA completion indications
+ *  preventing further DMA write requests.  Workaround the issue by disabling
+ *  the de-assertion of the clock request when in 1Gpbs mode.
+ **/
+static s32 e1000_k1_workaround_lpt_lp(struct e1000_hw *hw, bool link)
+{
+	u32 fextnvm6 = er32(FEXTNVM6);
+	s32 ret_val = 0;
+
+	if (link && (er32(STATUS) & E1000_STATUS_SPEED_1000)) {
+		u16 kmrn_reg;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val =
+		    e1000e_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG,
+						&kmrn_reg);
+		if (ret_val)
+			goto release;
+
+		ret_val =
+		    e1000e_write_kmrn_reg_locked(hw,
+						 E1000_KMRNCTRLSTA_K1_CONFIG,
+						 kmrn_reg &
+						 ~E1000_KMRNCTRLSTA_K1_ENABLE);
+		if (ret_val)
+			goto release;
+
+		usleep_range(10, 20);
+
+		ew32(FEXTNVM6, fextnvm6 | E1000_FEXTNVM6_REQ_PLL_CLK);
+
+		ret_val =
+		    e1000e_write_kmrn_reg_locked(hw,
+						 E1000_KMRNCTRLSTA_K1_CONFIG,
+						 kmrn_reg);
+release:
+		hw->phy.ops.release(hw);
+	} else {
+		/* clear FEXTNVM6 bit 8 on link down or 10/100 */
+		ew32(FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK);
+	}
+
+	return ret_val;
+}
+
+/**
  *  e1000_check_for_copper_link_ich8lan - Check for link (Copper)
  *  @hw: pointer to the HW structure
  *
@@ -818,6 +871,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 			return ret_val;
 	}
 
+	/* Work-around I218 hang issue */
+	if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V)) {
+		ret_val = e1000_k1_workaround_lpt_lp(hw, link);
+		if (ret_val)
+			return ret_val;
+	}
+
 	/* Clear link partner's EEE ability */
 	hw->dev_spec.ich8lan.eee_lp_ability = 0;
 
@@ -3954,8 +4015,16 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
 
 	phy_ctrl = er32(PHY_CTRL);
 	phy_ctrl |= E1000_PHY_CTRL_GBE_DISABLE;
+
 	if (hw->phy.type == e1000_phy_i217) {
-		u16 phy_reg;
+		u16 phy_reg, device_id = hw->adapter->pdev->device;
+
+		if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
+		    (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V)) {
+			u32 fextnvm6 = er32(FEXTNVM6);
+
+			ew32(FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK);
+		}
 
 		ret_val = hw->phy.ops.acquire(hw);
 		if (ret_val)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index b6d3174..8bf4655 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -92,6 +92,8 @@
 #define E1000_FEXTNVM4_BEACON_DURATION_8USEC	0x7
 #define E1000_FEXTNVM4_BEACON_DURATION_16USEC	0x3
 
+#define E1000_FEXTNVM6_REQ_PLL_CLK	0x00000100
+
 #define PCIE_ICH8_SNOOP_ALL	PCIE_NO_SNOOP_ALL
 
 #define E1000_ICH_RAR_ENTRIES	7
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 794fe14..a7e6a3e 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -42,6 +42,7 @@
 #define E1000_FEXTNVM	0x00028	/* Future Extended NVM - RW */
 #define E1000_FEXTNVM3	0x0003C	/* Future Extended NVM 3 - RW */
 #define E1000_FEXTNVM4	0x00024	/* Future Extended NVM 4 - RW */
+#define E1000_FEXTNVM6	0x00010	/* Future Extended NVM 6 - RW */
 #define E1000_FEXTNVM7	0x000E4	/* Future Extended NVM 7 - RW */
 #define E1000_FCT	0x00030	/* Flow Control Type - RW */
 #define E1000_VET	0x00038	/* VLAN Ether Type - RW */
-- 
cgit v0.10.2


From e4f7dbb17e797d922d72567f37de3735722034ba Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 21 Feb 2013 03:08:50 +0000
Subject: igb: Drop BUILD_BUG_ON check from igb_build_rx_buffer

On s390 the igb driver was throwing a build error due to the fact that a frame
built using build_skb would be larger than 2K.  Since this is not likely to
change at any point in the future we are better off just dropping the check
since we already had a check in igb_set_rx_buffer_len that will just disable
the usage of build_skb anyway.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ed79a1c..4d53a17 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6227,13 +6227,6 @@ static struct sk_buff *igb_build_rx_buffer(struct igb_ring *rx_ring,
 	/* If we spanned a buffer we have a huge mess so test for it */
 	BUG_ON(unlikely(!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)));
 
-	/* Guarantee this function can be used by verifying buffer sizes */
-	BUILD_BUG_ON(SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) < (NET_SKB_PAD +
-							NET_IP_ALIGN +
-							IGB_TS_HDR_LEN +
-							ETH_FRAME_LEN +
-							ETH_FCS_LEN));
-
 	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
 	page = rx_buffer->page;
 	prefetchw(page);
-- 
cgit v0.10.2


From ed65bdd8c0086d69948e6380dba0cc279a6906de Mon Sep 17 00:00:00 2001
From: Carolyn Wyborny <carolyn.wyborny@intel.com>
Date: Wed, 6 Feb 2013 03:35:27 +0000
Subject: igb: Fix link setup for I210 devices

This patch changes the setup copper link function to use a switch
statement for the PHY id's available for the given PHY types.  It
also adds a case for the I210 PHY id, so the appropriate setup link
function is called for it.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 84e7e09..b64542a 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1361,11 +1361,16 @@ static s32 igb_setup_copper_link_82575(struct e1000_hw *hw)
 	switch (hw->phy.type) {
 	case e1000_phy_i210:
 	case e1000_phy_m88:
-		if (hw->phy.id == I347AT4_E_PHY_ID ||
-		    hw->phy.id == M88E1112_E_PHY_ID)
+		switch (hw->phy.id) {
+		case I347AT4_E_PHY_ID:
+		case M88E1112_E_PHY_ID:
+		case I210_I_PHY_ID:
 			ret_val = igb_copper_link_setup_m88_gen2(hw);
-		else
+			break;
+		default:
 			ret_val = igb_copper_link_setup_m88(hw);
+			break;
+		}
 		break;
 	case e1000_phy_igp_3:
 		ret_val = igb_copper_link_setup_igp(hw);
-- 
cgit v0.10.2


From 603e86fa39cd48edba5ee8a4d19637bd41e2a8bf Mon Sep 17 00:00:00 2001
From: Carolyn Wyborny <carolyn.wyborny@intel.com>
Date: Wed, 20 Feb 2013 07:40:55 +0000
Subject: igb: Fix for lockdep issue in igb_get_i2c_client

This patch fixes a lockdep warning in igb_get_i2c_client by
refactoring the initialization and usage of the i2c_client
completely.  There is no on the fly allocation of the single
client needed today.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index d27edbc..2515140 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -447,7 +447,7 @@ struct igb_adapter {
 #endif
 	struct i2c_algo_bit_data i2c_algo;
 	struct i2c_adapter i2c_adap;
-	struct igb_i2c_client_list *i2c_clients;
+	struct i2c_client *i2c_client;
 };
 
 #define IGB_FLAG_HAS_MSI		(1 << 0)
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 0a9b073..4623502 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -39,6 +39,10 @@
 #include <linux/pci.h>
 
 #ifdef CONFIG_IGB_HWMON
+struct i2c_board_info i350_sensor_info = {
+	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
+};
+
 /* hwmon callback functions */
 static ssize_t igb_hwmon_show_location(struct device *dev,
 					 struct device_attribute *attr,
@@ -188,6 +192,7 @@ int igb_sysfs_init(struct igb_adapter *adapter)
 	unsigned int i;
 	int n_attrs;
 	int rc = 0;
+	struct i2c_client *client = NULL;
 
 	/* If this method isn't defined we don't support thermals */
 	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
@@ -198,6 +203,15 @@ int igb_sysfs_init(struct igb_adapter *adapter)
 		if (rc)
 			goto exit;
 
+	/* init i2c_client */
+	client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
+	if (client == NULL) {
+		dev_info(&adapter->pdev->dev,
+			"Failed to create new i2c device..\n");
+		goto exit;
+	}
+	adapter->i2c_client = client;
+
 	/* Allocation space for max attributes
 	 * max num sensors * values (loc, temp, max, caution)
 	 */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 4d53a17..4dbd629 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1923,10 +1923,6 @@ void igb_set_fw_version(struct igb_adapter *adapter)
 	return;
 }
 
-static const struct i2c_board_info i350_sensor_info = {
-	I2C_BOARD_INFO("i350bb", 0Xf8),
-};
-
 /*  igb_init_i2c - Init I2C interface
  *  @adapter: pointer to adapter structure
  *
@@ -7717,67 +7713,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
 	}
 }
 
-static DEFINE_SPINLOCK(i2c_clients_lock);
-
-/*  igb_get_i2c_client - returns matching client
- *  in adapters's client list.
- *  @adapter: adapter struct
- *  @dev_addr: device address of i2c needed.
- */
-static struct i2c_client *
-igb_get_i2c_client(struct igb_adapter *adapter, u8 dev_addr)
-{
-	ulong flags;
-	struct igb_i2c_client_list *client_list;
-	struct i2c_client *client = NULL;
-	struct i2c_board_info client_info = {
-		I2C_BOARD_INFO("igb", 0x00),
-	};
-
-	spin_lock_irqsave(&i2c_clients_lock, flags);
-	client_list = adapter->i2c_clients;
-
-	/* See if we already have an i2c_client */
-	while (client_list) {
-		if (client_list->client->addr == (dev_addr >> 1)) {
-			client = client_list->client;
-			goto exit;
-		} else {
-			client_list = client_list->next;
-		}
-	}
-
-	/* no client_list found, create a new one */
-	client_list = kzalloc(sizeof(*client_list), GFP_ATOMIC);
-	if (client_list == NULL)
-		goto exit;
-
-	/* dev_addr passed to us is left-shifted by 1 bit
-	 * i2c_new_device call expects it to be flush to the right.
-	 */
-	client_info.addr = dev_addr >> 1;
-	client_info.platform_data = adapter;
-	client_list->client = i2c_new_device(&adapter->i2c_adap, &client_info);
-	if (client_list->client == NULL) {
-		dev_info(&adapter->pdev->dev,
-			"Failed to create new i2c device..\n");
-		goto err_no_client;
-	}
-
-	/* insert new client at head of list */
-	client_list->next = adapter->i2c_clients;
-	adapter->i2c_clients = client_list;
-
-	client = client_list->client;
-	goto exit;
-
-err_no_client:
-	kfree(client_list);
-exit:
-	spin_unlock_irqrestore(&i2c_clients_lock, flags);
-	return client;
-}
-
 /*  igb_read_i2c_byte - Reads 8 bit word over I2C
  *  @hw: pointer to hardware structure
  *  @byte_offset: byte offset to read
@@ -7791,7 +7726,7 @@ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
 				u8 dev_addr, u8 *data)
 {
 	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
-	struct i2c_client *this_client = igb_get_i2c_client(adapter, dev_addr);
+	struct i2c_client *this_client = adapter->i2c_client;
 	s32 status;
 	u16 swfw_mask = 0;
 
@@ -7828,7 +7763,7 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
 				 u8 dev_addr, u8 data)
 {
 	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
-	struct i2c_client *this_client = igb_get_i2c_client(adapter, dev_addr);
+	struct i2c_client *this_client = adapter->i2c_client;
 	s32 status;
 	u16 swfw_mask = E1000_SWFW_PHY0_SM;
 
-- 
cgit v0.10.2


From a4ed2e737cb73e4405a3649f8aef7619b99fecae Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 5 Mar 2013 06:09:04 +0000
Subject: net/irda: Fix port open counts

Saving the port count bump is unsafe. If the tty is hung up while
this open was blocking, the port count is zeroed.

Explicitly check if the tty was hung up while blocking, and correct
the port count if not.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 9a5fd3c..1721dc7 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -280,7 +280,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	struct tty_port *port = &self->port;
 	DECLARE_WAITQUEUE(wait, current);
 	int		retval;
-	int		do_clocal = 0, extra_count = 0;
+	int		do_clocal = 0;
 	unsigned long	flags;
 
 	IRDA_DEBUG(2, "%s()\n", __func__ );
@@ -315,10 +315,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	      __FILE__, __LINE__, tty->driver->name, port->count);
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp)) {
-		extra_count = 1;
+	if (!tty_hung_up_p(filp))
 		port->count--;
-	}
 	spin_unlock_irqrestore(&port->lock, flags);
 	port->blocked_open++;
 
@@ -361,12 +359,10 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 
-	if (extra_count) {
-		/* ++ is not atomic, so this should be protected - Jean II */
-		spin_lock_irqsave(&port->lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
 		port->count++;
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
+	spin_unlock_irqrestore(&port->lock, flags);
 	port->blocked_open--;
 
 	IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n",
-- 
cgit v0.10.2


From 2f7c069b96ed7b1f6236f2fa7b0bc06f4f54f2d9 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 5 Mar 2013 06:09:05 +0000
Subject: net/irda: Hold port lock while bumping blocked_open

Although tty_lock() already protects concurrent update to
blocked_open, that fails to meet the separation-of-concerns between
tty_port and tty.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 1721dc7..d282bbe 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -317,8 +317,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count--;
-	spin_unlock_irqrestore(&port->lock, flags);
 	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	while (1) {
 		if (tty->termios.c_cflag & CBAUD)
@@ -362,8 +362,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count++;
-	spin_unlock_irqrestore(&port->lock, flags);
 	port->blocked_open--;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n",
 	      __FILE__, __LINE__, tty->driver->name, port->count);
-- 
cgit v0.10.2


From 0b176ce3a7cbfa92eceddf3896f1a504877d974a Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 5 Mar 2013 06:09:06 +0000
Subject: net/irda: Use barrier to set task state

Without a memory and compiler barrier, the task state change
can migrate relative to the condition testing in a blocking loop.
However, the task state change must be visible across all cpus
prior to testing those conditions. Failing to do this can result
in the familiar 'lost wakeup' and this task will hang until killed.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index d282bbe..522543d 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -324,7 +324,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 		if (tty->termios.c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (tty_hung_up_p(filp) ||
 		    !test_bit(ASYNCB_INITIALIZED, &port->flags)) {
-- 
cgit v0.10.2


From f74861ca87264e594e0134e8ac14db06be77fe6f Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 5 Mar 2013 06:09:07 +0000
Subject: net/irda: Raise dtr in non-blocking open

DTR/RTS need to be raised, regardless of the open() mode, but not
if the port has already shutdown.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 522543d..362ba47 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -289,8 +289,15 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 	 * If non-blocking mode is set, or the port is not enabled,
 	 * then make the check up front and then exit.
 	 */
-	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
-		/* nonblock mode is set or port is not enabled */
+	if (test_bit(TTY_IO_ERROR, &tty->flags)) {
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+		return 0;
+	}
+
+	if (filp->f_flags & O_NONBLOCK) {
+		/* nonblock mode is set */
+		if (tty->termios.c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		port->flags |= ASYNC_NORMAL_ACTIVE;
 		IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ );
 		return 0;
-- 
cgit v0.10.2


From 9b99b7e90bf5cd9a88823c49574ba80d92a98262 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:04:57 +0000
Subject: pkt_sched: sch_qfq: properly cap timestamps in charge_actual_service

QFQ+ schedules the active aggregates in a group using a bucket list
(one list per group). The bucket in which each aggregate is inserted
depends on the aggregate's timestamps, and the number
of buckets in a group is enough to accomodate the possible (range of)
values of the timestamps of all the aggregates in the group. For this
property to hold, timestamps must however be computed correctly.  One
necessary condition for computing timestamps correctly is that the
number of bits dequeued for each aggregate, while the aggregate is in
service, does not exceed the maximum budget budgetmax assigned to the
aggregate.

For each aggregate, budgetmax is proportional to the number of classes
in the aggregate. If the number of classes of the aggregate is
decreased through qfq_change_class(), then budgetmax is decreased
automatically as well.  Problems may occur if the aggregate is in
service when budgetmax is decreased, because the current remaining
budget of the aggregate and/or the service already received by the
aggregate may happen to be larger than the new value of budgetmax.  In
this case, when the aggregate is eventually deselected and its
timestamps are updated, the aggregate may happen to have received an
amount of service larger than budgetmax.  This may cause the aggregate
to be assigned a higher virtual finish time than the maximum
acceptable value for the last bucket in the bucket list of the group.

This fix introduces a cap that addresses this issue.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index e9a77f6..489edd9 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -298,6 +298,10 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
 	    new_num_classes == q->max_agg_classes - 1) /* agg no more full */
 		hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
 
+	/* The next assignment may let
+	 * agg->initial_budget > agg->budgetmax
+	 * hold, we will take it into account in charge_actual_service().
+	 */
 	agg->budgetmax = new_num_classes * agg->lmax;
 	new_agg_weight = agg->class_weight * new_num_classes;
 	agg->inv_w = ONE_FP/new_agg_weight;
@@ -988,8 +992,13 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
 /* Update F according to the actual service received by the aggregate. */
 static inline void charge_actual_service(struct qfq_aggregate *agg)
 {
-	/* compute the service received by the aggregate */
-	u32 service_received = agg->initial_budget - agg->budget;
+	/* Compute the service received by the aggregate, taking into
+	 * account that, after decreasing the number of classes in
+	 * agg, it may happen that
+	 * agg->initial_budget - agg->budget > agg->bugdetmax
+	 */
+	u32 service_received = min(agg->budgetmax,
+				   agg->initial_budget - agg->budget);
 
 	agg->F = agg->S + (u64)service_received * agg->inv_w;
 }
-- 
cgit v0.10.2


From 624b85fb96206879c8146abdba071222bbd25259 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:04:58 +0000
Subject: pkt_sched: sch_qfq: fix the update of eligible-group sets

Between two invocations of make_eligible, the system virtual time may
happen to grow enough that, in its binary representation, a bit with
higher order than 31 flips. This happens especially with
TSO/GSO. Before this fix, the mask used in make_eligible was computed
as (1UL<<index_of_last_flipped_bit)-1, whose value is well defined on
a 64-bit architecture, because index_of_flipped_bit <= 63, but is in
general undefined on a 32-bit architecture if index_of_flipped_bit > 31.
The fix just replaces 1UL with 1ULL.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 489edd9..39d85cc 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -821,7 +821,7 @@ static void qfq_make_eligible(struct qfq_sched *q)
 	unsigned long old_vslot = q->oldV >> q->min_slot_shift;
 
 	if (vslot != old_vslot) {
-		unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
+		unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1;
 		qfq_move_groups(q, mask, IR, ER);
 		qfq_move_groups(q, mask, IB, EB);
 	}
-- 
cgit v0.10.2


From 2f3b89a1fe0823fceb544856c9eeb036a75ff091 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:04:59 +0000
Subject: pkt_sched: sch_qfq: serve activated aggregates immediately if the
 scheduler is empty

If no aggregate is in service, then the function qfq_dequeue() does
not dequeue any packet. For this reason, to guarantee QFQ+ to be work
conserving, a just-activated aggregate must be set as in service
immediately if it happens to be the only active aggregate.
This is done by the function qfq_enqueue().

Unfortunately, the function qfq_add_to_agg(), used to add a class to
an aggregate, does not perform this important additional operation.
In particular, if: 1) qfq_add_to_agg() is invoked to complete the move
of a class from a source aggregate, becoming, for this move, inactive,
to a destination aggregate, becoming instead active, and 2) the
destination aggregate becomes the only active aggregate, then this
aggregate is not however set as in service. QFQ+ remains then in a
non-work-conserving state until a new invocation of qfq_enqueue()
recovers the situation.

This fix solves the problem by moving the logic for setting an
aggregate as in service directly into the function qfq_activate_agg().
Hence, from whatever point qfq_activate_aggregate() is invoked, QFQ+
remains work conserving.  Since the more-complex logic of this new
version of activate_aggregate() is not necessary, in qfq_dequeue(), to
reschedule an aggregate that finishes its budget, then the aggregate
is now rescheduled by invoking directly the functions needed.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 39d85cc..8c5172c 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1003,6 +1003,12 @@ static inline void charge_actual_service(struct qfq_aggregate *agg)
 	agg->F = agg->S + (u64)service_received * agg->inv_w;
 }
 
+static inline void qfq_update_agg_ts(struct qfq_sched *q,
+				     struct qfq_aggregate *agg,
+				     enum update_reason reason);
+
+static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
+
 static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
@@ -1030,7 +1036,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 		in_serv_agg->initial_budget = in_serv_agg->budget =
 			in_serv_agg->budgetmax;
 
-		if (!list_empty(&in_serv_agg->active))
+		if (!list_empty(&in_serv_agg->active)) {
 			/*
 			 * Still active: reschedule for
 			 * service. Possible optimization: if no other
@@ -1041,8 +1047,9 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 			 * handle it, we would need to maintain an
 			 * extra num_active_aggs field.
 			*/
-			qfq_activate_agg(q, in_serv_agg, requeue);
-		else if (sch->q.qlen == 0) { /* no aggregate to serve */
+			qfq_update_agg_ts(q, in_serv_agg, requeue);
+			qfq_schedule_agg(q, in_serv_agg);
+		} else if (sch->q.qlen == 0) { /* no aggregate to serve */
 			q->in_serv_agg = NULL;
 			return NULL;
 		}
@@ -1226,17 +1233,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	cl->deficit = agg->lmax;
 	list_add_tail(&cl->alist, &agg->active);
 
-	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl)
-		return err; /* aggregate was not empty, nothing else to do */
-
-	/* recharge budget */
-	agg->initial_budget = agg->budget = agg->budgetmax;
+	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
+	    q->in_serv_agg == agg)
+		return err; /* non-empty or in service, nothing else to do */
 
-	qfq_update_agg_ts(q, agg, enqueue);
-	if (q->in_serv_agg == NULL)
-		q->in_serv_agg = agg;
-	else if (agg != q->in_serv_agg)
-		qfq_schedule_agg(q, agg);
+	qfq_activate_agg(q, agg, enqueue);
 
 	return err;
 }
@@ -1293,8 +1294,15 @@ skip_update:
 static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
 			     enum update_reason reason)
 {
+	agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */
+
 	qfq_update_agg_ts(q, agg, reason);
-	qfq_schedule_agg(q, agg);
+	if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */
+		q->in_serv_agg = agg; /* start serving this aggregate */
+		 /* update V: to be in service, agg must be eligible */
+		q->oldV = q->V = agg->S;
+	} else if (agg != q->in_serv_agg)
+		qfq_schedule_agg(q, agg);
 }
 
 static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
-- 
cgit v0.10.2


From a0143efa96671dc51dab9bba776a66f9bfa1757f Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:05:00 +0000
Subject: pkt_sched: sch_qfq: prevent budget from wrapping around after a
 dequeue

Aggregate budgets are computed so as to guarantee that, after an
aggregate has been selected for service, that aggregate has enough
budget to serve at least one maximum-size packet for the classes it
contains. For this reason, after a new aggregate has been selected
for service, its next packet is immediately dequeued, without any
further control.

The maximum packet size for a class, lmax, can be changed through
qfq_change_class(). In case the user sets lmax to a lower value than
the the size of some of the still-to-arrive packets, QFQ+ will
automatically push up lmax as it enqueues these packets.  This
automatic push up is likely to happen with TSO/GSO.

In any case, if lmax is assigned a lower value than the size of some
of the packets already enqueued for the class, then the following
problem may occur: the size of the next packet to dequeue for the
class may happen to be larger than lmax, after the aggregate to which
the class belongs has been just selected for service. In this case,
even the budget of the aggregate, which is an unsigned value, may be
lower than the size of the next packet to dequeue. After dequeueing
this packet and subtracting its size from the budget, the latter would
wrap around.

This fix prevents the budget from wrapping around after any packet
dequeue.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8c5172c..c34af93 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1068,7 +1068,15 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
 	qdisc_bstats_update(sch, skb);
 
 	agg_dequeue(in_serv_agg, cl, len);
-	in_serv_agg->budget -= len;
+	/* If lmax is lowered, through qfq_change_class, for a class
+	 * owning pending packets with larger size than the new value
+	 * of lmax, then the following condition may hold.
+	 */
+	if (unlikely(in_serv_agg->budget < len))
+		in_serv_agg->budget = 0;
+	else
+		in_serv_agg->budget -= len;
+
 	q->V += (u64)len * IWSUM;
 	pr_debug("qfq dequeue: len %u F %lld now %lld\n",
 		 len, (unsigned long long) in_serv_agg->F,
-- 
cgit v0.10.2


From 40dd2d546198e7bbb8d3fe718957b158caa3fe52 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:05:01 +0000
Subject: pkt_sched: sch_qfq: do not allow virtual time to jump if an aggregate
 is in service

By definition of (the algorithm of) QFQ+, the system virtual time must
be pushed up only if there is no 'eligible' aggregate, i.e. no
aggregate that would have started to be served also in the ideal
system emulated by QFQ+.  QFQ+ serves only eligible aggregates, hence
the aggregate currently in service is eligible.  As a consequence, to
decide whether there is no eligible aggregate, QFQ+ must also check
whether there is no aggregate in service.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index c34af93..6b7ce80 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1279,7 +1279,8 @@ static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
 		/* group was surely ineligible, remove */
 		__clear_bit(grp->index, &q->bitmaps[IR]);
 		__clear_bit(grp->index, &q->bitmaps[IB]);
-	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
+		   q->in_serv_agg == NULL)
 		q->V = roundedS;
 
 	grp->S = roundedS;
-- 
cgit v0.10.2


From 76e4cb0d3a583900f844f1704b19b7e8c5df8837 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 5 Mar 2013 08:05:02 +0000
Subject: pkt_sched: sch_qfq: remove a useless invocation of
 qfq_update_eligible

QFQ+ can select for service only 'eligible' aggregates, i.e.,
aggregates that would have started to be served also in the emulated
ideal system.  As a consequence, for QFQ+ to be work conserving, at
least one of the active aggregates must be eligible when it is time to
choose the next aggregate to serve.

The set of eligible aggregates is updated through the function
qfq_update_eligible(), which does guarantee that, after its
invocation, at least one of the active aggregates is eligible.
Because of this property, this function is invoked in
qfq_deactivate_agg() to guarantee that at least one of the active
aggregates is still eligible after an aggregate has been deactivated.
In particular, the critical case is when there are other active
aggregates, but the aggregate being deactivated happens to be the only
one eligible.

However, this precaution is not needed for QFQ+ to be work conserving,
because update_eligible() is always invoked also at the beginning of
qfq_choose_next_agg(). This patch removes the additional invocation of
update_eligible() in qfq_deactivate_agg().

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Reviewed-by: Fabio Checconi <fchecconi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6b7ce80..d51852b 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1383,8 +1383,6 @@ static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
 			__set_bit(grp->index, &q->bitmaps[s]);
 		}
 	}
-
-	qfq_update_eligible(q);
 }
 
 static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
-- 
cgit v0.10.2


From 88c4c066c6b4db26dc4909ee94e6bf377e8e8e81 Mon Sep 17 00:00:00 2001
From: Zang MingJie <zealot0630@gmail.com>
Date: Mon, 4 Mar 2013 06:07:34 +0000
Subject: reset nf before xmit vxlan encapsulated packet

We should reset nf settings bond to the skb as ipip/ipgre do.

If not, the conntrack/nat info bond to the origin packet may continually
redirect the packet to vxlan interface causing a routing loop.

this is the scenario:

     VETP     VXLAN Gateway
    /----\  /---------------\
    |    |  |               |
    |  vx+--+vx --NAT-> eth0+--> Internet
    |    |  |               |
    \----/  \---------------/

when there are any packet coming from internet to the vetp, there will be lots
of garbage packets coming out the gateway's vxlan interface, but none actually
sent to the physical interface, because they are redirected back to the vxlan
interface in the postrouting chain of NAT rule, and dmesg complains:

    Mar  1 21:52:53 debian kernel: [ 8802.997699] Dead loop on virtual device vxlan0, fix it urgently!
    Mar  1 21:52:54 debian kernel: [ 8804.004907] Dead loop on virtual device vxlan0, fix it urgently!
    Mar  1 21:52:55 debian kernel: [ 8805.012189] Dead loop on virtual device vxlan0, fix it urgently!
    Mar  1 21:52:56 debian kernel: [ 8806.020593] Dead loop on virtual device vxlan0, fix it urgently!

the patch should fix the problem

Signed-off-by: Zang MingJie <zealot0630@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f10e58a..c3e3d29 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -961,6 +961,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
 	tunnel_ip_select_ident(skb, old_iph, &rt->dst);
 
+	nf_reset(skb);
+
 	vxlan_set_owner(dev, skb);
 
 	/* See iptunnel_xmit() */
-- 
cgit v0.10.2


From 691b3b7e1329141acf1e5ed44d8b468cea065fe3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 4 Mar 2013 12:32:43 +0000
Subject: net: fix new kernel-doc warnings in net core

Fix new kernel-doc warnings in net/core/dev.c:

Warning(net/core/dev.c:4788): No description found for parameter 'new_carrier'
Warning(net/core/dev.c:4788): Excess function parameter 'new_carries' description in 'dev_change_carrier'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index a06a7a5..5ca8734 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4780,7 +4780,7 @@ EXPORT_SYMBOL(dev_set_mac_address);
 /**
  *	dev_change_carrier - Change device carrier
  *	@dev: device
- *	@new_carries: new value
+ *	@new_carrier: new value
  *
  *	Change device carrier
  */
-- 
cgit v0.10.2


From d1f41b67ff7735193bc8b418b98ac99a448833e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazt@google.com>
Date: Tue, 5 Mar 2013 07:15:13 +0000
Subject: net: reduce net_rx_action() latency to 2 HZ

We should use time_after_eq() to get maximum latency of two ticks,
instead of three.

Bug added in commit 24f8b2385 (net: increase receive packet quantum)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 5ca8734..8f152f9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4103,7 +4103,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
-		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
+		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
 			goto softnet_break;
 
 		local_irq_enable();
-- 
cgit v0.10.2


From fa2b04f4502d74659e4e4b1294c6d88e08ece032 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Tue, 5 Mar 2013 17:06:32 +0000
Subject: net/ipv4: Timestamp option cannot overflow with prespecified
 addresses

When a router forwards a packet that contains the IPv4 timestamp option,
if there is no space left in the option for the router to add its own
timestamp, then the router increments the Overflow value in the option.

However, if the addresses of the routers are prespecified in the option,
then the overflow condition cannot happen: the option is structured so
that each prespecified router has a place to write its timestamp. Other
routers do not add a timestamp, so there will never be a lack of space.

This fix ensures that the Overflow value in the IPv4 timestamp option is
not incremented when the addresses of the routers are prespecified, even
if the Pointer value is greater than the Length value.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f6289bf..310a364 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -423,7 +423,7 @@ int ip_options_compile(struct net *net,
 					put_unaligned_be32(midtime, timeptr);
 					opt->is_changed = 1;
 				}
-			} else {
+			} else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) {
 				unsigned int overflow = optptr[3]>>4;
 				if (overflow == 15) {
 					pp_ptr = optptr + 3;
-- 
cgit v0.10.2


From 66d29cbc59433ba538922a9e958495156b31b83b Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Sun, 3 Mar 2013 21:48:46 +0000
Subject: benet: Wait f/w POST until timeout

While PCI card faces EEH errors, reset (usually hot reset) is
expected to recover from the EEH errors. After EEH core finishes
the reset, the driver callback (be_eeh_reset) is called and wait
the firmware to complete POST successfully. The original code would
return with error once detecting failure during POST stage. That
seems not enough.

The patch forces the driver (be_eeh_reset) to wait the firmware
completes POST until timeout, instead of returning error upon
detection POST failure immediately. Also, it would improve the
reliability of the EEH funtionality of the driver.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Acked-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 071aea7..813407f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -473,7 +473,7 @@ static int be_mbox_notify_wait(struct be_adapter *adapter)
 	return 0;
 }
 
-static int be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
+static void be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
 {
 	u32 sem;
 	u32 reg = skyhawk_chip(adapter) ? SLIPORT_SEMAPHORE_OFFSET_SH :
@@ -481,11 +481,6 @@ static int be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
 
 	pci_read_config_dword(adapter->pdev, reg, &sem);
 	*stage = sem & POST_STAGE_MASK;
-
-	if ((sem >> POST_ERR_SHIFT) & POST_ERR_MASK)
-		return -1;
-	else
-		return 0;
 }
 
 int lancer_wait_ready(struct be_adapter *adapter)
@@ -579,19 +574,17 @@ int be_fw_wait_ready(struct be_adapter *adapter)
 	}
 
 	do {
-		status = be_POST_stage_get(adapter, &stage);
-		if (status) {
-			dev_err(dev, "POST error; stage=0x%x\n", stage);
-			return -1;
-		} else if (stage != POST_STAGE_ARMFW_RDY) {
-			if (msleep_interruptible(2000)) {
-				dev_err(dev, "Waiting for POST aborted\n");
-				return -EINTR;
-			}
-			timeout += 2;
-		} else {
+		be_POST_stage_get(adapter, &stage);
+		if (stage == POST_STAGE_ARMFW_RDY)
 			return 0;
+
+		dev_info(dev, "Waiting for POST, %ds elapsed\n",
+			 timeout);
+		if (msleep_interruptible(2000)) {
+			dev_err(dev, "Waiting for POST aborted\n");
+			return -EINTR;
 		}
+		timeout += 2;
 	} while (timeout < 60);
 
 	dev_err(dev, "POST timeout; stage=0x%x\n", stage);
-- 
cgit v0.10.2


From 35205b211c8d17a8a0b5e8926cb7c73e9a7ef1ad Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 5 Mar 2013 01:03:47 +0000
Subject: sfc: Disable soft interrupt handling during efx_device_detach_sync()

efx_device_detach_sync() locks all TX queues before marking the device
detached and thus disabling further TX scheduling.  But it can still
be interrupted by TX completions which then result in TX scheduling in
soft interrupt context.  This will deadlock when it tries to acquire
a TX queue lock that efx_device_detach_sync() already acquired.

To avoid deadlock, we must use netif_tx_{,un}lock_bh().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 50247df..d2f790d 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -171,9 +171,9 @@ static inline void efx_device_detach_sync(struct efx_nic *efx)
 	 * TX scheduler is stopped when we're done and before
 	 * netif_device_present() becomes false.
 	 */
-	netif_tx_lock(dev);
+	netif_tx_lock_bh(dev);
 	netif_device_detach(dev);
-	netif_tx_unlock(dev);
+	netif_tx_unlock_bh(dev);
 }
 
 #endif /* EFX_EFX_H */
-- 
cgit v0.10.2


From c73e787a8db9117d59b5180baf83203a42ecadca Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 5 Mar 2013 17:49:39 +0000
Subject: sfc: Correct efx_rx_buffer::page_offset when EFX_PAGE_IP_ALIGN != 0

RX DMA buffers start at an offset of EFX_PAGE_IP_ALIGN bytes from the
start of a cache line.  This offset obviously needs to be included in
the virtual address, but this was missed in commit b590ace09d51
('sfc: Fix efx_rx_buf_offset() in the presence of swiotlb') since
EFX_PAGE_IP_ALIGN is equal to 0 on both x86 and powerpc.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 879ff58..bb579a6 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -215,7 +215,7 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue)
 		rx_buf = efx_rx_buffer(rx_queue, index);
 		rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
 		rx_buf->u.page = page;
-		rx_buf->page_offset = page_offset;
+		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
 		rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
 		rx_buf->flags = EFX_RX_BUF_PAGE;
 		++rx_queue->added_count;
-- 
cgit v0.10.2


From f422d2a04fe2e661fd439c19197a162cc9a36416 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 5 Mar 2013 19:10:26 +0000
Subject: net: docs: document multiqueue tuntap API

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index c0aab98..949d5dc 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -105,6 +105,83 @@ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
      Proto [2 bytes]
      Raw protocol(IP, IPv6, etc) frame.
 
+  3.3 Multiqueue tuntap interface:
+
+  From version 3.8, Linux supports multiqueue tuntap which can uses multiple
+  file descriptors (queues) to parallelize packets sending or receiving. The
+  device allocation is the same as before, and if user wants to create multiple
+  queues, TUNSETIFF with the same device name must be called many times with
+  IFF_MULTI_QUEUE flag.
+
+  char *dev should be the name of the device, queues is the number of queues to
+  be created, fds is used to store and return the file descriptors (queues)
+  created to the caller. Each file descriptor were served as the interface of a
+  queue which could be accessed by userspace.
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_alloc_mq(char *dev, int queues, int *fds)
+  {
+      struct ifreq ifr;
+      int fd, err, i;
+
+      if (!dev)
+          return -1;
+
+      memset(&ifr, 0, sizeof(ifr));
+      /* Flags: IFF_TUN   - TUN device (no Ethernet headers)
+       *        IFF_TAP   - TAP device
+       *
+       *        IFF_NO_PI - Do not provide packet information
+       *        IFF_MULTI_QUEUE - Create a queue of multiqueue device
+       */
+      ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
+      strcpy(ifr.ifr_name, dev);
+
+      for (i = 0; i < queues; i++) {
+          if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
+             goto err;
+          err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+          if (err) {
+             close(fd);
+             goto err;
+          }
+          fds[i] = fd;
+      }
+
+      return 0;
+  err:
+      for (--i; i >= 0; i--)
+          close(fds[i]);
+      return err;
+  }
+
+  A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
+  calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
+  calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
+  enabled by default after it was created through TUNSETIFF.
+
+  fd is the file descriptor (queue) that we want to enable or disable, when
+  enable is true we enable it, otherwise we disable it
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_set_queue(int fd, int enable)
+  {
+      struct ifreq ifr;
+
+      memset(&ifr, 0, sizeof(ifr));
+
+      if (enable)
+         ifr.ifr_flags = IFF_ATTACH_QUEUE;
+      else
+         ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+      return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
+  }
+
 Universal TUN/TAP device driver Frequently Asked Question.
    
 1. What platforms are supported by TUN/TAP driver ?
-- 
cgit v0.10.2


From c5b3ad4c67989c778e4753be4f91dc7193a04d21 Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@emulex.com>
Date: Tue, 5 Mar 2013 22:23:20 +0000
Subject: be2net: use CSR-BAR SEMAPHORE reg for BE2/BE3

The SLIPORT_SEMAPHORE register shadowed in the
config-space may not reflect the correct POST stage after
an EEH reset in BE2/3; it may return FW_READY state even though
FW is not ready. This causes the driver to prematurely
poll the FW mailbox and fail.

For BE2/3 use the CSR-BAR/0xac instead.

Reported-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 28ceb84..29aff55 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -349,6 +349,7 @@ struct be_adapter {
 	struct pci_dev *pdev;
 	struct net_device *netdev;
 
+	u8 __iomem *csr;	/* CSR BAR used only for BE2/3 */
 	u8 __iomem *db;		/* Door Bell */
 
 	struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 813407f..3c9b4f1 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -473,14 +473,17 @@ static int be_mbox_notify_wait(struct be_adapter *adapter)
 	return 0;
 }
 
-static void be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
+static u16 be_POST_stage_get(struct be_adapter *adapter)
 {
 	u32 sem;
-	u32 reg = skyhawk_chip(adapter) ? SLIPORT_SEMAPHORE_OFFSET_SH :
-					  SLIPORT_SEMAPHORE_OFFSET_BE;
 
-	pci_read_config_dword(adapter->pdev, reg, &sem);
-	*stage = sem & POST_STAGE_MASK;
+	if (BEx_chip(adapter))
+		sem  = ioread32(adapter->csr + SLIPORT_SEMAPHORE_OFFSET_BEx);
+	else
+		pci_read_config_dword(adapter->pdev,
+				      SLIPORT_SEMAPHORE_OFFSET_SH, &sem);
+
+	return sem & POST_STAGE_MASK;
 }
 
 int lancer_wait_ready(struct be_adapter *adapter)
@@ -574,7 +577,7 @@ int be_fw_wait_ready(struct be_adapter *adapter)
 	}
 
 	do {
-		be_POST_stage_get(adapter, &stage);
+		stage = be_POST_stage_get(adapter);
 		if (stage == POST_STAGE_ARMFW_RDY)
 			return 0;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index 541d453..62dc220 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h
@@ -32,8 +32,8 @@
 #define MPU_EP_CONTROL 		0
 
 /********** MPU semphore: used for SH & BE  *************/
-#define SLIPORT_SEMAPHORE_OFFSET_BE		0x7c
-#define SLIPORT_SEMAPHORE_OFFSET_SH		0x94
+#define SLIPORT_SEMAPHORE_OFFSET_BEx		0xac  /* CSR BAR offset */
+#define SLIPORT_SEMAPHORE_OFFSET_SH		0x94  /* PCI-CFG offset */
 #define POST_STAGE_MASK				0x0000FFFF
 #define POST_ERR_MASK				0x1
 #define POST_ERR_SHIFT				31
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3860888..08e54f3 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3688,6 +3688,8 @@ static void be_netdev_init(struct net_device *netdev)
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
 {
+	if (adapter->csr)
+		pci_iounmap(adapter->pdev, adapter->csr);
 	if (adapter->db)
 		pci_iounmap(adapter->pdev, adapter->db);
 }
@@ -3721,6 +3723,12 @@ static int be_map_pci_bars(struct be_adapter *adapter)
 	adapter->if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >>
 				SLI_INTF_IF_TYPE_SHIFT;
 
+	if (BEx_chip(adapter) && be_physfn(adapter)) {
+		adapter->csr = pci_iomap(adapter->pdev, 2, 0);
+		if (adapter->csr == NULL)
+			return -ENOMEM;
+	}
+
 	addr = pci_iomap(adapter->pdev, db_bar(adapter), 0);
 	if (addr == NULL)
 		goto pci_map_err;
@@ -4329,6 +4337,8 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
 	pci_restore_state(pdev);
 
 	/* Check if card is ok and fw is ready */
+	dev_info(&adapter->pdev->dev,
+		 "Waiting for FW to be ready after EEH reset\n");
 	status = be_fw_wait_ready(adapter);
 	if (status)
 		return PCI_ERS_RESULT_DISCONNECT;
-- 
cgit v0.10.2


From f8af75f3517a24838a36eb5797a1a3e60bf9e276 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 6 Mar 2013 11:02:37 +0000
Subject: tun: add a missing nf_reset() in tun_net_xmit()

Dave reported following crash :

general protection fault: 0000 [#1] SMP
CPU 2
Pid: 25407, comm: qemu-kvm Not tainted 3.7.9-205.fc18.x86_64 #1 Hewlett-Packard HP Z400 Workstation/0B4Ch
RIP: 0010:[<ffffffffa0399bd5>]  [<ffffffffa0399bd5>] destroy_conntrack+0x35/0x120 [nf_conntrack]
RSP: 0018:ffff880276913d78  EFLAGS: 00010206
RAX: 50626b6b7876376c RBX: ffff88026e530d68 RCX: ffff88028d158e00
RDX: ffff88026d0d5470 RSI: 0000000000000011 RDI: 0000000000000002
RBP: ffff880276913d88 R08: 0000000000000000 R09: ffff880295002900
R10: 0000000000000000 R11: 0000000000000003 R12: ffffffff81ca3b40
R13: ffffffff8151a8e0 R14: ffff880270875000 R15: 0000000000000002
FS:  00007ff3bce38a00(0000) GS:ffff88029fc40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00007fd1430bd000 CR3: 000000027042b000 CR4: 00000000000027e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process qemu-kvm (pid: 25407, threadinfo ffff880276912000, task ffff88028c369720)
Stack:
 ffff880156f59100 ffff880156f59100 ffff880276913d98 ffffffff815534f7
 ffff880276913db8 ffffffff8151a74b ffff880270875000 ffff880156f59100
 ffff880276913dd8 ffffffff8151a5a6 ffff880276913dd8 ffff88026d0d5470
Call Trace:
 [<ffffffff815534f7>] nf_conntrack_destroy+0x17/0x20
 [<ffffffff8151a74b>] skb_release_head_state+0x7b/0x100
 [<ffffffff8151a5a6>] __kfree_skb+0x16/0xa0
 [<ffffffff8151a666>] kfree_skb+0x36/0xa0
 [<ffffffff8151a8e0>] skb_queue_purge+0x20/0x40
 [<ffffffffa02205f7>] __tun_detach+0x117/0x140 [tun]
 [<ffffffffa022184c>] tun_chr_close+0x3c/0xd0 [tun]
 [<ffffffff8119669c>] __fput+0xec/0x240
 [<ffffffff811967fe>] ____fput+0xe/0x10
 [<ffffffff8107eb27>] task_work_run+0xa7/0xe0
 [<ffffffff810149e1>] do_notify_resume+0x71/0xb0
 [<ffffffff81640152>] int_signal+0x12/0x17
Code: 00 00 04 48 89 e5 41 54 53 48 89 fb 4c 8b a7 e8 00 00 00 0f 85 de 00 00 00 0f b6 73 3e 0f b7 7b 2a e8 10 40 00 00 48 85 c0 74 0e <48> 8b 40 28 48 85 c0 74 05 48 89 df ff d0 48 c7 c7 08 6a 3a a0
RIP  [<ffffffffa0399bd5>] destroy_conntrack+0x35/0x120 [nf_conntrack]
 RSP <ffff880276913d78>

This is because tun_net_xmit() needs to call nf_reset()
before queuing skb into receive_queue

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2c6a22e..b7c457a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -747,6 +747,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 	skb_orphan(skb);
 
+	nf_reset(skb);
+
 	/* Enqueue packet */
 	skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
 
-- 
cgit v0.10.2


From 4e0855dff094b0d56d6b5b271e0ce7851cc1e063 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 5 Mar 2013 09:42:59 +0000
Subject: e1000e: fix pci-device enable-counter balance

This patch removes redundant and unbalanced pci_disable_device() from
__e1000_shutdown(). pci_clear_master() is enough, device can go into
suspended state with elevated enable_cnt.

Bug was introduced in commit 23606cf5d1192c2b17912cb2ef6e62f9b11de133
("e1000e / PCI / PM: Add basic runtime PM support (rev. 4)") in v2.6.35

Cc: Bruce Allan <bruce.w.allan@intel.com>
CC: Stable <stable@kernel.org>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a177b8b..1799021 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5986,7 +5986,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	 */
 	e1000e_release_hw_control(adapter);
 
-	pci_disable_device(pdev);
+	pci_clear_master(pdev);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 66148babe728f3e00e13c56f6b0ecf325abd80da Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 5 Mar 2013 09:43:04 +0000
Subject: e1000e: fix runtime power management transitions

This patch removes redundant actions from driver and fixes its interaction
with actions in pci-bus runtime power management code.

It removes pci_save_state() from __e1000_shutdown() for normal adapters,
PCI bus callbacks pci_pm_*() will do all this for us. Now __e1000_shutdown()
switches to D3-state only quad-port adapters, because they needs quirk for
clearing false-positive error from downsteam pci-e port.

pci_save_state() now called after clearing bus-master bit, thus __e1000_resume()
and e1000_io_slot_reset() must set it back after restoring configuration space.

This patch set get_link_status before calling pm_runtime_put() in e1000_open()
to allow e1000_idle() get real link status and schedule first runtime suspend.

This patch also enables wakeup for device if management mode is enabled
(like for WoL) as result pci_prepare_to_sleep() would setup wakeup without
special actions like custom 'enable_wakeup' sign.

Cc: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 1799021..2954cc7 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4303,6 +4303,7 @@ static int e1000_open(struct net_device *netdev)
 	netif_start_queue(netdev);
 
 	adapter->idle_check = true;
+	hw->mac.get_link_status = true;
 	pm_runtime_put(&pdev->dev);
 
 	/* fire a link status change interrupt to start the watchdog */
@@ -5887,8 +5888,7 @@ release:
 	return retval;
 }
 
-static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
-			    bool runtime)
+static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
@@ -5912,10 +5912,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	}
 	e1000e_reset_interrupt_capability(adapter);
 
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-
 	status = er32(STATUS);
 	if (status & E1000_STATUS_LU)
 		wufc &= ~E1000_WUFC_LNKC;
@@ -5971,13 +5967,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
 		ew32(WUFC, 0);
 	}
 
-	*enable_wake = !!wufc;
-
-	/* make sure adapter isn't asleep if manageability is enabled */
-	if ((adapter->flags & FLAG_MNG_PT_ENABLED) ||
-	    (hw->mac.ops.check_mng_mode(hw)))
-		*enable_wake = true;
-
 	if (adapter->hw.phy.type == e1000_phy_igp_3)
 		e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
 
@@ -5988,26 +5977,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
 
 	pci_clear_master(pdev);
 
-	return 0;
-}
-
-static void e1000_power_off(struct pci_dev *pdev, bool sleep, bool wake)
-{
-	if (sleep && wake) {
-		pci_prepare_to_sleep(pdev);
-		return;
-	}
-
-	pci_wake_from_d3(pdev, wake);
-	pci_set_power_state(pdev, PCI_D3hot);
-}
-
-static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep,
-                                    bool wake)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-
 	/* The pci-e switch on some quad port adapters will report a
 	 * correctable error when the MAC transitions from D0 to D3.  To
 	 * prevent this we need to mask off the correctable errors on the
@@ -6021,12 +5990,13 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep,
 		pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL,
 					   (devctl & ~PCI_EXP_DEVCTL_CERE));
 
-		e1000_power_off(pdev, sleep, wake);
+		pci_save_state(pdev);
+		pci_prepare_to_sleep(pdev);
 
 		pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, devctl);
-	} else {
-		e1000_power_off(pdev, sleep, wake);
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_PCIEASPM
@@ -6084,9 +6054,7 @@ static int __e1000_resume(struct pci_dev *pdev)
 	if (aspm_disable_flag)
 		e1000e_disable_aspm(pdev, aspm_disable_flag);
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	pci_save_state(pdev);
+	pci_set_master(pdev);
 
 	e1000e_set_interrupt_capability(adapter);
 	if (netif_running(netdev)) {
@@ -6152,14 +6120,8 @@ static int __e1000_resume(struct pci_dev *pdev)
 static int e1000_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	int retval;
-	bool wake;
-
-	retval = __e1000_shutdown(pdev, &wake, false);
-	if (!retval)
-		e1000_complete_shutdown(pdev, true, wake);
 
-	return retval;
+	return __e1000_shutdown(pdev, false);
 }
 
 static int e1000_resume(struct device *dev)
@@ -6182,13 +6144,10 @@ static int e1000_runtime_suspend(struct device *dev)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
-	if (e1000e_pm_ready(adapter)) {
-		bool wake;
-
-		__e1000_shutdown(pdev, &wake, true);
-	}
+	if (!e1000e_pm_ready(adapter))
+		return 0;
 
-	return 0;
+	return __e1000_shutdown(pdev, true);
 }
 
 static int e1000_idle(struct device *dev)
@@ -6226,12 +6185,7 @@ static int e1000_runtime_resume(struct device *dev)
 
 static void e1000_shutdown(struct pci_dev *pdev)
 {
-	bool wake = false;
-
-	__e1000_shutdown(pdev, &wake, false);
-
-	if (system_state == SYSTEM_POWER_OFF)
-		e1000_complete_shutdown(pdev, false, wake);
+	__e1000_shutdown(pdev, false);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -6352,9 +6306,9 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
 			"Cannot re-enable PCI device after reset.\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
 	} else {
-		pci_set_master(pdev);
 		pdev->state_saved = true;
 		pci_restore_state(pdev);
+		pci_set_master(pdev);
 
 		pci_enable_wake(pdev, PCI_D3hot, 0);
 		pci_enable_wake(pdev, PCI_D3cold, 0);
@@ -6783,7 +6737,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* initialize the wol settings based on the eeprom settings */
 	adapter->wol = adapter->eeprom_wol;
-	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	/* make sure adapter isn't asleep if manageability is enabled */
+	if (adapter->wol || (adapter->flags & FLAG_MNG_PT_ENABLED) ||
+	    (hw->mac.ops.check_mng_mode(hw)))
+		device_wakeup_enable(&pdev->dev);
 
 	/* save off EEPROM version number */
 	e1000_read_nvm(&adapter->hw, 5, 1, &adapter->eeprom_vers);
-- 
cgit v0.10.2


From e60b22c5b7e59db09a7c9490b1e132c7e49ae904 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 5 Mar 2013 09:43:09 +0000
Subject: e1000e: fix accessing to suspended device

This patch fixes some annoying messages like 'Error reading PHY register' and
'Hardware Erorr' and saves several seconds on reboot.

Cc: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 2c18137..f91a8f3 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -36,6 +36,7 @@
 #include <linux/delay.h>
 #include <linux/vmalloc.h>
 #include <linux/mdio.h>
+#include <linux/pm_runtime.h>
 
 #include "e1000.h"
 
@@ -2229,7 +2230,19 @@ static int e1000e_get_ts_info(struct net_device *netdev,
 	return 0;
 }
 
+static int e1000e_ethtool_begin(struct net_device *netdev)
+{
+	return pm_runtime_get_sync(netdev->dev.parent);
+}
+
+static void e1000e_ethtool_complete(struct net_device *netdev)
+{
+	pm_runtime_put_sync(netdev->dev.parent);
+}
+
 static const struct ethtool_ops e1000_ethtool_ops = {
+	.begin			= e1000e_ethtool_begin,
+	.complete		= e1000e_ethtool_complete,
 	.get_settings		= e1000_get_settings,
 	.set_settings		= e1000_set_settings,
 	.get_drvinfo		= e1000_get_drvinfo,
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2954cc7..948b86ff 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4663,6 +4663,7 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter)
 	    (adapter->hw.phy.media_type == e1000_media_type_copper)) {
 		int ret_val;
 
+		pm_runtime_get_sync(&adapter->pdev->dev);
 		ret_val = e1e_rphy(hw, MII_BMCR, &phy->bmcr);
 		ret_val |= e1e_rphy(hw, MII_BMSR, &phy->bmsr);
 		ret_val |= e1e_rphy(hw, MII_ADVERTISE, &phy->advertise);
@@ -4673,6 +4674,7 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter)
 		ret_val |= e1e_rphy(hw, MII_ESTATUS, &phy->estatus);
 		if (ret_val)
 			e_warn("Error reading PHY register\n");
+		pm_runtime_put_sync(&adapter->pdev->dev);
 	} else {
 		/* Do not read PHY registers if link is not up
 		 * Set values to typical power-on defaults
-- 
cgit v0.10.2


From d8741e2e88ac9a458765a0c7b4e6542d7c038334 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Tue, 5 Mar 2013 10:25:16 -0500
Subject: tracing: Add help of snapshot feature when snapshot is empty

When cat'ing the snapshot file, instead of showing an empty trace
header like the trace file does, show how to use the snapshot
feature.

Also, this is a good place to show if the snapshot has been allocated
or not. Users may want to "pre allocate" the snapshot to have a fast
"swap" of the current buffer. Otherwise, a swap would be slow and might
fail as it would need to allocate the snapshot buffer, and that might
fail under tight memory constraints.

Here's what it looked like before:

 # tracer: nop
 #
 # entries-in-buffer/entries-written: 0/0   #P:4
 #
 #                              _-----=> irqs-off
 #                             / _----=> need-resched
 #                            | / _---=> hardirq/softirq
 #                            || / _--=> preempt-depth
 #                            ||| /     delay
 #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
 #              | |       |   ||||       |         |

Here's what it looks like now:

 # tracer: nop
 #
 #
 # * Snapshot is freed *
 #
 # Snapshot commands:
 # echo 0 > snapshot : Clears and frees snapshot buffer
 # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.
 #                      Takes a snapshot of the main buffer.
 # echo 2 > snapshot : Clears snapshot buffer (but does not allocate)
 #                      (Doesn't have to be '2' works with any number that
 #                       is not a '0' or '1')

Acked-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c2e2c23..9e3120b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2400,6 +2400,27 @@ static void test_ftrace_alive(struct seq_file *m)
 	seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
 }
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+{
+	if (iter->trace->allocated_snapshot)
+		seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
+	else
+		seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
+
+	seq_printf(m, "# Snapshot commands:\n");
+	seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
+	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
+	seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
+	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
+	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
+	seq_printf(m, "#                       is not a '0' or '1')\n");
+}
+#else
+/* Should never be called */
+static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
+#endif
+
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
@@ -2411,7 +2432,9 @@ static int s_show(struct seq_file *m, void *v)
 			seq_puts(m, "#\n");
 			test_ftrace_alive(m);
 		}
-		if (iter->trace && iter->trace->print_header)
+		if (iter->snapshot && trace_empty(iter))
+			print_snapshot_help(m, iter);
+		else if (iter->trace && iter->trace->print_header)
 			iter->trace->print_header(m);
 		else
 			trace_default_header(m);
-- 
cgit v0.10.2


From c9960e48543799f168c4c9486f9790fb686ce5a8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <srostedt@redhat.com>
Date: Tue, 5 Mar 2013 10:53:02 -0500
Subject: tracing: Do not return EINVAL in snapshot when not allocated

To use the tracing snapshot feature, writing a '1' into the snapshot
file causes the snapshot buffer to be allocated if it has not already
been allocated and dose a 'swap' with the main buffer, so that the
snapshot now contains what was in the main buffer, and the main buffer
now writes to what was the snapshot buffer.

To free the snapshot buffer, a '0' is written into the snapshot file.

To clear the snapshot buffer, any number but a '0' or '1' is written
into the snapshot file. But if the file is not allocated it returns
-EINVAL error code. This is rather pointless. It is better just to
do nothing and return success.

Acked-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9e3120b..1f835a8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4167,8 +4167,6 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	default:
 		if (current_trace->allocated_snapshot)
 			tracing_reset_online_cpus(&max_tr);
-		else
-			ret = -EINVAL;
 		break;
 	}
 
-- 
cgit v0.10.2


From 3fb817f1cd54bedd23e8913051473d574a0f1717 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 7 Mar 2013 03:46:52 +0000
Subject: net/mlx4_core: Disable mlx4_QP_ATTACH calls from guests if the host
 uses flow steering

Guests kernels may not correctly detect if DMFS (device-enabled flow steering) is
activated by the host. If DMFS is activated, the master should return error to guests
which try to use the B0-steering flow calls (mlx4_QP_ATTACH).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 083fb48..2995687 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2990,6 +2990,9 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	u8 steer_type_mask = 2;
 	enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1;
 
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_B0)
+		return -EINVAL;
+
 	qpn = vhcr->in_modifier & 0xffffff;
 	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err)
-- 
cgit v0.10.2


From 0081c8f3814a8344ca975c085d987ec6c90499ae Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 7 Mar 2013 03:46:53 +0000
Subject: net/mlx4_core: Turn off device-managed FS bit in dev-cap wrapper if
 DMFS is not enabled

Older kernels detect DMFS (device-managed flow steering) from the HCA
device capability directly, regardless of whether the capability was
enabled in INIT_HCA, this is fixed by commit 7b8157bed "mlx4_core: Adjustments
to Flow Steering activation logic for SR-IOV"

To protect against guests running kernels without this fix, the host driver
should turn off the DMFS capability bit in mlx4_QUERY_DEV_CAP_wrapper.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 50917eb..f624557 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -787,6 +787,14 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
 	MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 
+	/* turn off device-managed steering capability if not enabled */
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
+		MLX4_GET(field, outbox->buf,
+			 QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+		field &= 0x7f;
+		MLX4_PUT(outbox->buf, field,
+			 QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From e7dbeba85600aa2c8daf99f8f53d9ad27e88b810 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 7 Mar 2013 03:46:54 +0000
Subject: net/mlx4_core: Fix endianness bug in set_param_l

The set_param_l function assumes casting a u64 pointer to a u32 pointer
allows to access the lower 32bits, but it results in writing the upper
32 bits on big endian systems.

The fixed function reads the upper 32 bits of the 64 argument, and or's
them with the 32 bits of the 32-bit value passed to the function.

Since this is now a "read-modify-write" operation, we got many
"unintialized variable" warnings which needed to be fixed as well.

Reported-by: Alexander Schmidt <alexschm@de.ibm.com>.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 7e64033..0706623 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -226,7 +226,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
 
 static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index d180bc4..16abde2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1555,7 +1555,7 @@ void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
 
 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
 {
-	u64 in_param;
+	u64 in_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&in_param, idx);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index cf88334..d738454 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1235,7 +1235,7 @@ int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
 
 static inline void set_param_l(u64 *arg, u32 val)
 {
-	*((u32 *)arg) = val;
+	*arg = (*arg & 0xffffffff00000000ULL) | (u64) val;
 }
 
 static inline void set_param_h(u64 *arg, u32 val)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 602ca9b..f91719a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -183,7 +183,7 @@ u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
 
 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	u64 out_param;
 	int err;
 
@@ -240,7 +240,7 @@ void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 
 static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
@@ -351,7 +351,7 @@ void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 
 static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 {
-	u64 in_param;
+	u64 in_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&in_param, index);
@@ -374,7 +374,7 @@ int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 
 static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 {
-	u64 param;
+	u64 param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&param, index);
@@ -395,7 +395,7 @@ void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
 
 static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
 {
-	u64 in_param;
+	u64 in_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&in_param, index);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 1ac8863..00f223a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -101,7 +101,7 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
 
 void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 719ead1..10c57c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -175,7 +175,7 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac);
 
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
-	u64 out_param;
+	u64 out_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
@@ -222,7 +222,7 @@ EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
 
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
-	u64 out_param;
+	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&out_param, port);
@@ -361,7 +361,7 @@ out:
 
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 {
-	u64 out_param;
+	u64 out_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
@@ -406,7 +406,7 @@ out:
 
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 81e2abe..e891b05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -222,7 +222,7 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	u64 out_param;
 	int err;
 
@@ -255,7 +255,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 {
-	u64 in_param;
+	u64 in_param = 0;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
@@ -319,7 +319,7 @@ err_out:
 
 static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 {
-	u64 param;
+	u64 param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&param, qpn);
@@ -344,7 +344,7 @@ void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 
 static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 {
-	u64 in_param;
+	u64 in_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&in_param, qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index feda6c0..e329fe1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -149,7 +149,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
 
 static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
 {
-	u64 in_param;
+	u64 in_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
 		set_param_l(&in_param, srqn);
-- 
cgit v0.10.2


From bfa8ab47415a87c6c93a9e54e16f2f8cc6de79af Mon Sep 17 00:00:00 2001
From: Yan Burman <yanb@mellanox.com>
Date: Thu, 7 Mar 2013 03:46:55 +0000
Subject: net/mlx4_en: Fix race when setting the device MAC address

Remove unnecessary use of workqueue for the device MAC address setting
flow, and fix a race when setting MAC address which was introduced by
commit c07cb4b0a "net/mlx4_en: Manage hash of MAC addresses per port"

The race happened when mlx4_en_replace_mac was being executed in parallel
with a successive call to ndo_set_mac_address, e.g witn an A/B/A MAC
setting configuration test, the third set fails.

With this change we also properly report an error if set MAC fails.

Signed-off-by: Yan Burman <yanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index bb4d8d9..217e618 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -650,28 +650,10 @@ u64 mlx4_en_mac_to_u64(u8 *addr)
 	return mac;
 }
 
-static int mlx4_en_set_mac(struct net_device *dev, void *addr)
+static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv)
 {
-	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct mlx4_en_dev *mdev = priv->mdev;
-	struct sockaddr *saddr = addr;
-
-	if (!is_valid_ether_addr(saddr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-	queue_work(mdev->workqueue, &priv->mac_task);
-	return 0;
-}
-
-static void mlx4_en_do_set_mac(struct work_struct *work)
-{
-	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
-						 mac_task);
-	struct mlx4_en_dev *mdev = priv->mdev;
 	int err = 0;
 
-	mutex_lock(&mdev->state_lock);
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
 		err = mlx4_en_replace_mac(priv, priv->base_qpn,
@@ -683,7 +665,26 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 	} else
 		en_dbg(HW, priv, "Port is down while registering mac, exiting...\n");
 
+	return err;
+}
+
+static int mlx4_en_set_mac(struct net_device *dev, void *addr)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct sockaddr *saddr = addr;
+	int err;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
+
+	mutex_lock(&mdev->state_lock);
+	err = mlx4_en_do_set_mac(priv);
 	mutex_unlock(&mdev->state_lock);
+
+	return err;
 }
 
 static void mlx4_en_clear_list(struct net_device *dev)
@@ -1348,7 +1349,7 @@ static void mlx4_en_do_get_stats(struct work_struct *work)
 		queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
 	}
 	if (mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port]) {
-		queue_work(mdev->workqueue, &priv->mac_task);
+		mlx4_en_do_set_mac(priv);
 		mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port] = 0;
 	}
 	mutex_unlock(&mdev->state_lock);
@@ -2078,7 +2079,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 	spin_lock_init(&priv->stats_lock);
 	INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
-	INIT_WORK(&priv->mac_task, mlx4_en_do_set_mac);
 	INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
 	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
 	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index c313d7e..f710b7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -509,7 +509,6 @@ struct mlx4_en_priv {
 	struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
 	struct mlx4_qp drop_qp;
 	struct work_struct rx_mode_task;
-	struct work_struct mac_task;
 	struct work_struct watchdog_task;
 	struct work_struct linkstate_task;
 	struct delayed_work stats_task;
-- 
cgit v0.10.2


From 83a5a6cef40616d19a388f560447e99c2ca04d1e Mon Sep 17 00:00:00 2001
From: Yan Burman <yanb@mellanox.com>
Date: Thu, 7 Mar 2013 03:46:56 +0000
Subject: net/mlx4_en: Cleanup MAC resources on module unload or port stop

Make sure we cleanup all MAC related resources (entries in the port MAC
table and steering rules) when stopping a port or when the driver is unloaded.

The leak was introduced by commit 07cb4b0a "net/mlx4_en: Manage hash of MAC
addresses per port".

Signed-off-by: Yan Burman <yanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 217e618..7fd0936 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -565,34 +565,38 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_dev *dev = mdev->dev;
 	int qpn = priv->base_qpn;
-	u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
-
-	en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
-	       priv->dev->dev_addr);
-	mlx4_unregister_mac(dev, priv->port, mac);
+	u64 mac;
 
-	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+		mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
+		en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+		       priv->dev->dev_addr);
+		mlx4_unregister_mac(dev, priv->port, mac);
+	} else {
 		struct mlx4_mac_entry *entry;
 		struct hlist_node *tmp;
 		struct hlist_head *bucket;
-		unsigned int mac_hash;
+		unsigned int i;
 
-		mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX];
-		bucket = &priv->mac_hash[mac_hash];
-		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
-			if (ether_addr_equal_64bits(entry->mac,
-						    priv->dev->dev_addr)) {
-				en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n",
-				       priv->port, priv->dev->dev_addr, qpn);
+		for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
+			bucket = &priv->mac_hash[i];
+			hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
+				mac = mlx4_en_mac_to_u64(entry->mac);
+				en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+				       entry->mac);
 				mlx4_en_uc_steer_release(priv, entry->mac,
 							 qpn, entry->reg_id);
-				mlx4_qp_release_range(dev, qpn, 1);
 
+				mlx4_unregister_mac(dev, priv->port, mac);
 				hlist_del_rcu(&entry->hlist);
 				kfree_rcu(entry, rcu);
-				break;
 			}
 		}
+
+		en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n",
+		       priv->port, qpn);
+		mlx4_qp_release_range(dev, qpn, 1);
+		priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC;
 	}
 }
 
-- 
cgit v0.10.2


From a229e488ac3f904d06c20d8d3f47831db3c7a15a Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 7 Mar 2013 03:46:57 +0000
Subject: net/mlx4_en: Disable RFS when running in SRIOV mode

Commit 37706996 "mlx4_en: fix allocation of CPU affinity reverse-map" fixed
a bug when mlx4_dev->caps.comp_pool is larger from the device rx rings, but
introduced a regression.

When the mlx4_core is activating its "legacy mode" (e.g when running in SRIOV
mode) w.r.t to EQs/IRQs usage, comp_pool becomes zero and we're crashing on
divide by zero alloc_cpu_rmap.

Fix that by enabling RFS only when running in non-legacy mode.

Reported-by: Yan Burman <yanb@mellanox.com>
Cc: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7fd0936..995d4b6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1833,9 +1833,11 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 	}
 
 #ifdef CONFIG_RFS_ACCEL
-	priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool);
-	if (!priv->dev->rx_cpu_rmap)
-		goto err;
+	if (priv->mdev->dev->caps.comp_pool) {
+		priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool);
+		if (!priv->dev->rx_cpu_rmap)
+			goto err;
+	}
 #endif
 
 	return 0;
-- 
cgit v0.10.2


From e4fabf2b6e6d75752d5eede57f23ff8e9c6aa09b Mon Sep 17 00:00:00 2001
From: Bhavesh Davda <bhavesh@vmware.com>
Date: Wed, 6 Mar 2013 12:04:53 +0000
Subject: vmxnet3: prevent div-by-zero panic when ring resizing uninitialized
 dev

Linux is free to call ethtool ops as soon as a netdev exists when probe
finishes. However, we only allocate vmxnet3 tx/rx queues and initialize the
rx_buf_per_pkt field in struct vmxnet3_adapter when the interface is
opened (UP).

Signed-off-by: Bhavesh Davda <bhavesh@vmware.com>
Signed-off-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 4aad350..eae7a03 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2958,6 +2958,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 
 	adapter->num_rx_queues = num_rx_queues;
 	adapter->num_tx_queues = num_tx_queues;
+	adapter->rx_buf_per_pkt = 1;
 
 	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
 	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index a0feb17..63a1243 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -472,6 +472,12 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 						VMXNET3_RX_RING_MAX_SIZE)
 		return -EINVAL;
 
+	/* if adapter not yet initialized, do nothing */
+	if (adapter->rx_buf_per_pkt == 0) {
+		netdev_err(netdev, "adapter not completely initialized, "
+			   "ring size cannot be changed yet\n");
+		return -EOPNOTSUPP;
+	}
 
 	/* round it up to a multiple of VMXNET3_RING_SIZE_ALIGN */
 	new_tx_ring_size = (param->tx_pending + VMXNET3_RING_SIZE_MASK) &
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3198384..3541814 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -70,10 +70,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.1.29.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.1.30.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01011D00
+#define VMXNET3_DRIVER_VERSION_NUM      0x01011E00
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
-- 
cgit v0.10.2


From 9cb6cb7ed11cd3b69c47bb414983603a6ff20b1d Mon Sep 17 00:00:00 2001
From: Zang MingJie <zealot0630@gmail.com>
Date: Wed, 6 Mar 2013 04:37:37 +0000
Subject: vxlan: fix oops when delete netns containing vxlan

The following script will produce a kernel oops:

    sudo ip netns add v
    sudo ip netns exec v ip ad add 127.0.0.1/8 dev lo
    sudo ip netns exec v ip link set lo up
    sudo ip netns exec v ip ro add 224.0.0.0/4 dev lo
    sudo ip netns exec v ip li add vxlan0 type vxlan id 42 group 239.1.1.1 dev lo
    sudo ip netns exec v ip link set vxlan0 up
    sudo ip netns del v

where inspect by gdb:

    Program received signal SIGSEGV, Segmentation fault.
    [Switching to Thread 107]
    0xffffffffa0289e33 in ?? ()
    (gdb) bt
    #0  vxlan_leave_group (dev=0xffff88001bafa000) at drivers/net/vxlan.c:533
    #1  vxlan_stop (dev=0xffff88001bafa000) at drivers/net/vxlan.c:1087
    #2  0xffffffff812cc498 in __dev_close_many (head=head@entry=0xffff88001f2e7dc8) at net/core/dev.c:1299
    #3  0xffffffff812cd920 in dev_close_many (head=head@entry=0xffff88001f2e7dc8) at net/core/dev.c:1335
    #4  0xffffffff812cef31 in rollback_registered_many (head=head@entry=0xffff88001f2e7dc8) at net/core/dev.c:4851
    #5  0xffffffff812cf040 in unregister_netdevice_many (head=head@entry=0xffff88001f2e7dc8) at net/core/dev.c:5752
    #6  0xffffffff812cf1ba in default_device_exit_batch (net_list=0xffff88001f2e7e18) at net/core/dev.c:6170
    #7  0xffffffff812cab27 in cleanup_net (work=<optimized out>) at net/core/net_namespace.c:302
    #8  0xffffffff810540ef in process_one_work (worker=0xffff88001ba9ed40, work=0xffffffff8167d020) at kernel/workqueue.c:2157
    #9  0xffffffff810549d0 in worker_thread (__worker=__worker@entry=0xffff88001ba9ed40) at kernel/workqueue.c:2276
    #10 0xffffffff8105870c in kthread (_create=0xffff88001f2e5d68) at kernel/kthread.c:168
    #11 <signal handler called>
    #12 0x0000000000000000 in ?? ()
    #13 0x0000000000000000 in ?? ()
    (gdb) fr 0
    #0  vxlan_leave_group (dev=0xffff88001bafa000) at drivers/net/vxlan.c:533
    533		struct sock *sk = vn->sock->sk;
    (gdb) l
    528	static int vxlan_leave_group(struct net_device *dev)
    529	{
    530		struct vxlan_dev *vxlan = netdev_priv(dev);
    531		struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
    532		int err = 0;
    533		struct sock *sk = vn->sock->sk;
    534		struct ip_mreqn mreq = {
    535			.imr_multiaddr.s_addr	= vxlan->gaddr,
    536			.imr_ifindex		= vxlan->link,
    537		};
    (gdb) p vn->sock
    $4 = (struct socket *) 0x0

The kernel calls `vxlan_exit_net` when deleting the netns before shutting down
vxlan interfaces. Later the removal of all vxlan interfaces, where `vn->sock`
is already gone causes the oops. so we should manually shutdown all interfaces
before deleting `vn->sock` as the patch does.

Signed-off-by: Zang MingJie <zealot0630@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c3e3d29..7cee7a3 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1506,6 +1506,14 @@ static __net_init int vxlan_init_net(struct net *net)
 static __net_exit void vxlan_exit_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_dev *vxlan;
+	unsigned h;
+
+	rtnl_lock();
+	for (h = 0; h < VNI_HASH_SIZE; ++h)
+		hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist)
+			dev_close(vxlan->dev);
+	rtnl_unlock();
 
 	if (vn->sock) {
 		sk_release_kernel(vn->sock->sk);
-- 
cgit v0.10.2


From 80028ea1c0afc24d4ddeb8dd2a9992fff03616ca Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 6 Mar 2013 07:10:32 +0000
Subject: bonding: fire NETDEV_RELEASE event only on 0 slaves

Currently, if we set up netconsole over bonding and release a slave,
netconsole will stop logging on the whole bonding device. Change the
behavior to stop the netconsole only when the last slave is released.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7bd068a..8b4e96e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1964,7 +1964,6 @@ static int __bond_release_one(struct net_device *bond_dev,
 	}
 
 	block_netpoll_tx();
-	call_netdevice_notifiers(NETDEV_RELEASE, bond_dev);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -2066,8 +2065,10 @@ static int __bond_release_one(struct net_device *bond_dev,
 	write_unlock_bh(&bond->lock);
 	unblock_netpoll_tx();
 
-	if (bond->slave_cnt == 0)
+	if (bond->slave_cnt == 0) {
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
+		call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);
+	}
 
 	bond_compute_features(bond);
 	if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
-- 
cgit v0.10.2


From 260055bb1f1f8b5328601816c50fd7e0dfda7dff Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil.sutter@viprinet.com>
Date: Wed, 6 Mar 2013 07:49:02 +0000
Subject: mv643xx_eth: fix for disabled autoneg

When autoneg has been disabled in the PHY (Marvell 88E1118 here), auto
negotiation between MAC and PHY seem non-functional anymore. The only
way I found to workaround this is to manually configure the MAC with the
settings sent to the PHY earlier.

Signed-off-by: Phil Sutter <phil.sutter@viprinet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 2914050..6562c73 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1081,6 +1081,45 @@ static void txq_set_fixed_prio_mode(struct tx_queue *txq)
 
 
 /* mii management interface *************************************************/
+static void mv643xx_adjust_pscr(struct mv643xx_eth_private *mp)
+{
+	u32 pscr = rdlp(mp, PORT_SERIAL_CONTROL);
+	u32 autoneg_disable = FORCE_LINK_PASS |
+	             DISABLE_AUTO_NEG_SPEED_GMII |
+		     DISABLE_AUTO_NEG_FOR_FLOW_CTRL |
+		     DISABLE_AUTO_NEG_FOR_DUPLEX;
+
+	if (mp->phy->autoneg == AUTONEG_ENABLE) {
+		/* enable auto negotiation */
+		pscr &= ~autoneg_disable;
+		goto out_write;
+	}
+
+	pscr |= autoneg_disable;
+
+	if (mp->phy->speed == SPEED_1000) {
+		/* force gigabit, half duplex not supported */
+		pscr |= SET_GMII_SPEED_TO_1000;
+		pscr |= SET_FULL_DUPLEX_MODE;
+		goto out_write;
+	}
+
+	pscr &= ~SET_GMII_SPEED_TO_1000;
+
+	if (mp->phy->speed == SPEED_100)
+		pscr |= SET_MII_SPEED_TO_100;
+	else
+		pscr &= ~SET_MII_SPEED_TO_100;
+
+	if (mp->phy->duplex == DUPLEX_FULL)
+		pscr |= SET_FULL_DUPLEX_MODE;
+	else
+		pscr &= ~SET_FULL_DUPLEX_MODE;
+
+out_write:
+	wrlp(mp, PORT_SERIAL_CONTROL, pscr);
+}
+
 static irqreturn_t mv643xx_eth_err_irq(int irq, void *dev_id)
 {
 	struct mv643xx_eth_shared_private *msp = dev_id;
@@ -1499,6 +1538,7 @@ static int
 mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
+	int ret;
 
 	if (mp->phy == NULL)
 		return -EINVAL;
@@ -1508,7 +1548,10 @@ mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	 */
 	cmd->advertising &= ~ADVERTISED_1000baseT_Half;
 
-	return phy_ethtool_sset(mp->phy, cmd);
+	ret = phy_ethtool_sset(mp->phy, cmd);
+	if (!ret)
+		mv643xx_adjust_pscr(mp);
+	return ret;
 }
 
 static void mv643xx_eth_get_drvinfo(struct net_device *dev,
@@ -2442,11 +2485,15 @@ static int mv643xx_eth_stop(struct net_device *dev)
 static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
+	int ret;
 
-	if (mp->phy != NULL)
-		return phy_mii_ioctl(mp->phy, ifr, cmd);
+	if (mp->phy == NULL)
+		return -ENOTSUPP;
 
-	return -EOPNOTSUPP;
+	ret = phy_mii_ioctl(mp->phy, ifr, cmd);
+	if (!ret)
+		mv643xx_adjust_pscr(mp);
+	return ret;
 }
 
 static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu)
-- 
cgit v0.10.2


From 0c1233aba1e948c37f6dc7620cb7c253fcd71ce9 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Wed, 6 Mar 2013 11:45:24 +0000
Subject: netlabel: correctly list all the static label mappings

When we have a large number of static label mappings that spill across
the netlink message boundary we fail to properly save our state in the
netlink_callback struct which causes us to repeat the same listings.
This patch fixes this problem by saving the state correctly between
calls to the NetLabel static label netlink "dumpit" routines.

Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 847d495..85742b7 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1250,10 +1250,10 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 
 unlabel_staticlist_return:
 	rcu_read_unlock();
-	cb->args[0] = skip_bkt;
-	cb->args[1] = skip_chain;
-	cb->args[2] = skip_addr4;
-	cb->args[3] = skip_addr6;
+	cb->args[0] = iter_bkt;
+	cb->args[1] = iter_chain;
+	cb->args[2] = iter_addr4;
+	cb->args[3] = iter_addr6;
 	return skb->len;
 }
 
@@ -1320,8 +1320,8 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 
 unlabel_staticlistdef_return:
 	rcu_read_unlock();
-	cb->args[0] = skip_addr4;
-	cb->args[1] = skip_addr6;
+	cb->args[0] = iter_addr4;
+	cb->args[1] = iter_addr6;
 	return skb->len;
 }
 
-- 
cgit v0.10.2


From 195ca382ca253431cc02c82bca61126c8a7ae155 Mon Sep 17 00:00:00 2001
From: Sony Chacko <sony.chacko@qlogic.com>
Date: Wed, 6 Mar 2013 13:03:25 +0000
Subject: MAINTAINERS: Update qlcnic maintainers list

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9561658..c08411b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6412,6 +6412,8 @@ F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
+M:	Rajesh Borundia <rajesh.borundia@qlogic.com>
+M:	Shahed Shaikh <shahed.shaikh@qlogic.com>
 M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
 M:	Sony Chacko <sony.chacko@qlogic.com>
 M:	linux-driver@qlogic.com
-- 
cgit v0.10.2


From d0d79c3fd77abe39654c2e594149f1f9ef1eeb05 Mon Sep 17 00:00:00 2001
From: Junwei Zhang <junwei.zhang@6wind.com>
Date: Wed, 6 Mar 2013 20:48:47 +0000
Subject: afkey: fix a typo

Signed-off-by: Martin Zhang <martinbj2008@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 556fdaf..8555f33 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2201,7 +2201,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
 	xp->priority = pol->sadb_x_policy_priority;
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
 	xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
 	if (!xp->family) {
 		err = -EINVAL;
@@ -2214,7 +2214,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	if (xp->selector.sport)
 		xp->selector.sport_mask = htons(0xffff);
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
 	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
 	xp->selector.prefixlen_d = sa->sadb_address_prefixlen;
 
@@ -2315,7 +2315,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 	memset(&sel, 0, sizeof(sel));
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
 	sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
 	sel.prefixlen_s = sa->sadb_address_prefixlen;
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2323,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 	if (sel.sport)
 		sel.sport_mask = htons(0xffff);
 
-	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1],
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
 	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
 	sel.prefixlen_d = sa->sadb_address_prefixlen;
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
-- 
cgit v0.10.2


From c10cb5fc0fc9fa605e01f715118bde5ba5a98616 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Thu, 7 Mar 2013 02:34:33 +0000
Subject: Fix: sparse warning in inet_csk_prepare_forced_close

In e337e24d66 (inet: Fix kmemleak in tcp_v4/6_syn_recv_sock and
dccp_v4/6_request_recv_sock) I introduced the function
inet_csk_prepare_forced_close, which does a call to bh_unlock_sock().
This produces a sparse-warning.

This patch adds the missing __releases.

Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7d1874b..786d97a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -735,6 +735,7 @@ EXPORT_SYMBOL(inet_csk_destroy_sock);
  * tcp/dccp_create_openreq_child().
  */
 void inet_csk_prepare_forced_close(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
 {
 	/* sk_clone_lock locked the socket and set refcnt to 2 */
 	bh_unlock_sock(sk);
-- 
cgit v0.10.2


From fbca58a2242ef2b84049365786d501ee512aefcf Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Thu, 7 Mar 2013 03:05:33 +0000
Subject: bridge: add missing vid to br_mdb_get()

Obviously, vid should be considered when searching for multicast
group.

Cc: Vlad Yasevich <vyasevic@redhat.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d5f1d3f..314c73e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -66,7 +66,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto out;
 		}
 
-		mdst = br_mdb_get(br, skb);
+		mdst = br_mdb_get(br, skb, vid);
 		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
 			br_multicast_deliver(mdst, skb);
 		else
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 4803301..828e2bc 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -97,7 +97,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	if (is_broadcast_ether_addr(dest))
 		skb2 = skb;
 	else if (is_multicast_ether_addr(dest)) {
-		mdst = br_mdb_get(br, skb);
+		mdst = br_mdb_get(br, skb, vid);
 		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
 			if ((mdst && mdst->mglist) ||
 			    br_multicast_is_router(br))
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 10e6fce..923fbea 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -132,7 +132,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 #endif
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-					struct sk_buff *skb)
+					struct sk_buff *skb, u16 vid)
 {
 	struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
 	struct br_ip ip;
@@ -144,6 +144,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 		return NULL;
 
 	ip.proto = skb->protocol;
+	ip.vid = vid;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6d314c4..3cbf5be 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -442,7 +442,7 @@ extern int br_multicast_rcv(struct net_bridge *br,
 			    struct net_bridge_port *port,
 			    struct sk_buff *skb);
 extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-					       struct sk_buff *skb);
+					       struct sk_buff *skb, u16 vid);
 extern void br_multicast_add_port(struct net_bridge_port *port);
 extern void br_multicast_del_port(struct net_bridge_port *port);
 extern void br_multicast_enable_port(struct net_bridge_port *port);
@@ -504,7 +504,7 @@ static inline int br_multicast_rcv(struct net_bridge *br,
 }
 
 static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
-						      struct sk_buff *skb)
+						      struct sk_buff *skb, u16 vid)
 {
 	return NULL;
 }
-- 
cgit v0.10.2


From ba81276b1a5e3cf0674cb0e6d9525e5ae0c98695 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 7 Mar 2013 07:59:25 +0000
Subject: team: unsyc the devices addresses when port is removed

When a team port is removed, unsync all devices addresses that may have
been synched to the port devices.

CC: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 05c5efe..bf34192 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1138,6 +1138,8 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 	netdev_upper_dev_unlink(port_dev, dev);
 	team_port_disable_netpoll(port);
 	vlan_vids_del_by_dev(port_dev, dev);
+	dev_uc_unsync(port_dev, dev);
+	dev_mc_unsync(port_dev, dev);
 	dev_close(port_dev);
 	team_port_leave(team, port);
 
-- 
cgit v0.10.2


From 87ab7f6f2874f1115817e394a7ed2dea1c72549e Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 7 Mar 2013 10:21:48 +0000
Subject: macvlan: Set IFF_UNICAST_FLT flag to prevent unnecessary promisc
 mode.

Macvlan already supports hw address filters.  Set the IFF_UNICAST_FLT
so that it doesn't needlesly enter PROMISC mode when macvlans are
stacked.

Signed-of-by: Vlad Yasevich <vyasevic@redhat.com>

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 417b2af..73abbc1 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -660,6 +660,7 @@ void macvlan_common_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags	       &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
 	dev->header_ops		= &macvlan_hard_header_ops,
-- 
cgit v0.10.2


From 5f3347e6e75768985a088d959c49fb66263087b6 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Thu, 7 Mar 2013 13:27:33 +0000
Subject: bnx2x: Fix intermittent long KR2 link up time

When a KR2 device is connected to a KR link-partner, sometimes it requires
disabling KR2 for the link to come up. To get a KR2 link up later, in case no
base pages are seen, the KR2 is restored. The problem was that some link
partners cleared their advertised BP/NP after around two seconds, causing the
driver to disable/enable KR2 link all the time.
The fix was to wait at least 5 seconds before checking KR2 recovery.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 31c5787..00ac493 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -12527,6 +12527,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 	vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
 	vars->mac_type = MAC_TYPE_NONE;
 	vars->phy_flags = 0;
+	vars->check_kr2_recovery_cnt = 0;
 	/* Driver opens NIG-BRB filters */
 	bnx2x_set_rx_filter(params, 1);
 	/* Check if link flap can be avoided */
@@ -13411,6 +13412,7 @@ static void bnx2x_disable_kr2(struct link_params *params,
 	vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
 	bnx2x_update_link_attr(params, vars->link_attr_sync);
 
+	vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT;
 	/* Restart AN on leading lane */
 	bnx2x_warpcore_restart_AN_KR(phy, params);
 }
@@ -13439,6 +13441,15 @@ static void bnx2x_check_kr2_wa(struct link_params *params,
 		return;
 	}
 
+	/* Once KR2 was disabled, wait 5 seconds before checking KR2 recovery
+	 * since some switches tend to reinit the AN process and clear the
+	 * advertised BP/NP after ~2 seconds causing the KR2 to be disabled
+	 * and recovered many times
+	 */
+	if (vars->check_kr2_recovery_cnt > 0) {
+		vars->check_kr2_recovery_cnt--;
+		return;
+	}
 	lane = bnx2x_get_warpcore_lane(phy, params);
 	CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
 			  MDIO_AER_BLOCK_AER_REG, lane);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index be5c195..f92fcf7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -342,7 +342,8 @@ struct link_vars {
 	u32 link_status;
 	u32 eee_status;
 	u8 fault_detected;
-	u8 rsrv1;
+	u8 check_kr2_recovery_cnt;
+#define CHECK_KR2_RECOVERY_CNT	5
 	u16 periodic_flags;
 #define PERIODIC_FLAGS_LINK_EVENT	0x0001
 
-- 
cgit v0.10.2


From d9169323308a63fdd967920b9c63a00394ae7c85 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Thu, 7 Mar 2013 13:27:34 +0000
Subject: bnx2x: Fix SFP+ misconfiguration in iSCSI boot scenario

Fix a problem in which iSCSI-boot installation fails when switching SFP+ boot
port and moving the SFP+ module prior to boot. The SFP+ insertion triggers an
interrupt which configures the SFP+ module wrongly before interface is loaded.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 00ac493..77ebae0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -8647,7 +8647,9 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
 						MDIO_WC_DEVAD,
 						MDIO_WC_REG_DIGITAL5_MISC6,
 						&rx_tx_in_reset);
-				if (!rx_tx_in_reset) {
+				if ((!rx_tx_in_reset) &&
+				    (params->link_flags &
+				     PHY_INITIALIZED)) {
 					bnx2x_warpcore_reset_lane(bp, phy, 1);
 					bnx2x_warpcore_config_sfi(phy, params);
 					bnx2x_warpcore_reset_lane(bp, phy, 0);
@@ -12528,6 +12530,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 	vars->mac_type = MAC_TYPE_NONE;
 	vars->phy_flags = 0;
 	vars->check_kr2_recovery_cnt = 0;
+	params->link_flags = PHY_INITIALIZED;
 	/* Driver opens NIG-BRB filters */
 	bnx2x_set_rx_filter(params, 1);
 	/* Check if link flap can be avoided */
@@ -12692,6 +12695,7 @@ int bnx2x_lfa_reset(struct link_params *params,
 	struct bnx2x *bp = params->bp;
 	vars->link_up = 0;
 	vars->phy_flags = 0;
+	params->link_flags &= ~PHY_INITIALIZED;
 	if (!params->lfa_base)
 		return bnx2x_link_reset(params, vars, 1);
 	/*
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index f92fcf7..56c2aae 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -309,6 +309,7 @@ struct link_params {
 				req_flow_ctrl is set to AUTO */
 	u16 link_flags;
 #define LINK_FLAGS_INT_DISABLED		(1<<0)
+#define PHY_INITIALIZED		(1<<1)
 	u32 lfa_base;
 };
 
-- 
cgit v0.10.2


From 2e85d67690cf3ea3f074a6e872f675226883fe7f Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Thu, 7 Mar 2013 17:19:32 +0000
Subject: net/rds: zero last byte for strncpy

for NUL terminated string, need be always sure '\0' in the end.

additional info:
  strncpy will pads with zeroes to the end of the given buffer.
  should initialise every bit of memory that is going to be copied to userland

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7be790d..73be187 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -87,6 +87,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter,
 	for (i = 0; i < nr; i++) {
 		BUG_ON(strlen(names[i]) >= sizeof(ctr.name));
 		strncpy(ctr.name, names[i], sizeof(ctr.name) - 1);
+		ctr.name[sizeof(ctr.name) - 1] = '\0';
 		ctr.value = values[i];
 
 		rds_info_copy(iter, &ctr, sizeof(ctr));
-- 
cgit v0.10.2


From f39479363e0361c8bb4397481c01a7c3a1a3c8ac Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Thu, 7 Mar 2013 18:25:41 +0000
Subject: drivers/isdn: checkng length to be sure not memory overflow

sizeof (cmd.parm.cmsg.para) is 50 (MAX_CAPI_PARA_LEN).
  sizeof (cmd.parm) is 80+, but less than 100.
  strlen(msg) may be more than 80+ (Modem-Commandbuffer, less than 255).
    isdn_tty_send_msg is called by isdn_tty_parse_at
    the relative parameter is m->mdmcmd (atemu *m)
    the relative command may be "+M..."

  so need check the length to be sure not memory overflow.
    cmd.parm is a union, and need keep original valid buffer length no touch

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index d8a7d83..ebaebdf 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -902,7 +902,9 @@ isdn_tty_send_msg(modem_info *info, atemu *m, char *msg)
 	int j;
 	int l;
 
-	l = strlen(msg);
+	l = min(strlen(msg), sizeof(cmd.parm) - sizeof(cmd.parm.cmsg)
+		+ sizeof(cmd.parm.cmsg.para) - 2);
+
 	if (!l) {
 		isdn_tty_modem_result(RESULT_ERROR, info);
 		return;
-- 
cgit v0.10.2


From 1abccd7419de9829bcdf9ab1f81d5f6cf74d55d3 Mon Sep 17 00:00:00 2001
From: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Date: Fri, 8 Mar 2013 16:32:25 +0900
Subject: tracing: update documentation of snapshot utility

Now, "snapshot" file returns success on a reset of snapshot buffer
even if the buffer wasn't allocated, instead of returning EINVAL.
This patch updates snapshot desctiption according to the change.

Link: http://lkml.kernel.org/r/51399409.4090207@hitachi.com

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 53d6a3c..a372304 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1873,7 +1873,7 @@ feature:
 
 	status\input  |     0      |     1      |    else    |
 	--------------+------------+------------+------------+
-	not allocated |(do nothing)| alloc+swap |   EINVAL   |
+	not allocated |(do nothing)| alloc+swap |(do nothing)|
 	--------------+------------+------------+------------+
 	allocated     |    free    |    swap    |   clear    |
 	--------------+------------+------------+------------+
-- 
cgit v0.10.2


From a6a8fe950e1b8596bb06f2c89c3a1a4bf2011ba9 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Fri, 8 Mar 2013 09:45:39 -0500
Subject: netlabel: fix build problems when CONFIG_IPV6=n

My last patch to solve a problem where the static/fallback labels were
not fully displayed resulted in build problems when IPv6 was disabled.
This patch resolves the IPv6 build problems; sorry for the screw-up.

Please queue for -stable or simply merge with the previous patch.

Reported-by: Kbuild Test Robot <fengguang.wu@intel.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 85742b7..8a6c6ea 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1189,8 +1189,6 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	struct netlbl_unlhsh_walk_arg cb_arg;
 	u32 skip_bkt = cb->args[0];
 	u32 skip_chain = cb->args[1];
-	u32 skip_addr4 = cb->args[2];
-	u32 skip_addr6 = cb->args[3];
 	u32 iter_bkt;
 	u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_unlhsh_iface *iface;
@@ -1215,7 +1213,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 				continue;
 			netlbl_af4list_foreach_rcu(addr4,
 						   &iface->addr4_list) {
-				if (iter_addr4++ < skip_addr4)
+				if (iter_addr4++ < cb->args[2])
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
 					      NLBL_UNLABEL_C_STATICLIST,
@@ -1231,7 +1229,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(addr6,
 						   &iface->addr6_list) {
-				if (iter_addr6++ < skip_addr6)
+				if (iter_addr6++ < cb->args[3])
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
 					      NLBL_UNLABEL_C_STATICLIST,
@@ -1273,12 +1271,9 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 {
 	struct netlbl_unlhsh_walk_arg cb_arg;
 	struct netlbl_unlhsh_iface *iface;
-	u32 skip_addr4 = cb->args[0];
-	u32 skip_addr6 = cb->args[1];
-	u32 iter_addr4 = 0;
+	u32 iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_af4list *addr4;
 #if IS_ENABLED(CONFIG_IPV6)
-	u32 iter_addr6 = 0;
 	struct netlbl_af6list *addr6;
 #endif
 
@@ -1292,7 +1287,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 		goto unlabel_staticlistdef_return;
 
 	netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
-		if (iter_addr4++ < skip_addr4)
+		if (iter_addr4++ < cb->args[0])
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
 					      iface,
@@ -1305,7 +1300,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
-		if (iter_addr6++ < skip_addr6)
+		if (iter_addr6++ < cb->args[1])
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
 					      iface,
-- 
cgit v0.10.2


From 3bc1b1add7a8484cc4a261c3e128dbe1528ce01f Mon Sep 17 00:00:00 2001
From: Cristian Bercaru <B43982@freescale.com>
Date: Fri, 8 Mar 2013 07:03:38 +0000
Subject: bridging: fix rx_handlers return code

The frames for which rx_handlers return RX_HANDLER_CONSUMED are no longer
counted as dropped. They are counted as successfully received by
'netif_receive_skb'.

This allows network interface drivers to correctly update their RX-OK and
RX-DRP counters based on the result of 'netif_receive_skb'.

Signed-off-by: Cristian Bercaru <B43982@freescale.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 8f152f9..dffbef7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3444,6 +3444,7 @@ ncls:
 		}
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
+			ret = NET_RX_SUCCESS;
 			goto unlock;
 		case RX_HANDLER_ANOTHER:
 			goto another_round;
-- 
cgit v0.10.2


From ddf64354af4a702ee0b85d0a285ba74c7278a460 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 8 Mar 2013 02:07:23 +0000
Subject: ipv6: stop multicast forwarding to process interface scoped addresses

v2:
a) used struct ipv6_addr_props

v3:
a) reverted changes for ipv6_addr_props

v4:
a) do not use __ipv6_addr_needs_scope_id

Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index b1876e5..e33fe0a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -281,7 +281,8 @@ int ip6_mc_input(struct sk_buff *skb)
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
 	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
-	    !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
+	    !(ipv6_addr_type(&hdr->daddr) &
+	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
 		/*
 		 * Okay, we try to forward - split and duplicate
-- 
cgit v0.10.2


From 84421b99cedc3443e76d2a594f3c815d5cb9a8e1 Mon Sep 17 00:00:00 2001
From: Nithin Sujir <nsujir@broadcom.com>
Date: Fri, 8 Mar 2013 08:01:24 +0000
Subject: tg3: Update link_up flag for phylib devices

Commit f4a46d1f46a8fece34edd2023e054072b02e110d introduced a bug where
the ifconfig stats would remain 0 for phylib devices. This is due to
tp->link_up flag never becoming true causing tg3_periodic_fetch_stats()
to return.

The link_up flag was being updated in tg3_test_and_report_link_chg()
after setting up the phy. This function however, is not called for
phylib devices since the driver does not do the phy setup.

This patch moves the link_up flag update into the common
tg3_link_report() function that gets called for phylib devices as well
for non phylib devices when the link state changes.

To avoid updating link_up twice, we replace tg3_carrier_...() calls that
are followed by tg3_link_report(), with netif_carrier_...(). We can then
remove the unused tg3_carrier_on() function.

CC: <stable@vger.kernel.org>
Reported-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index fdb9b56..93729f9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -1869,6 +1869,8 @@ static void tg3_link_report(struct tg3 *tp)
 
 		tg3_ump_link_report(tp);
 	}
+
+	tp->link_up = netif_carrier_ok(tp->dev);
 }
 
 static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl)
@@ -2522,12 +2524,6 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
 	return err;
 }
 
-static void tg3_carrier_on(struct tg3 *tp)
-{
-	netif_carrier_on(tp->dev);
-	tp->link_up = true;
-}
-
 static void tg3_carrier_off(struct tg3 *tp)
 {
 	netif_carrier_off(tp->dev);
@@ -2553,7 +2549,7 @@ static int tg3_phy_reset(struct tg3 *tp)
 		return -EBUSY;
 
 	if (netif_running(tp->dev) && tp->link_up) {
-		tg3_carrier_off(tp);
+		netif_carrier_off(tp->dev);
 		tg3_link_report(tp);
 	}
 
@@ -4262,9 +4258,9 @@ static bool tg3_test_and_report_link_chg(struct tg3 *tp, int curr_link_up)
 {
 	if (curr_link_up != tp->link_up) {
 		if (curr_link_up) {
-			tg3_carrier_on(tp);
+			netif_carrier_on(tp->dev);
 		} else {
-			tg3_carrier_off(tp);
+			netif_carrier_off(tp->dev);
 			if (tp->phy_flags & TG3_PHYFLG_MII_SERDES)
 				tp->phy_flags &= ~TG3_PHYFLG_PARALLEL_DETECT;
 		}
-- 
cgit v0.10.2


From 586948372189d33ceca9d89fb0c791ef4d53d8ad Mon Sep 17 00:00:00 2001
From: Stephan Frank <sfrank@cs.tu-berlin.de>
Date: Thu, 7 Mar 2013 14:08:50 -0800
Subject: Input: wacom - add support for 0x10d

It is a Wacom device found in Fujitsu Lifebook T902.

Signed-off-by: Stephan Frank <sfrank@cs.tu-berlin.de>
Acked-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 41b6fbf..1daa979 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -2017,6 +2017,9 @@ static const struct wacom_features wacom_features_0x100 =
 static const struct wacom_features wacom_features_0x101 =
 	{ "Wacom ISDv4 101",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
 	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x10D =
+	{ "Wacom ISDv4 10D",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
+	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x4001 =
 	{ "Wacom ISDv4 4001",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
 	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2201,6 +2204,7 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0xEF) },
 	{ USB_DEVICE_WACOM(0x100) },
 	{ USB_DEVICE_WACOM(0x101) },
+	{ USB_DEVICE_WACOM(0x10D) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x47) },
 	{ USB_DEVICE_WACOM(0xF4) },
-- 
cgit v0.10.2


From c085c49920b2f900ba716b4ca1c1a55ece9872cc Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sat, 9 Mar 2013 05:52:19 +0000
Subject: bridge: fix mdb info leaks

The bridging code discloses heap and stack bytes via the RTM_GETMDB
netlink interface and via the notify messages send to group RTNLGRP_MDB
afer a successful add/del.

Fix both cases by initializing all unset members/padding bytes with
memset(0).

Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 9f97b85..ee79f3f 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -80,6 +80,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 				port = p->port;
 				if (port) {
 					struct br_mdb_entry e;
+					memset(&e, 0, sizeof(e));
 					e.ifindex = port->dev->ifindex;
 					e.state = p->state;
 					if (p->addr.proto == htons(ETH_P_IP))
@@ -136,6 +137,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				break;
 
 			bpm = nlmsg_data(nlh);
+			memset(bpm, 0, sizeof(*bpm));
 			bpm->ifindex = dev->ifindex;
 			if (br_mdb_fill_info(skb, cb, dev) < 0)
 				goto out;
@@ -171,6 +173,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
 		return -EMSGSIZE;
 
 	bpm = nlmsg_data(nlh);
+	memset(bpm, 0, sizeof(*bpm));
 	bpm->family  = AF_BRIDGE;
 	bpm->ifindex = dev->ifindex;
 	nest = nla_nest_start(skb, MDBA_MDB);
@@ -228,6 +231,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 {
 	struct br_mdb_entry entry;
 
+	memset(&entry, 0, sizeof(entry));
 	entry.ifindex = port->dev->ifindex;
 	entry.addr.proto = group->proto;
 	entry.addr.u.ip4 = group->u.ip4;
-- 
cgit v0.10.2


From 84d73cd3fb142bf1298a8c13fd4ca50fd2432372 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sat, 9 Mar 2013 05:52:20 +0000
Subject: rtnl: fix info leak on RTM_GETLINK request for VF devices

Initialize the mac address buffer with 0 as the driver specific function
will probably not fill the whole buffer. In fact, all in-kernel drivers
fill only ETH_ALEN of the MAX_ADDR_LEN bytes, i.e. 6 of the 32 possible
bytes. Therefore we currently leak 26 bytes of stack memory to userland
via the netlink interface.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b376410..a585d45 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -979,6 +979,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 * report anything.
 			 */
 			ivi.spoofchk = -1;
+			memset(ivi.mac, 0, sizeof(ivi.mac));
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
 			vf_mac.vf =
-- 
cgit v0.10.2


From 29cd8ae0e1a39e239a3a7b67da1986add1199fc0 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sat, 9 Mar 2013 05:52:21 +0000
Subject: dcbnl: fix various netlink info leaks

The dcb netlink interface leaks stack memory in various places:
* perm_addr[] buffer is only filled at max with 12 of the 32 bytes but
  copied completely,
* no in-kernel driver fills all fields of an IEEE 802.1Qaz subcommand,
  so we're leaking up to 58 bytes for ieee_ets structs, up to 136 bytes
  for ieee_pfc structs, etc.,
* the same is true for CEE -- no in-kernel driver fills the whole
  struct,

Prevent all of the above stack info leaks by properly initializing the
buffers/structures involved.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 1b588e2..21291f1 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -284,6 +284,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlmsghdr *nlh,
 	if (!netdev->dcbnl_ops->getpermhwaddr)
 		return -EOPNOTSUPP;
 
+	memset(perm_addr, 0, sizeof(perm_addr));
 	netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
 
 	return nla_put(skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), perm_addr);
@@ -1042,6 +1043,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getets) {
 		struct ieee_ets ets;
+		memset(&ets, 0, sizeof(ets));
 		err = ops->ieee_getets(netdev, &ets);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets))
@@ -1050,6 +1052,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getmaxrate) {
 		struct ieee_maxrate maxrate;
+		memset(&maxrate, 0, sizeof(maxrate));
 		err = ops->ieee_getmaxrate(netdev, &maxrate);
 		if (!err) {
 			err = nla_put(skb, DCB_ATTR_IEEE_MAXRATE,
@@ -1061,6 +1064,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_getpfc) {
 		struct ieee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->ieee_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc))
@@ -1094,6 +1098,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 	/* get peer info if available */
 	if (ops->ieee_peer_getets) {
 		struct ieee_ets ets;
+		memset(&ets, 0, sizeof(ets));
 		err = ops->ieee_peer_getets(netdev, &ets);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets))
@@ -1102,6 +1107,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->ieee_peer_getpfc) {
 		struct ieee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->ieee_peer_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc))
@@ -1280,6 +1286,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
 	/* peer info if available */
 	if (ops->cee_peer_getpg) {
 		struct cee_pg pg;
+		memset(&pg, 0, sizeof(pg));
 		err = ops->cee_peer_getpg(netdev, &pg);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg))
@@ -1288,6 +1295,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
 
 	if (ops->cee_peer_getpfc) {
 		struct cee_pfc pfc;
+		memset(&pfc, 0, sizeof(pfc));
 		err = ops->cee_peer_getpfc(netdev, &pfc);
 		if (!err &&
 		    nla_put(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc))
-- 
cgit v0.10.2


From fef4c86e59a76f2ec1a77d5732f40752700bd5dd Mon Sep 17 00:00:00 2001
From: David Oostdyk <daveo@ll.mit.edu>
Date: Fri, 8 Mar 2013 08:28:15 +0000
Subject: rrunner.c: fix possible memory leak in rr_init_one()

In the event that register_netdev() failed, the rrpriv->evt_ring
allocation would have not been freed.

Signed-off-by: David Oostdyk <daveo@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index e5b19b0..3c4d627 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -202,6 +202,9 @@ static int rr_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
  out:
+	if (rrpriv->evt_ring)
+		pci_free_consistent(pdev, EVT_RING_SIZE, rrpriv->evt_ring,
+				    rrpriv->evt_ring_dma);
 	if (rrpriv->rx_ring)
 		pci_free_consistent(pdev, RX_TOTAL_SIZE, rrpriv->rx_ring,
 				    rrpriv->rx_ring_dma);
-- 
cgit v0.10.2


From 9026c4927254f5bea695cc3ef2e255280e6a3011 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sat, 9 Mar 2013 09:11:57 +0000
Subject: 6lowpan: Fix endianness issue in is_addr_link_local().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 8c2251f..bba5f83 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -84,7 +84,7 @@
 	(memcmp(addr1, addr2, length >> 3) == 0)
 
 /* local link, i.e. FE80::/10 */
-#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE)
+#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80))
 
 /*
  * check whether we can compress the IID to 16 bits,
-- 
cgit v0.10.2


From 39735019716e93914a366ac1fb2e78f91b170545 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Sat, 9 Mar 2013 16:17:20 -0800
Subject: Input: tc3589x-keypad - fix keymap size

The keymap size used by tc3589x is too low, leading to the driver
overwriting other people's memory.  Fix this by making the driver
use the automatically allocated keymap provided by
matrix_keypad_build_keymap() instead of allocating one on its own.

Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c
index 2fb0d76..208de7cb 100644
--- a/drivers/input/keyboard/tc3589x-keypad.c
+++ b/drivers/input/keyboard/tc3589x-keypad.c
@@ -70,8 +70,6 @@
 #define TC3589x_EVT_INT_CLR	0x2
 #define TC3589x_KBD_INT_CLR	0x1
 
-#define TC3589x_KBD_KEYMAP_SIZE     64
-
 /**
  * struct tc_keypad - data structure used by keypad driver
  * @tc3589x:    pointer to tc35893
@@ -88,7 +86,7 @@ struct tc_keypad {
 	const struct tc3589x_keypad_platform_data *board;
 	unsigned int krow;
 	unsigned int kcol;
-	unsigned short keymap[TC3589x_KBD_KEYMAP_SIZE];
+	unsigned short *keymap;
 	bool keypad_stopped;
 };
 
@@ -338,12 +336,14 @@ static int tc3589x_keypad_probe(struct platform_device *pdev)
 
 	error = matrix_keypad_build_keymap(plat->keymap_data, NULL,
 					   TC3589x_MAX_KPROW, TC3589x_MAX_KPCOL,
-					   keypad->keymap, input);
+					   NULL, input);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to build keymap\n");
 		goto err_free_mem;
 	}
 
+	keypad->keymap = input->keycode;
+
 	input_set_capability(input, EV_MSC, MSC_SCAN);
 	if (!plat->no_autorepeat)
 		__set_bit(EV_REP, input->evbit);
-- 
cgit v0.10.2


From f94352f8db97b9a3b3c1ec45f6fef1400880168a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 3 Mar 2013 20:19:07 -0800
Subject: Input: ads7864 - check return value of regulator enable

At least print a warning if we can't power the device up.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 4f702b3..434c3df 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -236,7 +236,12 @@ static void __ads7846_disable(struct ads7846 *ts)
 /* Must be called with ts->lock held */
 static void __ads7846_enable(struct ads7846 *ts)
 {
-	regulator_enable(ts->reg);
+	int error;
+
+	error = regulator_enable(ts->reg);
+	if (error != 0)
+		dev_err(&ts->spi->dev, "Failed to enable supply: %d\n", error);
+
 	ads7846_restart(ts);
 }
 
-- 
cgit v0.10.2


From 4b7d293c64fde133cc2b669d0d7637b8a4c6d62f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 3 Mar 2013 20:21:30 -0800
Subject: Input: mms114 - Fix regulator enable and disable paths

When it uses regulators the mms114 driver checks to see if it managed to
acquire regulators and ignores errors. This is not the intended usage and
not great style in general.

Since the driver already refuses to probe if it fails to allocate the
regulators simply make the enable and disable calls unconditional and
add appropriate error handling, including adding cleanup of the
regulators if setup_reg() fails.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 4a29ddf..1443532 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -314,15 +314,27 @@ static int mms114_start(struct mms114_data *data)
 	struct i2c_client *client = data->client;
 	int error;
 
-	if (data->core_reg)
-		regulator_enable(data->core_reg);
-	if (data->io_reg)
-		regulator_enable(data->io_reg);
+	error = regulator_enable(data->core_reg);
+	if (error) {
+		dev_err(&client->dev, "Failed to enable avdd: %d\n", error);
+		return error;
+	}
+
+	error = regulator_enable(data->io_reg);
+	if (error) {
+		dev_err(&client->dev, "Failed to enable vdd: %d\n", error);
+		regulator_disable(data->core_reg);
+		return error;
+	}
+
 	mdelay(MMS114_POWERON_DELAY);
 
 	error = mms114_setup_regs(data);
-	if (error < 0)
+	if (error < 0) {
+		regulator_disable(data->io_reg);
+		regulator_disable(data->core_reg);
 		return error;
+	}
 
 	if (data->pdata->cfg_pin)
 		data->pdata->cfg_pin(true);
@@ -335,16 +347,20 @@ static int mms114_start(struct mms114_data *data)
 static void mms114_stop(struct mms114_data *data)
 {
 	struct i2c_client *client = data->client;
+	int error;
 
 	disable_irq(client->irq);
 
 	if (data->pdata->cfg_pin)
 		data->pdata->cfg_pin(false);
 
-	if (data->io_reg)
-		regulator_disable(data->io_reg);
-	if (data->core_reg)
-		regulator_disable(data->core_reg);
+	error = regulator_disable(data->io_reg);
+	if (error)
+		dev_warn(&client->dev, "Failed to disable vdd: %d\n", error);
+
+	error = regulator_disable(data->core_reg);
+	if (error)
+		dev_warn(&client->dev, "Failed to disable avdd: %d\n", error);
 }
 
 static int mms114_input_open(struct input_dev *dev)
-- 
cgit v0.10.2


From bc077320f8f6449197f4010d6774afab7bb998b2 Mon Sep 17 00:00:00 2001
From: Marco Stornelli <marco.stornelli@gmail.com>
Date: Sat, 20 Oct 2012 12:02:59 +0200
Subject: hostfs: fix a not needed double check

With the commit 3be2be0a32c18b0fd6d623cda63174a332ca0de1 we removed vmtruncate,
but actaully there is no need to call inode_newsize_ok() because the checks are
already done in inode_change_ok() at the begin of the function.

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fbabb90..178b90c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -845,15 +845,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 		return err;
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		int error;
-
-		error = inode_newsize_ok(inode, attr->ia_size);
-		if (error)
-			return error;
-
+	    attr->ia_size != i_size_read(inode))
 		truncate_setsize(inode, attr->ia_size);
-	}
 
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
-- 
cgit v0.10.2


From 55ea1cfab24e37a347f1beae9f0b6724cad18f6b Mon Sep 17 00:00:00 2001
From: Paul Chavent <Paul.Chavent@onera.fr>
Date: Thu, 6 Dec 2012 16:25:05 +0100
Subject: net : enable tx time stamping in the vde driver.

This new version moves the skb_tx_timestamp in the main uml
driver. This should avoid the need to call this function in each
transport (vde, slirp, tuntap, ...). It also add support for ethtool
get_ts_info.

Signed-off-by: Paul Chavent <paul.chavent@onera.fr>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index d8926c3..39f1862 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -218,6 +218,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_lock_irqsave(&lp->lock, flags);
 
 	len = (*lp->write)(lp->fd, skb, lp);
+	skb_tx_timestamp(skb);
 
 	if (len == skb->len) {
 		dev->stats.tx_packets++;
@@ -281,6 +282,7 @@ static void uml_net_get_drvinfo(struct net_device *dev,
 static const struct ethtool_ops uml_net_ethtool_ops = {
 	.get_drvinfo	= uml_net_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
+	.get_ts_info	= ethtool_op_get_ts_info,
 };
 
 static void uml_net_user_timer_expire(unsigned long _conn)
-- 
cgit v0.10.2


From fdfa4c952844fce881df8c76de9c7180cbe913ab Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@gentoo.org>
Date: Sun, 30 Dec 2012 01:37:30 +0300
Subject: um: add missing declaration of 'getrlimit()' and friends

arch/um/os-Linux/start_up.c: In function 'check_coredump_limit':
arch/um/os-Linux/start_up.c:338:16: error: storage size of 'lim' isn't known
arch/um/os-Linux/start_up.c:339:2: error: implicit declaration of function 'getrlimit' [-Werror=implicit-function-declaration]

Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
CC: Jeff Dike <jdike@addtoit.com>
CC: Richard Weinberger <richard@nod.at>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: user-mode-linux-devel@lists.sourceforge.net
CC: user-mode-linux-user@lists.sourceforge.net
CC: linux-kernel@vger.kernel.org
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index da4b9e9..337518c 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -15,6 +15,8 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 #include <asm/unistd.h>
 #include <init.h>
 #include <os.h>
-- 
cgit v0.10.2


From 72383d43b223c410fc61d9e905690b9b9ba9d418 Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@gentoo.org>
Date: Sun, 30 Dec 2012 01:37:31 +0300
Subject: um: fix build failure due to mess-up of sig_info protorype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

arch/um/os-Linux/signal.c:18:8: error: conflicting types for 'sig_info'
In file included from /home/slyfox/linux-2.6/arch/um/os-Linux/signal.c:12:0:
arch/um/include/shared/as-layout.h:64:15: note: previous declaration of 'sig_info' was here

Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
CC: Jeff Dike <jdike@addtoit.com>
CC: Richard Weinberger <richard@nod.at>
CC: "Martin Pärtel" <martin.partel@gmail.com>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: user-mode-linux-devel@lists.sourceforge.net
CC: user-mode-linux-user@lists.sourceforge.net
CC: linux-kernel@vger.kernel.org
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index b1469fe..9d9f1b4 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,7 +15,7 @@
 #include <sysdep/mcontext.h>
 #include "internal.h"
 
-void (*sig_info[NSIG])(int, siginfo_t *, struct uml_pt_regs *) = {
+void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGTRAP]	= relay_signal,
 	[SIGFPE]	= relay_signal,
 	[SIGILL]	= relay_signal,
-- 
cgit v0.10.2


From cc4f02486c09977ccbe3ce2276aca5608a44ca00 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 11 Mar 2013 10:03:42 +0100
Subject: um: Use tty_port_operations->destruct

As we setup the SIGWINCH handler in tty_port_operations->activate
it makes sense to tear down it in ->destruct.

Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index f1b3857..232243a 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -315,8 +315,22 @@ static int line_activate(struct tty_port *port, struct tty_struct *tty)
 	return 0;
 }
 
+static void unregister_winch(struct tty_struct *tty);
+
+static void line_destruct(struct tty_port *port)
+{
+	struct tty_struct *tty = tty_port_tty_get(port);
+	struct line *line = tty->driver_data;
+
+	if (line->sigio) {
+		unregister_winch(tty);
+		line->sigio = 0;
+	}
+}
+
 static const struct tty_port_operations line_port_ops = {
 	.activate = line_activate,
+	.destruct = line_destruct,
 };
 
 int line_open(struct tty_struct *tty, struct file *filp)
@@ -340,18 +354,6 @@ int line_install(struct tty_driver *driver, struct tty_struct *tty,
 	return 0;
 }
 
-static void unregister_winch(struct tty_struct *tty);
-
-void line_cleanup(struct tty_struct *tty)
-{
-	struct line *line = tty->driver_data;
-
-	if (line->sigio) {
-		unregister_winch(tty);
-		line->sigio = 0;
-	}
-}
-
 void line_close(struct tty_struct *tty, struct file * filp)
 {
 	struct line *line = tty->driver_data;
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index 16fdd0a..b8d14fa 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -105,7 +105,6 @@ static const struct tty_operations ssl_ops = {
 	.throttle 		= line_throttle,
 	.unthrottle 		= line_unthrottle,
 	.install		= ssl_install,
-	.cleanup		= line_cleanup,
 	.hangup			= line_hangup,
 };
 
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 827777a..7b361f3 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -110,7 +110,6 @@ static const struct tty_operations console_ops = {
 	.set_termios 		= line_set_termios,
 	.throttle 		= line_throttle,
 	.unthrottle 		= line_unthrottle,
-	.cleanup		= line_cleanup,
 	.hangup			= line_hangup,
 };
 
-- 
cgit v0.10.2


From 2116bda6ad937d7acb6e2316fd9e65ad6ca01d42 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 11 Mar 2013 10:05:45 +0100
Subject: um: Use tty_port in SIGWINCH handler

The tty below tty_port might get destroyed by the tty layer
while we hold a reference to it.
So we have to carry tty_port around...

Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
index 78f1b89..c512b03 100644
--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -37,7 +37,7 @@ extern int console_write_chan(struct chan *chan, const char *buf,
 extern int console_open_chan(struct line *line, struct console *co);
 extern void deactivate_chan(struct chan *chan, int irq);
 extern void reactivate_chan(struct chan *chan, int irq);
-extern void chan_enable_winch(struct chan *chan, struct tty_struct *tty);
+extern void chan_enable_winch(struct chan *chan, struct tty_port *port);
 extern int enable_chan(struct line *line);
 extern void close_chan(struct line *line);
 extern int chan_window_size(struct line *line, 
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 15c553c..80b47cb 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -122,10 +122,10 @@ static int open_chan(struct list_head *chans)
 	return err;
 }
 
-void chan_enable_winch(struct chan *chan, struct tty_struct *tty)
+void chan_enable_winch(struct chan *chan, struct tty_port *port)
 {
 	if (chan && chan->primary && chan->ops->winch)
-		register_winch(chan->fd, tty);
+		register_winch(chan->fd, port);
 }
 
 static void line_timer_cb(struct work_struct *work)
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 9be670a..3fd7c3e 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -216,7 +216,7 @@ static int winch_thread(void *arg)
 	}
 }
 
-static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out,
+static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
 		       unsigned long *stack_out)
 {
 	struct winch_data data;
@@ -271,7 +271,7 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out,
 	return err;
 }
 
-void register_winch(int fd, struct tty_struct *tty)
+void register_winch(int fd, struct tty_port *port)
 {
 	unsigned long stack;
 	int pid, thread, count, thread_fd = -1;
@@ -281,17 +281,17 @@ void register_winch(int fd, struct tty_struct *tty)
 		return;
 
 	pid = tcgetpgrp(fd);
-	if (is_skas_winch(pid, fd, tty)) {
-		register_winch_irq(-1, fd, -1, tty, 0);
+	if (is_skas_winch(pid, fd, port)) {
+		register_winch_irq(-1, fd, -1, port, 0);
 		return;
 	}
 
 	if (pid == -1) {
-		thread = winch_tramp(fd, tty, &thread_fd, &stack);
+		thread = winch_tramp(fd, port, &thread_fd, &stack);
 		if (thread < 0)
 			return;
 
-		register_winch_irq(thread_fd, fd, thread, tty, stack);
+		register_winch_irq(thread_fd, fd, thread, port, stack);
 
 		count = write(thread_fd, &c, sizeof(c));
 		if (count != sizeof(c))
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index dc693298..03f1b56 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -38,10 +38,10 @@ extern int generic_window_size(int fd, void *unused, unsigned short *rows_out,
 			       unsigned short *cols_out);
 extern void generic_free(void *data);
 
-struct tty_struct;
-extern void register_winch(int fd,  struct tty_struct *tty);
+struct tty_port;
+extern void register_winch(int fd,  struct tty_port *port);
 extern void register_winch_irq(int fd, int tty_fd, int pid,
-			       struct tty_struct *tty, unsigned long stack);
+			       struct tty_port *port, unsigned long stack);
 
 #define __channel_help(fn, prefix) \
 __uml_help(fn, prefix "[0-9]*=<channel description>\n" \
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 232243a..be541cf 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -305,7 +305,7 @@ static int line_activate(struct tty_port *port, struct tty_struct *tty)
 		return ret;
 
 	if (!line->sigio) {
-		chan_enable_winch(line->chan_out, tty);
+		chan_enable_winch(line->chan_out, port);
 		line->sigio = 1;
 	}
 
@@ -603,7 +603,7 @@ struct winch {
 	int fd;
 	int tty_fd;
 	int pid;
-	struct tty_struct *tty;
+	struct tty_port *port;
 	unsigned long stack;
 	struct work_struct work;
 };
@@ -657,7 +657,7 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 			goto out;
 		}
 	}
-	tty = winch->tty;
+	tty = tty_port_tty_get(winch->port);
 	if (tty != NULL) {
 		line = tty->driver_data;
 		if (line != NULL) {
@@ -665,6 +665,7 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 					 &tty->winsize.ws_col);
 			kill_pgrp(tty->pgrp, SIGWINCH, 1);
 		}
+		tty_kref_put(tty);
 	}
  out:
 	if (winch->fd != -1)
@@ -672,7 +673,7 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty,
+void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port,
 			unsigned long stack)
 {
 	struct winch *winch;
@@ -687,7 +688,7 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty,
 				   .fd  	= fd,
 				   .tty_fd 	= tty_fd,
 				   .pid  	= pid,
-				   .tty 	= tty,
+				   .port 	= port,
 				   .stack	= stack });
 
 	if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt,
@@ -716,15 +717,18 @@ static void unregister_winch(struct tty_struct *tty)
 {
 	struct list_head *ele, *next;
 	struct winch *winch;
+	struct tty_struct *wtty;
 
 	spin_lock(&winch_handler_lock);
 
 	list_for_each_safe(ele, next, &winch_handlers) {
 		winch = list_entry(ele, struct winch, list);
-		if (winch->tty == tty) {
+		wtty = tty_port_tty_get(winch->port);
+		if (wtty == tty) {
 			free_winch(winch);
 			break;
 		}
+		tty_kref_put(wtty);
 	}
 	spin_unlock(&winch_handler_lock);
 }
-- 
cgit v0.10.2


From cb16b91a449afd01b85ec4e59f30449d11c4acd7 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 7 Mar 2013 17:35:53 +0800
Subject: s390: Fix a header dependencies related build error

Commit 877c685607925238e302cd3aa38788dca6c1b226
("perf: Remove include of cgroup.h from perf_event.h") caused
this build failure if PERF_EVENTS is enabled:

   In file included from arch/s390/include/asm/perf_event.h:9:0,
                    from include/linux/perf_event.h:24,
                    from kernel/events/ring_buffer.c:12:
   arch/s390/include/asm/cpu_mf.h: In function 'qctri':
   arch/s390/include/asm/cpu_mf.h:61:12: error: 'EINVAL' undeclared (first use in this function)

cpu_mf.h had an implicit errno.h dependency, which was added
indirectly via cgroups.h but not anymore. Add it explicitly.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Link: http://lkml.kernel.org/r/51385F79.7000106@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 35f0020..c7c9bf6 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -12,6 +12,7 @@
 #ifndef _ASM_S390_CPU_MF_H
 #define _ASM_S390_CPU_MF_H
 
+#include <linux/errno.h>
 #include <asm/facility.h>
 
 #define CPU_MF_INT_SF_IAE	(1 << 31)	/* invalid entry address */
-- 
cgit v0.10.2


From c30bd09915ea243603d7803d53de890c4a6f1474 Mon Sep 17 00:00:00 2001
From: Dong Zhu <bluezhudong@gmail.com>
Date: Thu, 6 Dec 2012 22:03:34 +0800
Subject: timekeeping: Avoid adjust kernel time once hwclock kept in UTC time

If the Hardware Clock kept in local time,kernel will adjust the time
to be UTC time.But if Hardware Clock kept in UTC time,system will make
a dummy settimeofday call first (sys_tz.tz_minuteswest = 0) to make sure
the time is not shifted,so at this point I think maybe it is not necessary
to set the kernel time once the sys_tz.tz_minuteswest is zero.

Signed-off-by: Dong Zhu <bluezhudong@gmail.com>
[jstultz: Updated to merge with conflicting changes ]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time.c b/kernel/time.c
index f8342a4..effac57 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -138,13 +138,14 @@ int persistent_clock_is_local;
  */
 static inline void warp_clock(void)
 {
-	struct timespec adjust;
+	if (sys_tz.tz_minuteswest != 0) {
+		struct timespec adjust;
 
-	adjust = current_kernel_time();
-	if (sys_tz.tz_minuteswest != 0)
 		persistent_clock_is_local = 1;
-	adjust.tv_sec += sys_tz.tz_minuteswest * 60;
-	do_settimeofday(&adjust);
+		adjust = current_kernel_time();
+		adjust.tv_sec += sys_tz.tz_minuteswest * 60;
+		do_settimeofday(&adjust);
+	}
 }
 
 /*
-- 
cgit v0.10.2


From 7859e404ae73fe4f38b8cfc1af19ea82f153084e Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 22 Feb 2013 12:33:29 -0800
Subject: timekeeping: Use inject_offset in warp_clock

When warping the clock (from a local time RTC), use
timekeeping_inject_offset() to atomically add the offset.

This avoids any minor time error caused by the delay between
reading the time, and then setting the adjusted time.

Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time.c b/kernel/time.c
index effac57..d3617db 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -142,9 +142,9 @@ static inline void warp_clock(void)
 		struct timespec adjust;
 
 		persistent_clock_is_local = 1;
-		adjust = current_kernel_time();
-		adjust.tv_sec += sys_tz.tz_minuteswest * 60;
-		do_settimeofday(&adjust);
+		adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+		adjust.tv_nsec = 0;
+		timekeeping_inject_offset(&adjust);
 	}
 }
 
-- 
cgit v0.10.2


From 3195ef59cb42cda3aeeb24a7fd2ba1b900c4a3cc Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 14 Feb 2013 12:02:54 -0500
Subject: x86: Do full rtc synchronization with ntp

Every 11 minutes ntp attempts to update the x86 rtc with the current
system time.  Currently, the x86 code only updates the rtc if the system
time is within +/-15 minutes of the current value of the rtc. This
was done originally to avoid setting the RTC if the RTC was in localtime
mode (common with Windows dualbooting).  Other architectures do a full
synchronization and now that we have better infrastructure to detect
when the RTC is in localtime, there is no reason that x86 should be
software limited to a 30 minute window.

This patch changes the behavior of the kernel to do a full synchronization
(year, month, day, hour, minute, and second) of the rtc when ntp requests
a synchronization between the system time and the rtc.

I've used the RTC library functions in this patchset as they do all the
required bounds checking.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: x86@kernel.org
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: linux-efi@vger.kernel.org
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
[jstultz: Tweak commit message, fold in build fix found by fengguang
Also add select RTC_LIB to X86, per new dependency, as found by prarit]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a4f24f5..26bd792 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -120,6 +120,7 @@ config X86
 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION
+	select RTC_LIB
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 2e8f3d3..198eb20 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -13,6 +13,7 @@
 #include <asm/x86_init.h>
 #include <asm/time.h>
 #include <asm/mrst.h>
+#include <asm/rtc.h>
 
 #ifdef CONFIG_X86_32
 /*
@@ -36,70 +37,24 @@ EXPORT_SYMBOL(rtc_lock);
  * nowtime is written into the registers of the CMOS clock, it will
  * jump to the next second precisely 500 ms later. Check the Motorola
  * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you'll only notice that after reboot!
  */
 int mach_set_rtc_mmss(unsigned long nowtime)
 {
-	int real_seconds, real_minutes, cmos_minutes;
-	unsigned char save_control, save_freq_select;
-	unsigned long flags;
+	struct rtc_time tm;
 	int retval = 0;
 
-	spin_lock_irqsave(&rtc_lock, flags);
-
-	 /* tell the clock it's being set */
-	save_control = CMOS_READ(RTC_CONTROL);
-	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-	/* stop and reset prescaler */
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		cmos_minutes = bcd2bin(cmos_minutes);
-
-	/*
-	 * since we're only adjusting minutes and seconds,
-	 * don't interfere with hour overflow. This avoids
-	 * messing with unknown time zones but requires your
-	 * RTC not to be off by more than 15 minutes
-	 */
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	/* correct for half hour time zone */
-	if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
-		real_minutes += 30;
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) < 30) {
-		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			real_seconds = bin2bcd(real_seconds);
-			real_minutes = bin2bcd(real_minutes);
-		}
-		CMOS_WRITE(real_seconds, RTC_SECONDS);
-		CMOS_WRITE(real_minutes, RTC_MINUTES);
+	rtc_time_to_tm(nowtime, &tm);
+	if (!rtc_valid_tm(&tm)) {
+		retval = set_rtc_time(&tm);
+		if (retval)
+			printk(KERN_ERR "%s: RTC write failed with error %d\n",
+			       __FUNCTION__, retval);
 	} else {
-		printk_once(KERN_NOTICE
-		       "set_rtc_mmss: can't update from %d to %d\n",
-		       cmos_minutes, real_minutes);
-		retval = -1;
+		printk(KERN_ERR
+		       "%s: Invalid RTC value: write of %lx to RTC failed\n",
+			__FUNCTION__, nowtime);
+		retval = -EINVAL;
 	}
-
-	/* The following flags have to be released exactly in this order,
-	 * otherwise the DS12887 (popular MC146818A clone with integrated
-	 * battery and quartz) will not reset the oscillator and will not
-	 * update precisely 500 ms later. You won't find this mentioned in
-	 * the Dallas Semiconductor data sheets, but who believes data
-	 * sheets anyway ...                           -- Markus Kuhn
-	 */
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
 	return retval;
 }
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 5f2ecaf..28d9efa 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -48,6 +48,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/x86_init.h>
+#include <asm/rtc.h>
 
 #define EFI_DEBUG	1
 
@@ -258,10 +259,10 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 
 int efi_set_rtc_mmss(unsigned long nowtime)
 {
-	int real_seconds, real_minutes;
 	efi_status_t 	status;
 	efi_time_t 	eft;
 	efi_time_cap_t 	cap;
+	struct rtc_time	tm;
 
 	status = efi.get_time(&eft, &cap);
 	if (status != EFI_SUCCESS) {
@@ -269,13 +270,20 @@ int efi_set_rtc_mmss(unsigned long nowtime)
 		return -1;
 	}
 
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
-		real_minutes += 30;
-	real_minutes %= 60;
-	eft.minute = real_minutes;
-	eft.second = real_seconds;
+	rtc_time_to_tm(nowtime, &tm);
+	if (!rtc_valid_tm(&tm)) {
+		eft.year = tm.tm_year + 1900;
+		eft.month = tm.tm_mon + 1;
+		eft.day = tm.tm_mday;
+		eft.minute = tm.tm_min;
+		eft.second = tm.tm_sec;
+		eft.nanosecond = 0;
+	} else {
+		printk(KERN_ERR
+		       "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
+		       __FUNCTION__, nowtime);
+		return -1;
+	}
 
 	status = efi.set_time(&eft);
 	if (status != EFI_SUCCESS) {
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 225bd0f..d62b0a3 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -85,27 +85,35 @@ unsigned long vrtc_get_time(void)
 	return mktime(year, mon, mday, hour, min, sec);
 }
 
-/* Only care about the minutes and seconds */
 int vrtc_set_mmss(unsigned long nowtime)
 {
-	int real_sec, real_min;
 	unsigned long flags;
-	int vrtc_min;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	vrtc_min = vrtc_cmos_read(RTC_MINUTES);
-
-	real_sec = nowtime % 60;
-	real_min = nowtime / 60;
-	if (((abs(real_min - vrtc_min) + 15)/30) & 1)
-		real_min += 30;
-	real_min %= 60;
-
-	vrtc_cmos_write(real_sec, RTC_SECONDS);
-	vrtc_cmos_write(real_min, RTC_MINUTES);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return 0;
+	struct rtc_time tm;
+	int year;
+	int retval = 0;
+
+	rtc_time_to_tm(nowtime, &tm);
+	if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
+		/*
+		 * tm.year is the number of years since 1900, and the
+		 * vrtc need the years since 1972.
+		 */
+		year = tm.tm_year - 72;
+		spin_lock_irqsave(&rtc_lock, flags);
+		vrtc_cmos_write(year, RTC_YEAR);
+		vrtc_cmos_write(tm.tm_mon, RTC_MONTH);
+		vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH);
+		vrtc_cmos_write(tm.tm_hour, RTC_HOURS);
+		vrtc_cmos_write(tm.tm_min, RTC_MINUTES);
+		vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+	} else {
+		printk(KERN_ERR
+		       "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
+			__FUNCTION__, nowtime);
+		retval = -EINVAL;
+	}
+	return retval;
 }
 
 void __init mrst_rtc_init(void)
-- 
cgit v0.10.2


From c54fdbb2823d96b842d00c548e14dbc0dd37831d Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 12 Mar 2013 11:56:45 +0800
Subject: x86: Add cpu capability flag X86_FEATURE_NONSTOP_TSC_S3

On some new Intel Atom processors (Penwell and Cloverview), there is
a feature that the TSC won't stop in S3 state, say the TSC value
won't be reset to 0 after resume. This feature makes TSC a more reliable
clocksource and could benefit the timekeeping code during system
suspend/resume cycle, so add a flag for it.

Signed-off-by: Feng Tang <feng.tang@intel.com>
[jstultz: Fix checkpatch warning]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..a8466f2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -100,6 +100,7 @@
 #define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
 #define X86_FEATURE_EAGER_FPU	(3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1905ce9..e7ae0d8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -96,6 +96,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 			sched_clock_stable = 1;
 	}
 
+	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
+	if (c->x86 == 6) {
+		switch (c->x86_model) {
+		case 0x27:	/* Penwell */
+		case 0x35:	/* Cloverview */
+			set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
+			break;
+		default:
+			break;
+		}
+	}
+
 	/*
 	 * There is a known erratum on Pentium III and Core Solo
 	 * and Core Duo CPUs.
-- 
cgit v0.10.2


From 5caf4636259ae3af0efbb9bfc4cd97874b547c7d Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 12 Mar 2013 11:56:46 +0800
Subject: clocksource: Add new feature flag CLOCK_SOURCE_SUSPEND_NONSTOP

Some x86 processors have a TSC clocksource, which continues to run
even when system is suspended. Also most OMAP platforms have a
32 KHz timer which has similar capability. Add a feature flag so that
it could be utilized.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 27cfda4..aa7032c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -206,6 +206,7 @@ struct clocksource {
 #define CLOCK_SOURCE_WATCHDOG			0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
 #define CLOCK_SOURCE_UNSTABLE			0x40
+#define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
-- 
cgit v0.10.2


From 82f9c080b22a5b859ae2b50822dfb6b812898fdb Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 12 Mar 2013 11:56:47 +0800
Subject: x86: tsc: Add support for new S3_NONSTOP feature

Add support for new S3_NONSTOP feature

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4b9ea10..098b3cf 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -768,7 +768,8 @@ static cycle_t read_tsc(struct clocksource *cs)
 
 static void resume_tsc(struct clocksource *cs)
 {
-	clocksource_tsc.cycle_last = 0;
+	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
+		clocksource_tsc.cycle_last = 0;
 }
 
 static struct clocksource clocksource_tsc = {
@@ -939,6 +940,9 @@ static int __init init_tsc_clocksource(void)
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
+		clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
+
 	/*
 	 * Trust the results of the earlier calibration on systems
 	 * exporting a reliable TSC.
-- 
cgit v0.10.2


From e445cf1c4257cc0238d72e4129eb4739f46fd3de Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 12 Mar 2013 11:56:48 +0800
Subject: timekeeping: utilize the suspend-nonstop clocksource to count
 suspended time

There are some new processors whose TSC clocksource won't stop during
suspend. Currently, after system resumes, kernel will use persistent
clock or RTC to compensate the sleep time, but with these nonstop
clocksources, we could skip the special compensation from external
sources, and just use current clocksource for time recounting.

This can solve some time drift bugs caused by some not-so-accurate or
error-prone RTC devices.

The current way to count suspended time is first try to use the persistent
clock, and then try the RTC if persistent clock can't be used. This
patch will change the trying order to:
	suspend-nonstop clocksource -> persistent clock -> RTC

When counting the sleep time with nonstop clocksource, use an accurate way
suggested by Jason Gunthorpe to cover very large delta cycles.

Signed-off-by: Feng Tang <feng.tang@intel.com>
[jstultz: Small optimization, avoiding re-reading the clocksource]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9a0bc98..0355f12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -788,22 +788,66 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 static void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &timekeeper;
+	struct clocksource *clock = tk->clock;
 	unsigned long flags;
-	struct timespec ts;
+	struct timespec ts_new, ts_delta;
+	cycle_t cycle_now, cycle_delta;
+	bool suspendtime_found = false;
 
-	read_persistent_clock(&ts);
+	read_persistent_clock(&ts_new);
 
 	clockevents_resume();
 	clocksource_resume();
 
 	write_seqlock_irqsave(&tk->lock, flags);
 
-	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
-		ts = timespec_sub(ts, timekeeping_suspend_time);
-		__timekeeping_inject_sleeptime(tk, &ts);
+	/*
+	 * After system resumes, we need to calculate the suspended time and
+	 * compensate it for the OS time. There are 3 sources that could be
+	 * used: Nonstop clocksource during suspend, persistent clock and rtc
+	 * device.
+	 *
+	 * One specific platform may have 1 or 2 or all of them, and the
+	 * preference will be:
+	 *	suspend-nonstop clocksource -> persistent clock -> rtc
+	 * The less preferred source will only be tried if there is no better
+	 * usable source. The rtc part is handled separately in rtc core code.
+	 */
+	cycle_now = clock->read(clock);
+	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
+		cycle_now > clock->cycle_last) {
+		u64 num, max = ULLONG_MAX;
+		u32 mult = clock->mult;
+		u32 shift = clock->shift;
+		s64 nsec = 0;
+
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/*
+		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
+		 * suspended time is too long. In that case we need do the
+		 * 64 bits math carefully
+		 */
+		do_div(max, mult);
+		if (cycle_delta > max) {
+			num = div64_u64(cycle_delta, max);
+			nsec = (((u64) max * mult) >> shift) * num;
+			cycle_delta -= num * max;
+		}
+		nsec += ((u64) cycle_delta * mult) >> shift;
+
+		ts_delta = ns_to_timespec(nsec);
+		suspendtime_found = true;
+	} else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
+		ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
+		suspendtime_found = true;
 	}
-	/* re-base the last cycle value */
-	tk->clock->cycle_last = tk->clock->read(tk->clock);
+
+	if (suspendtime_found)
+		__timekeeping_inject_sleeptime(tk, &ts_delta);
+
+	/* Re-base the last cycle value */
+	clock->cycle_last = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, false);
-- 
cgit v0.10.2


From cc244ddae6d4c6902ac9d7d64023534f8c44a7eb Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 3 May 2012 12:30:07 -0700
Subject: timekeeping: Move TAI managment into timekeeping core from ntp

Currently NTP manages the TAI offset. Since there's plans for a
CLOCK_TAI clockid, push the TAI management into the timekeeping
core.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/time.h b/include/linux/time.h
index d4835df..47210a1 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -181,6 +181,8 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern int timekeeping_inject_offset(struct timespec *ts);
+extern s32 timekeeping_get_tai_offset(void);
+extern void timekeeping_set_tai_offset(s32 tai_offset);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e1d558e..ff94f43 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -62,6 +62,9 @@ struct timekeeper {
 	ktime_t			offs_boot;
 	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
 	struct timespec		raw_time;
+	/* The current UTC to TAI offset in seconds */
+	s32			tai_offset;
+
 	/* Seqlock for all timekeeper values */
 	seqlock_t		lock;
 };
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 072bb06..59e2749 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -53,9 +53,6 @@ static int			time_state = TIME_OK;
 /* clock status bits:							*/
 static int			time_status = STA_UNSYNC;
 
-/* TAI offset (secs):							*/
-static long			time_tai;
-
 /* time adjustment (nsecs):						*/
 static s64			time_offset;
 
@@ -415,7 +412,6 @@ int second_overflow(unsigned long secs)
 		else if (secs % 86400 == 0) {
 			leap = -1;
 			time_state = TIME_OOP;
-			time_tai++;
 			printk(KERN_NOTICE
 				"Clock: inserting leap second 23:59:60 UTC\n");
 		}
@@ -425,7 +421,6 @@ int second_overflow(unsigned long secs)
 			time_state = TIME_OK;
 		else if ((secs + 1) % 86400 == 0) {
 			leap = 1;
-			time_tai--;
 			time_state = TIME_WAIT;
 			printk(KERN_NOTICE
 				"Clock: deleting leap second 23:59:59 UTC\n");
@@ -579,7 +574,9 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
  * Called with ntp_lock held, so we can access and modify
  * all the global NTP state:
  */
-static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
+static inline void process_adjtimex_modes(struct timex *txc,
+						struct timespec *ts,
+						s32 *time_tai)
 {
 	if (txc->modes & ADJ_STATUS)
 		process_adj_status(txc, ts);
@@ -613,7 +610,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
 	}
 
 	if (txc->modes & ADJ_TAI && txc->constant > 0)
-		time_tai = txc->constant;
+		*time_tai = txc->constant;
 
 	if (txc->modes & ADJ_OFFSET)
 		ntp_update_offset(txc->offset);
@@ -632,6 +629,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
 int do_adjtimex(struct timex *txc)
 {
 	struct timespec ts;
+	u32 time_tai, orig_tai;
 	int result;
 
 	/* Validate the data before disabling interrupts */
@@ -671,6 +669,7 @@ int do_adjtimex(struct timex *txc)
 	}
 
 	getnstimeofday(&ts);
+	orig_tai = time_tai = timekeeping_get_tai_offset();
 
 	raw_spin_lock_irq(&ntp_lock);
 
@@ -687,7 +686,7 @@ int do_adjtimex(struct timex *txc)
 
 		/* If there are input parameters, then process them: */
 		if (txc->modes)
-			process_adjtimex_modes(txc, &ts);
+			process_adjtimex_modes(txc, &ts, &time_tai);
 
 		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
 				  NTP_SCALE_SHIFT);
@@ -716,6 +715,9 @@ int do_adjtimex(struct timex *txc)
 
 	raw_spin_unlock_irq(&ntp_lock);
 
+	if (time_tai != orig_tai)
+		timekeeping_set_tai_offset(time_tai);
+
 	txc->time.tv_sec = ts.tv_sec;
 	txc->time.tv_usec = ts.tv_nsec;
 	if (!(time_status & STA_NANO))
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 0355f12..937098a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -513,6 +513,48 @@ error: /* even if we error out, we forwarded the time, so call update */
 }
 EXPORT_SYMBOL(timekeeping_inject_offset);
 
+
+/**
+ * timekeeping_get_tai_offset - Returns current TAI offset from UTC
+ *
+ */
+s32 timekeeping_get_tai_offset(void)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned int seq;
+	s32 ret;
+
+	do {
+		seq = read_seqbegin(&tk->lock);
+		ret = tk->tai_offset;
+	} while (read_seqretry(&tk->lock, seq));
+
+	return ret;
+}
+
+/**
+ * __timekeeping_set_tai_offset - Lock free worker function
+ *
+ */
+void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
+{
+	tk->tai_offset = tai_offset;
+}
+
+/**
+ * timekeeping_set_tai_offset - Sets the current TAI offset from UTC
+ *
+ */
+void timekeeping_set_tai_offset(s32 tai_offset)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned long flags;
+
+	write_seqlock_irqsave(&tk->lock, flags);
+	__timekeeping_set_tai_offset(tk, tai_offset);
+	write_sequnlock_irqrestore(&tk->lock, flags);
+}
+
 /**
  * change_clocksource - Swaps clocksources if a new one is available
  *
@@ -1143,6 +1185,8 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
 			tk_set_wall_to_mono(tk,
 				timespec_sub(tk->wall_to_monotonic, ts));
 
+			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
+
 			clock_was_set_delayed();
 		}
 	}
-- 
cgit v0.10.2


From 1ff3c9677bff7e468e0c487d0ffefe4e901d33f4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 3 May 2012 12:43:40 -0700
Subject: timekeeping: Add CLOCK_TAI clockid

This add a CLOCK_TAI clockid and the needed accessors.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/time.h b/include/linux/time.h
index 47210a1..22d81b3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -183,6 +183,7 @@ extern u64 timekeeping_max_deferment(void);
 extern int timekeeping_inject_offset(struct timespec *ts);
 extern s32 timekeeping_get_tai_offset(void);
 extern void timekeeping_set_tai_offset(s32 tai_offset);
+extern void timekeeping_clocktai(struct timespec *ts);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 0d3c0ed..e75e1b6 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -54,11 +54,9 @@ struct itimerval {
 #define CLOCK_BOOTTIME			7
 #define CLOCK_REALTIME_ALARM		8
 #define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_SGI_CYCLE			10	/* Hardware specific */
+#define CLOCK_TAI			11
 
-/*
- * The IDs of various hardware clocks:
- */
-#define CLOCK_SGI_CYCLE			10
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 6edbb2c..fbfc5f1 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -221,6 +221,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+	timekeeping_clocktai(tp);
+	return 0;
+}
 
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +266,10 @@ static __init int init_posix_timers(void)
 		.clock_getres	= posix_get_coarse_res,
 		.clock_get	= posix_get_monotonic_coarse,
 	};
+	struct k_clock clock_tai = {
+		.clock_getres	= hrtimer_get_res,
+		.clock_get	= posix_get_tai,
+	};
 	struct k_clock clock_boottime = {
 		.clock_getres	= hrtimer_get_res,
 		.clock_get	= posix_get_boottime,
@@ -278,6 +287,7 @@ static __init int init_posix_timers(void)
 	posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
 	posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
 	posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+	posix_timers_register_clock(CLOCK_TAI, &clock_tai);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 937098a..8a84275 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -379,6 +379,36 @@ void ktime_get_ts(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
 
+
+/**
+ * timekeeping_clocktai - Returns the TAI time of day in a timespec
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec.
+ */
+void timekeeping_clocktai(struct timespec *ts)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned long seq;
+	u64 nsecs;
+
+	WARN_ON(timekeeping_suspended);
+
+	do {
+		seq = read_seqbegin(&tk->lock);
+
+		ts->tv_sec = tk->xtime_sec + tk->tai_offset;
+		nsecs = timekeeping_get_ns(tk);
+
+	} while (read_seqretry(&tk->lock, seq));
+
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs);
+
+}
+EXPORT_SYMBOL(timekeeping_clocktai);
+
+
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit v0.10.2


From 90adda98b89aaf68b06014ecf805b6c477daa19b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 21 Jan 2013 17:00:11 -0800
Subject: hrtimer: Add hrtimer support for CLOCK_TAI

Add hrtimer support for CLOCK_TAI, as well as posix timer interfaces.

Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc07d27..d19a5c2 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -157,6 +157,7 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
+	HRTIMER_BASE_TAI,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -327,7 +328,9 @@ extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
-extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
+extern ktime_t ktime_get_clocktai(void);
+extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+					 ktime_t *offs_tai);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index ff94f43..26700d8 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -64,6 +64,8 @@ struct timekeeper {
 	struct timespec		raw_time;
 	/* The current UTC to TAI offset in seconds */
 	s32			tai_offset;
+	/* Offset clock monotonic -> clock tai */
+	ktime_t			offs_tai;
 
 	/* Seqlock for all timekeeper values */
 	seqlock_t		lock;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cc47812..2587207 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -83,6 +83,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = HRTIMER_BASE_TAI,
+			.clockid = CLOCK_TAI,
+			.get_time = &ktime_get_clocktai,
+			.resolution = KTIME_LOW_RES,
+		},
 	}
 };
 
@@ -90,6 +96,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_TAI]		= HRTIMER_BASE_TAI,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -106,8 +113,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 {
 	ktime_t xtim, mono, boot;
 	struct timespec xts, tom, slp;
+	s32 tai_offset;
 
 	get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
+	tai_offset = timekeeping_get_tai_offset();
 
 	xtim = timespec_to_ktime(xts);
 	mono = ktime_add(xtim, timespec_to_ktime(tom));
@@ -115,6 +124,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+	base->clock_base[HRTIMER_BASE_TAI].softirq_time =
+				ktime_add(xtim,	ktime_set(tai_offset, 0));
 }
 
 /*
@@ -651,8 +662,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
 	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
 	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
-	return ktime_get_update_offsets(offs_real, offs_boot);
+	return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
 }
 
 /*
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index fbfc5f1..2a2e173 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -269,6 +269,12 @@ static __init int init_posix_timers(void)
 	struct k_clock clock_tai = {
 		.clock_getres	= hrtimer_get_res,
 		.clock_get	= posix_get_tai,
+		.nsleep		= common_nsleep,
+		.nsleep_restart	= hrtimer_nanosleep_restart,
+		.timer_create	= common_timer_create,
+		.timer_set	= common_timer_set,
+		.timer_get	= common_timer_get,
+		.timer_del	= common_timer_del,
 	};
 	struct k_clock clock_boottime = {
 		.clock_getres	= hrtimer_get_res,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8a84275..8061ae0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -67,6 +67,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
 	tk->wall_to_monotonic = wtm;
 	set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
 	tk->offs_real = timespec_to_ktime(tmp);
+	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
 static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -409,6 +410,20 @@ void timekeeping_clocktai(struct timespec *ts)
 EXPORT_SYMBOL(timekeeping_clocktai);
 
 
+/**
+ * ktime_get_clocktai - Returns the TAI time of day in a ktime
+ *
+ * Returns the time of day in a ktime.
+ */
+ktime_t ktime_get_clocktai(void)
+{
+	struct timespec ts;
+
+	timekeeping_clocktai(&ts);
+	return timespec_to_ktime(ts);
+}
+EXPORT_SYMBOL(ktime_get_clocktai);
+
 #ifdef CONFIG_NTP_PPS
 
 /**
@@ -569,6 +584,7 @@ s32 timekeeping_get_tai_offset(void)
 void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
 {
 	tk->tai_offset = tai_offset;
+	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
 }
 
 /**
@@ -1539,7 +1555,8 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
  * Returns current monotonic time and updates the offsets
  * Called from hrtimer_interupt() or retrigger_next_event()
  */
-ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
+ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+							ktime_t *offs_tai)
 {
 	struct timekeeper *tk = &timekeeper;
 	ktime_t now;
@@ -1554,6 +1571,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
+		*offs_tai = tk->offs_tai;
 	} while (read_seqretry(&tk->lock, seq));
 
 	now = ktime_add_ns(ktime_set(secs, 0), nsecs);
-- 
cgit v0.10.2


From 23a9537a6999fce16f06ca61fc6cac52c8fbdc86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Feb 2013 22:51:36 +0000
Subject: timekeeping: Calc stuff once

Calculate the cycle interval shifted value once. No functional change,
just makes the code more readable.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8061ae0..c442a4c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1250,15 +1250,16 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 						u32 shift)
 {
+	cycle_t interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
 
 	/* If the offset is smaller then a shifted interval, do nothing */
-	if (offset < tk->cycle_interval<<shift)
+	if (offset < interval)
 		return offset;
 
 	/* Accumulate one shifted interval */
-	offset -= tk->cycle_interval << shift;
-	tk->clock->cycle_last += tk->cycle_interval << shift;
+	offset -= interval;
+	tk->clock->cycle_last += interval;
 
 	tk->xtime_nsec += tk->xtime_interval << shift;
 	accumulate_nsecs_to_secs(tk);
-- 
cgit v0.10.2


From eb93e4d93093615c60cb7dd3dcb24e46bd7d62d4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Feb 2013 22:51:36 +0000
Subject: timekeeping: Make jiffies_lock internal

Nothing outside of the timekeeping core needs that lock.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 82ed068..8fb8edf 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -75,7 +75,6 @@ extern int register_refined_jiffies(long clock_tick_rate);
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
-extern seqlock_t jiffies_lock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index cf3e59e..f5c9207 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,6 +4,8 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 
+extern seqlock_t jiffies_lock;
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
 
 #define TICK_DO_TIMER_NONE	-1
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c442a4c..b0c648f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,6 +23,7 @@
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
 
+#include "tick-internal.h"
 
 static struct timekeeper timekeeper;
 
-- 
cgit v0.10.2


From 7e40672d930b369c1984457233ec5557aa53bfb8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Feb 2013 22:51:37 +0000
Subject: timekeeping: Move lock out of timekeeper struct

Make the lock a separate entity. Preparatory patch for shadow
timekeeper structure.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[Merged with CLOCK_TAI changes]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 26700d8..a151bd7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -67,8 +67,6 @@ struct timekeeper {
 	/* Offset clock monotonic -> clock tai */
 	ktime_t			offs_tai;
 
-	/* Seqlock for all timekeeper values */
-	seqlock_t		lock;
 };
 
 static inline struct timespec tk_xtime(struct timekeeper *tk)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b0c648f..caede71 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -26,6 +26,7 @@
 #include "tick-internal.h"
 
 static struct timekeeper timekeeper;
+static DEFINE_SEQLOCK(timekeeper_lock);
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -212,11 +213,11 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
 	unsigned long flags;
 	int ret;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
 	/* update timekeeping data */
 	update_pvclock_gtod(tk);
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	return ret;
 }
@@ -230,13 +231,12 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
  */
 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 {
-	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
 	int ret;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 	ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	return ret;
 }
@@ -296,12 +296,12 @@ int __getnstimeofday(struct timespec *ts)
 	s64 nsecs = 0;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		ts->tv_sec = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
@@ -337,11 +337,11 @@ ktime_t ktime_get(void)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
 		nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 	/*
 	 * Use ktime_set/ktime_add_ns to create a proper ktime on
 	 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -368,12 +368,12 @@ void ktime_get_ts(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	ts->tv_sec += tomono.tv_sec;
 	ts->tv_nsec = 0;
@@ -397,12 +397,12 @@ void timekeeping_clocktai(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		ts->tv_sec = tk->xtime_sec + tk->tai_offset;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
@@ -445,7 +445,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		*ts_raw = tk->raw_time;
 		ts_real->tv_sec = tk->xtime_sec;
@@ -454,7 +454,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		nsecs_raw = timekeeping_get_ns_raw(tk);
 		nsecs_real = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	timespec_add_ns(ts_raw, nsecs_raw);
 	timespec_add_ns(ts_real, nsecs_real);
@@ -494,7 +494,7 @@ int do_settimeofday(const struct timespec *tv)
 	if (!timespec_valid_strict(tv))
 		return -EINVAL;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	timekeeping_forward_now(tk);
 
@@ -508,7 +508,7 @@ int do_settimeofday(const struct timespec *tv)
 
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -533,7 +533,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	timekeeping_forward_now(tk);
 
@@ -550,7 +550,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -571,9 +571,9 @@ s32 timekeeping_get_tai_offset(void)
 	s32 ret;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		ret = tk->tai_offset;
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	return ret;
 }
@@ -597,9 +597,9 @@ void timekeeping_set_tai_offset(s32 tai_offset)
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 	__timekeeping_set_tai_offset(tk, tai_offset);
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /**
@@ -615,7 +615,7 @@ static int change_clocksource(void *data)
 
 	new = (struct clocksource *) data;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	timekeeping_forward_now(tk);
 	if (!new->enable || new->enable(new) == 0) {
@@ -626,7 +626,7 @@ static int change_clocksource(void *data)
 	}
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	return 0;
 }
@@ -676,11 +676,11 @@ void getrawmonotonic(struct timespec *ts)
 	s64 nsecs;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		nsecs = timekeeping_get_ns_raw(tk);
 		*ts = tk->raw_time;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	timespec_add_ns(ts, nsecs);
 }
@@ -696,11 +696,11 @@ int timekeeping_valid_for_hres(void)
 	int ret;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	return ret;
 }
@@ -715,11 +715,11 @@ u64 timekeeping_max_deferment(void)
 	u64 ret;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		ret = tk->clock->max_idle_ns;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	return ret;
 }
@@ -782,11 +782,9 @@ void __init timekeeping_init(void)
 		boot.tv_nsec = 0;
 	}
 
-	seqlock_init(&tk->lock);
-
 	ntp_init();
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
@@ -805,7 +803,7 @@ void __init timekeeping_init(void)
 	tmp.tv_nsec = 0;
 	tk_set_sleep_time(tk, tmp);
 
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /* time in seconds when suspend began */
@@ -853,7 +851,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 	if (has_persistent_clock())
 		return;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	timekeeping_forward_now(tk);
 
@@ -861,7 +859,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -888,7 +886,7 @@ static void timekeeping_resume(void)
 	clockevents_resume();
 	clocksource_resume();
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	/*
 	 * After system resumes, we need to calculate the suspended time and
@@ -940,7 +938,7 @@ static void timekeeping_resume(void)
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, false);
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	touch_softlockup_watchdog();
 
@@ -959,7 +957,7 @@ static int timekeeping_suspend(void)
 
 	read_persistent_clock(&timekeeping_suspend_time);
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
@@ -982,7 +980,7 @@ static int timekeeping_suspend(void)
 		timekeeping_suspend_time =
 			timespec_add(timekeeping_suspend_time, delta_delta);
 	}
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
 	clocksource_suspend();
@@ -1322,7 +1320,7 @@ static void update_wall_time(void)
 	int shift = 0, maxshift;
 	unsigned long flags;
 
-	write_seqlock_irqsave(&tk->lock, flags);
+	write_seqlock_irqsave(&timekeeper_lock, flags);
 
 	/* Make sure we're fully resumed: */
 	if (unlikely(timekeeping_suspended))
@@ -1377,7 +1375,7 @@ static void update_wall_time(void)
 	timekeeping_update(tk, false);
 
 out:
-	write_sequnlock_irqrestore(&tk->lock, flags);
+	write_sequnlock_irqrestore(&timekeeper_lock, flags);
 
 }
 
@@ -1425,13 +1423,13 @@ void get_monotonic_boottime(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 		sleep = tk->total_sleep_time;
 
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
 	ts->tv_nsec = 0;
@@ -1490,10 +1488,10 @@ struct timespec current_kernel_time(void)
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		now = tk_xtime(tk);
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	return now;
 }
@@ -1506,11 +1504,11 @@ struct timespec get_monotonic_coarse(void)
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		now = tk_xtime(tk);
 		mono = tk->wall_to_monotonic;
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
@@ -1541,11 +1539,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		*xtim = tk_xtime(tk);
 		*wtom = tk->wall_to_monotonic;
 		*sleep = tk->total_sleep_time;
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1566,7 +1564,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
 	u64 secs, nsecs;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 
 		secs = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
@@ -1574,7 +1572,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
 		*offs_tai = tk->offs_tai;
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	now = ktime_add_ns(ktime_set(secs, 0), nsecs);
 	now = ktime_sub(now, *offs_real);
@@ -1592,9 +1590,9 @@ ktime_t ktime_get_monotonic_offset(void)
 	struct timespec wtom;
 
 	do {
-		seq = read_seqbegin(&tk->lock);
+		seq = read_seqbegin(&timekeeper_lock);
 		wtom = tk->wall_to_monotonic;
-	} while (read_seqretry(&tk->lock, seq));
+	} while (read_seqretry(&timekeeper_lock, seq));
 
 	return timespec_to_ktime(wtom);
 }
-- 
cgit v0.10.2


From 9a7a71b1d0968fc2bd602b7481cde1d4872e01ff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Feb 2013 22:51:38 +0000
Subject: timekeeping: Split timekeeper_lock into lock and seqcount

We want to shorten the seqcount write hold time. So split the seqlock
into a lock and a seqcount.

Open code the seqwrite_lock in the places which matter and drop the
sequence counter update where it's pointless.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[jstultz: Merge fixups from CLOCK_TAI collisions]
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index caede71..5e048e0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -26,7 +26,8 @@
 #include "tick-internal.h"
 
 static struct timekeeper timekeeper;
-static DEFINE_SEQLOCK(timekeeper_lock);
+static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+static seqcount_t timekeeper_seq;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -204,8 +205,6 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 
 /**
  * pvclock_gtod_register_notifier - register a pvclock timedata update listener
- *
- * Must hold write on timekeeper.lock
  */
 int pvclock_gtod_register_notifier(struct notifier_block *nb)
 {
@@ -213,11 +212,10 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
 	unsigned long flags;
 	int ret;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
-	/* update timekeeping data */
 	update_pvclock_gtod(tk);
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	return ret;
 }
@@ -226,23 +224,21 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
 /**
  * pvclock_gtod_unregister_notifier - unregister a pvclock
  * timedata update listener
- *
- * Must hold write on timekeeper.lock
  */
 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 {
 	unsigned long flags;
 	int ret;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 
-/* must hold write on timekeeper.lock */
+/* must hold timekeeper_lock */
 static void timekeeping_update(struct timekeeper *tk, bool clearntp)
 {
 	if (clearntp) {
@@ -296,12 +292,12 @@ int __getnstimeofday(struct timespec *ts)
 	s64 nsecs = 0;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		ts->tv_sec = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
@@ -337,11 +333,11 @@ ktime_t ktime_get(void)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
 		nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 	/*
 	 * Use ktime_set/ktime_add_ns to create a proper ktime on
 	 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -368,12 +364,12 @@ void ktime_get_ts(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	ts->tv_sec += tomono.tv_sec;
 	ts->tv_nsec = 0;
@@ -397,12 +393,12 @@ void timekeeping_clocktai(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		ts->tv_sec = tk->xtime_sec + tk->tai_offset;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
@@ -445,7 +441,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		*ts_raw = tk->raw_time;
 		ts_real->tv_sec = tk->xtime_sec;
@@ -454,7 +450,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		nsecs_raw = timekeeping_get_ns_raw(tk);
 		nsecs_real = timekeeping_get_ns(tk);
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	timespec_add_ns(ts_raw, nsecs_raw);
 	timespec_add_ns(ts_real, nsecs_real);
@@ -494,7 +490,8 @@ int do_settimeofday(const struct timespec *tv)
 	if (!timespec_valid_strict(tv))
 		return -EINVAL;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
 
@@ -508,7 +505,8 @@ int do_settimeofday(const struct timespec *tv)
 
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -533,7 +531,8 @@ int timekeeping_inject_offset(struct timespec *ts)
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
 
@@ -550,7 +549,8 @@ int timekeeping_inject_offset(struct timespec *ts)
 error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -571,9 +571,9 @@ s32 timekeeping_get_tai_offset(void)
 	s32 ret;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		ret = tk->tai_offset;
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	return ret;
 }
@@ -597,9 +597,11 @@ void timekeeping_set_tai_offset(s32 tai_offset)
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 	__timekeeping_set_tai_offset(tk, tai_offset);
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /**
@@ -615,7 +617,8 @@ static int change_clocksource(void *data)
 
 	new = (struct clocksource *) data;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
 	if (!new->enable || new->enable(new) == 0) {
@@ -626,7 +629,8 @@ static int change_clocksource(void *data)
 	}
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	return 0;
 }
@@ -676,11 +680,11 @@ void getrawmonotonic(struct timespec *ts)
 	s64 nsecs;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		nsecs = timekeeping_get_ns_raw(tk);
 		*ts = tk->raw_time;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	timespec_add_ns(ts, nsecs);
 }
@@ -696,11 +700,11 @@ int timekeeping_valid_for_hres(void)
 	int ret;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	return ret;
 }
@@ -715,11 +719,11 @@ u64 timekeeping_max_deferment(void)
 	u64 ret;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		ret = tk->clock->max_idle_ns;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	return ret;
 }
@@ -784,7 +788,8 @@ void __init timekeeping_init(void)
 
 	ntp_init();
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
@@ -803,7 +808,8 @@ void __init timekeeping_init(void)
 	tmp.tv_nsec = 0;
 	tk_set_sleep_time(tk, tmp);
 
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /* time in seconds when suspend began */
@@ -851,7 +857,8 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 	if (has_persistent_clock())
 		return;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
 
@@ -859,7 +866,8 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	timekeeping_update(tk, true);
 
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
 	clock_was_set();
@@ -886,7 +894,8 @@ static void timekeeping_resume(void)
 	clockevents_resume();
 	clocksource_resume();
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	/*
 	 * After system resumes, we need to calculate the suspended time and
@@ -938,7 +947,8 @@ static void timekeeping_resume(void)
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, false);
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	touch_softlockup_watchdog();
 
@@ -957,7 +967,8 @@ static int timekeeping_suspend(void)
 
 	read_persistent_clock(&timekeeping_suspend_time);
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
@@ -980,7 +991,8 @@ static int timekeeping_suspend(void)
 		timekeeping_suspend_time =
 			timespec_add(timekeeping_suspend_time, delta_delta);
 	}
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
 	clocksource_suspend();
@@ -1320,7 +1332,8 @@ static void update_wall_time(void)
 	int shift = 0, maxshift;
 	unsigned long flags;
 
-	write_seqlock_irqsave(&timekeeper_lock, flags);
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	write_seqcount_begin(&timekeeper_seq);
 
 	/* Make sure we're fully resumed: */
 	if (unlikely(timekeeping_suspended))
@@ -1375,7 +1388,8 @@ static void update_wall_time(void)
 	timekeeping_update(tk, false);
 
 out:
-	write_sequnlock_irqrestore(&timekeeper_lock, flags);
+	write_seqcount_end(&timekeeper_seq);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 }
 
@@ -1423,13 +1437,13 @@ void get_monotonic_boottime(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 		sleep = tk->total_sleep_time;
 
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
 	ts->tv_nsec = 0;
@@ -1488,10 +1502,10 @@ struct timespec current_kernel_time(void)
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		now = tk_xtime(tk);
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	return now;
 }
@@ -1504,11 +1518,11 @@ struct timespec get_monotonic_coarse(void)
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		now = tk_xtime(tk);
 		mono = tk->wall_to_monotonic;
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
@@ -1539,11 +1553,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 	unsigned long seq;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		*xtim = tk_xtime(tk);
 		*wtom = tk->wall_to_monotonic;
 		*sleep = tk->total_sleep_time;
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1564,7 +1578,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
 	u64 secs, nsecs;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 
 		secs = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
@@ -1572,7 +1586,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
 		*offs_tai = tk->offs_tai;
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	now = ktime_add_ns(ktime_set(secs, 0), nsecs);
 	now = ktime_sub(now, *offs_real);
@@ -1590,9 +1604,9 @@ ktime_t ktime_get_monotonic_offset(void)
 	struct timespec wtom;
 
 	do {
-		seq = read_seqbegin(&timekeeper_lock);
+		seq = read_seqcount_begin(&timekeeper_seq);
 		wtom = tk->wall_to_monotonic;
-	} while (read_seqretry(&timekeeper_lock, seq));
+	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	return timespec_to_ktime(wtom);
 }
-- 
cgit v0.10.2


From dd5d70e869f960bde6376f4447fff59f16186cf5 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Mon, 25 Mar 2013 12:24:24 -0700
Subject: timekeeping: __timekeeping_set_tai_offset can be static

Yet again, the kbuild test robot saves the day, noting
I left out defining __timekeeping_set_tai_offset as
static. It even sent me this patch.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5e048e0..c5feb7a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -582,7 +582,7 @@ s32 timekeeping_get_tai_offset(void)
  * __timekeeping_set_tai_offset - Lock free worker function
  *
  */
-void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
+static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
 {
 	tk->tai_offset = tai_offset;
 	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
-- 
cgit v0.10.2


From 8011657b9e63cb2e914b9a0f75233b910c1854cb Mon Sep 17 00:00:00 2001
From: Christian Daudt <csd@broadcom.com>
Date: Wed, 13 Mar 2013 14:27:27 -0700
Subject: ARM: bcm281xx: Add timer driver (driver portion)

This adds support for the Broadcom timer, used in the following SoCs:
BCM11130, BCM11140, BCM11351, BCM28145, BCM28155

Updates from V6:
- Split DT portion into a separate patch

Updates from V5:
- Rebase to latest arm-soc/for-next

Updates from V4:
- Switch code to use CLOCKSOURCE_OF_DECLARE

Updates from V3:
- Migrate to 3.9 timer framework updates

Updates from V2:
- prepend static fns + fields with kona_

Updates from V1:
- Rename bcm_timer.c to bcm_kona_timer.c
- Pull .h into bcm_kona_timer.c
- Make timers static
- Clean up comment block
- Switched to using clockevents_config_and_register
- Added an error to the get_timer loop if it repeats too much
- Added to Documentation/devicetree/bindings/arm/bcm/bcm,kona-timer.txt
- Added missing readl to timer_disable_and_clear

Note: bcm,kona-timer was kept as the 'compatible' field to make it
specific enough for when there are multiple bcm timers (bcm,timer is
too generic).

Signed-off-by: Christian Daudt <csd@broadcom.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index bf02471..f112895 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -6,6 +6,7 @@ config ARCH_BCM
 	select ARM_ERRATA_764369 if SMP
 	select ARM_GIC
 	select CPU_V7
+	select CLKSRC_OF
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_TIME
 	select GPIO_BCM
diff --git a/arch/arm/mach-bcm/board_bcm.c b/arch/arm/mach-bcm/board_bcm.c
index f0f9aba..2595935 100644
--- a/arch/arm/mach-bcm/board_bcm.c
+++ b/arch/arm/mach-bcm/board_bcm.c
@@ -16,14 +16,11 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/irqchip.h>
+#include <linux/clocksource.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 
-static void timer_init(void)
-{
-}
-
 
 static void __init board_init(void)
 {
@@ -35,7 +32,7 @@ static const char * const bcm11351_dt_compat[] = { "bcm,bcm11351", NULL, };
 
 DT_MACHINE_START(BCM11351_DT, "Broadcom Application Processor")
 	.init_irq = irqchip_init,
-	.init_time = timer_init,
+	.init_time = clocksource_of_init,
 	.init_machine = board_init,
 	.dt_compat = bcm11351_dt_compat,
 MACHINE_END
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 4d8283a..96e2531 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
 obj-$(CONFIG_SUNXI_TIMER)	+= sunxi_timer.o
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
+obj-$(CONFIG_ARCH_BCM)		+= bcm_kona_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
new file mode 100644
index 0000000..350f493
--- /dev/null
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2012 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/clockchips.h>
+#include <linux/types.h>
+
+#include <linux/io.h>
+#include <asm/mach/time.h>
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+
+#define KONA_GPTIMER_STCS_OFFSET			0x00000000
+#define KONA_GPTIMER_STCLO_OFFSET			0x00000004
+#define KONA_GPTIMER_STCHI_OFFSET			0x00000008
+#define KONA_GPTIMER_STCM0_OFFSET			0x0000000C
+
+#define KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT		0
+#define KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT		4
+
+struct kona_bcm_timers {
+	int tmr_irq;
+	void __iomem *tmr_regs;
+};
+
+static struct kona_bcm_timers timers;
+
+static u32 arch_timer_rate;
+
+/*
+ * We use the peripheral timers for system tick, the cpu global timer for
+ * profile tick
+ */
+static void kona_timer_disable_and_clear(void __iomem *base)
+{
+	uint32_t reg;
+
+	/*
+	 * clear and disable interrupts
+	 * We are using compare/match register 0 for our system interrupts
+	 */
+	reg = readl(base + KONA_GPTIMER_STCS_OFFSET);
+
+	/* Clear compare (0) interrupt */
+	reg |= 1 << KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT;
+	/* disable compare */
+	reg &= ~(1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
+
+	writel(reg, base + KONA_GPTIMER_STCS_OFFSET);
+
+}
+
+static void
+kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
+{
+	void __iomem *base = IOMEM(timer_base);
+	int loop_limit = 4;
+
+	/*
+	 * Read 64-bit free running counter
+	 * 1. Read hi-word
+	 * 2. Read low-word
+	 * 3. Read hi-word again
+	 * 4.1
+	 *      if new hi-word is not equal to previously read hi-word, then
+	 *      start from #1
+	 * 4.2
+	 *      if new hi-word is equal to previously read hi-word then stop.
+	 */
+
+	while (--loop_limit) {
+		*msw = readl(base + KONA_GPTIMER_STCHI_OFFSET);
+		*lsw = readl(base + KONA_GPTIMER_STCLO_OFFSET);
+		if (*msw == readl(base + KONA_GPTIMER_STCHI_OFFSET))
+			break;
+	}
+	if (!loop_limit) {
+		pr_err("bcm_kona_timer: getting counter failed.\n");
+		pr_err(" Timer will be impacted\n");
+	}
+
+	return;
+}
+
+static const struct of_device_id bcm_timer_ids[] __initconst = {
+	{.compatible = "bcm,kona-timer"},
+	{},
+};
+
+static void __init kona_timers_init(void)
+{
+	struct device_node *node;
+	u32 freq;
+
+	node = of_find_matching_node(NULL, bcm_timer_ids);
+
+	if (!node)
+		panic("No timer");
+
+	if (!of_property_read_u32(node, "clock-frequency", &freq))
+		arch_timer_rate = freq;
+	else
+		panic("clock-frequency not set in the .dts file");
+
+	/* Setup IRQ numbers */
+	timers.tmr_irq = irq_of_parse_and_map(node, 0);
+
+	/* Setup IO addresses */
+	timers.tmr_regs = of_iomap(node, 0);
+
+	kona_timer_disable_and_clear(timers.tmr_regs);
+}
+
+static int kona_timer_set_next_event(unsigned long clc,
+				  struct clock_event_device *unused)
+{
+	/*
+	 * timer (0) is disabled by the timer interrupt already
+	 * so, here we reload the next event value and re-enable
+	 * the timer.
+	 *
+	 * This way, we are potentially losing the time between
+	 * timer-interrupt->set_next_event. CPU local timers, when
+	 * they come in should get rid of skew.
+	 */
+
+	uint32_t lsw, msw;
+	uint32_t reg;
+
+	kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+
+	/* Load the "next" event tick value */
+	writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
+
+	/* Enable compare */
+	reg = readl(timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
+	reg |= (1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
+	writel(reg, timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
+
+	return 0;
+}
+
+static void kona_timer_set_mode(enum clock_event_mode mode,
+			     struct clock_event_device *unused)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* by default mode is one shot don't do any thing */
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		kona_timer_disable_and_clear(timers.tmr_regs);
+	}
+}
+
+static struct clock_event_device kona_clockevent_timer = {
+	.name = "timer 1",
+	.features = CLOCK_EVT_FEAT_ONESHOT,
+	.set_next_event = kona_timer_set_next_event,
+	.set_mode = kona_timer_set_mode
+};
+
+static void __init kona_timer_clockevents_init(void)
+{
+	kona_clockevent_timer.cpumask = cpumask_of(0);
+	clockevents_config_and_register(&kona_clockevent_timer,
+		arch_timer_rate, 6, 0xffffffff);
+}
+
+static irqreturn_t kona_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = &kona_clockevent_timer;
+
+	kona_timer_disable_and_clear(timers.tmr_regs);
+	evt->event_handler(evt);
+	return IRQ_HANDLED;
+}
+
+static struct irqaction kona_timer_irq = {
+	.name = "Kona Timer Tick",
+	.flags = IRQF_TIMER,
+	.handler = kona_timer_interrupt,
+};
+
+static void __init kona_timer_init(void)
+{
+	kona_timers_init();
+	kona_timer_clockevents_init();
+	setup_irq(timers.tmr_irq, &kona_timer_irq);
+	kona_timer_set_next_event((arch_timer_rate / HZ), NULL);
+}
+
+CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer",
+	kona_timer_init);
-- 
cgit v0.10.2