staging/lustre/ldlm: high load because of negative timeout

When the time of LRU resizing exceeds waiting period of recalculation, the ldlm daemon will keep on resizing without any interval of rest. That will cause high CPU load. This patch fixes the problem by setting the recalculation timestamp after LRU resizing finishes rather than before it. What is more, an interval of one second is enforced between each recalculation. Signed-off-by: Li Xi <lixi@ddn.com> Reviewed-on: http://review.whamcloud.com/11227 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5415 Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com> Reviewed-by: Bobi Jam <bobijam@gmail.com> Reviewed-by: Lai Siyao <lai.siyao@intel.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Li Xi <lixi@ddn.com> 2015-02-02 02:52:02 (GMT)
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2015-02-07 09:27:16 (GMT)
commit: 4d2c7b309d17a5609a5d4ede628d16828262ca00 (patch)
tree: 81cfa5759ce630b393f3805f539f2998f2b48a0a /drivers/staging
parent: c00266e369b945945316dad3869342d07f4be868 (diff)
download: linux-4d2c7b309d17a5609a5d4ede628d16828262ca00.tar.xz
1 files changed, 26 insertions, 6 deletions
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 142b3dd..d20d277 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -470,6 +470,7 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
 static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 {
 	time_t recalc_interval_sec;
+	int ret;
 
 	recalc_interval_sec = get_seconds() - pl->pl_recalc_time;
 	if (recalc_interval_sec < pl->pl_recalc_period)
@@ -490,16 +491,15 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 	 */
 	ldlm_cli_pool_pop_slv(pl);
 
-	pl->pl_recalc_time = get_seconds();
-	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
-			    recalc_interval_sec);
 	spin_unlock(&pl->pl_lock);
 
 	/*
 	 * Do not cancel locks in case lru resize is disabled for this ns.
 	 */
-	if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
-		return 0;
+	if (!ns_connect_lru_resize(ldlm_pl2ns(pl))) {
+		ret = 0;
+		goto out;
+	}
 
 	/*
 	 * In the time of canceling locks on client we do not need to maintain
@@ -507,7 +507,19 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 	 * It may be called when SLV has changed much, this is why we do not
 	 * take into account pl->pl_recalc_time here.
 	 */
-	return ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC, LDLM_CANCEL_LRUR);
+	ret = ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC, LDLM_CANCEL_LRUR);
+
+out:
+	spin_lock(&pl->pl_lock);
+	/*
+	 * Time of LRU resizing might be longer than period,
+	 * so update after LRU resizing rather than before it.
+	 */
+	pl->pl_recalc_time = get_seconds();
+	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+			    recalc_interval_sec);
+	spin_unlock(&pl->pl_lock);
+	return ret;
 }
 
 /**
@@ -591,6 +603,14 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
 	}
 	recalc_interval_sec = pl->pl_recalc_time - get_seconds() +
 			      pl->pl_recalc_period;
+	if (recalc_interval_sec <= 0) {
+		/* Prevent too frequent recalculation. */
+		CDEBUG(D_DLMTRACE, "Negative interval(%ld), "
+		       "too short period(%ld)",
+		       recalc_interval_sec,
+		       pl->pl_recalc_period);
+		recalc_interval_sec = 1;
+	}
 
 	return recalc_interval_sec;
 }
author	Li Xi <lixi@ddn.com>	2015-02-02 02:52:02 (GMT)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-02-07 09:27:16 (GMT)
commit	4d2c7b309d17a5609a5d4ede628d16828262ca00 (patch)
tree	81cfa5759ce630b393f3805f539f2998f2b48a0a /drivers/staging
parent	c00266e369b945945316dad3869342d07f4be868 (diff)
download	linux-4d2c7b309d17a5609a5d4ede628d16828262ca00.tar.xz