summaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/alloc.c24
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dlmglue.c105
-rw-r--r--fs/ocfs2/dlmglue.h18
-rw-r--r--fs/ocfs2/ocfs2.h1
5 files changed, 143 insertions, 13 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f72712f..06089be 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7310,13 +7310,24 @@ out:
static int ocfs2_trim_extent(struct super_block *sb,
struct ocfs2_group_desc *gd,
- u32 start, u32 count)
+ u64 group, u32 start, u32 count)
{
u64 discard, bcount;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
bcount = ocfs2_clusters_to_blocks(sb, count);
- discard = le64_to_cpu(gd->bg_blkno) +
- ocfs2_clusters_to_blocks(sb, start);
+ discard = ocfs2_clusters_to_blocks(sb, start);
+
+ /*
+ * For the first cluster group, the gd->bg_blkno is not at the start
+ * of the group, but at an offset from the start. If we add it while
+ * calculating discard for first group, we will wrongly start fstrim a
+ * few blocks after the desried start block and the range can cross
+ * over into the next cluster group. So, add it only if this is not
+ * the first cluster group.
+ */
+ if (group != osb->first_cluster_group_blkno)
+ discard += le64_to_cpu(gd->bg_blkno);
trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
@@ -7324,7 +7335,7 @@ static int ocfs2_trim_extent(struct super_block *sb,
}
static int ocfs2_trim_group(struct super_block *sb,
- struct ocfs2_group_desc *gd,
+ struct ocfs2_group_desc *gd, u64 group,
u32 start, u32 max, u32 minbits)
{
int ret = 0, count = 0, next;
@@ -7343,7 +7354,7 @@ static int ocfs2_trim_group(struct super_block *sb,
next = ocfs2_find_next_bit(bitmap, max, start);
if ((next - start) >= minbits) {
- ret = ocfs2_trim_extent(sb, gd,
+ ret = ocfs2_trim_extent(sb, gd, group,
start, next - start);
if (ret < 0) {
mlog_errno(ret);
@@ -7441,7 +7452,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
gd = (struct ocfs2_group_desc *)gd_bh->b_data;
- cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
+ cnt = ocfs2_trim_group(sb, gd, group,
+ first_bit, last_bit, minlen);
brelse(gd_bh);
gd_bh = NULL;
if (cnt < 0) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 636abcb..5e8709a 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2242,13 +2242,13 @@ unlock:
spin_unlock(&o2hb_live_lock);
}
-static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item,
+static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item,
char *page)
{
return sprintf(page, "%u\n", o2hb_dead_threshold);
}
-static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item,
+static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item,
const char *page, size_t count)
{
unsigned long tmp;
@@ -2297,11 +2297,11 @@ static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item,
}
-CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold);
+CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold);
CONFIGFS_ATTR(o2hb_heartbeat_group_, mode);
static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
- &o2hb_heartbeat_group_attr_threshold,
+ &o2hb_heartbeat_group_attr_dead_threshold,
&o2hb_heartbeat_group_attr_mode,
NULL,
};
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 77d1632..8dce409 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -532,6 +532,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
init_waitqueue_head(&res->l_event);
INIT_LIST_HEAD(&res->l_blocked_list);
INIT_LIST_HEAD(&res->l_mask_waiters);
+ INIT_LIST_HEAD(&res->l_holders);
}
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
@@ -749,6 +750,50 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
res->l_flags = 0UL;
}
+/*
+ * Keep a list of processes who have interest in a lockres.
+ * Note: this is now only uesed for check recursive cluster locking.
+ */
+static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
+ struct ocfs2_lock_holder *oh)
+{
+ INIT_LIST_HEAD(&oh->oh_list);
+ oh->oh_owner_pid = get_pid(task_pid(current));
+
+ spin_lock(&lockres->l_lock);
+ list_add_tail(&oh->oh_list, &lockres->l_holders);
+ spin_unlock(&lockres->l_lock);
+}
+
+static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
+ struct ocfs2_lock_holder *oh)
+{
+ spin_lock(&lockres->l_lock);
+ list_del(&oh->oh_list);
+ spin_unlock(&lockres->l_lock);
+
+ put_pid(oh->oh_owner_pid);
+}
+
+static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_lock_holder *oh;
+ struct pid *pid;
+
+ /* look in the list of holders for one with the current task as owner */
+ spin_lock(&lockres->l_lock);
+ pid = task_pid(current);
+ list_for_each_entry(oh, &lockres->l_holders, oh_list) {
+ if (oh->oh_owner_pid == pid) {
+ spin_unlock(&lockres->l_lock);
+ return 1;
+ }
+ }
+ spin_unlock(&lockres->l_lock);
+
+ return 0;
+}
+
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
int level)
{
@@ -2333,8 +2378,9 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
goto getbh;
}
- if (ocfs2_mount_local(osb))
- goto local;
+ if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
+ ocfs2_mount_local(osb))
+ goto update;
if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
ocfs2_wait_for_recovery(osb);
@@ -2363,7 +2409,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
ocfs2_wait_for_recovery(osb);
-local:
+update:
/*
* We only see this flag if we're being called from
* ocfs2_read_locked_inode(). It means we're locking an inode
@@ -2497,6 +2543,59 @@ void ocfs2_inode_unlock(struct inode *inode,
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
}
+/*
+ * This _tracker variantes are introduced to deal with the recursive cluster
+ * locking issue. The idea is to keep track of a lock holder on the stack of
+ * the current process. If there's a lock holder on the stack, we know the
+ * task context is already protected by cluster locking. Currently, they're
+ * used in some VFS entry routines.
+ *
+ * return < 0 on error, return == 0 if there's no lock holder on the stack
+ * before this call, return == 1 if this call would be a recursive locking.
+ */
+int ocfs2_inode_lock_tracker(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ struct ocfs2_lock_holder *oh)
+{
+ int status;
+ int arg_flags = 0, has_locked;
+ struct ocfs2_lock_res *lockres;
+
+ lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ has_locked = ocfs2_is_locked_by_me(lockres);
+ /* Just get buffer head if the cluster lock has been taken */
+ if (has_locked)
+ arg_flags = OCFS2_META_LOCK_GETBH;
+
+ if (likely(!has_locked || ret_bh)) {
+ status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ return status;
+ }
+ }
+ if (!has_locked)
+ ocfs2_add_holder(lockres, oh);
+
+ return has_locked;
+}
+
+void ocfs2_inode_unlock_tracker(struct inode *inode,
+ int ex,
+ struct ocfs2_lock_holder *oh,
+ int had_lock)
+{
+ struct ocfs2_lock_res *lockres;
+
+ lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ if (!had_lock) {
+ ocfs2_remove_holder(lockres, oh);
+ ocfs2_inode_unlock(inode, ex);
+ }
+}
+
int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
{
struct ocfs2_lock_res *lockres;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d293a22..a7fc18b 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -70,6 +70,11 @@ struct ocfs2_orphan_scan_lvb {
__be32 lvb_os_seqno;
};
+struct ocfs2_lock_holder {
+ struct list_head oh_list;
+ struct pid *oh_owner_pid;
+};
+
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -77,6 +82,8 @@ struct ocfs2_orphan_scan_lvb {
#define OCFS2_META_LOCK_NOQUEUE (0x02)
/* don't block waiting for the downconvert thread, instead return -EAGAIN */
#define OCFS2_LOCK_NONBLOCK (0x04)
+/* just get back disk inode bh if we've got cluster lock. */
+#define OCFS2_META_LOCK_GETBH (0x08)
/* Locking subclasses of inode cluster lock */
enum {
@@ -170,4 +177,15 @@ void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
/* To set the locking protocol on module initialization */
void ocfs2_set_locking_protocol(void);
+
+/* The _tracker pair is used to avoid cluster recursive locking */
+int ocfs2_inode_lock_tracker(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ struct ocfs2_lock_holder *oh);
+void ocfs2_inode_unlock_tracker(struct inode *inode,
+ int ex,
+ struct ocfs2_lock_holder *oh,
+ int had_lock);
+
#endif /* DLMGLUE_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e63af7d..594575e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -172,6 +172,7 @@ struct ocfs2_lock_res {
struct list_head l_blocked_list;
struct list_head l_mask_waiters;
+ struct list_head l_holders;
unsigned long l_flags;
char l_name[OCFS2_LOCK_ID_MAX_LEN];