/* * Copyright (C) 2016 Oracle. All Rights Reserved. * * Author: Darrick J. Wong * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_icache.h" #include "xfs_pnfs.h" #include "xfs_refcount_btree.h" #include "xfs_refcount.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_bit.h" #include "xfs_alloc.h" #include "xfs_quota_defs.h" #include "xfs_quota.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" /* * Copy on Write of Shared Blocks * * XFS must preserve "the usual" file semantics even when two files share * the same physical blocks. This means that a write to one file must not * alter the blocks in a different file; the way that we'll do that is * through the use of a copy-on-write mechanism. At a high level, that * means that when we want to write to a shared block, we allocate a new * block, write the data to the new block, and if that succeeds we map the * new block into the file. * * XFS provides a "delayed allocation" mechanism that defers the allocation * of disk blocks to dirty-but-not-yet-mapped file blocks as long as * possible. This reduces fragmentation by enabling the filesystem to ask * for bigger chunks less often, which is exactly what we want for CoW. * * The delalloc mechanism begins when the kernel wants to make a block * writable (write_begin or page_mkwrite). If the offset is not mapped, we * create a delalloc mapping, which is a regular in-core extent, but without * a real startblock. (For delalloc mappings, the startblock encodes both * a flag that this is a delalloc mapping, and a worst-case estimate of how * many blocks might be required to put the mapping into the BMBT.) delalloc * mappings are a reservation against the free space in the filesystem; * adjacent mappings can also be combined into fewer larger mappings. * * When dirty pages are being written out (typically in writepage), the * delalloc reservations are converted into real mappings by allocating * blocks and replacing the delalloc mapping with real ones. A delalloc * mapping can be replaced by several real ones if the free space is * fragmented. * * We want to adapt the delalloc mechanism for copy-on-write, since the * write paths are similar. The first two steps (creating the reservation * and allocating the blocks) are exactly the same as delalloc except that * the mappings must be stored in a separate CoW fork because we do not want * to disturb the mapping in the data fork until we're sure that the write * succeeded. IO completion in this case is the process of removing the old * mapping from the data fork and moving the new mapping from the CoW fork to * the data fork. This will be discussed shortly. * * For now, unaligned directio writes will be bounced back to the page cache. * Block-aligned directio writes will use the same mechanism as buffered * writes. * * CoW remapping must be done after the data block write completes, * because we don't want to destroy the old data fork map until we're sure * the new block has been written. Since the new mappings are kept in a * separate fork, we can simply iterate these mappings to find the ones * that cover the file blocks that we just CoW'd. For each extent, simply * unmap the corresponding range in the data fork, map the new range into * the data fork, and remove the extent from the CoW fork. * * Since the remapping operation can be applied to an arbitrary file * range, we record the need for the remap step as a flag in the ioend * instead of declaring a new IO type. This is required for direct io * because we only have ioend for the whole dio, and we have to be able to * remember the presence of unwritten blocks and CoW blocks with a single * ioend structure. Better yet, the more ground we can cover with one * ioend, the better. */