Skip to content

Commit 4886ff7

Browse files
adam900710kdave
authored andcommitted
btrfs: introduce a new helper to submit write bio for repair
Both scrub and read-repair are utilizing a special repair writes that: - Only writes back to a single device Even for read-repair on RAID56, we only update the corrupted data stripe itself, not triggering the full RMW path. - Requires a valid @mirror_num For RAID56 case, only @mirror_num == 1 is valid. For non-RAID56 cases, we need @mirror_num to locate our stripe. - No data csum generation needed These two call sites still have some differences though: - Read-repair goes plain bio It doesn't need a full btrfs_bio, and goes submit_bio_wait(). - New scrub repair would go btrfs_bio To simplify both read and write path. So here this patch would: - Introduce a common helper, btrfs_map_repair_block() Due to the single device nature, we can use an on-stack btrfs_io_stripe to pass device and its physical bytenr. - Introduce a new interface, btrfs_submit_repair_bio(), for later scrub code This is for the incoming scrub code. Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 4317ff0 commit 4886ff7

File tree

5 files changed

+132
-44
lines changed

5 files changed

+132
-44
lines changed

fs/btrfs/bio.c

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -735,12 +735,9 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
735735
u64 length, u64 logical, struct page *page,
736736
unsigned int pg_offset, int mirror_num)
737737
{
738-
struct btrfs_device *dev;
738+
struct btrfs_io_stripe smap = { 0 };
739739
struct bio_vec bvec;
740740
struct bio bio;
741-
u64 map_length = 0;
742-
u64 sector;
743-
struct btrfs_io_context *bioc = NULL;
744741
int ret = 0;
745742

746743
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
@@ -749,68 +746,38 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
749746
if (btrfs_repair_one_zone(fs_info, logical))
750747
return 0;
751748

752-
map_length = length;
753-
754749
/*
755750
* Avoid races with device replace and make sure our bioc has devices
756751
* associated to its stripes that don't go away while we are doing the
757752
* read repair operation.
758753
*/
759754
btrfs_bio_counter_inc_blocked(fs_info);
760-
if (btrfs_is_parity_mirror(fs_info, logical, length)) {
761-
/*
762-
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
763-
* to update all raid stripes, but here we just want to correct
764-
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
765-
* stripe's dev and sector.
766-
*/
767-
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
768-
&map_length, &bioc, 0);
769-
if (ret)
770-
goto out_counter_dec;
771-
ASSERT(bioc->mirror_num == 1);
772-
} else {
773-
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
774-
&map_length, &bioc, mirror_num);
775-
if (ret)
776-
goto out_counter_dec;
777-
/*
778-
* This happens when dev-replace is also running, and the
779-
* mirror_num indicates the dev-replace target.
780-
*
781-
* In this case, we don't need to do anything, as the read
782-
* error just means the replace progress hasn't reached our
783-
* read range, and later replace routine would handle it well.
784-
*/
785-
if (mirror_num != bioc->mirror_num)
786-
goto out_counter_dec;
787-
}
788-
789-
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
790-
dev = bioc->stripes[bioc->mirror_num - 1].dev;
791-
btrfs_put_bioc(bioc);
755+
ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
756+
if (ret < 0)
757+
goto out_counter_dec;
792758

793-
if (!dev || !dev->bdev ||
794-
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
759+
if (!smap.dev->bdev ||
760+
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {
795761
ret = -EIO;
796762
goto out_counter_dec;
797763
}
798764

799-
bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
800-
bio.bi_iter.bi_sector = sector;
765+
bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
766+
bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
801767
__bio_add_page(&bio, page, length, pg_offset);
802768

803769
btrfsic_check_bio(&bio);
804770
ret = submit_bio_wait(&bio);
805771
if (ret) {
806772
/* try to remap that extent elsewhere? */
807-
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
773+
btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
808774
goto out_bio_uninit;
809775
}
810776

811777
btrfs_info_rl_in_rcu(fs_info,
812778
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
813-
ino, start, btrfs_dev_name(dev), sector);
779+
ino, start, btrfs_dev_name(smap.dev),
780+
smap.physical >> SECTOR_SHIFT);
814781
ret = 0;
815782

816783
out_bio_uninit:
@@ -820,6 +787,45 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
820787
return ret;
821788
}
822789

790+
/*
791+
* Submit a btrfs_bio based repair write.
792+
*
793+
* If @dev_replace is true, the write would be submitted to dev-replace target.
794+
*/
795+
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
796+
{
797+
struct btrfs_fs_info *fs_info = bbio->fs_info;
798+
u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
799+
u64 length = bbio->bio.bi_iter.bi_size;
800+
struct btrfs_io_stripe smap = { 0 };
801+
int ret;
802+
803+
ASSERT(fs_info);
804+
ASSERT(mirror_num > 0);
805+
ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
806+
ASSERT(!bbio->inode);
807+
808+
btrfs_bio_counter_inc_blocked(fs_info);
809+
ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
810+
if (ret < 0)
811+
goto fail;
812+
813+
if (dev_replace) {
814+
if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
815+
bbio->bio.bi_opf &= ~REQ_OP_WRITE;
816+
bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
817+
}
818+
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
819+
smap.dev = fs_info->dev_replace.tgtdev;
820+
}
821+
__btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
822+
return;
823+
824+
fail:
825+
btrfs_bio_counter_dec(fs_info);
826+
btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
827+
}
828+
823829
int __init btrfs_bioset_init(void)
824830
{
825831
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,

fs/btrfs/bio.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
9898
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
9999

100100
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
101+
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
101102
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
102103
u64 length, u64 logical, struct page *page,
103104
unsigned int pg_offset, int mirror_num);

fs/btrfs/raid56.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ static inline int nr_data_stripes(const struct map_lookup *map)
170170
return map->num_stripes - btrfs_nr_parity_stripes(map->type);
171171
}
172172

173+
static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
174+
{
175+
return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
176+
}
177+
173178
#define RAID5_P_STRIPE ((u64)-2)
174179
#define RAID6_Q_STRIPE ((u64)-1)
175180

fs/btrfs/volumes.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8019,3 +8019,76 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
80198019

80208020
return true;
80218021
}
8022+
8023+
static void map_raid56_repair_block(struct btrfs_io_context *bioc,
8024+
struct btrfs_io_stripe *smap,
8025+
u64 logical)
8026+
{
8027+
int data_stripes = nr_bioc_data_stripes(bioc);
8028+
int i;
8029+
8030+
for (i = 0; i < data_stripes; i++) {
8031+
u64 stripe_start = bioc->full_stripe_logical +
8032+
(i << BTRFS_STRIPE_LEN_SHIFT);
8033+
8034+
if (logical >= stripe_start &&
8035+
logical < stripe_start + BTRFS_STRIPE_LEN)
8036+
break;
8037+
}
8038+
ASSERT(i < data_stripes);
8039+
smap->dev = bioc->stripes[i].dev;
8040+
smap->physical = bioc->stripes[i].physical +
8041+
((logical - bioc->full_stripe_logical) &
8042+
BTRFS_STRIPE_LEN_MASK);
8043+
}
8044+
8045+
/*
8046+
* Map a repair write into a single device.
8047+
*
8048+
* A repair write is triggered by read time repair or scrub, which would only
8049+
* update the contents of a single device.
8050+
* Not update any other mirrors nor go through RMW path.
8051+
*
8052+
* Callers should ensure:
8053+
*
8054+
* - Call btrfs_bio_counter_inc_blocked() first
8055+
* - The range does not cross stripe boundary
8056+
* - Has a valid @mirror_num passed in.
8057+
*/
8058+
int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
8059+
struct btrfs_io_stripe *smap, u64 logical,
8060+
u32 length, int mirror_num)
8061+
{
8062+
struct btrfs_io_context *bioc = NULL;
8063+
u64 map_length = length;
8064+
int mirror_ret = mirror_num;
8065+
int ret;
8066+
8067+
ASSERT(mirror_num > 0);
8068+
8069+
ret = __btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
8070+
&bioc, smap, &mirror_ret, true);
8071+
if (ret < 0)
8072+
return ret;
8073+
8074+
/* The map range should not cross stripe boundary. */
8075+
ASSERT(map_length >= length);
8076+
8077+
/* Already mapped to single stripe. */
8078+
if (!bioc)
8079+
goto out;
8080+
8081+
/* Map the RAID56 multi-stripe writes to a single one. */
8082+
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
8083+
map_raid56_repair_block(bioc, smap, logical);
8084+
goto out;
8085+
}
8086+
8087+
ASSERT(mirror_num <= bioc->num_stripes);
8088+
smap->dev = bioc->stripes[mirror_num - 1].dev;
8089+
smap->physical = bioc->stripes[mirror_num - 1].physical;
8090+
out:
8091+
btrfs_put_bioc(bioc);
8092+
ASSERT(smap->dev);
8093+
return 0;
8094+
}

fs/btrfs/volumes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,9 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
587587
struct btrfs_io_context **bioc_ret,
588588
struct btrfs_io_stripe *smap, int *mirror_num_ret,
589589
int need_raid_map);
590+
int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
591+
struct btrfs_io_stripe *smap, u64 logical,
592+
u32 length, int mirror_num);
590593
struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
591594
u64 logical, u64 *length_ret,
592595
u32 *num_stripes);

0 commit comments

Comments
 (0)