Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect a slow raidz child during reads #16900

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ struct vdev {
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
hrtime_t vdev_last_latency_check;

/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
Expand Down Expand Up @@ -432,6 +433,9 @@ struct vdev {
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
uint64_t vdev_expansion_time; /* vdev's last expansion time */
uint64_t vdev_outlier_count; /* read outlier amongst peers */
hrtime_t vdev_recent_latency; /* most recent read latency */
hrtime_t vdev_read_sit_out_expire; /* end of sit out period */
list_node_t vdev_leaf_node; /* leaf vdev list */

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
boolean_t vdev_skip_latency_outlier(vdev_t *, zio_flag_t);

extern const zio_vsd_ops_t vdev_raidz_vsd_ops;

Expand Down
8 changes: 8 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,14 @@ For testing, pause RAID-Z expansion when reflow amount reaches this value.
.It Sy raidz_io_aggregate_rows Ns = Ns Sy 4 Pq ulong
For expanded RAID-Z, aggregate reads that have more rows than this.
.
.It Sy raid_read_sit_out_secs Ns = Ns Sy 600 Ns s Po 10 min Pc Pq ulong
For RAID-Z and dRAID only, this is the slow disk sit out time period in
seconds.
When a slow disk outlier is detected, then it gets placed in a sit out
during reads for the duration of this time period.
Defaults to 600 seconds and a value of zero disables slow disk outlier
detection.
.
.It Sy reference_history Ns = Ns Sy 3 Pq int
Maximum reference holders being tracked when reference_tracking_enable is
active.
Expand Down
2 changes: 2 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -4520,6 +4520,8 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_stat.vs_checksum_errors = 0;
vd->vdev_stat.vs_dio_verify_errors = 0;
vd->vdev_stat.vs_slow_ios = 0;
atomic_store_64(&vd->vdev_outlier_count, 0);
vd->vdev_read_sit_out_expire = 0;

for (int c = 0; c < vd->vdev_children; c++)
vdev_clear(spa, vd->vdev_child[c]);
Expand Down
19 changes: 19 additions & 0 deletions module/zfs/vdev_draid.c
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,17 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
/* Sequential rebuild must do IO at redundancy group boundary. */
IMPLY(zio->io_priority == ZIO_PRIORITY_REBUILD, rr->rr_nempty == 0);

/*
* Calculate how much parity is available for sitting out reads
*/
int parity_avail = rr->rr_firstdatacol;
for (int p = 0; p < rr->rr_firstdatacol; p++) {
raidz_col_t *rc = &rr->rr_col[p];
if (!vdev_draid_readable(vd->vdev_child[rc->rc_devidx],
rc->rc_offset)) {
parity_avail--;
}
}
/*
* Iterate over the columns in reverse order so that we hit the parity
* last. Any errors along the way will force us to read the parity.
Expand Down Expand Up @@ -1993,6 +2004,14 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
rc->rc_force_repair = 1;
rc->rc_allow_repair = 1;
}
} else if (parity_avail > 0 && c >= rr->rr_firstdatacol &&
rr->rr_missingdata == 0 &&
vdev_skip_latency_outlier(cvd, zio->io_flags)) {
rr->rr_missingdata++;
rc->rc_error = SET_ERROR(EAGAIN);
rc->rc_skipped = 1;
parity_avail--;
continue;
}
}

Expand Down
Loading
Loading