Skip to content

Commit f5686ae

Browse files
authored
DAOS-18376 container: Fix inconsistent ds_cont_child state (#17314)
Fix inconsistent ds_cont_child state when ds_cont_csummer_init() yields in md-on-ssd mode: In md-on-ssd mode, ds_cont_csummer_init() persists properties to VOS and calls umem_tx_commit(). Because umem_tx_commit() may yield, ds_cont_child can be left in an inconsistent state: - sc_props_fetched remains false - sc_csummer is already set A concurrent ULT may observe this state and hit an assertion. Signed-off-by: Liang Zhen <[email protected]>
1 parent 1af35d5 commit f5686ae

File tree

6 files changed

+60
-41
lines changed

6 files changed

+60
-41
lines changed

src/container/srv_target.c

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -132,23 +132,25 @@ ds_cont_csummer_init(struct ds_cont_child *cont)
132132
bool dedup_only = false;
133133

134134
D_ASSERT(cont != NULL);
135-
cont_props = &cont->sc_props;
135+
while (cont->sc_csummer_initing) {
136+
ABT_mutex_lock(cont->sc_mutex);
137+
ABT_cond_wait(cont->sc_init_cond, cont->sc_mutex);
138+
ABT_mutex_unlock(cont->sc_mutex);
139+
}
136140

137-
if (cont->sc_props_fetched)
141+
if (cont->sc_csummer_inited)
138142
return 0;
139143

144+
D_ASSERT(cont->sc_csummer == NULL);
145+
cont->sc_csummer_initing = 1;
140146
/** Get the container csum related properties
141147
* Need the pool for the IV namespace
142148
*/
143-
D_ASSERT(cont->sc_csummer == NULL);
149+
cont_props = &cont->sc_props;
144150
rc = ds_cont_get_props(cont_props, cont->sc_pool_uuid, cont->sc_uuid);
145151
if (rc != 0)
146152
goto done;
147153

148-
/* Check again since IV fetch yield */
149-
if (cont->sc_props_fetched)
150-
goto done;
151-
152154
csum_val = cont_props->dcp_csum_type;
153155
if (!daos_cont_csum_prop_is_enabled(csum_val)) {
154156
dedup_only = true;
@@ -178,9 +180,13 @@ ds_cont_csummer_init(struct ds_cont_child *cont)
178180
DP_UUID(cont->sc_uuid), DP_RC(rc));
179181
rc = 0;
180182
}
181-
cont->sc_props_fetched = 1;
182-
183+
D_ASSERT(!cont->sc_csummer_inited); /* nobody else can do this except me */
184+
cont->sc_csummer_inited = 1;
183185
done:
186+
if (cont->sc_csummer_initing) {
187+
cont->sc_csummer_initing = 0;
188+
ABT_cond_broadcast(cont->sc_init_cond);
189+
}
184190
return rc;
185191
}
186192

@@ -218,7 +224,7 @@ cont_aggregate_runnable(struct ds_cont_child *cont, struct sched_request *req,
218224
DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid));
219225
}
220226

221-
if (!cont->sc_props_fetched)
227+
if (!cont->sc_csummer_inited)
222228
ds_cont_csummer_init(cont);
223229

224230
if (cont->sc_props.dcp_dedup_enabled ||
@@ -655,6 +661,25 @@ cont_child_obj(struct daos_llink *llink)
655661
return container_of(llink, struct ds_cont_child, sc_list);
656662
}
657663

664+
static void
665+
cont_child_fini_abt(struct ds_cont_child *cont)
666+
{
667+
if (cont->sc_dtx_resync_cond)
668+
ABT_cond_free(&cont->sc_dtx_resync_cond);
669+
if (cont->sc_scrub_cond)
670+
ABT_cond_free(&cont->sc_scrub_cond);
671+
if (cont->sc_rebuild_cond)
672+
ABT_cond_free(&cont->sc_rebuild_cond);
673+
if (cont->sc_init_cond)
674+
ABT_cond_free(&cont->sc_init_cond);
675+
if (cont->sc_fini_cond)
676+
ABT_cond_free(&cont->sc_fini_cond);
677+
if (cont->sc_mutex)
678+
ABT_mutex_free(&cont->sc_mutex);
679+
if (cont->sc_open_mutex)
680+
ABT_mutex_free(&cont->sc_open_mutex);
681+
}
682+
658683
static int
659684
cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid,
660685
struct daos_llink **link)
@@ -678,34 +703,39 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid,
678703
rc = ABT_mutex_create(&cont->sc_mutex);
679704
if (rc != ABT_SUCCESS) {
680705
rc = dss_abterr2der(rc);
681-
goto out_open_mutex;
706+
goto out_abt;
682707
}
683708

684709
rc = ABT_cond_create(&cont->sc_dtx_resync_cond);
685710
if (rc != ABT_SUCCESS) {
686711
rc = dss_abterr2der(rc);
687-
goto out_mutex;
712+
goto out_abt;
688713
}
689714
rc = ABT_cond_create(&cont->sc_scrub_cond);
690715
if (rc != ABT_SUCCESS) {
691716
rc = dss_abterr2der(rc);
692-
goto out_resync_cond;
717+
goto out_abt;
693718
}
694719
rc = ABT_cond_create(&cont->sc_rebuild_cond);
695720
if (rc != ABT_SUCCESS) {
696721
rc = dss_abterr2der(rc);
697-
goto out_scrub_cond;
722+
goto out_abt;
723+
}
724+
rc = ABT_cond_create(&cont->sc_init_cond);
725+
if (rc != ABT_SUCCESS) {
726+
rc = dss_abterr2der(rc);
727+
goto out_abt;
698728
}
699729
rc = ABT_cond_create(&cont->sc_fini_cond);
700730
if (rc != ABT_SUCCESS) {
701731
rc = dss_abterr2der(rc);
702-
goto out_rebuild_cond;
732+
goto out_abt;
703733
}
704734

705735
cont->sc_pool = ds_pool_child_lookup(po_uuid);
706736
if (cont->sc_pool == NULL) {
707737
rc = -DER_NO_HDL;
708-
goto out_finish_cond;
738+
goto out_abt;
709739
}
710740

711741
rc = vos_cont_open(cont->sc_pool->spc_hdl, co_uuid, &cont->sc_hdl);
@@ -745,18 +775,8 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid,
745775
vos_cont_close(cont->sc_hdl);
746776
out_pool:
747777
ds_pool_child_put(cont->sc_pool);
748-
out_finish_cond:
749-
ABT_cond_free(&cont->sc_fini_cond);
750-
out_rebuild_cond:
751-
ABT_cond_free(&cont->sc_rebuild_cond);
752-
out_scrub_cond:
753-
ABT_cond_free(&cont->sc_scrub_cond);
754-
out_resync_cond:
755-
ABT_cond_free(&cont->sc_dtx_resync_cond);
756-
out_mutex:
757-
ABT_mutex_free(&cont->sc_mutex);
758-
out_open_mutex:
759-
ABT_mutex_free(&cont->sc_open_mutex);
778+
out_abt:
779+
cont_child_fini_abt(cont);
760780
out:
761781
D_FREE(cont);
762782
return rc;
@@ -777,14 +797,10 @@ cont_child_free_ref(struct daos_llink *llink)
777797
cont_tgt_track_eph_fini(cont);
778798
vos_cont_close(cont->sc_hdl);
779799
ds_pool_child_put(cont->sc_pool);
780-
daos_csummer_destroy(&cont->sc_csummer);
800+
if (cont->sc_csummer)
801+
daos_csummer_destroy(&cont->sc_csummer);
781802
D_FREE(cont->sc_snapshots);
782-
ABT_cond_free(&cont->sc_dtx_resync_cond);
783-
ABT_cond_free(&cont->sc_scrub_cond);
784-
ABT_cond_free(&cont->sc_rebuild_cond);
785-
ABT_cond_free(&cont->sc_fini_cond);
786-
ABT_mutex_free(&cont->sc_mutex);
787-
ABT_mutex_free(&cont->sc_open_mutex);
803+
cont_child_fini_abt(cont);
788804
D_FREE(cont);
789805
}
790806

src/include/daos_srv/container.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,11 @@ struct ds_cont_child {
7676
ABT_cond sc_scrub_cond;
7777
ABT_cond sc_rebuild_cond;
7878
ABT_cond sc_fini_cond;
79+
ABT_cond sc_init_cond;
7980
uint32_t sc_dtx_resyncing : 1, sc_dtx_reindex : 1, sc_dtx_reindex_abort : 1,
80-
sc_dtx_delay_reset : 1, sc_dtx_registered : 1, sc_props_fetched : 1, sc_stopping : 1,
81-
sc_destroying : 1, sc_vos_agg_active : 1, sc_ec_agg_active : 1,
81+
sc_dtx_delay_reset : 1, sc_dtx_registered : 1, sc_csummer_inited : 1,
82+
sc_csummer_initing : 1, sc_stopping : 1, sc_destroying : 1, sc_vos_agg_active : 1,
83+
sc_ec_agg_active : 1,
8284
/* flag of CONT_CAPA_READ_DATA/_WRITE_DATA disabled */
8385
sc_rw_disabled : 1, sc_scrubbing : 1, sc_rebuilding : 1,
8486
/* flag of sc_ec_agg_eph_boundary valid */

src/include/daos_srv/vos.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,7 @@ struct cont_scrub {
15901590
void *scs_cont_src;
15911591
daos_handle_t scs_cont_hdl;
15921592
uuid_t scs_cont_uuid;
1593-
bool scs_props_fetched;
1593+
bool scs_csummer_inited;
15941594
};
15951595

15961596
/*

src/pool/srv_pool_scrub_ult.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ cont_lookup_cb(uuid_t pool_uuid, uuid_t cont_uuid, void *arg,
8282
cont->scs_cont_hdl = cont_child->sc_hdl;
8383
uuid_copy(cont->scs_cont_uuid, cont_uuid);
8484
cont->scs_cont_src = cont_child;
85-
cont->scs_props_fetched = cont_child->sc_props_fetched;
85+
cont->scs_csummer_inited = cont_child->sc_csummer_inited;
8686

8787
ABT_mutex_lock(cont_child->sc_mutex);
8888
cont_child->sc_scrubbing = 1;

src/vos/tests/pool_scrubbing_tests.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* (C) Copyright 2020-2022 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -497,7 +498,7 @@ sts_ctx_setup_scrub_ctx(struct sts_context *ctx)
497498
ctx->tsc_scrub_ctx.sc_drain_pool_tgt_fn = fake_target_drain;
498499
ctx->tsc_scrub_ctx.sc_pool = &ctx->tsc_pool;
499500
ctx->tsc_scrub_ctx.sc_dmi = &ctx->tsc_dmi;
500-
ctx->tsc_scrub_ctx.sc_cont.scs_props_fetched = true;
501+
ctx->tsc_scrub_ctx.sc_cont.scs_csummer_inited = true;
501502
}
502503

503504
static void

src/vos/vos_pool_scrub.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ cont_iter_is_loaded_cb(daos_handle_t ih, vos_iter_entry_t *entry,
878878
* initialized if csums are enabled
879879
*/
880880
if (!args->args_found_unloaded_container)
881-
args->args_found_unloaded_container = !args->args_ctx->sc_cont.scs_props_fetched;
881+
args->args_found_unloaded_container = !args->args_ctx->sc_cont.scs_csummer_inited;
882882

883883
sc_cont_teardown(ctx);
884884
return 0;

0 commit comments

Comments
 (0)