diff --git a/0001-Coverity-fixes-resources-leaks.patch b/0001-Coverity-fixes-resources-leaks.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d493053c47f74387462b2414c17ae1f8c9b20f3 --- /dev/null +++ b/0001-Coverity-fixes-resources-leaks.patch @@ -0,0 +1,91 @@ +From 8f54ce5b7eb0ca982803e270082e33f50897b9a6 Mon Sep 17 00:00:00 2001 +From: Nigel Croxon +Date: Mon, 4 Nov 2024 11:17:46 -0500 +Subject: [PATCH 01/39] Coverity fixes resources leaks + +Handle variable going out of scope leaks the handle. + +Signed-off-by: Nigel Croxon +--- + Assemble.c | 3 ++- + Incremental.c | 2 +- + bitmap.c | 7 +++++-- + 3 files changed, 8 insertions(+), 4 deletions(-) + +diff --git a/Assemble.c b/Assemble.c +index 37a530ee..f8099cd3 100644 +--- a/Assemble.c ++++ b/Assemble.c +@@ -753,6 +753,7 @@ static int load_devices(struct devs *devices, char *devmap, + tst->ss->free_super(tst); + free(tst); + *stp = st; ++ free(best); + return -1; + } + close(dfd); +@@ -834,7 +835,6 @@ static int load_devices(struct devs *devices, char *devmap, + inargv ? "the list" : + "the\n DEVICE list in mdadm.conf" + ); +- free(best); + *stp = st; + goto error; + } +@@ -857,6 +857,7 @@ error: + close(mdfd); + free(devices); + free(devmap); ++ free(best); + return -1; + + } +diff --git a/Incremental.c b/Incremental.c +index aa5db3bf..9b455a12 100644 +--- a/Incremental.c ++++ b/Incremental.c +@@ -282,7 +282,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c, + * clustering resource agents + */ + if (info.array.state & (1 << MD_SB_CLUSTERED)) +- goto out; ++ goto out_unlock; + + /* Couldn't find an existing array, maybe make a new one */ + mdfd = create_mddev(match ? match->devname : NULL, name_to_use, trustworthy, +diff --git a/bitmap.c b/bitmap.c +index c62d18d4..3f8da63d 100644 +--- a/bitmap.c ++++ b/bitmap.c +@@ -260,8 +260,11 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + return rv; + + info = bitmap_fd_read(fd, brief); +- if (!info) ++ if (!info) { ++ close_fd(&fd); ++ free(info); + return rv; ++ } + sb = &info->sb; + if (sb->magic != BITMAP_MAGIC) { + pr_err("This is an md array. To view a bitmap you need to examine\n"); +@@ -336,7 +339,6 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + printf(" Cluster name : %-64s\n", sb->cluster_name); + for (i = 0; i < (int)sb->nodes; i++) { + st = NULL; +- free(info); + fd = bitmap_file_open(filename, &st, i, fd); + if (fd < 0) { + printf(" Unable to open bitmap file on node: %i\n", i); +@@ -347,6 +349,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st) + printf(" Unable to read bitmap on node: %i\n", i); + continue; + } ++ free(sb); + sb = &info->sb; + if (sb->magic != BITMAP_MAGIC) + pr_err("invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic); +-- +2.41.0 + diff --git a/0002-Incremental-Document-workaround.patch b/0002-Incremental-Document-workaround.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd90291f6412b338f02d89264ffe315c915b07c0 --- /dev/null +++ b/0002-Incremental-Document-workaround.patch @@ -0,0 +1,64 @@ +From 7de5dc53bfbc3bae7d43fc81e51c7c56638004f6 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Mon, 4 Nov 2024 11:37:41 +0100 +Subject: [PATCH 02/39] Incremental: Document workaround + +Keep it documented in code. + +Signed-off-by: Mariusz Tkaczyk +--- + Incremental.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +diff --git a/Incremental.c b/Incremental.c +index 9b455a12..60d6f8cb 100644 +--- a/Incremental.c ++++ b/Incremental.c +@@ -1693,15 +1693,30 @@ int Incremental_remove(char *devname, char *id_path, int verbose) + + mdfd = open_dev_excl(ent->devnm); + if (is_fd_valid(mdfd)) { ++ char *array_state_file = "array_state"; ++ ++ /** ++ * This is a workaround for the old issue. ++ * Incremental_remove() triggered from udev rule when disk is removed from OS ++ * tries to set array in auto-read-only mode. This can interrupt rebuild ++ * process which is started automatically, e.g. if array is mounted and ++ * spare disk is available (I/O errors limit might be achieved faster than disk is ++ * removed by mdadm). Prevent Incremental_remove() from setting array ++ * into "auto-read-only", by requiring exclusive open to succeed. ++ */ + close_fd(&mdfd); +- if (sysfs_get_str(&mdi, NULL, "array_state", +- buf, sizeof(buf)) > 0) { +- if (strncmp(buf, "active", 6) == 0 || +- strncmp(buf, "clean", 5) == 0) +- sysfs_set_str(&mdi, NULL, +- "array_state", "read-auto"); ++ ++ if (sysfs_get_str(&mdi, NULL, array_state_file, buf, sizeof(buf)) > 0) { ++ char *str_read_auto = map_num_s(sysfs_array_states, ARRAY_READ_AUTO); ++ char *str_active = map_num_s(sysfs_array_states, ARRAY_ACTIVE); ++ char *str_clean = map_num_s(sysfs_array_states, ARRAY_CLEAN); ++ ++ if (strncmp(buf, str_active, strlen(str_active)) == 0 || ++ strncmp(buf, str_clean, strlen(str_clean)) == 0) ++ sysfs_set_str(&mdi, NULL, array_state_file, str_read_auto); + } + } ++ + mdfd = open_dev(ent->devnm); + if (mdfd < 0) { + if (verbose >= 0) +@@ -1711,6 +1726,7 @@ int Incremental_remove(char *devname, char *id_path, int verbose) + + if (id_path) { + struct map_ent *map = NULL, *me; ++ + me = map_by_devnm(&map, ent->devnm); + if (me) + policy_save_path(id_path, me); +-- +2.41.0 + diff --git a/0004-Incremental-Simplify-remove-logic.patch b/0004-Incremental-Simplify-remove-logic.patch new file mode 100644 index 0000000000000000000000000000000000000000..abfe7d48aa5edb8a46817af06f5d280549e025f8 --- /dev/null +++ b/0004-Incremental-Simplify-remove-logic.patch @@ -0,0 +1,259 @@ +From b9888145987e273a7613209721a68f75e060263e Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 5 Nov 2024 13:07:16 +0100 +Subject: [PATCH 04/39] Incremental: Simplify remove logic + +Incremental_remove() does not execute Manage_subdevs() now. + +Signed-off-by: Mariusz Tkaczyk +--- + Incremental.c | 116 ++++++++++++++++++++++++++++++++------------------ + Manage.c | 10 ++--- + mdadm.h | 1 + + sysfs.c | 23 ++++++++++ + 4 files changed, 102 insertions(+), 48 deletions(-) + +diff --git a/Incremental.c b/Incremental.c +index 60d6f8cb..228d2bdd 100644 +--- a/Incremental.c ++++ b/Incremental.c +@@ -1610,22 +1610,6 @@ release: + return rv; + } + +-static void remove_from_member_array(struct mdstat_ent *memb, +- struct mddev_dev *devlist, int verbose) +-{ +- int subfd = open_dev(memb->devnm); +- +- if (subfd >= 0) { +- /* +- * Ignore the return value because it's necessary +- * to handle failure condition here. +- */ +- Manage_subdevs(memb->devnm, subfd, devlist, verbose, +- 0, UOPT_UNDEFINED, 0); +- close(subfd); +- } +-} +- + /** + * is_devnode_path() - check if the devname passed might be devnode path. + * @devnode: the path to check. +@@ -1646,25 +1630,81 @@ static bool is_devnode_path(char *devnode) + return false; + } + ++/** ++ * Incremental_remove_external() - Remove the device from external container. ++ * @device_devnm: block device to remove. ++ * @container_devnm: the parent container ++ * @mdstat: mdstat file content. ++ * @verbose: verbose flag. ++ * ++ * Fail member device in each subarray and remove member device from external container. ++ * The resposibility of removing member disks from external subararys belongs to mdmon. ++ */ ++static mdadm_status_t Incremental_remove_external(char *device_devnm, char *container_devnm, ++ struct mdstat_ent *mdstat, int verbose) ++{ ++ mdadm_status_t rv = MDADM_STATUS_SUCCESS; ++ struct mdstat_ent *memb; ++ ++ for (memb = mdstat ; memb ; memb = memb->next) { ++ mdadm_status_t ret = MDADM_STATUS_SUCCESS; ++ int state_fd; ++ ++ if (!is_container_member(memb, container_devnm)) ++ continue; ++ ++ /* ++ * Checking mdstat is pointles because it might be outdated, try open descriptor ++ * instead. If it fails, we are fine with that, device is already gone. ++ */ ++ state_fd = sysfs_open_memb_attr(memb->devnm, device_devnm, "state", O_RDWR); ++ if (!is_fd_valid(state_fd)) ++ continue; ++ ++ ret = sysfs_set_memb_state_fd(state_fd, MEMB_STATE_FAULTY, NULL); ++ if (ret && verbose >= 0) ++ pr_err("Cannot fail member device %s in external subarray %s.\n", ++ device_devnm, memb->devnm); ++ ++ close_fd(&state_fd); ++ ++ /* ++ * Don't remove member device from container if it failed to remove it ++ * from any member array. ++ */ ++ rv |= ret; ++ } ++ ++ if (rv == MDADM_STATUS_SUCCESS) ++ rv = sysfs_set_memb_state(container_devnm, device_devnm, MEMB_STATE_REMOVE); ++ ++ if (rv && verbose >= 0) ++ pr_err("Cannot remove member device %s from container %s.\n", device_devnm, ++ container_devnm); ++ ++ return rv; ++} ++ + /** + * Incremental_remove() - Remove the device from all raid arrays. + * @devname: the device we want to remove, it could be kernel device name or devnode. + * @id_path: optional, /dev/disk/by-path path to save for bare scenarios support. + * @verbose: verbose flag. + * +- * First, fail the device (if needed) and then remove the device from native raid array or external +- * container. If it is external container, the device is removed from each subarray first. ++ * First, fail the device (if needed) and then remove the device. This code is critical for system ++ * funtionality and that is why it is keept as simple as possible. We do not load devices using ++ * sysfs_read() because any unerelated failure may lead us to abort. We also do not call ++ * Manage_Subdevs(). + */ + int Incremental_remove(char *devname, char *id_path, int verbose) + { ++ mdadm_status_t rv = MDADM_STATUS_SUCCESS; + char *devnm = basename(devname); +- struct mddev_dev devlist = {0}; + char buf[SYSFS_MAX_BUF_SIZE]; + struct mdstat_ent *mdstat; + struct mdstat_ent *ent; + struct mdinfo mdi; +- int rv = 1; +- int mdfd; ++ int mdfd = -1; + + if (strcmp(devnm, devname) != 0) + if (!is_devnode_path(devname)) { +@@ -1733,32 +1773,24 @@ int Incremental_remove(char *devname, char *id_path, int verbose) + map_free(map); + } + +- devlist.devname = devnm; +- devlist.disposition = 'I'; +- /* for a container, we must fail each member array */ + if (is_mdstat_ent_external(ent)) { +- struct mdstat_ent *memb; +- for (memb = mdstat ; memb ; memb = memb->next) { +- if (is_container_member(memb, ent->devnm)) +- remove_from_member_array(memb, +- &devlist, verbose); +- } +- } else { +- /* +- * This 'I' incremental remove is a try-best effort, +- * the failure condition can be safely ignored +- * because of the following up 'r' remove. +- */ +- Manage_subdevs(ent->devnm, mdfd, &devlist, +- verbose, 0, UOPT_UNDEFINED, 0); ++ rv = Incremental_remove_external(devnm, ent->devnm, mdstat, verbose); ++ goto out; + } + +- devlist.disposition = 'r'; +- rv = Manage_subdevs(ent->devnm, mdfd, &devlist, +- verbose, 0, UOPT_UNDEFINED, 0); ++ /* Native arrays are handled separatelly to provide more detailed error handling */ ++ rv = sysfs_set_memb_state(ent->devnm, devnm, MEMB_STATE_FAULTY); ++ if (rv && verbose >= 0) ++ pr_err("Cannot fail member device %s in array %s.\n", devnm, ent->devnm); ++ ++ if (rv == MDADM_STATUS_SUCCESS) ++ rv = sysfs_set_memb_state(ent->devnm, devnm, MEMB_STATE_REMOVE); ++ ++ if (rv && verbose >= 0) ++ pr_err("Cannot remove member device %s from %s.\n", devnm, ent->devnm); + +- close_fd(&mdfd); + out: ++ close_fd(&mdfd); + free_mdstat(mdstat); + return rv; + } +diff --git a/Manage.c b/Manage.c +index d618a2f0..034eb00c 100644 +--- a/Manage.c ++++ b/Manage.c +@@ -1381,8 +1381,6 @@ bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const + * 'f' - set the device faulty SET_DISK_FAULTY + * device can be 'detached' in which case any device that + * is inaccessible will be marked faulty. +- * 'I' - remove device by using incremental fail +- * which is executed when device is removed surprisingly. + * 'R' - mark this device as wanting replacement. + * 'W' - this device is added if necessary and activated as + * a replacement for a previous 'R' device. +@@ -1544,9 +1542,9 @@ int Manage_subdevs(char *devname, int fd, + + /* This is a kernel-internal name like 'sda1' */ + +- if (!strchr("rfI", dv->disposition)) { +- pr_err("%s only meaningful with -r, -f or -I, not -%c\n", +- dv->devname, dv->disposition); ++ if (!strchr("rf", dv->disposition)) { ++ pr_err("%s only meaningful with -r, -f, not -%c\n", dv->devname, ++ dv->disposition); + goto abort; + } + +@@ -1673,7 +1671,7 @@ int Manage_subdevs(char *devname, int fd, + close_fd(&sysfd); + goto abort; + } +- case 'I': ++ + if (is_fd_valid(sysfd)) { + rv = sysfs_set_memb_state_fd(sysfd, MEMB_STATE_FAULTY, &err); + } else { +diff --git a/mdadm.h b/mdadm.h +index 218056c8..77705b11 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -829,6 +829,7 @@ extern mdadm_status_t sysfs_write_descriptor(const int fd, const char *value, + const ssize_t len, int *errno_p); + extern mdadm_status_t write_attr(const char *value, const int fd); + extern mdadm_status_t sysfs_set_memb_state_fd(int fd, memb_state_t state, int *err); ++extern mdadm_status_t sysfs_set_memb_state(char *array_devnm, char *memb_devnm, memb_state_t state); + extern void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf); + + extern int sysfs_open(char *devnm, char *devname, char *attr); +diff --git a/sysfs.c b/sysfs.c +index 60b27459..c030d634 100644 +--- a/sysfs.c ++++ b/sysfs.c +@@ -149,6 +149,29 @@ mdadm_status_t sysfs_set_memb_state_fd(int fd, memb_state_t state, int *err) + return sysfs_write_descriptor(fd, val, strlen(val), err); + } + ++/** ++ * sysfs_set_memb_state() - write to member disk state file. ++ * @array_devnm: kernel name of the array. ++ * @memb_devnm: kernel name of member device. ++ * @state: value to write. ++ * ++ * Function expects that the device exists, error is unconditionally printed. ++ */ ++mdadm_status_t sysfs_set_memb_state(char *array_devnm, char *memb_devnm, memb_state_t state) ++{ ++ int state_fd = sysfs_open_memb_attr(array_devnm, memb_devnm, "state", O_RDWR); ++ ++ if (!is_fd_valid(state_fd)) { ++ pr_err("Cannot open file descriptor to %s in array %s, aborting.\n", ++ memb_devnm, array_devnm); ++ return MDADM_STATUS_ERROR; ++ } ++ ++ return sysfs_set_memb_state_fd(state_fd, state, NULL); ++ ++ close_fd(&state_fd); ++} ++ + /** + * sysfs_get_container_devnm() - extract container device name. + * @mdi: md_info describes member array, with GET_VERSION option. +-- +2.41.0 + diff --git a/0007-mdadm.man-Remove-external-bitmap.patch b/0007-mdadm.man-Remove-external-bitmap.patch new file mode 100644 index 0000000000000000000000000000000000000000..da318a4acec3bc20aad1b0b6747e61796145fd13 --- /dev/null +++ b/0007-mdadm.man-Remove-external-bitmap.patch @@ -0,0 +1,127 @@ +From ef4b6a23189d804bfd8fa81f5038afe6ce825bde Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Tue, 7 Jan 2025 10:09:16 +0100 +Subject: [PATCH 07/39] mdadm.man: Remove external bitmap + +Remove external bitmap support from manual. + +Signed-off-by: Mariusz Tkaczyk +--- + mdadm.8.in | 55 ++++++------------------------------------------------ + 1 file changed, 6 insertions(+), 49 deletions(-) + +diff --git a/mdadm.8.in b/mdadm.8.in +index 2b6f3e50..83c0689f 100644 +--- a/mdadm.8.in ++++ b/mdadm.8.in +@@ -740,27 +740,11 @@ parameter and are stored internally. + .B none + - create array with no bitmap or remove any present bitmap (grow mode). + +-Setting bitmap for file is deprecated and should not be used. The file should not exist unless +-.B \-\-force +-is also given. The same file should be provided when assembling the array. The file name must +-contain at least one slash ('/'). Bitmap files are only known to work on ext2 and ext3. Storing +-bitmap files on other filesystems may result in serious problems. +- +-When creating an array on devices which are 100G or larger, +-.I mdadm +-automatically adds an internal bitmap as it will usually be +-beneficial. This can be suppressed with +-.B "\-\-bitmap=none" +-or by selecting a different consistency policy with +-.BR \-\-consistency\-policy . +- + .TP + .BR \-\-bitmap\-chunk= + Set the chunk size of the bitmap. Each bit corresponds to that many + Kilobytes of storage. +-When using a file-based bitmap, the default is to use the smallest +-size that is at least 4 and requires no more than 2^21 chunks. +-When using an ++ + .B internal + bitmap, the chunk size defaults to 64Meg, or larger if necessary to + fit the bitmap into the available space. +@@ -1108,13 +1092,6 @@ are present. This is only needed with + and can be used if the physical connections to devices are + not as reliable as you would like. + +-.TP +-.BR \-b ", " \-\-bitmap= +-Specify the bitmap file that was given when the array was created. If +-an array has an +-.B internal +-bitmap, there is no need to specify this when assembling the array. +- + .TP + .BR \-\-backup\-file= + If +@@ -1614,9 +1591,8 @@ applies to a whole array which is currently active. + + .TP + .BR \-X ", " \-\-examine\-bitmap +-Report information about a bitmap file. +-The argument is either an external bitmap file or an array component +-in case of an internal bitmap. Note that running this on an array ++Report information about a bitmap. ++The argument is an array component. Note that running this on an array + device (e.g. + .BR /dev/md0 ) + does not report the bitmap for that array. +@@ -1774,10 +1750,7 @@ Only meaningful with + this will scan the + .B map + file for arrays that are being incrementally assembled and will try to +-start any that are not already started. If any such array is listed +-in +-.B mdadm.conf +-as requiring an external bitmap, that bitmap will be attached first. ++start any that are not already started. + + .TP + .BR \-\-fail ", " \-f +@@ -2151,15 +2124,7 @@ setting. + .\".B \-\-size + .\"is given, the apparent size of the smallest drive given is used. + +-If the array type supports a write-intent bitmap, and if the devices +-in the array exceed 100G is size, an internal write-intent bitmap +-will automatically be added unless some other option is explicitly +-requested with the +-.B \-\-bitmap +-option or a different consistency policy is selected with the +-.B \-\-consistency\-policy +-option. In any case, space for a bitmap will be reserved so that one +-can be added later with ++Space for a bitmap will be reserved so that one can be added later with + .BR "\-\-grow \-\-bitmap=internal" . + + If the metadata type supports it (currently only 1.x and IMSM metadata), +@@ -2735,11 +2700,6 @@ Also, the size of an array cannot be changed while it has an active + bitmap. If an array has a bitmap, it must be removed before the size + can be changed. Once the change is complete a new bitmap can be created. + +-.PP +-Note: +-.B "--grow --size" +-is not yet supported for external file bitmap. +- + .SS RAID\-DEVICES CHANGES + + A RAID1 array can work with any number of devices from 1 upwards +@@ -2834,10 +2794,7 @@ stored on the device being reshaped. + .SS BITMAP CHANGES + + A write-intent bitmap can be added to, or removed from, an active +-array. Either internal bitmaps, or bitmaps stored in a separate file, +-can be added. Note that if you add a bitmap stored in a file which is +-in a filesystem that is on the RAID array being affected, the system +-will deadlock. The bitmap must be on a separate filesystem. ++array. + + .SS CONSISTENCY POLICY CHANGES + +-- +2.41.0 + diff --git a/0008-Remove-freeze-reshape-logic.patch b/0008-Remove-freeze-reshape-logic.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7de89e6d0b88e43e58f860bffea2d30ce2c1de9 --- /dev/null +++ b/0008-Remove-freeze-reshape-logic.patch @@ -0,0 +1,223 @@ +From cbc1cd589496a4ae16eb226a7fbad71a7d3d842d Mon Sep 17 00:00:00 2001 +From: Mateusz Kusiak +Date: Wed, 16 Oct 2024 10:48:08 +0000 +Subject: [PATCH 08/39] Remove --freeze-reshape logic + +This commit removes --freeze-reshape logic, it basicaly reverts +commit b76b30e0f950 ("Do not continue reshape during initrd phase"). +--freeze-reshape was supposed to be used to restore critical sector in +incremental and assemble operations without starting a reshape process, +but it's meaning has been lost through the years and it is not +currently used. + +A replacement for this logic will be added in incoming patches, so +reshapes won't be started in initrd phrase. + +Signed-off-by: Mateusz Kusiak +--- + Grow.c | 30 +++++++----------------------- + ReadMe.c | 1 - + mdadm.8.in | 37 ------------------------------------- + mdadm.c | 6 ------ + mdadm.h | 2 -- + 5 files changed, 7 insertions(+), 69 deletions(-) + +diff --git a/Grow.c b/Grow.c +index cc1be6cc..0d9e3b53 100644 +--- a/Grow.c ++++ b/Grow.c +@@ -1746,7 +1746,7 @@ static int reshape_array(char *container, int fd, char *devname, + int force, struct mddev_dev *devlist, + unsigned long long data_offset, + char *backup_file, int verbose, int forked, +- int restart, int freeze_reshape); ++ int restart); + static int reshape_container(char *container, char *devname, + int mdfd, + struct supertype *st, +@@ -2341,7 +2341,7 @@ size_change_error: + sync_metadata(st); + rv = reshape_array(container, fd, devname, st, &info, c->force, + devlist, s->data_offset, c->backup_file, +- c->verbose, 0, 0, 0); ++ c->verbose, 0, 0); + frozen = 0; + } + release: +@@ -3000,7 +3000,7 @@ static int reshape_array(char *container, int fd, char *devname, + int force, struct mddev_dev *devlist, + unsigned long long data_offset, + char *backup_file, int verbose, int forked, +- int restart, int freeze_reshape) ++ int restart) + { + struct reshape reshape; + int spares_needed; +@@ -3484,14 +3484,6 @@ started: + } + if (restart) + sysfs_set_str(sra, NULL, "array_state", "active"); +- if (freeze_reshape) { +- free(fdlist); +- free(offsets); +- sysfs_free(sra); +- pr_err("Reshape has to be continued from location %llu when root filesystem has been mounted.\n", +- sra->reshape_progress); +- return 1; +- } + + if (!forked) + if (continue_via_systemd(container ?: sra->sys_name, +@@ -3688,7 +3680,7 @@ int reshape_container(char *container, char *devname, + */ + ping_monitor(container); + +- if (!forked && !c->freeze_reshape) ++ if (!forked) + if (continue_via_systemd(container, GROW_SERVICE, NULL)) + return 0; + +@@ -3698,8 +3690,7 @@ int reshape_container(char *container, char *devname, + unfreeze(st); + return 1; + default: /* parent */ +- if (!c->freeze_reshape) +- printf("%s: multi-array reshape continues in background\n", Name); ++ printf("%s: multi-array reshape continues in background\n", Name); + return 0; + case 0: /* child */ + manage_fork_fds(0); +@@ -3797,15 +3788,9 @@ int reshape_container(char *container, char *devname, + + rv = reshape_array(container, fd, adev, st, + content, c->force, NULL, INVALID_SECTORS, +- c->backup_file, c->verbose, 1, restart, +- c->freeze_reshape); ++ c->backup_file, c->verbose, 1, restart); + close(fd); + +- if (c->freeze_reshape) { +- sysfs_free(cc); +- exit(0); +- } +- + restart = 0; + if (rv) + break; +@@ -5220,8 +5205,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, + } else + ret_val = reshape_array(NULL, mdfd, "array", st, info, 1, + NULL, INVALID_SECTORS, c->backup_file, +- 0, forked, 1 | info->reshape_active, +- c->freeze_reshape); ++ 0, forked, 1 | info->reshape_active); + + return ret_val; + } +diff --git a/ReadMe.c b/ReadMe.c +index 9c29723f..c2415c26 100644 +--- a/ReadMe.c ++++ b/ReadMe.c +@@ -158,7 +158,6 @@ struct option long_options[] = { + {"scan", 0, 0, 's'}, + {"force", 0, 0, Force}, + {"update", 1, 0, 'U'}, +- {"freeze-reshape", 0, 0, FreezeReshape}, + + /* Management */ + {"add", 0, 0, Add}, +diff --git a/mdadm.8.in b/mdadm.8.in +index 83c0689f..45255521 100644 +--- a/mdadm.8.in ++++ b/mdadm.8.in +@@ -880,31 +880,6 @@ different versions of + .I mdadm + are used to add different devices). + +-.TP +-.BR \-\-continue +-This option is complementary to the +-.B \-\-freeze-reshape +-option for assembly. It is needed when +-.B \-\-grow +-operation is interrupted and it is not restarted automatically due to +-.B \-\-freeze-reshape +-usage during array assembly. This option is used together with +-.BR \-G +-, ( +-.BR \-\-grow +-) command and device for a pending reshape to be continued. +-All parameters required for reshape continuation will be read from array metadata. +-If initial +-.BR \-\-grow +-command had required +-.BR \-\-backup\-file= +-option to be set, continuation option will require to have exactly the same +-backup file given as well. +-.IP +-Any other parameter passed together with +-.BR \-\-continue +-option will be ignored. +- + .TP + .BR \-N ", " \-\-name= + Set a +@@ -1302,18 +1277,6 @@ or + and allows the array to be again used on a kernel prior to Linux 5.3. + This option should be used with great caution. + +-.TP +-.BR \-\-freeze\-reshape +-This option is intended to be used in start-up scripts during the initrd boot phase. +-When the array under reshape is assembled during the initrd phase, this option +-stops the reshape after the reshape-critical section has been restored. This happens +-before the file system pivot operation and avoids loss of filesystem context. +-Losing file system context would cause reshape to be broken. +- +-Reshape can be continued later using the +-.B \-\-continue +-option for the grow command. +- + .SH For Manage mode: + + .TP +diff --git a/mdadm.c b/mdadm.c +index 7d3b656b..a72058b4 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -710,12 +710,6 @@ int main(int argc, char *argv[]) + case O(MANAGE,Force): /* add device which is too large */ + c.force = 1; + continue; +- /* now for the Assemble options */ +- case O(ASSEMBLE, FreezeReshape): /* Freeze reshape during +- * initrd phase */ +- case O(INCREMENTAL, FreezeReshape): +- c.freeze_reshape = 1; +- continue; + case O(CREATE,'u'): /* uuid of array */ + case O(ASSEMBLE,'u'): /* uuid of array */ + if (ident.uuid_set) { +diff --git a/mdadm.h b/mdadm.h +index 77705b11..6062e167 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -530,7 +530,6 @@ enum special_options { + RebuildMapOpt, + InvalidBackup, + UdevRules, +- FreezeReshape, + Continue, + OffRootOpt, + Prefer, +@@ -680,7 +679,6 @@ struct context { + int scan; + int SparcAdjust; + int delay; +- int freeze_reshape; + char *backup_file; + int invalid_backup; + char *action; +-- +2.41.0 + diff --git a/0010-mdadm-Do-not-start-reshape-before-switchroot.patch b/0010-mdadm-Do-not-start-reshape-before-switchroot.patch new file mode 100644 index 0000000000000000000000000000000000000000..496363e3c366b9efe21f90be33a484307fe0149d --- /dev/null +++ b/0010-mdadm-Do-not-start-reshape-before-switchroot.patch @@ -0,0 +1,217 @@ +From 8a0d3fea424c1c19c51993c0849ea76ea41e8003 Mon Sep 17 00:00:00 2001 +From: Mateusz Kusiak +Date: Thu, 10 Oct 2024 10:31:06 +0000 +Subject: [PATCH 10/39] mdadm: Do not start reshape before switchroot + +There are numerous issues for --grow --continue in switchroot phrase, +they include: +* Events being missed for restarting grow-continue service. This is + apparent mostly on OS on RAID scenarios. When a checkpoint (next step) + is committed, we have no reliable way to gracefully stop reshape until + it reaches that checkpoint. During boot, there's heavy I/O utilisation, + which causes sync speed drop, and naturally checkpoint takes longer to + reach. This further causes systemd to forcefully kill grow-continue + service due to timeouts, which results in udev event being missed for + grow-continue service restart. +* Grow-continue (seemingly) was not designed to be restarted without + reassembly, some things like stopping chunksize (to lower) migration + were straight up not working until recently. +This patch makes grow-continue (actual reshape) start after switchroot +phrase. This way we should not encounter issues related to restarting +the service. + +Add checks not start a reshape if in initrd, let it initialise only. +Change grow-continue udev rule to be triggered whenever there's a +reshape happening in metadata, rely on udev event to kick reshape after +switchroot. Add handle_forking helper function for reshapes to avoid +duplicating code. + +Signed-off-by: Mateusz Kusiak +--- + Grow.c | 81 +++++++++++++++++++++++++++------------ + mdadm_status.h | 3 +- + udev-md-raid-arrays.rules | 3 +- + util.c | 1 + + 4 files changed, 61 insertions(+), 27 deletions(-) + +diff --git a/Grow.c b/Grow.c +index 0d9e3b53..2719346c 100644 +--- a/Grow.c ++++ b/Grow.c +@@ -2995,6 +2995,34 @@ static void catch_term(int sig) + sigterm = 1; + } + ++ ++/** ++ * handle_forking() - Handle reshape forking. ++ * ++ * @forked: if already forked. ++ * @devname: device name. ++ * Returns: -1 if fork() failed, ++ * 0 if child process, ++ * 1 if job delegated to forked process or systemd. ++ * ++ * This function is a helper function for reshapes for fork handling. ++ */ ++static mdadm_status_t handle_forking(bool forked, char *devname) ++{ ++ if (forked) ++ return MDADM_STATUS_FORKED; ++ ++ if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL)) ++ return MDADM_STATUS_SUCCESS; ++ ++ switch (fork()) { ++ case -1: return MDADM_STATUS_ERROR; /* error */ ++ case 0: return MDADM_STATUS_FORKED; /* child */ ++ default: return MDADM_STATUS_SUCCESS; /* parent */ ++ } ++ ++} ++ + static int reshape_array(char *container, int fd, char *devname, + struct supertype *st, struct mdinfo *info, + int force, struct mddev_dev *devlist, +@@ -3485,33 +3513,35 @@ started: + if (restart) + sysfs_set_str(sra, NULL, "array_state", "active"); + +- if (!forked) +- if (continue_via_systemd(container ?: sra->sys_name, +- GROW_SERVICE, NULL)) { +- free(fdlist); +- free(offsets); +- sysfs_free(sra); +- return 0; +- } ++ /* Do not run in initrd */ ++ if (in_initrd()) { ++ free(fdlist); ++ free(offsets); ++ sysfs_free(sra); ++ pr_info("Reshape has to be continued from location %llu when root filesystem has been mounted.\n", ++ sra->reshape_progress); ++ return 1; ++ } + + /* Now we just need to kick off the reshape and watch, while + * handling backups of the data... + * This is all done by a forked background process. + */ +- switch(forked ? 0 : fork()) { +- case -1: ++ switch (handle_forking(forked, container ? container : sra->sys_name)) { ++ default: /* Unused, only to satisfy compiler. */ ++ case MDADM_STATUS_ERROR: /* error */ + pr_err("Cannot run child to monitor reshape: %s\n", + strerror(errno)); + abort_reshape(sra); + goto release; +- default: ++ case MDADM_STATUS_FORKED: /* child */ ++ map_fork(); ++ break; ++ case MDADM_STATUS_SUCCESS: /* parent */ + free(fdlist); + free(offsets); + sysfs_free(sra); + return 0; +- case 0: +- map_fork(); +- break; + } + + /* Close unused file descriptor in the forked process */ +@@ -3680,22 +3710,19 @@ int reshape_container(char *container, char *devname, + */ + ping_monitor(container); + +- if (!forked) +- if (continue_via_systemd(container, GROW_SERVICE, NULL)) +- return 0; +- +- switch (forked ? 0 : fork()) { +- case -1: /* error */ ++ switch (handle_forking(forked, container)) { ++ default: /* Unused, only to satisfy compiler. */ ++ case MDADM_STATUS_ERROR: /* error */ + perror("Cannot fork to complete reshape\n"); + unfreeze(st); + return 1; +- default: /* parent */ +- printf("%s: multi-array reshape continues in background\n", Name); +- return 0; +- case 0: /* child */ ++ case MDADM_STATUS_FORKED: /* child */ + manage_fork_fds(0); + map_fork(); + break; ++ case MDADM_STATUS_SUCCESS: /* parent */ ++ printf("%s: multi-array reshape continues in background\n", Name); ++ return 0; + } + + /* close unused handle in child process +@@ -3791,6 +3818,12 @@ int reshape_container(char *container, char *devname, + c->backup_file, c->verbose, 1, restart); + close(fd); + ++ /* Do not run reshape in initrd but let it initialize.*/ ++ if (in_initrd()) { ++ sysfs_free(cc); ++ exit(0); ++ } ++ + restart = 0; + if (rv) + break; +diff --git a/mdadm_status.h b/mdadm_status.h +index 905105e2..e244127c 100644 +--- a/mdadm_status.h ++++ b/mdadm_status.h +@@ -7,7 +7,8 @@ typedef enum mdadm_status { + MDADM_STATUS_SUCCESS = 0, + MDADM_STATUS_ERROR, + MDADM_STATUS_UNDEF, +- MDADM_STATUS_MEM_FAIL ++ MDADM_STATUS_MEM_FAIL, ++ MDADM_STATUS_FORKED + } mdadm_status_t; + + #endif +diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules +index 4e64b249..d8de6d00 100644 +--- a/udev-md-raid-arrays.rules ++++ b/udev-md-raid-arrays.rules +@@ -15,7 +15,6 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state" + ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state" + TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end" + ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end" +-ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes" + LABEL="md_ignore_state" + + IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode" +@@ -40,6 +39,6 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service" + ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c" + ENV{MD_MON_THIS}=="?*", TEST=="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@initrd-%c.service" + ENV{MD_MON_THIS}=="?*", TEST!="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service" +-ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service" ++ENV{MD_RESHAPE_ACTIVE}=="True", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service" + + LABEL="md_end" +diff --git a/util.c b/util.c +index 6aa44a80..8099852f 100644 +--- a/util.c ++++ b/util.c +@@ -2307,6 +2307,7 @@ int continue_via_systemd(char *devnm, char *service_name, char *prefix) + int pid, status; + char pathbuf[1024]; + ++ dprintf("Start %s service\n", service_name); + /* Simply return that service cannot be started */ + if (check_env("MDADM_NO_SYSTEMCTL")) + return 0; +-- +2.41.0 + diff --git a/0012-Refactor-continue_via_systemd.patch b/0012-Refactor-continue_via_systemd.patch new file mode 100644 index 0000000000000000000000000000000000000000..33ed7efb8610479696225c946cd5543e1acd4c9e --- /dev/null +++ b/0012-Refactor-continue_via_systemd.patch @@ -0,0 +1,117 @@ +From 82ccad68d46d4b10a928bc860c0feedf26e483e3 Mon Sep 17 00:00:00 2001 +From: Mateusz Kusiak +Date: Wed, 20 Nov 2024 19:01:30 +0000 +Subject: [PATCH 12/39] Refactor continue_via_systemd() + +Refactor continue_via_systemd() and it's calls to make it more readable. +No functional changes. + +Signed-off-by: Mateusz Kusiak +--- + Grow.c | 2 +- + mdadm.h | 2 +- + util.c | 43 ++++++++++++++++++++++++------------------- + 3 files changed, 26 insertions(+), 21 deletions(-) + +diff --git a/Grow.c b/Grow.c +index 818eb6a4..53b0b387 100644 +--- a/Grow.c ++++ b/Grow.c +@@ -3013,7 +3013,7 @@ static mdadm_status_t handle_forking(bool forked, char *devname) + if (forked) + return MDADM_STATUS_FORKED; + +- if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL)) ++ if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL) == MDADM_STATUS_SUCCESS) + return MDADM_STATUS_SUCCESS; + + switch (fork()) { +diff --git a/mdadm.h b/mdadm.h +index 6062e167..e84c341c 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -1694,7 +1694,7 @@ extern int same_dev(char *one, char *two); + extern int compare_paths (char* path1,char* path2); + extern void enable_fds(int devices); + extern void manage_fork_fds(int close_all); +-extern int continue_via_systemd(char *devnm, char *service_name, char *prefix); ++extern mdadm_status_t continue_via_systemd(char *devnm, char *service_name, char *prefix); + + extern void ident_init(struct mddev_ident *ident); + extern mdadm_status_t ident_set_devname(struct mddev_ident *ident, const char *devname); +diff --git a/util.c b/util.c +index 8099852f..8c45f0e1 100644 +--- a/util.c ++++ b/util.c +@@ -1982,7 +1982,7 @@ int start_mdmon(char *devnm) + + if (check_env("MDADM_NO_MDMON")) + return 0; +- if (continue_via_systemd(devnm, MDMON_SERVICE, prefix)) ++ if (continue_via_systemd(devnm, MDMON_SERVICE, prefix) == MDADM_STATUS_SUCCESS) + return 0; + + /* That failed, try running mdmon directly */ +@@ -2299,36 +2299,41 @@ void manage_fork_fds(int close_all) + /* In a systemd/udev world, it is best to get systemd to + * run daemon rather than running in the background. + * Returns: +- * 1- if systemd service has been started +- * 0- otherwise ++ * MDADM_STATUS_SUCCESS - if systemd service has been started. ++ * MDADM_STATUS_ERROR - otherwise. + */ +-int continue_via_systemd(char *devnm, char *service_name, char *prefix) ++mdadm_status_t continue_via_systemd(char *devnm, char *service_name, char *prefix) + { + int pid, status; +- char pathbuf[1024]; ++ char pathbuf[PATH_MAX]; + + dprintf("Start %s service\n", service_name); + /* Simply return that service cannot be started */ + if (check_env("MDADM_NO_SYSTEMCTL")) +- return 0; ++ return MDADM_STATUS_SUCCESS; ++ ++ /* Fork in attempt to start services */ + switch (fork()) { +- case 0: +- manage_fork_fds(1); +- snprintf(pathbuf, sizeof(pathbuf), +- "%s@%s%s.service", service_name, prefix ?: "", devnm); +- status = execl("/usr/bin/systemctl", "systemctl", "restart", +- pathbuf, NULL); +- status = execl("/bin/systemctl", "systemctl", "restart", +- pathbuf, NULL); +- exit(1); +- case -1: /* Just do it ourselves. */ ++ case -1: /* Fork failed, just do it ourselves. */ + break; +- default: /* parent - good */ ++ case 0: /* child */ ++ manage_fork_fds(1); ++ snprintf(pathbuf, sizeof(pathbuf), "%s@%s%s.service", ++ service_name, prefix ? prefix : "", devnm); ++ ++ /* Attempt to start service. ++ * On success execl() will "kill" the fork, and return status of systemctl call. ++ */ ++ execl("/usr/bin/systemctl", "systemctl", "restart", pathbuf, NULL); ++ execl("/bin/systemctl", "systemctl", "restart", pathbuf, NULL); ++ exit(MDADM_STATUS_ERROR); ++ default: /* parent */ ++ /* Check if forked process successfully trigered service */ + pid = wait(&status); + if (pid >= 0 && status == 0) +- return 1; ++ return MDADM_STATUS_SUCCESS; + } +- return 0; ++ return MDADM_STATUS_ERROR; + } + + int in_initrd(void) +-- +2.41.0 + diff --git a/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch b/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d71864c943f166cf4eb49e211ef1eb37709fdb80 --- /dev/null +++ b/0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch @@ -0,0 +1,32 @@ +From e0df6c4c984d564e9e40913727e916a6cd8f466e Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Fri, 17 Jan 2025 15:15:40 +0800 +Subject: [PATCH 13/39] mdadm/raid6check: add xmalloc.h to raid6check.c + +It reports building error: +raid6check.c:324:26: error: implicit declaration of function xmalloc + +Add xmalloc.h to raid6check.c file to fix this. + +Signed-off-by: Xiao Ni +Link: https://lore.kernel.org/r/20250117071540.4094-1-xni@redhat.com +Signed-off-by: Song Liu +--- + raid6check.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/raid6check.c b/raid6check.c +index 99477761..95533f7d 100644 +--- a/raid6check.c ++++ b/raid6check.c +@@ -23,6 +23,7 @@ + */ + + #include "mdadm.h" ++#include "xmalloc.h" + #include + #include + +-- +2.41.0 + diff --git a/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch b/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c538ab1e0a5ed44339b69d1c5488e5996693371 --- /dev/null +++ b/0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch @@ -0,0 +1,56 @@ +From b1ee932b89a16c881a3336f9fd728d46c1f8c65d Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Wed, 22 Jan 2025 23:18:59 +0800 +Subject: [PATCH 14/39] mdopen: add sbin path to env PATH when call + system("modprobe md_mod") + +During the boot process if mdadm is called in udev context, sbin paths +like /sbin, /usr/sbin, /usr/local/sbin normally not defined in PATH env +variable, calling system("modprobe md_mod") in create_named_array() may +fail with 'sh: modprobe: command not found' error message. + +We don't want to move modprobe binary into udev private directory, so +setting the PATH env is a more proper method to avoid the above issue. + +This patch sets PATH env variable with "/sbin:/usr/sbin:/usr/local/sbin" +before calling system("modprobe md_mod"). The change only takes effect +within the udev worker context, not seen by global udev environment. + +Signed-off-by: Coly Li +Signed-off-by: Mariusz Tkaczyk +--- + mdopen.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/mdopen.c b/mdopen.c +index 26f0c716..57252b64 100644 +--- a/mdopen.c ++++ b/mdopen.c +@@ -39,6 +39,24 @@ int create_named_array(char *devnm) + + fd = open(new_array_file, O_WRONLY); + if (fd < 0 && errno == ENOENT) { ++ char buf[PATH_MAX] = {0}; ++ char *env_ptr; ++ ++ env_ptr = getenv("PATH"); ++ /* ++ * When called by udev worker context, path of modprobe ++ * might not be in env PATH. Set sbin paths into PATH ++ * env to avoid potential failure when run modprobe here. ++ */ ++ if (env_ptr) ++ snprintf(buf, PATH_MAX - 1, "%s:%s", env_ptr, ++ "/sbin:/usr/sbin:/usr/local/sbin"); ++ else ++ snprintf(buf, PATH_MAX - 1, "%s", ++ "/sbin:/usr/sbin:/usr/local/sbin"); ++ ++ setenv("PATH", buf, 1); ++ + if (system("modprobe md_mod") == 0) + fd = open(new_array_file, O_WRONLY); + } +-- +2.41.0 + diff --git a/0015-udev-persist-properties-of-MD-devices-after-switch_r.patch b/0015-udev-persist-properties-of-MD-devices-after-switch_r.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f678b67ce12a704be0e760acf46542399ba35cc --- /dev/null +++ b/0015-udev-persist-properties-of-MD-devices-after-switch_r.patch @@ -0,0 +1,40 @@ +From 21e4efb1cd15c4de4a57de26b0ea2e4234aa8ce5 Mon Sep 17 00:00:00 2001 +From: Antonio Alvarez Feijoo +Date: Thu, 23 Jan 2025 19:46:38 +0100 +Subject: [PATCH 15/39] udev: persist properties of MD devices after + switch_root + +dracut installs in the initrd a custom udev rule for MD devices +(59-persistent-storage-md.rules) only to set the db_persist option (see +[1]). The main purpose is that if an MD device is activated in the initrd, +its properties are kept on the udev database after the transition from the +initrd to the rootfs. This was added to fix detection issues when LVM is +on top. + +This patch would allow to remove the custom udev rule shipped by dracut +(63-md-raid-arrays.rules is already being installed in the initrd), and it +will also benefit other initrd generators that do not want to create +custom udev rules. + +[1] https://github.com/dracutdevs/dracut/blob/master/modules.d/90mdraid + +Signed-off-by: Antonio Alvarez Feijoo +--- + udev-md-raid-arrays.rules | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules +index d8de6d00..a8098dc5 100644 +--- a/udev-md-raid-arrays.rules ++++ b/udev-md-raid-arrays.rules +@@ -29,6 +29,7 @@ ENV{DEVTYPE}=="partition", ENV{MD_DEVNAME}=="*[0-9]", SYMLINK+="md/$env{MD_DEVNA + IMPORT{builtin}="blkid" + OPTIONS+="link_priority=100" + OPTIONS+="watch" ++OPTIONS+="db_persist" + ENV{ID_FS_USAGE}=="filesystem|other|crypto", ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}" + ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-partuuid/$env{ID_PART_ENTRY_UUID}" + ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}" +-- +2.41.0 + diff --git a/0016-mdadm-fix-grow-with-add-for-linear.patch b/0016-mdadm-fix-grow-with-add-for-linear.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0ed44c586f5b4f9027b7356064f93d4cd9706ff --- /dev/null +++ b/0016-mdadm-fix-grow-with-add-for-linear.patch @@ -0,0 +1,36 @@ +From c09ae8417dc9e11da1d5bf2867c6498050c6ddb9 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Fri, 27 Dec 2024 14:07:02 +0800 +Subject: [PATCH 16/39] mdadm: fix --grow with --add for linear + +For the case mdadm --grow with --add, the s.btype should not be +initialized yet, hence BitmapUnknown should be checked instead of +BitmapNone. + +Noted that this behaviour should only support by md-linear, which is +removed from kernel, howerver, it turns out md-linear is used widely +in home NAS and we're planning to reintroduce it soon. + +Fixes: 581ba1341017 ("mdadm: remove bitmap file support") +Signed-off-by: Yu Kuai +Signed-off-by: Mariusz Tkaczyk +--- + mdadm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mdadm.c b/mdadm.c +index a72058b4..6200cd0e 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -1619,7 +1619,7 @@ int main(int argc, char *argv[]) + if (devs_found > 1 && s.raiddisks == 0 && s.level == UnSet) { + /* must be '-a'. */ + if (s.size > 0 || s.chunk || +- s.layout_str || s.btype != BitmapNone) { ++ s.layout_str || s.btype != BitmapUnknown) { + pr_err("--add cannot be used with other geometry changes in --grow mode\n"); + rv = 1; + break; +-- +2.41.0 + diff --git a/0017-platform-intel-Disable-legacy-option-ROM-scan-on-UEF.patch b/0017-platform-intel-Disable-legacy-option-ROM-scan-on-UEF.patch new file mode 100644 index 0000000000000000000000000000000000000000..56e2af14ce9a393866d6f26f952d5e500afb55dd --- /dev/null +++ b/0017-platform-intel-Disable-legacy-option-ROM-scan-on-UEF.patch @@ -0,0 +1,41 @@ +From 1fc0f290caeb0720aa6c97177ab429953f5bf10f Mon Sep 17 00:00:00 2001 +From: Ross Lagerwall +Date: Wed, 29 Jan 2025 13:31:11 +0000 +Subject: [PATCH 17/39] platform-intel: Disable legacy option ROM scan on UEFI + machines + +The legacy option ROM memory range from 0xc0000-0xeffff is not defined +on UEFI machines so don't attempt to scan it. This avoids lockdown log +spam when Secure Boot is enabled (avoids use of /dev/mem). + +Signed-off-by: Ross Lagerwall +--- + platform-intel.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/platform-intel.c b/platform-intel.c +index 95bc4929..270aef36 100644 +--- a/platform-intel.c ++++ b/platform-intel.c +@@ -607,6 +607,7 @@ const struct imsm_orom *imsm_platform_test(struct sys_dev *hba) + + static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba) + { ++ struct stat st; + unsigned long align; + + if (check_env("IMSM_TEST_OROM")) +@@ -616,6 +617,10 @@ static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba) + if (check_env("IMSM_TEST_AHCI_EFI") || check_env("IMSM_TEST_SCU_EFI")) + return NULL; + ++ /* Skip legacy option ROM scan when EFI booted */ ++ if (stat("/sys/firmware/efi", &st) == 0 && S_ISDIR(st.st_mode)) ++ return NULL; ++ + find_intel_devices(); + + if (intel_devices == NULL) +-- +2.41.0 + diff --git a/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch b/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc27630f0386e86980d6f6f63315b7680d6df02a --- /dev/null +++ b/0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch @@ -0,0 +1,61 @@ +From 9e8b3b1492cff63dafb759382c74a479460f49e6 Mon Sep 17 00:00:00 2001 +From: lilinzhe +Date: Mon, 16 Dec 2024 12:00:02 +0800 +Subject: [PATCH 18/39] super-ddf: Prevent crash when handling DDF metadata + +A dummy function is defined because availability of ss->update_super is +not always verified. + +This fix addresses a crash reported when assembling a RAID array using +mdadm with DDF metadata. For more details, see the discussion at: +https://lore.kernel.org/all/ +CALHdMH30LuxR4tz9jP2ykDaDJtZ3P7L3LrZ+9e4Fq=Q6NwSM=Q@mail.gmail.com/ + +The discussion centers on an issue with mdadm where attempting to +assemble a RAID array caused a null pointer dereference. The problem +was traced to a missing update_super() function in super-ddf.c, which +led to a crash in Assemble.c. + +Signed-off-by: lilinzhe +--- + super-ddf.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/super-ddf.c b/super-ddf.c +index 6cd099ab..a06ed435 100644 +--- a/super-ddf.c ++++ b/super-ddf.c +@@ -5195,6 +5195,21 @@ static void default_geometry_ddf(struct supertype *st, int *level, int *layout, + *layout = ddf_level_to_layout(*level); + } + ++static int update_super_ddf_dummy(struct supertype *st, struct mdinfo *info, ++ enum update_opt update, ++ char *devname, int verbose, ++ int uuid_set, char *homehost) ++{ ++ /* ++ * A dummy update_super function is required to ensure ++ * reliable handling of DDF metadata in mdadm. ++ * This implementation acts as a placeholder for cases ++ * where ss->update_super is not verified. ++ */ ++ dprintf("update_super is not implemented in DDF\n"); ++ return 0; ++} ++ + struct superswitch super_ddf = { + .examine_super = examine_super_ddf, + .brief_examine_super = brief_examine_super_ddf, +@@ -5213,6 +5228,8 @@ struct superswitch super_ddf = { + .uuid_from_super= uuid_from_super_ddf, + .getinfo_super = getinfo_super_ddf, + ++ .update_super = update_super_ddf_dummy, ++ + .avail_size = avail_size_ddf, + + .compare_super = compare_super_ddf, +-- +2.41.0 + diff --git a/0019-super-ddf-optimize-DDF-header-search-for-widely-used.patch b/0019-super-ddf-optimize-DDF-header-search-for-widely-used.patch new file mode 100644 index 0000000000000000000000000000000000000000..eec122abb9e332278d20daf7b994cb3e20d60364 --- /dev/null +++ b/0019-super-ddf-optimize-DDF-header-search-for-widely-used.patch @@ -0,0 +1,297 @@ +From f2197b6b6c14af6c788c628acd1fc6d92c268c53 Mon Sep 17 00:00:00 2001 +From: lilinzhe +Date: Mon, 16 Dec 2024 12:11:41 +0800 +Subject: [PATCH 19/39] super-ddf: optimize DDF header search for widely used + RAID controllers + +Implemented fallback logic to search the last 32MB of the device +for the DDF header (magic). If found, proceeds to load the DDF metadata +from the located position. + +When clearing metadata as required by the mdadm --zero (function Kill), +also erase the last 32MB of data; otherwise, it may result in an +infinite loop. + +According to the specification, the Anchor Header should be placed at +the end of the disk. However,some widely used RAID hardware, such as +LSI and PERC, do not position it within the last 512 bytes of the disk. + +Signed-off-by: lilinzhe +--- + super-ddf.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++----- + xmalloc.c | 11 +++ + xmalloc.h | 1 + + 3 files changed, 185 insertions(+), 17 deletions(-) + +diff --git a/super-ddf.c b/super-ddf.c +index a06ed435..6e7db924 100644 +--- a/super-ddf.c ++++ b/super-ddf.c +@@ -272,6 +272,10 @@ struct phys_disk { + #define DDF_ReadErrors 32 + #define DDF_Missing 64 + ++ ++#define SEARCH_BLOCK_SIZE 4096 ++#define SEARCH_REGION_SIZE (32 * 1024 * 1024) ++ + /* The content of the virt_section global scope */ + struct virtual_disk { + be32 magic; /* DDF_VIRT_RECORDS_MAGIC */ +@@ -877,30 +881,180 @@ static void *load_section(int fd, struct ddf_super *super, void *buf, + return buf; + } + +-static int load_ddf_headers(int fd, struct ddf_super *super, char *devname) ++ ++/* ++ * Search for DDF_HEADER_MAGIC in the last 32MB of the device ++ * ++ * According to the specification, the Anchor Header should be placed at ++ * the end of the disk. However,some widely used RAID hardware, such as ++ * LSI and PERC, do not position it within the last 512 bytes of the disk. ++ * ++ */ ++static int search_for_ddf_headers(int fd, char *devname, ++ unsigned long long *out) + { ++ unsigned long long search_start; ++ unsigned long long search_end; ++ size_t bytes_block_to_read; + unsigned long long dsize; ++ unsigned long long pos; ++ int bytes_current_read; ++ size_t offset; ++ ++ void *buffer = NULL; ++ be32 *magic_ptr = NULL; ++ ++ int result = 0; + + get_dev_size(fd, NULL, &dsize); + +- if (lseek64(fd, dsize - 512, 0) == -1L) { +- if (devname) +- pr_err("Cannot seek to anchor block on %s: %s\n", ++ ++ /* Determine the search range */ ++ if (dsize > SEARCH_REGION_SIZE) ++ search_start = dsize - SEARCH_REGION_SIZE; ++ else ++ search_start = 0; ++ ++ search_end = dsize; ++ pos = search_start; ++ ++ ++ buffer = xmemalign(SEARCH_BLOCK_SIZE, SEARCH_BLOCK_SIZE); ++ ++ if (buffer == NULL) { ++ result = 1; ++ goto cleanup; ++ } ++ ++ while (pos < search_end) { ++ /* Calculate the number of bytes to read in the current block */ ++ bytes_block_to_read = SEARCH_BLOCK_SIZE; ++ if (search_end - pos < SEARCH_BLOCK_SIZE) ++ bytes_block_to_read = search_end - pos; ++ ++ if (lseek64(fd, pos, SEEK_SET) < 0) { ++ pr_err("lseek64 for %s failed %d:%s\n", ++ fd2devnm(fd), errno, strerror(errno)); ++ result = 2; ++ goto cleanup; ++ } ++ ++ /*Read data from the device */ ++ bytes_current_read = read(fd, buffer, bytes_block_to_read); ++ ++ if (bytes_current_read <= 0) { ++ pr_err("Failed to read %s. %d:%s, Position=%llu, Bytes to read=%zu. Skipping.\n", ++ fd2devnm(fd), errno, strerror(errno), pos, bytes_block_to_read); ++ pos += SEARCH_BLOCK_SIZE; /* Skip to the next block */ ++ continue; ++ } ++ ++ /* Search for the magic value within the read block */ ++ for (offset = 0; ++ offset + sizeof(be32) <= (size_t)bytes_current_read; ++ offset += sizeof(be32)) { ++ ++ magic_ptr = (be32 *) ((char *)buffer + offset); ++ if (be32_eq(*magic_ptr, DDF_HEADER_MAGIC)) { ++ *out = pos + offset; ++ result = 0; ++ goto cleanup; ++ } ++ } ++ ++ pos += SEARCH_BLOCK_SIZE; ++ } ++ ++cleanup: ++ free(buffer); ++ return result; ++} ++ ++static int load_ddf_headers(int fd, struct ddf_super *super, char *devname) ++{ ++ /* ++ * Load DDF headers from a device. ++ * First, check at dsize - 512, and if not found, search for it. ++ */ ++ unsigned long long dsize = 0; ++ unsigned long long ddfpos = 0; ++ unsigned long long ddffound = 0; ++ bool found_anchor = false; ++ ++ get_dev_size(fd, NULL, &dsize); ++ ++ /* Check the last 512 bytes for the DDF header. */ ++ if (lseek64(fd, dsize - 512, SEEK_SET) == -1L) { ++ if (devname) { ++ pr_err("Cannot seek to last 512 bytes on %s: %s\n", + devname, strerror(errno)); ++ } + return 1; + } +- if (read(fd, &super->anchor, 512) != 512) { +- if (devname) +- pr_err("Cannot read anchor block on %s: %s\n", ++ ++ ++ /* Read the last 512 bytes into the anchor block */ ++ if (read(fd, &super->anchor, 512) == 512) { ++ /* Check if the magic value matches */ ++ if (be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) ++ found_anchor = true; ++ ++ } else { ++ if (devname) { ++ pr_err("Cannot read last 512 bytes on %s: %s\n", + devname, strerror(errno)); +- return 1; ++ } + } +- if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) { ++ ++ if (!found_anchor) { ++ /* If not found, perform a full search for DDF headers */ ++ ddffound = search_for_ddf_headers(fd, devname, &ddfpos); ++ if (ddffound != 0) { ++ if (devname) { ++ pr_err ++ ("DDF headers not found during search on %s\n", ++ devname); ++ } ++ return 2; ++ } ++ ++ /* Seek to the found position */ ++ if (lseek64(fd, ddfpos, SEEK_SET) == -1L) { ++ if (devname) { ++ pr_err("Cannot seek to anchor block on %s\n", ++ devname); ++ } ++ return 1; ++ } ++ ++ /* Read the header from the found position */ ++ if (read(fd, &super->anchor, 512) != 512) { ++ if (devname) { ++ pr_err ++ ("Cannot read DDF header at found position on %s: %s\n", ++ devname, strerror(errno)); ++ } ++ return 1; ++ } ++ ++ /* Verify the magic value again */ ++ if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) { ++ if (devname) { ++ pr_err("Invalid DDF header magic value on %s\n", ++ devname); ++ } ++ return 2; ++ } ++ found_anchor = true; ++ } ++ if (!found_anchor) { ++ + if (devname) +- pr_err("no DDF anchor found on %s\n", +- devname); ++ pr_err("DDF headers not found on %s\n", devname); ++ + return 2; + } ++ + if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) { + if (devname) + pr_err("bad CRC on anchor on %s\n", +@@ -3889,16 +4043,17 @@ static int store_super_ddf(struct supertype *st, int fd) + dl->fd = ofd; + return ret; + } ++ // this is used for cleanup (Kill). to clean up 512 bytes ++ // at the end of the disk is not enough. ++ // clears SEARCH_REGION_SIZE bytes at the end of the disk. + +- if (posix_memalign(&buf, 512, 512) != 0) +- return 1; +- memset(buf, 0, 512); +- +- if (lseek64(fd, dsize - 512, 0) == -1L) { ++ buf = xmemalign(SEARCH_BLOCK_SIZE, SEARCH_REGION_SIZE); ++ memset(buf, 0, SEARCH_REGION_SIZE); ++ if (lseek64(fd, dsize - SEARCH_REGION_SIZE, 0) == -1L) { + free(buf); + return 1; + } +- rc = write(fd, buf, 512); ++ rc = write(fd, buf, SEARCH_REGION_SIZE); + free(buf); + if (rc < 0) + return 1; +@@ -5208,6 +5363,7 @@ static int update_super_ddf_dummy(struct supertype *st, struct mdinfo *info, + */ + dprintf("update_super is not implemented in DDF\n"); + return 0; ++ + } + + struct superswitch super_ddf = { +diff --git a/xmalloc.c b/xmalloc.c +index e28d3bd6..9472005e 100644 +--- a/xmalloc.c ++++ b/xmalloc.c +@@ -75,3 +75,14 @@ char *xstrdup(const char *str) + + return exit_memory_alloc_failure(); + } ++ ++void *xmemalign(size_t alignment, size_t size) ++{ ++ void *ptr = NULL; ++ int result = posix_memalign(&ptr, alignment, size); ++ ++ if (result == 0) ++ return ptr; ++ ++ return exit_memory_alloc_failure(); ++} +diff --git a/xmalloc.h b/xmalloc.h +index 0904b0ab..789948ae 100644 +--- a/xmalloc.h ++++ b/xmalloc.h +@@ -9,5 +9,6 @@ void *xmalloc(size_t len); + void *xrealloc(void *ptr, size_t len); + void *xcalloc(size_t num, size_t size); + char *xstrdup(const char *str); ++void *xmemalign(size_t alignment, size_t size); + + #endif +-- +2.41.0 + diff --git a/0020-bitmap.h-clear-__KERNEL__-based-headers.patch b/0020-bitmap.h-clear-__KERNEL__-based-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..12b4137c61cbb607f9e3a58880308826a0208ac8 --- /dev/null +++ b/0020-bitmap.h-clear-__KERNEL__-based-headers.patch @@ -0,0 +1,200 @@ +From eb9876f58658a107705a689852110903723e4d3b Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Mon, 3 Feb 2025 11:36:01 +0100 +Subject: [PATCH 20/39] bitmap.h - clear __KERNEL__ based headers + +It is unused for years. Clear it. + +Signed-off-by: Mariusz Tkaczyk +--- + bitmap.h | 169 ------------------------------------------------------- + 1 file changed, 169 deletions(-) + +diff --git a/bitmap.h b/bitmap.h +index 7b1f80f2..2614a14e 100644 +--- a/bitmap.h ++++ b/bitmap.h +@@ -78,63 +78,6 @@ + * + */ + +-#ifdef __KERNEL__ +- +-#define PAGE_BITS (PAGE_SIZE << 3) +-#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) +- +-typedef __u16 bitmap_counter_t; +-#define COUNTER_BITS 16 +-#define COUNTER_BIT_SHIFT 4 +-#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8) +-#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) +- +-#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) +-#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) +-#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) +-#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) +-#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) +-#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) +- +-/* how many counters per page? */ +-#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) +-/* same, except a shift value for more efficient bitops */ +-#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) +-/* same, except a mask value for more efficient bitops */ +-#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) +- +-#define BITMAP_BLOCK_SIZE 512 +-#define BITMAP_BLOCK_SHIFT 9 +- +-/* how many blocks per chunk? (this is variable) */ +-#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) +-#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) +-#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) +- +-/* when hijacked, the counters and bits represent even larger "chunks" */ +-/* there will be 1024 chunks represented by each counter in the page pointers */ +-#define PAGEPTR_BLOCK_RATIO(bitmap) \ +- (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) +-#define PAGEPTR_BLOCK_SHIFT(bitmap) \ +- (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) +-#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) +- +-/* +- * on-disk bitmap: +- * +- * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap +- * file a page at a time. There's a superblock at the start of the file. +- */ +- +-/* map chunks (bits) to file pages - offset by the size of the superblock */ +-#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) +- +-#endif +- +-/* +- * bitmap structures: +- */ +- + #define BITMAP_MAGIC 0x6d746962 + + /* use these for bitmap->flags and bitmap->sb->state bit-fields */ +@@ -176,116 +119,4 @@ typedef struct bitmap_super_s { + * devices. For raid10 it is the size of the array. + */ + +-#ifdef __KERNEL__ +- +-/* the in-memory bitmap is represented by bitmap_pages */ +-struct bitmap_page { +- /* +- * map points to the actual memory page +- */ +- char *map; +- /* +- * in emergencies (when map cannot be allocated), hijack the map +- * pointer and use it as two counters itself +- */ +- unsigned int hijacked; +- /* +- * count of dirty bits on the page +- */ +- int count; +-}; +- +-/* keep track of bitmap file pages that have pending writes on them */ +-struct page_list { +- struct list_head list; +- struct page *page; +-}; +- +-/* the main bitmap structure - one per mddev */ +-struct bitmap { +- struct bitmap_page *bp; +- unsigned long pages; /* total number of pages in the bitmap */ +- unsigned long missing_pages; /* number of pages not yet allocated */ +- +- mddev_t *mddev; /* the md device that the bitmap is for */ +- +- int counter_bits; /* how many bits per block counter */ +- +- /* bitmap chunksize -- how much data does each bit represent? */ +- unsigned long chunksize; +- unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ +- unsigned long chunks; /* total number of data chunks for the array */ +- +- /* We hold a count on the chunk currently being synced, and drop +- * it when the last block is started. If the resync is aborted +- * midway, we need to be able to drop that count, so we remember +- * the counted chunk.. +- */ +- unsigned long syncchunk; +- +- __u64 events_cleared; +- +- /* bitmap spinlock */ +- spinlock_t lock; +- +- struct file *file; /* backing disk file */ +- struct page *sb_page; /* cached copy of the bitmap file superblock */ +- struct page **filemap; /* list of cache pages for the file */ +- unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ +- unsigned long file_pages; /* number of pages in the file */ +- +- unsigned long flags; +- +- /* +- * the bitmap daemon - periodically wakes up and sweeps the bitmap +- * file, cleaning up bits and flushing out pages to disk as necessary +- */ +- mdk_thread_t *daemon; +- unsigned long daemon_sleep; /* how many seconds between updates? */ +- +- /* +- * bitmap write daemon - this daemon performs writes to the bitmap file +- * this thread is only needed because of a limitation in ext3 (jbd) +- * that does not allow a task to have two journal transactions ongoing +- * simultaneously (even if the transactions are for two different +- * filesystems) -- in the case of bitmap, that would be the filesystem +- * that the bitmap file resides on and the filesystem that is mounted +- * on the md device -- see current->journal_info in jbd/transaction.c +- */ +- mdk_thread_t *write_daemon; +- mdk_thread_t *writeback_daemon; +- spinlock_t write_lock; +- struct semaphore write_ready; +- struct semaphore write_done; +- unsigned long writes_pending; +- wait_queue_head_t write_wait; +- struct list_head write_pages; +- struct list_head complete_pages; +- mempool_t *write_pool; +-}; +- +-/* the bitmap API */ +- +-/* these are used only by md/bitmap */ +-int bitmap_create(mddev_t *mddev); +-void bitmap_destroy(mddev_t *mddev); +-int bitmap_active(struct bitmap *bitmap); +- +-char *file_path(struct file *file, char *buf, int count); +-void bitmap_print_sb(struct bitmap *bitmap); +-int bitmap_update_sb(struct bitmap *bitmap); +- +-int bitmap_setallbits(struct bitmap *bitmap); +- +-/* these are exported */ +-void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors); +-void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, +- int success); +-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks); +-void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); +-void bitmap_close_sync(struct bitmap *bitmap); +- +-int bitmap_unplug(struct bitmap *bitmap); +-#endif +- + #endif +-- +2.41.0 + diff --git a/0021-bitmap.h-Minor-fixes.patch b/0021-bitmap.h-Minor-fixes.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a4d8cb7b0986f1808d629d1cf8b2b49792fa934 --- /dev/null +++ b/0021-bitmap.h-Minor-fixes.patch @@ -0,0 +1,206 @@ +From 17fed47a64e1890df9820b93548c396b7de54e31 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Wed, 5 Feb 2025 11:34:45 +0100 +Subject: [PATCH 21/39] bitmap.h: Minor fixes + +Move documentation to documentation/bitmap.md. Add Neil's copyrights, +add missing license. Remove unused macros. + +Signed-off-by: Mariusz Tkaczyk +--- + bitmap.h | 94 ++++------------------------------------- + documentation/bitmap.md | 67 +++++++++++++++++++++++++++++ + 2 files changed, 75 insertions(+), 86 deletions(-) + create mode 100644 documentation/bitmap.md + +diff --git a/bitmap.h b/bitmap.h +index 2614a14e..9f3d4f3e 100644 +--- a/bitmap.h ++++ b/bitmap.h +@@ -1,83 +1,19 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ + /* +- * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 +- * +- * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. ++ * Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 ++ * Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. ++ * Copyright (C) 2005 Neil Brown + */ ++ ++/* See documentation/bitmap.md */ ++ + #ifndef BITMAP_H + #define BITMAP_H 1 + + #define BITMAP_MAJOR_LO 3 +-/* version 4 insists the bitmap is in little-endian order +- * with version 3, it is host-endian which is non-portable +- */ + #define BITMAP_MAJOR_HI 4 +-#define BITMAP_MAJOR_HOSTENDIAN 3 + #define BITMAP_MAJOR_CLUSTERED 5 +- +-#define BITMAP_MINOR 39 +- +-/* +- * in-memory bitmap: +- * +- * Use 16 bit block counters to track pending writes to each "chunk". +- * The 2 high order bits are special-purpose, the first is a flag indicating +- * whether a resync is needed. The second is a flag indicating whether a +- * resync is active. +- * This means that the counter is actually 14 bits: +- * +- * +--------+--------+------------------------------------------------+ +- * | resync | resync | counter | +- * | needed | active | | +- * | (0-1) | (0-1) | (0-16383) | +- * +--------+--------+------------------------------------------------+ +- * +- * The "resync needed" bit is set when: +- * a '1' bit is read from storage at startup. +- * a write request fails on some drives +- * a resync is aborted on a chunk with 'resync active' set +- * It is cleared (and resync-active set) when a resync starts across all drives +- * of the chunk. +- * +- * +- * The "resync active" bit is set when: +- * a resync is started on all drives, and resync_needed is set. +- * resync_needed will be cleared (as long as resync_active wasn't already set). +- * It is cleared when a resync completes. +- * +- * The counter counts pending write requests, plus the on-disk bit. +- * When the counter is '1' and the resync bits are clear, the on-disk +- * bit can be cleared as well, thus setting the counter to 0. +- * When we set a bit, or in the counter (to start a write), if the fields is +- * 0, we first set the disk bit and set the counter to 1. +- * +- * If the counter is 0, the on-disk bit is clear and the stipe is clean +- * Anything that dirties the stipe pushes the counter to 2 (at least) +- * and sets the on-disk bit (lazily). +- * If a periodic sweep find the counter at 2, it is decremented to 1. +- * If the sweep find the counter at 1, the on-disk bit is cleared and the +- * counter goes to zero. +- * +- * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block +- * counters as a fallback when "page" memory cannot be allocated: +- * +- * Normal case (page memory allocated): +- * +- * page pointer (32-bit) +- * +- * [ ] ------+ +- * | +- * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) +- * c1 c2 c2048 +- * +- * Hijacked case (page memory allocation failed): +- * +- * hijacked page pointer (32-bit) +- * +- * [ ][ ] (no page memory allocated) +- * counter #1 (16-bit) counter #2 (16-bit) +- * +- */ +- + #define BITMAP_MAGIC 0x6d746962 + + /* use these for bitmap->flags and bitmap->sb->state bit-fields */ +@@ -105,18 +41,4 @@ typedef struct bitmap_super_s { + __u8 pad[256 - 136]; /* set to zero */ + } bitmap_super_t; + +-/* notes: +- * (1) This event counter is updated before the eventcounter in the md superblock +- * When a bitmap is loaded, it is only accepted if this event counter is equal +- * to, or one greater than, the event counter in the superblock. +- * (2) This event counter is updated when the other one is *if*and*only*if* the +- * array is not degraded. As bits are not cleared when the array is degraded, +- * this represents the last time that any bits were cleared. +- * If a device is being added that has an event count with this value or +- * higher, it is accepted as conforming to the bitmap. +- * (3)This is the number of sectors represented by the bitmap, and is the range that +- * resync happens across. For raid1 and raid5/6 it is the size of individual +- * devices. For raid10 it is the size of the array. +- */ +- + #endif +diff --git a/documentation/bitmap.md b/documentation/bitmap.md +new file mode 100644 +index 00000000..bdc146e5 +--- /dev/null ++++ b/documentation/bitmap.md +@@ -0,0 +1,67 @@ ++# Internal bitmap block design ++ ++Use 16 bit block counters to track pending writes to each "chunk". ++The 2 high order bits are special-purpose, the first is a flag indicating ++whether a resync is needed. The second is a flag indicating whether a ++resync is active. This means that the counter is actually 14 bits: ++ ++| resync_needed | resync_active | counter | ++| :----: | :----: | :----: | ++| (0-1) | (0-1) | (0-16383) | ++ ++The `resync_needed` bit is set when: ++- a `1` bit is read from storage at startup; ++- a write request fails on some drives; ++- a resync is aborted on a chunk with `resync_active` set; ++- It is cleared (and `resync_active` set) when a resync starts across all drives of the chunk. ++ ++The `resync_active` bit is set when: ++- a resync is started on all drives, and `resync_needed` is set. ++- `resync_needed` will be cleared (as long as `resync_active` wasn't already set). ++- It is cleared when a resync completes. ++ ++The counter counts pending write requests, plus the on-disk bit. ++When the counter is `1` and the resync bits are clear, the on-disk ++bit can be cleared as well, thus setting the counter to `0`. ++When we set a bit, or in the counter (to start a write), if the fields is ++`0`, we first set the disk bit and set the counter to `1`. ++ ++If the counter is `0`, the on-disk bit is clear and the stipe is clean ++Anything that dirties the stipe pushes the counter to `2` (at least) ++and sets the on-disk bit (lazily). ++If a periodic sweep find the counter at `2`, it is decremented to `1`. ++If the sweep find the counter at `1`, the on-disk bit is cleared and the ++counter goes to `0`. ++ ++Also, we'll hijack the "map" pointer itself and use it as two 16 bit block ++counters as a fallback when "page" memory cannot be allocated: ++ ++Normal case (page memory allocated): ++ ++page pointer (32-bit) ++ ++ [ ] ------+ ++ | ++ +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) ++ c1 c2 c2048 ++ ++ Hijacked case (page memory allocation failed): ++ ++ hijacked page pointer (32-bit) ++ ++ [ ][ ] (no page memory allocated) ++ counter #1 (16-bit) counter #2 (16-bit) ++ ++ ++## Notes: ++1. bitmap_super_s->events counter is updated before the event counter in the md superblock; ++ When a bitmap is loaded, it is only accepted if this event counter is equal ++ to, or one greater than, the event counter in the superblock. ++2. bitmap_super_s->events is updated when the other one is `if` and `only if` the ++ array is not degraded. As bits are not cleared when the array is degraded, ++ this represents the last time that any bits were cleared. If a device is being ++ added that has an event count with this value or higher, it is accepted ++ as conforming to the bitmap. ++3. bitmap_super_s->chunksize is the number of sectors represented by the bitmap, ++ and is the range that resync happens across. For raid1 and raid5/6 it is the ++ size of individual devices. For raid10 it is the size of the array. +-- +2.41.0 + diff --git a/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch b/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch new file mode 100644 index 0000000000000000000000000000000000000000..58fa3ceeb3debbf80f77dee2b155b33717bb09ef --- /dev/null +++ b/0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch @@ -0,0 +1,122 @@ +From 7d29b3823c18a24d6efbb502f08638788f97e04b Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Tue, 18 Feb 2025 10:48:31 -0800 +Subject: [PATCH 24/39] mdmon: imsm: fix metadata corruption when managing new + array + +When manager thread detects new array, it will invoke manage_new(). +For imsm array, it will further invoke imsm_open_new(). Since +commit bbab0940fa75("imsm: write bad block log on metadata sync"), +it preallocates bad block log when opening the array, that requires +increasing the mpb buffer size. +For that, imsm_open_new() invokes function imsm_update_metadata_locally(), +which first uses imsm_prepare_update() to allocate a larger mpb buffer +and store it at "mpb->next_buf", and then invoke imsm_process_update() +to copy the content from current mpb buffer "mpb->buf" to "mpb->next_buf", +and then free the current mpb buffer and set the new buffer as current. + +There is a small race window, when monitor thread is syncing metadata, +it gets current buffer pointer in imsm_sync_metadata()->write_super_imsm(), +but before flushing the buffer to disk, manager thread does above switching +buffer which frees current buffer, then monitor thread will run into +use-after-free issue and could cause on-disk metadata corruption. +If system keeps running, further metadata update could fix the corruption, +because after switching buffer, the new buffer will contain good metadata, +but if panic/power cycle happens while disk metadata is corrupted, +the system will run into bootup failure if array is used as root, +otherwise the array can not be assembled after boot if not used as root. + +This issue will not happen for imsm array with only one member array, +because the memory array has not be opened yet, monitor thread will not +do any metadata updates. +This can happen for imsm array with at lease two member array, in the +following two scenarios: +1. Restarting mdmon process with at least two member array +This will happen during system boot up or user restart mdmon after mdadm +upgrade +2. Adding new member array to exist imsm array with at least one member +array. + +To fix this, delay the switching buffer operation to monitor thread. + +Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync") +Signed-off-by: Junxiao Bi +--- + managemon.c | 10 ++++++++-- + super-intel.c | 14 +++++++++++--- + 2 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/managemon.c b/managemon.c +index d7981328..74b64bfc 100644 +--- a/managemon.c ++++ b/managemon.c +@@ -721,11 +721,12 @@ static void manage_new(struct mdstat_ent *mdstat, + * the monitor. + */ + ++ struct metadata_update *update = NULL; + struct active_array *new = NULL; + struct mdinfo *mdi = NULL, *di; +- int i, inst; +- int failed = 0; + char buf[SYSFS_MAX_BUF_SIZE]; ++ int failed = 0; ++ int i, inst; + + /* check if array is ready to be monitored */ + if (!mdstat->active || !mdstat->level) +@@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent *mdstat, + /* if everything checks out tell the metadata handler we want to + * manage this instance + */ ++ container->update_tail = &update; + if (!aa_ready(new) || container->ss->open_new(container, new, inst) < 0) { ++ container->update_tail = NULL; + goto error; + } else { ++ if (update) ++ queue_metadata_update(update); ++ container->update_tail = NULL; + replace_array(container, victim, new); + if (failed) { + new->check_degraded = 1; +diff --git a/super-intel.c b/super-intel.c +index cab84198..4988eef1 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, + return failed; + } + ++static int imsm_prepare_update(struct supertype *st, ++ struct metadata_update *update); + static int imsm_open_new(struct supertype *c, struct active_array *a, + int inst) + { + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->anchor; +- struct imsm_update_prealloc_bb_mem u; ++ struct imsm_update_prealloc_bb_mem *u; ++ struct metadata_update mu; + + if (inst >= mpb->num_raid_devs) { + pr_err("subarry index %d, out of range\n", inst); +@@ -8482,8 +8485,13 @@ static int imsm_open_new(struct supertype *c, struct active_array *a, + dprintf("imsm: open_new %d\n", inst); + a->info.container_member = inst; + +- u.type = update_prealloc_badblocks_mem; +- imsm_update_metadata_locally(c, &u, sizeof(u)); ++ u = xmalloc(sizeof(*u)); ++ u->type = update_prealloc_badblocks_mem; ++ mu.len = sizeof(*u); ++ mu.buf = (char *)u; ++ imsm_prepare_update(c, &mu); ++ if (c->update_tail) ++ append_metadata_update(c, u, sizeof(*u)); + + return 0; + } +-- +2.41.0 + diff --git a/0025-Regression-fix-156.patch b/0025-Regression-fix-156.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c6dfe0d4a1a5a4b19f9542b7cb6d5bd43466cbc --- /dev/null +++ b/0025-Regression-fix-156.patch @@ -0,0 +1,217 @@ +From c2fbf66ba0243f499f78ed43fa1207a9bd9361b5 Mon Sep 17 00:00:00 2001 +From: XiaoNi87 +Date: Tue, 18 Mar 2025 08:18:04 +0800 +Subject: [PATCH 25/39] Regression fix (#156) + +Signed-off-by: Xiao Ni +--- + .github/tools/run_mdadm_tests.sh | 2 +- + test | 22 +++++++++++++++++++--- + tests/05r6tor0.broken | 15 +++++++++++++++ + tests/07revert-inplace.broken | 8 ++++++++ + tests/10ddf-create.broken | 5 ----- + tests/10ddf-fail-two-spares.broken | 5 ----- + tests/20raid5journal.broken | 17 +++++++++++++++++ + tests/env-ddf-template | 3 +-- + tests/skiptests | 9 +++++++++ + util.c | 2 +- + 10 files changed, 71 insertions(+), 17 deletions(-) + create mode 100644 tests/05r6tor0.broken + create mode 100644 tests/07revert-inplace.broken + delete mode 100644 tests/10ddf-create.broken + delete mode 100644 tests/10ddf-fail-two-spares.broken + create mode 100644 tests/20raid5journal.broken + create mode 100644 tests/skiptests + +diff --git a/.github/tools/run_mdadm_tests.sh b/.github/tools/run_mdadm_tests.sh +index 456874b5..22d89a8c 100755 +--- a/.github/tools/run_mdadm_tests.sh ++++ b/.github/tools/run_mdadm_tests.sh +@@ -11,7 +11,7 @@ sudo ./test setup + + #sudo ./test --tests=00createnames + +-sudo ./test --skip-broken --no-error --disable-integrity --disable-multipath --disable-linear --keep-going ++sudo ./test --skip-broken --no-error --disable-integrity --disable-multipath --disable-linear --keep-going --skip-bigcase + + ret=$? + sudo ./test cleanup +diff --git a/test b/test +index 88e44f18..7fa68177 100755 +--- a/test ++++ b/test +@@ -26,6 +26,10 @@ savelogs=0 + exitonerror=1 + ctrl_c_error=0 + skipbroken=0 ++skipbigcase=0 ++skipfile="skiptests" ++skipcheckfile=$testdir/$skipfile ++checkscript="" + loop=1 + prefix='[0-9][0-9]' + +@@ -192,6 +196,7 @@ do_help() { + --loop=N Run tests N times (0 to run forever) + --skip-broken Skip tests that are known to be broken + --skip-always-broken Skip tests that are known to always fail ++ --skip-bigcase Skip tests that need time than 200 seconds + --dev=loop|lvm|ram|disk Use loop devices (default), LVM, RAM or disk + --disks= Provide a bunch of physical devices for test + --volgroup=name LVM volume group for LVM test +@@ -295,6 +300,9 @@ parse_args() { + --skip-always-broken ) + skipbroken=always + ;; ++ --skip-bigcase ) ++ skipbigcase=all ++ ;; + --disable-multipath ) + unset MULTIPATH + ;; +@@ -369,9 +377,17 @@ main() { + else + for script in $testdir/$prefix $testdir/$prefix*[^~] + do +- case $script in *.broken) ;; +- *) +- do_test $script ++ checkscript="${script##*/}" ++ case $script in ++ *.broken) ++ ;; ++ *) ++ if grep -q "$checkscript" "$skipcheckfile"; then ++ if [ "$skipbigcase" == "all" ]; then ++ continue ++ fi ++ fi ++ do_test $script + esac + done + fi +diff --git a/tests/05r6tor0.broken b/tests/05r6tor0.broken +new file mode 100644 +index 00000000..930a0941 +--- /dev/null ++++ b/tests/05r6tor0.broken +@@ -0,0 +1,15 @@ ++Sometimes ++ +++++ pgrep -f 'mdadm --grow --continue' ++++ [[ '' != '' ]] ++++ break ++++ echo 100 ++++ echo 500 ++++ sleep 2 ++++ check raid5 ++++ case $1 in ++++ grep -sq 'active raid5 ' /proc/mdstat ++++ die 'active raid5 not found' ++++ echo -e '\n\tERROR: active raid5 not found \n' ++ ++ ERROR: active raid5 not found +diff --git a/tests/07revert-inplace.broken b/tests/07revert-inplace.broken +new file mode 100644 +index 00000000..73d98a04 +--- /dev/null ++++ b/tests/07revert-inplace.broken +@@ -0,0 +1,8 @@ ++always fails ++ ++Fails with errors: ++ ++ /usr/sbin/mdadm -A /dev/md0 --update=revert-reshape /dev/loop0 /dev/loop1 /dev/loop2 /dev/loop3 /dev/loop4 --backup-file=/tmp/md-backup ++++ rv=1 ++++ case $* in ++++ cat /var/tmp/stderr ++mdadm: failed to RUN_ARRAY /dev/md0: Invalid argument +diff --git a/tests/10ddf-create.broken b/tests/10ddf-create.broken +deleted file mode 100644 +index 0f7d25e5..00000000 +--- a/tests/10ddf-create.broken ++++ /dev/null +@@ -1,5 +0,0 @@ +-Fails due to segmentation fault at assemble. +- +-Too much effort to diagnose this now, marking as broken to make CI clear. +- ++ /usr/sbin/mdadm -A /dev/md/ddf0 /dev/loop8 /dev/loop9 /dev/loop10 /dev/loop11 /dev/loop12 +- ./test: line 76: 101955 Segmentation fault (core dumped) $mdadm "$@" 2> $targetdir/stderr +diff --git a/tests/10ddf-fail-two-spares.broken b/tests/10ddf-fail-two-spares.broken +deleted file mode 100644 +index eeea56d9..00000000 +--- a/tests/10ddf-fail-two-spares.broken ++++ /dev/null +@@ -1,5 +0,0 @@ +-fails infrequently +- +-Fails roughly 1 in 3 with error: +- +- ERROR: /dev/md/vol1 should be optimal in meta data +diff --git a/tests/20raid5journal.broken b/tests/20raid5journal.broken +new file mode 100644 +index 00000000..c7b214af +--- /dev/null ++++ b/tests/20raid5journal.broken +@@ -0,0 +1,17 @@ ++always fail ++ ++++ /usr/sbin/mdadm -I /dev/loop4 ++++ rv=0 ++++ case $* in ++++ cat /var/tmp/stderr ++mdadm: /dev/loop4 attached to /dev/md/0_0, which has been started. ++++ return 0 ++++ check raid5 ++++ case $1 in ++++ grep -sq 'active raid5 ' /proc/mdstat ++++ die 'active raid5 not found' ++++ echo -e '\n\tERROR: active raid5 not found \n' ++ ++ ERROR: active raid5 not found ++ ++++ save_log fail +diff --git a/tests/env-ddf-template b/tests/env-ddf-template +index 4f4ad0f3..ebc0ebf3 100644 +--- a/tests/env-ddf-template ++++ b/tests/env-ddf-template +@@ -3,8 +3,7 @@ sha1_sum() { + } + + get_rootdev() { +- local part=$(grep ' / ' /proc/mounts | awk '{print $1}') +- local bd=/dev/$(lsblk -no PKNAME $part) ++ local bd=$(grep ' / ' /proc/mounts | awk '{print $1}') + [ -b $bd ] || exit 1 + echo $bd + } +diff --git a/tests/skiptests b/tests/skiptests +new file mode 100644 +index 00000000..fd0893f1 +--- /dev/null ++++ b/tests/skiptests +@@ -0,0 +1,9 @@ ++casename:seconds ++01raid6integ:1732 ++01replace:396 ++07layouts:836 ++11spare-migration:1140 ++12imsm-r0_2d-grow-r0_5d:218 ++13imsm-r0_r0_2d-grow-r0_r0_4d:218 ++13imsm-r0_r0_2d-grow-r0_r0_5d:246 ++19raid6check:268 +diff --git a/util.c b/util.c +index 8c45f0e1..9fe2d227 100644 +--- a/util.c ++++ b/util.c +@@ -2310,7 +2310,7 @@ mdadm_status_t continue_via_systemd(char *devnm, char *service_name, char *prefi + dprintf("Start %s service\n", service_name); + /* Simply return that service cannot be started */ + if (check_env("MDADM_NO_SYSTEMCTL")) +- return MDADM_STATUS_SUCCESS; ++ return MDADM_STATUS_ERROR; + + /* Fork in attempt to start services */ + switch (fork()) { +-- +2.41.0 + diff --git a/0026-super1-Clear-extra-flags-when-initializing-metadata.patch b/0026-super1-Clear-extra-flags-when-initializing-metadata.patch new file mode 100644 index 0000000000000000000000000000000000000000..6d04f57a95e534718db27ede3e5ded1446a8b2a7 --- /dev/null +++ b/0026-super1-Clear-extra-flags-when-initializing-metadata.patch @@ -0,0 +1,72 @@ +From 4e2e208c8d3e9ba0fae88136d7c4cd0292af73b0 Mon Sep 17 00:00:00 2001 +From: Wu Guanghao +Date: Tue, 11 Mar 2025 03:11:55 +0000 +Subject: [PATCH 26/39] super1: Clear extra flags when initializing metadata + +When adding a disk to a RAID1 array, the metadata is read from the +existing member disks for sync. However, only the bad_blocks flag are +copied, the bad_blocks records are not copied, so the bad_blocks +records are all zeros. The kernel function super_1_load() detects +bad_blocks flag and reads the bad_blocks record, then sets the bad +block using badblocks_set(). + +After the kernel commit 1726c7746783 (badblocks: improve badblocks_set() +for multiple ranges handling) if the length of a bad_blocks record is 0, +it will return a failure. Therefore the device addition will fail. + +So when adding a new disk, some flags cannot be sync and need to be clead. + +Signed-off-by: Wu Guanghao +--- + super1.c | 3 +++ + tests/05r1-add-badblocks | 24 ++++++++++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 tests/05r1-add-badblocks + +diff --git a/super1.c b/super1.c +index fe3c4c64..c828b682 100644 +--- a/super1.c ++++ b/super1.c +@@ -1971,6 +1971,9 @@ static int write_init_super1(struct supertype *st) + long bm_offset; + bool raid0_need_layout = false; + ++ /* Clear extra flags */ ++ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BAD_BLOCKS | MD_FEATURE_REPLACEMENT); ++ + /* Since linux kernel v5.4, raid0 always has a layout */ + if (has_raid0_layout(sb) && get_linux_version() >= 5004000) + raid0_need_layout = true; +diff --git a/tests/05r1-add-badblocks b/tests/05r1-add-badblocks +new file mode 100644 +index 00000000..6192327a +--- /dev/null ++++ b/tests/05r1-add-badblocks +@@ -0,0 +1,24 @@ ++# ++# create a raid1 with a drive and set badblocks for the drive. ++# add a new drive does not cause an error. ++# ++ ++# create raid1 ++mdadm -CR $md0 -l1 -n2 -e1.0 $dev1 missing ++testdev $md0 1 $mdsize1a 64 ++sleep 3 ++ ++# set badblocks for the drive ++dev1_name=$(basename $dev1) ++echo "100 100" > /sys/block/md0/md/dev-$dev1_name/bad_blocks ++echo "write_error" > /sys/block/md0/md/dev-$dev1_name/state ++ ++# write badblocks to metadata ++dd if=/dev/zero of=$md0 bs=512 count=200 oflag=direct ++ ++# re-add and recovery ++mdadm $md0 -a $dev2 ++check recovery ++ ++mdadm -S $md0 ++ +-- +2.41.0 + diff --git a/0027-imsm-Fix-RAID0-to-RAID10-migration.patch b/0027-imsm-Fix-RAID0-to-RAID10-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2991bfafdc1778bede5ac1d7a3995f2a159ea60 --- /dev/null +++ b/0027-imsm-Fix-RAID0-to-RAID10-migration.patch @@ -0,0 +1,74 @@ +From 127e38b59cbdf717d1569bcdc75b8d823d8485f3 Mon Sep 17 00:00:00 2001 +From: Blazej Kucman +Date: Mon, 31 Mar 2025 12:46:52 +0200 +Subject: [PATCH 27/39] imsm: Fix RAID0 to RAID10 migration + +Support for RAID10 with +4 disks in IMSM introduced an inconsistency +between the VROC UEFI driver and Linux IMSM. VROC UEFI does not +support RAID10 with +4 disks, therefore appropriate protections were +added to the mdadm IMSM code that results in skipping processing of +such RAID in the UEFI phase. Unfortunately the case of migration +RAID0 2 disks to RAID10 4 disks was omitted, this case requires +maintaining compatibility with the VROC UEFI driver because it is +supported. + +For RAID10 +4 disk the MPB_ATTRIB_RAID10_EXT attribute is set in the +metadata, thanks to which the UEFI driver does not process such RAID. +In the series adding support, a new metadata raid level value +IMSM_T_RAID10 was also introduced. It is not recognized by VROC UEFI. + +The issue is caused by the fact that in the case of the mentioned +migration, IMSM_T_RAID10 is entered into the metadata but attribute +MPB_ATTRIB_RAID10_EXT is not entered, which causes an attempt to +process such RAID in the UEFI phase. This situation results in +the platform hang during booting in UEFI phase, this also results in +data loss after failed and interrupted RAID processing in VROC UEFI. + +The above situation is result of the update_imsm_raid_level() +function, for the mentioned migration function is executed on a map +with a not yet updated number of disks. + +The fix is to explicitly handle migration in the function mentioned +above to maintain compatibility with VROC UEFI driver. + +Steps to reproduce: +mdadm -C /dev/md/imsm0 -e imsm -n 2 /dev/nvme[1,2]n1 -R +mdadm -C /dev/md/vol -l 0 -n 2 /dev/nvme[1,2]n1 --assume-clean -R +mdadm -a /dev/md127 /dev/nvme3n1 +mdadm -a /dev/md127 /dev/nvme4n1 +mdadm -G /dev/md126 -l 10 +reboot + +Fixes: 27550b13297a ("imsm: add support for literal RAID 10") +Signed-off-by: Blazej Kucman +--- + super-intel.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/super-intel.c b/super-intel.c +index 4988eef1..b7b030a2 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -1327,6 +1327,19 @@ static void update_imsm_raid_level(struct imsm_map *map, int new_level) + return; + } + ++ /* ++ * RAID0 to RAID10 migration. ++ * Due to the compatibility with VROC UEFI must be maintained, this case must be handled ++ * separately, because the map does not have an updated number of disks. ++ */ ++ if (map->raid_level == IMSM_T_RAID0) { ++ if (map->num_members == 2) ++ map->raid_level = IMSM_T_RAID1; ++ else ++ map->raid_level = IMSM_T_RAID10; ++ return; ++ } ++ + if (map->num_members == 4) { + if (map->raid_level == IMSM_T_RAID10 || map->raid_level == IMSM_T_RAID1) + return; +-- +2.41.0 + diff --git a/0030-Update-tests.yml.patch b/0030-Update-tests.yml.patch new file mode 100644 index 0000000000000000000000000000000000000000..4fbdfb3ac9a2fd25b531096a732bbb7a29935fae --- /dev/null +++ b/0030-Update-tests.yml.patch @@ -0,0 +1,26 @@ +From 07bde560b71a2f6e36de5bc6f24e07e761c1c83b Mon Sep 17 00:00:00 2001 +From: Paul Luse +Date: Thu, 24 Apr 2025 08:38:29 -0700 +Subject: [PATCH 30/39] Update tests.yml + +Signed-off-by: Paul Luse +--- + .github/workflows/tests.yml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml +index 6f8e2b8e..b4c156c0 100644 +--- a/.github/workflows/tests.yml ++++ b/.github/workflows/tests.yml +@@ -65,7 +65,7 @@ jobs: + if: ${{ steps.testing.outcome == 'failure' }} + run: | + cd .. +- sudo rm /home/ci/actions-runner/_work/mdadm/logs/*.log ++ sudo rm -rf /home/ci/actions-runner/_work/mdadm/logs/*.log + + - name: "Set failed" + if: ${{ steps.testing.outcome == 'failure' }} +-- +2.41.0 + diff --git a/0031-Update-tests.yml.patch b/0031-Update-tests.yml.patch new file mode 100644 index 0000000000000000000000000000000000000000..51b55c62b8485a33fc66a6288677358bd16079d2 --- /dev/null +++ b/0031-Update-tests.yml.patch @@ -0,0 +1,26 @@ +From affe2168b807ccd48f00dc9e021196a5e2e83870 Mon Sep 17 00:00:00 2001 +From: Paul Luse +Date: Thu, 24 Apr 2025 08:41:15 -0700 +Subject: [PATCH 31/39] Update tests.yml + +Signed-off-by: Paul Luse +--- + .github/workflows/tests.yml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml +index b4c156c0..9b361571 100644 +--- a/.github/workflows/tests.yml ++++ b/.github/workflows/tests.yml +@@ -37,7 +37,7 @@ jobs: + vagrant status + vagrant up + sleep 2 +- vagrant ssh -c "sudo timedatectl set-timezone UTC && \ ++ vagrant ssh -c "uname -r && sudo timedatectl set-timezone UTC && \ + sudo systemctl restart chronyd && sudo chronyc -a makestep && sleep 1" + echo "FYI vagrant time command finished with exit code: $?" + +-- +2.41.0 + diff --git a/0032-Update-tests.yml.patch b/0032-Update-tests.yml.patch new file mode 100644 index 0000000000000000000000000000000000000000..086645a7a48feadff53b2787dc337c4cf455368c --- /dev/null +++ b/0032-Update-tests.yml.patch @@ -0,0 +1,29 @@ +From 5fd2f5da6fe7995190627f8a7bd9f6ff90aad1d4 Mon Sep 17 00:00:00 2001 +From: Paul Luse +Date: Thu, 24 Apr 2025 09:03:32 -0700 +Subject: [PATCH 32/39] Update tests.yml + +Signed-off-by: Paul Luse +--- + .github/workflows/tests.yml | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml +index 9b361571..2556fccf 100644 +--- a/.github/workflows/tests.yml ++++ b/.github/workflows/tests.yml +@@ -37,9 +37,8 @@ jobs: + vagrant status + vagrant up + sleep 2 +- vagrant ssh -c "uname -r && sudo timedatectl set-timezone UTC && \ +- sudo systemctl restart chronyd && sudo chronyc -a makestep && sleep 1" +- echo "FYI vagrant time command finished with exit code: $?" ++ vagrant ssh -c "uname -r" ++ echo "FYI vagrant uname command finished with exit code: $?" + + - name: 'Run tests' + id: testing +-- +2.41.0 + diff --git a/0035-mdadm-Remove-klibc-and-uclibc-support.patch b/0035-mdadm-Remove-klibc-and-uclibc-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..37c5a92c6accea6a509e7bc8df0a833dff496d74 --- /dev/null +++ b/0035-mdadm-Remove-klibc-and-uclibc-support.patch @@ -0,0 +1,183 @@ +From bd648e3bec3d883d2f4addea84ac1ac8790c75e9 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Fri, 7 Mar 2025 11:38:48 +0100 +Subject: [PATCH 35/39] mdadm: Remove klibc and uclibc support + +Klibc compilation is not working for at least 3 years because of +following error: +mdadm.h:1912:15: error: unknown type name 'sighandler_t' + +It will have a conflict with le/be_to_cpu() functions family provided by +asm/byteorder.h which will be included with raid/md_p.h. Therefore we +need to remove support for it. Also, remove uclibc because it is not actively +maintained. + +Remove klibc and uclibc targets from Makefile and special klibc code. +Targets can be removed safely because using CC is recommended. + +Signed-off-by: Mariusz Tkaczyk +--- + Makefile | 34 +++------------------------------- + README.md | 3 --- + mdadm.h | 37 +------------------------------------ + 3 files changed, 4 insertions(+), 70 deletions(-) + +diff --git a/Makefile b/Makefile +index bcd092de..387e4a56 100644 +--- a/Makefile ++++ b/Makefile +@@ -31,16 +31,6 @@ + # define "CXFLAGS" to give extra flags to CC. + # e.g. make CXFLAGS=-O to optimise + CXFLAGS ?=-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE +-TCC = tcc +-UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found ) +-#DIET_GCC = diet gcc +-# sorry, but diet-libc doesn't know about posix_memalign, +-# so we cannot use it any more. +-DIET_GCC = gcc -DHAVE_STDINT_H +- +-KLIBC=/home/src/klibc/klibc-0.77 +- +-KLIBC_GCC = gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 + + ifdef COVERITY + COVERITY_FLAGS=-include coverity-gcc-hack.h +@@ -225,8 +215,6 @@ everything: all swap_super test_stripe raid6check \ + mdadm.Os mdadm.O2 man + everything-test: all swap_super test_stripe \ + mdadm.Os mdadm.O2 man +-# mdadm.uclibc doesn't work on x86-64 +-# mdadm.tcc doesn't work.. + + %.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(COVERITY_FLAGS) -o $@ -c $< +@@ -237,13 +225,6 @@ mdadm : $(OBJS) | check_rundir + mdadm.static : $(OBJS) $(STATICOBJS) + $(CC) $(CFLAGS) $(LDFLAGS) -static -o mdadm.static $(OBJS) $(STATICOBJS) $(LDLIBS) + +-mdadm.tcc : $(SRCS) $(INCL) +- $(TCC) -o mdadm.tcc $(SRCS) +- +-mdadm.klibc : $(SRCS) $(INCL) +- rm -f $(OBJS) +- $(CC) -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS) +- + mdadm.Os : $(SRCS) $(INCL) + $(CC) -o mdadm.Os $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -Os $(SRCS) $(LDLIBS) + +@@ -298,15 +279,6 @@ install : install-bin install-man install-udev + install-static : mdadm.static install-man + $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm + +-install-tcc : mdadm.tcc install-man +- $(INSTALL) -D $(STRIP) -m 755 mdadm.tcc $(DESTDIR)$(BINDIR)/mdadm +- +-install-uclibc : mdadm.uclibc install-man +- $(INSTALL) -D $(STRIP) -m 755 mdadm.uclibc $(DESTDIR)$(BINDIR)/mdadm +- +-install-klibc : mdadm.klibc install-man +- $(INSTALL) -D $(STRIP) -m 755 mdadm.klibc $(DESTDIR)$(BINDIR)/mdadm +- + install-man: mdadm.8 md.4 mdadm.conf.5 mdmon.8 + $(INSTALL) -D -m 644 mdadm.8 $(DESTDIR)$(MAN8DIR)/mdadm.8 + $(INSTALL) -D -m 644 mdmon.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 +@@ -354,9 +326,9 @@ test: mdadm mdmon test_stripe swap_super raid6check + + clean : + rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \ +- mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \ +- .merge_file_* mdadm.Os mdadm.O2 mdmon.O2 swap_super init.cpio.gz \ +- mdadm.uclibc.static test_stripe raid6check raid6check.o mdmon mdadm.8 ++ mdadm.static *.orig *.porig *.rej *.alt merge_file_* \ ++ mdadm.Os mdadm.O2 mdmon.O2 swap_super init.cpio.gz \ ++ test_stripe raid6check raid6check.o mdmon mdadm.8 + rm -rf cov-int + + dist : clean +diff --git a/README.md b/README.md +index 029e0ee2..ba611ec5 100644 +--- a/README.md ++++ b/README.md +@@ -135,9 +135,6 @@ List of installation targets: + + The following targets are deprecated and should not be used: + - `install-static` +-- `install-tcc` +-- `install-uclibc` +-- `install-klibc` + + # License + +diff --git a/mdadm.h b/mdadm.h +index 0ea83ad3..592bd1ba 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -43,6 +43,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); + #include + #include + #include ++#include + #include + #include + #include +@@ -189,7 +190,6 @@ struct dlm_lksb { + ((x) & 0x00000000ff000000ULL) << 8 | \ + ((x) & 0x000000ff00000000ULL) >> 8) + +-#if !defined(__KLIBC__) + #if BYTE_ORDER == LITTLE_ENDIAN + #define __cpu_to_le16(_x) (unsigned int)(_x) + #define __cpu_to_le32(_x) (unsigned int)(_x) +@@ -221,7 +221,6 @@ struct dlm_lksb { + #else + # error "unknown endianness." + #endif +-#endif /* __KLIBC__ */ + + /* + * Partially stolen from include/linux/unaligned/packed_struct.h +@@ -1530,40 +1529,6 @@ extern void sysfsline(char *line); + struct stat64; + #endif + +-#define HAVE_NFTW we assume +-#define HAVE_FTW +- +-#ifdef __UCLIBC__ +-# include +-# ifndef __UCLIBC_HAS_LFS__ +-# define lseek64 lseek +-# endif +-# ifndef __UCLIBC_HAS_FTW__ +-# undef HAVE_FTW +-# undef HAVE_NFTW +-# endif +-#endif +- +-#ifdef __dietlibc__ +-# undef HAVE_NFTW +-#endif +- +-#if defined(__KLIBC__) +-# undef HAVE_NFTW +-# undef HAVE_FTW +-#endif +- +-#ifndef HAVE_NFTW +-# define FTW_PHYS 1 +-# ifndef HAVE_FTW +- struct FTW {}; +-# endif +-#endif +- +-#ifdef HAVE_FTW +-# include +-#endif +- + extern int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s); + + extern int Manage_ro(char *devname, int fd, int readonly); +-- +2.41.0 + diff --git a/0036-mdadm-include-asm-byteorder.h.patch b/0036-mdadm-include-asm-byteorder.h.patch new file mode 100644 index 0000000000000000000000000000000000000000..a19b37c3bbb61cd1a25cd6eab650124771f1fd7c --- /dev/null +++ b/0036-mdadm-include-asm-byteorder.h.patch @@ -0,0 +1,96 @@ +From 696207860f408534651db89c5b40133f5903fa25 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Fri, 7 Mar 2025 12:10:58 +0100 +Subject: [PATCH 36/39] mdadm: include asm/byteorder.h + +It will be included by raid/md_p.h anyway. Include it directly and +remove custom functions. It is not a problem now. + +Signed-off-by: Mariusz Tkaczyk +--- + mdadm.h | 55 +------------------------------------------------------ + 1 file changed, 1 insertion(+), 54 deletions(-) + +diff --git a/mdadm.h b/mdadm.h +index 592bd1ba..7471cedc 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -34,6 +34,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); + #endif + + #include ++#include + #include + #include + #include +@@ -85,7 +86,6 @@ struct dlm_lksb { + #endif + + #include +-/*#include */ + #include + #include + #include +@@ -169,59 +169,6 @@ struct dlm_lksb { + #include "msg.h" + #include "mdadm_status.h" + +-#include +-/* Redhat don't like to #include , and +- * some time include isn't enough, +- * and there is no standard conversion function so... */ +-/* And dietlibc doesn't think byteswap is ok, so.. */ +-/* #include */ +-#define __mdadm_bswap_16(x) (((x) & 0x00ffU) << 8 | \ +- ((x) & 0xff00U) >> 8) +-#define __mdadm_bswap_32(x) (((x) & 0x000000ffU) << 24 | \ +- ((x) & 0xff000000U) >> 24 | \ +- ((x) & 0x0000ff00U) << 8 | \ +- ((x) & 0x00ff0000U) >> 8) +-#define __mdadm_bswap_64(x) (((x) & 0x00000000000000ffULL) << 56 | \ +- ((x) & 0xff00000000000000ULL) >> 56 | \ +- ((x) & 0x000000000000ff00ULL) << 40 | \ +- ((x) & 0x00ff000000000000ULL) >> 40 | \ +- ((x) & 0x0000000000ff0000ULL) << 24 | \ +- ((x) & 0x0000ff0000000000ULL) >> 24 | \ +- ((x) & 0x00000000ff000000ULL) << 8 | \ +- ((x) & 0x000000ff00000000ULL) >> 8) +- +-#if BYTE_ORDER == LITTLE_ENDIAN +-#define __cpu_to_le16(_x) (unsigned int)(_x) +-#define __cpu_to_le32(_x) (unsigned int)(_x) +-#define __cpu_to_le64(_x) (unsigned long long)(_x) +-#define __le16_to_cpu(_x) (unsigned int)(_x) +-#define __le32_to_cpu(_x) (unsigned int)(_x) +-#define __le64_to_cpu(_x) (unsigned long long)(_x) +- +-#define __cpu_to_be16(_x) __mdadm_bswap_16(_x) +-#define __cpu_to_be32(_x) __mdadm_bswap_32(_x) +-#define __cpu_to_be64(_x) __mdadm_bswap_64(_x) +-#define __be16_to_cpu(_x) __mdadm_bswap_16(_x) +-#define __be32_to_cpu(_x) __mdadm_bswap_32(_x) +-#define __be64_to_cpu(_x) __mdadm_bswap_64(_x) +-#elif BYTE_ORDER == BIG_ENDIAN +-#define __cpu_to_le16(_x) __mdadm_bswap_16(_x) +-#define __cpu_to_le32(_x) __mdadm_bswap_32(_x) +-#define __cpu_to_le64(_x) __mdadm_bswap_64(_x) +-#define __le16_to_cpu(_x) __mdadm_bswap_16(_x) +-#define __le32_to_cpu(_x) __mdadm_bswap_32(_x) +-#define __le64_to_cpu(_x) __mdadm_bswap_64(_x) +- +-#define __cpu_to_be16(_x) (unsigned int)(_x) +-#define __cpu_to_be32(_x) (unsigned int)(_x) +-#define __cpu_to_be64(_x) (unsigned long long)(_x) +-#define __be16_to_cpu(_x) (unsigned int)(_x) +-#define __be32_to_cpu(_x) (unsigned int)(_x) +-#define __be64_to_cpu(_x) (unsigned long long)(_x) +-#else +-# error "unknown endianness." +-#endif +- + /* + * Partially stolen from include/linux/unaligned/packed_struct.h + */ +-- +2.41.0 + diff --git a/md-auto-readd.rule b/md-auto-readd.rule new file mode 100644 index 0000000000000000000000000000000000000000..5ce29fb30556ac7969056eb15f2ca37ef835befd --- /dev/null +++ b/md-auto-readd.rule @@ -0,0 +1,27 @@ +# +# Enable/Disable - default is Disabled +# to disable this rule, GOTO="md_end" should be the first active command. +# to enable this rule, Comment out GOTO="md_end". +GOTO="md_end" + +# Required: MD arrays must have a bitmap for transient devices to +# be added back in the array. +# mdadm -CR /dev/md0 -l1 -n2 /dev/sd[ab] –bitmap=internal + +# Don't process any events if anaconda is running as anaconda brings up +# raid devices manually +ENV{ANACONDA}=="?*", GOTO="md_end" + +# Also don't process disks that are slated to be a multipath device +ENV{DM_MULTIPATH_DEVICE_PATH}=="1", GOTO="md_end" + +# We process add events on block devices (since they are ready as soon as +# they are added to the system) + +ACTION!="add", GOTO="md_end" +ENV{ID_FS_TYPE}!="linux_raid_member", GOTO="md_end" +SUBSYSTEM=="block", RUN{program}+="/usr/sbin/md-auto-readd.sh $devnode" + +# +# Land here to exit cleanly +LABEL="md_end" diff --git a/md-auto-readd.sh b/md-auto-readd.sh new file mode 100644 index 0000000000000000000000000000000000000000..f15c4827f06d858a78649cb2c9aded5c0a9b6b92 --- /dev/null +++ b/md-auto-readd.sh @@ -0,0 +1,17 @@ +#!/usr/bin/bash +MDADM=/sbin/mdadm +DEVNAME=$1 + +export $(${MDADM} --examine --export ${DEVNAME}) +if [ -z "${MD_UUID}" ]; then + exit 1 +fi + +UUID_LINK=$(readlink /dev/disk/by-id/md-uuid-${MD_UUID}) +MD_DEVNAME=${UUID_LINK##*/} +export $(${MDADM} --detail --export /dev/${MD_DEVNAME}) +if [ -z "${MD_METADATA}" ] ; then + exit 1 +fi + +${MDADM} --manage /dev/${MD_DEVNAME} --re-add ${DEVNAME} --verbose diff --git a/mdadm-4.3.tar.xz b/mdadm-4.3.tar.xz deleted file mode 100644 index de58ca08e0a212f03c304bb9522cb7901e3a97ab..0000000000000000000000000000000000000000 Binary files a/mdadm-4.3.tar.xz and /dev/null differ diff --git a/mdadm-4.4.tar.gz b/mdadm-4.4.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a03cf152c4edddf3e0aa6f9124d2a26d3adf2eb0 Binary files /dev/null and b/mdadm-4.4.tar.gz differ diff --git a/mdadm-check-posix-name-before-setting-name-and-devna.patch b/mdadm-check-posix-name-before-setting-name-and-devna.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d1678a43a02dbacf6c5b68bb1e5cbf1ad84a262 --- /dev/null +++ b/mdadm-check-posix-name-before-setting-name-and-devna.patch @@ -0,0 +1,88 @@ +From bcfe5a4220a0d49f703700033f7e5eaafdbf52ff Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Wed, 30 Apr 2025 08:51:49 -0400 +Subject: [PATCH 39/39] mdadm: check posix name before setting name and devname + +It's good to has limitations for name when creating an array. But the +arrays which were created before patch e2eb503 (mdadm: Follow POSIX +Portable Character Set) can't be assembled. So remove the POSIX check +for assemble mode. + +This can be reproduced: +* build mdadm without patch e2eb503 +* mdadm -CR /dev/md/node1:pv1 -l0 -n2 /dev/loop0 /dev/loop1 +* mdadm -Ss +* build with latest mdadm, and try to assemble it. +* mdadm -A /dev/md/node1:pv1 --name node1:pv1 + +Fixes: e2eb503 (mdadm: Follow POSIX Portable Character Set) +Signed-off-by: Xiao Ni +--- + config.c | 8 ++------ + mdadm.c | 12 ++++++++++++ + 2 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/config.c b/config.c +index 8a8ae5e4..8abdba44 100644 +--- a/config.c ++++ b/config.c +@@ -208,11 +208,6 @@ static mdadm_status_t ident_check_name(const char *name, const char *prop_name, + return MDADM_STATUS_ERROR; + } + +- if (!is_name_posix_compatible(name)) { +- ident_log(prop_name, name, "Not POSIX compatible", cmdline); +- return MDADM_STATUS_ERROR; +- } +- + return MDADM_STATUS_SUCCESS; + } + +@@ -512,7 +507,8 @@ void arrayline(char *line) + + for (w = dl_next(line); w != line; w = dl_next(w)) { + if (w[0] == '/' || strchr(w, '=') == NULL) { +- _ident_set_devname(&mis, w, false); ++ if (is_name_posix_compatible(basename(w))) ++ _ident_set_devname(&mis, w, false); + } else if (strncasecmp(w, "uuid=", 5) == 0) { + if (mis.uuid_set) + pr_err("only specify uuid once, %s ignored.\n", +diff --git a/mdadm.c b/mdadm.c +index 6200cd0e..2147b278 100644 +--- a/mdadm.c ++++ b/mdadm.c +@@ -732,6 +732,11 @@ int main(int argc, char *argv[]) + exit(2); + } + ++ if (mode != ASSEMBLE && ++ !is_name_posix_compatible(basename(optarg))) { ++ pr_err("%s Not POSIX compatible\n", basename(optarg)); ++ exit(2); ++ } + if (ident_set_name(&ident, optarg) != MDADM_STATUS_SUCCESS) + exit(2); + +@@ -1284,11 +1289,18 @@ int main(int argc, char *argv[]) + mode == GROW || (mode == ASSEMBLE && ! c.scan)) { + struct stat stb; + int ret; ++ char *bname = basename(devlist->devname); + + if (devs_found < 1) { + pr_err("an md device must be given in this mode\n"); + exit(2); + } ++ ++ if (mode != ASSEMBLE && !is_name_posix_compatible(bname)) { ++ pr_err("%s Not POSIX compatible\n", bname); ++ exit(2); ++ } ++ + if (ident_set_devname(&ident, devlist->devname) != MDADM_STATUS_SUCCESS) + exit(1); + +-- +2.41.0 + diff --git a/mdadm-fix-building-errors.patch b/mdadm-fix-building-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1c97cd7426a5417e0a2e7128d389a04d84abeea --- /dev/null +++ b/mdadm-fix-building-errors.patch @@ -0,0 +1,64 @@ +From 46940fbca6df3ddffa71541e459a277d79584fc0 Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Wed, 30 Apr 2025 06:47:08 -0400 +Subject: [PATCH 38/39] mdadm: fix building errors + +This is a rhel-only patch and this patch will be sent to upstream. + +Signed-off-by: Xiao Ni +--- + super-ddf.c | 9 +++++---- + super-intel.c | 2 +- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/super-ddf.c b/super-ddf.c +index 6e7db924..285d3b8b 100644 +--- a/super-ddf.c ++++ b/super-ddf.c +@@ -1606,9 +1606,9 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) + map_num(ddf_sec_level, vc->srl) ?: "-unknown-"); + } + printf(" Device Size[%d] : %llu\n", n, +- be64_to_cpu(vc->blocks)/2); ++ (unsigned long long)(be64_to_cpu(vc->blocks)/2)); + printf(" Array Size[%d] : %llu\n", n, +- be64_to_cpu(vc->array_blocks)/2); ++ (unsigned long long)(be64_to_cpu(vc->array_blocks)/2)); + } + } + +@@ -1665,7 +1665,7 @@ static void examine_pds(struct ddf_super *sb) + printf(" %3d %08x ", i, + be32_to_cpu(pd->refnum)); + printf("%8lluK ", +- be64_to_cpu(pd->config_size)>>1); ++ (unsigned long long)be64_to_cpu(pd->config_size)>>1); + for (dl = sb->dlist; dl ; dl = dl->next) { + if (be32_eq(dl->disk.refnum, pd->refnum)) { + char *dv = map_dev(dl->major, dl->minor, 0); +@@ -2901,7 +2901,8 @@ static unsigned int find_unused_pde(const struct ddf_super *ddf) + static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl) + { + __u64 cfs, t; +- cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba)); ++ cfs = min((unsigned long long)dl->size - 32*1024*2ULL, ++ (unsigned long long)be64_to_cpu(dl->primary_lba)); + t = be64_to_cpu(dl->secondary_lba); + if (t != ~(__u64)0) + cfs = min(cfs, t); +diff --git a/super-intel.c b/super-intel.c +index b7b030a2..caa583d8 100644 +--- a/super-intel.c ++++ b/super-intel.c +@@ -2325,7 +2325,7 @@ static void export_examine_super_imsm(struct supertype *st) + printf("MD_LEVEL=container\n"); + printf("MD_UUID=%s\n", nbuf+5); + printf("MD_DEVICES=%u\n", mpb->num_disks); +- printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time)); ++ printf("MD_CREATION_TIME=%llu\n", (unsigned long long)__le64_to_cpu(mpb->creation_time)); + } + + static void detail_super_imsm(struct supertype *st, char *homehost, +-- +2.41.0 + diff --git a/mdadm-udev.patch b/mdadm-udev.patch new file mode 100644 index 0000000000000000000000000000000000000000..a9d4d938a442b5c316913e478ab91164e6f6bedf --- /dev/null +++ b/mdadm-udev.patch @@ -0,0 +1,26 @@ +--- mdadm/udev-md-raid-assembly.rules.orig 2023-01-06 16:37:03.780756100 +0800 ++++ mdadm/udev-md-raid-assembly.rules 2023-01-06 17:04:09.536159980 +0800 +@@ -5,6 +5,9 @@ + ENV{ANACONDA}=="?*", GOTO="md_inc_end" + # assemble md arrays + ++# Also don't process disks that are slated to be a multipath device ++ENV{DM_MULTIPATH_DEVICE_PATH}=="1", GOTO="md_inc_end" ++ + SUBSYSTEM!="block", GOTO="md_inc_end" + + # skip non-initialized devices +@@ -33,6 +36,13 @@ + + LABEL="md_inc" + ++# Make sure we don't handle dm devices when some limits are set. ++# And linux_raid_member only be set when change/remove event happen. ++# So we don't need to consider add event here. ++KERNEL=="dm-*", ENV{DM_UDEV_RULES_VSN}!="?*", GOTO="md_inc_end" ++KERNEL=="dm-*", ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}=="1", GOTO="md_inc_end" ++KERNEL=="dm-*", ENV{DM_SUSPENDED}=="1", GOTO="md_inc_end" ++ + # Bare disks are ready when add event happens, the raid can be assembled. + ACTION=="change", KERNEL!="dm-*|md*", GOTO="md_inc_end" + diff --git a/mdadm-use-standard-libc-nftw.patch b/mdadm-use-standard-libc-nftw.patch new file mode 100644 index 0000000000000000000000000000000000000000..c92cea84515faf02f9a96e5b8385c9dfcce59cb3 --- /dev/null +++ b/mdadm-use-standard-libc-nftw.patch @@ -0,0 +1,53 @@ +commit e549ac6ab2ce5e7ec182310f8f5f2e41c6ac9233 +Author: Xiao Ni +Date: Wed May 7 18:06:59 2025 +0800 + + mdadm: use standard libc nftw + + commit bd648e3bec3d ("mdadm: Remove klibc and uclibc support") removes + macro HAVE_NFTW/HAVE_FTW and uses libc header ftw.h. But it leaves the + codes in lib.c which let mdadm command call nftw defined in lib.c. It + needs to remove these codes. + + The bug can be reproduced by: + mdadm -CR /dev/md0 --level raid5 --metadata=1.1 --chunk=32 --raid-disks 3 + --size 10000 /dev/loop1 /dev/loop2 /dev/loop3 + mdadm /dev/md0 --grow --chunk=64 + mdadm: /dev/md0: cannot open component -unknown- + + Fixes: bd648e3bec3d ("mdadm: Remove klibc and uclibc support") + Signed-off-by: Xiao Ni + +diff --git a/lib.c b/lib.c +index f36ae03a..eb6cc119 100644 +--- a/lib.c ++++ b/lib.c +@@ -245,28 +245,6 @@ int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s) + return 0; + } + +-#ifndef HAVE_NFTW +-#ifdef HAVE_FTW +-int add_dev_1(const char *name, const struct stat *stb, int flag) +-{ +- return add_dev(name, stb, flag, NULL); +-} +-int nftw(const char *path, +- int (*han)(const char *name, const struct stat *stb, +- int flag, struct FTW *s), int nopenfd, int flags) +-{ +- return ftw(path, add_dev_1, nopenfd); +-} +-#else +-int nftw(const char *path, +- int (*han)(const char *name, const struct stat *stb, +- int flag, struct FTW *s), int nopenfd, int flags) +-{ +- return 0; +-} +-#endif /* HAVE_FTW */ +-#endif /* HAVE_NFTW */ +- + /* + * Find a block device with the right major/minor number. + * If we find multiple names, choose the shortest. diff --git a/mdadm.spec b/mdadm.spec index a3c8c53e43e45bb2c5b0bfd47c5960896b13154c..24f51e9fef1720275cde5583bb40451395735b04 100644 --- a/mdadm.spec +++ b/mdadm.spec @@ -1,24 +1,55 @@ -%define anolis_release 1 +%define anolis_release 1 Name: mdadm -Version: 4.3 +Version: 4.4 Release: %{anolis_release}%{?dist} Summary: The mdadm program controls Linux md devices (software RAID arrays) URL: http://www.kernel.org/pub/linux/utils/raid/mdadm/ +Source0: https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/snapshot/mdadm-4.4.tar.gz +Source1: raid-check +Source2: mdadm-raid-check-sysconfig +Source3: mdmonitor.service +Source4: mdadm.conf +Source5: mdadm_event.conf +Source6: raid-check.timer +Source7: raid-check.service +Source8: mdcheck +Source9: md-auto-readd.rule +Source10: md-auto-readd.sh License: GPLv2+ -Source: https://mirrors.edge.kernel.org/pub/linux/utils/raid/mdadm/%{name}-%{version}.tar.xz -Source1: raid-check -Source2: mdadm.rules -Source3: mdadm-raid-check-sysconfig -Source4: mdmonitor.service -Source5: mdadm.conf -Source6: mdadm_event.conf -Source7: raid-check.timer -Source8: raid-check.service # Build without -Werror. Patch00: disable-Werror.patch +Patch1: 0017-platform-intel-Disable-legacy-option-ROM-scan-on-UEF.patch +Patch2: 0031-Update-tests.yml.patch +Patch3: mdadm-fix-building-errors.patch +Patch4: 0013-mdadm-raid6check-add-xmalloc.h-to-raid6check.c.patch +Patch5: 0020-bitmap.h-clear-__KERNEL__-based-headers.patch +Patch6: 0024-mdmon-imsm-fix-metadata-corruption-when-managing-new.patch +Patch7: mdadm-check-posix-name-before-setting-name-and-devna.patch +Patch8: 0008-Remove-freeze-reshape-logic.patch +Patch9: 0001-Coverity-fixes-resources-leaks.patch +Patch10: 0007-mdadm.man-Remove-external-bitmap.patch +Patch11: 0014-mdopen-add-sbin-path-to-env-PATH-when-call-system-mo.patch +Patch12: 0019-super-ddf-optimize-DDF-header-search-for-widely-used.patch +Patch13: 0002-Incremental-Document-workaround.patch +Patch14: mdadm-use-standard-libc-nftw.patch +Patch15: 0010-mdadm-Do-not-start-reshape-before-switchroot.patch +Patch16: 0018-super-ddf-Prevent-crash-when-handling-DDF-metadata.patch +Patch17: 0027-imsm-Fix-RAID0-to-RAID10-migration.patch +Patch18: 0012-Refactor-continue_via_systemd.patch +Patch19: 0026-super1-Clear-extra-flags-when-initializing-metadata.patch +Patch20: 0030-Update-tests.yml.patch +Patch21: 0016-mdadm-fix-grow-with-add-for-linear.patch +Patch22: 0035-mdadm-Remove-klibc-and-uclibc-support.patch +Patch23: 0004-Incremental-Simplify-remove-logic.patch +Patch24: 0032-Update-tests.yml.patch +Patch25: 0015-udev-persist-properties-of-MD-devices-after-switch_r.patch +Patch26: 0021-bitmap.h-Minor-fixes.patch +Patch27: 0025-Regression-fix-156.patch +Patch28: 0036-mdadm-include-asm-byteorder.h.patch +Patch29: mdadm-udev.patch BuildRequires: make BuildRequires: systemd-rpm-macros binutils-devel gcc systemd-devel @@ -90,6 +121,28 @@ install -m644 %{SOURCE6} %{buildroot}/etc/libreport/events.d /etc/libreport/events.d/* %changelog +* Thu Sep 18 2025 wenyuzifang - 4.4-1 +- Updated to version 4.4 to fix xxxxxx +- Prevent log spam and unauthorized memory access warnings on UEFI systems with Secure Boot enabled. +- Fix type-mismatch errors in printf statements for clean compilation across platforms. +- Include xmalloc.h to declare xmalloc, preventing implicit function errors during compilation. +- Fix a race condition in IMSM array management to prevent metadata corruption. +- Ensure legacy arrays with non-POSIX names can be assembled after mdadm updates. +- Fix memory and resource leaks to improve system stability and prevent resource exhaustion. +- Extend PATH to ensure modprobe is found during boot, preventing RAID assembly failures. +- Fix DDF header detection for LSI and PERC RAID controllers; clear 32MB during metadata wipe to prevent infinite loops. +- Use standard nftw for device detection during RAID resize to ensure reliable array growth. +- Improve boot reliability by deferring RAID reshape until after switchroot. +- Safely handle missing update_super function calls to prevent crashes when assembling DDF RAID arrays. +- Ensure compatibility of RAID0 to RAID10 migration with VROC UEFI to prevent boot hangs and data loss. +- Clear unsafe metadata flags when adding disks to RAID1 arrays with bad blocks marked. +- Forcefully remove all .log files and directories in CI failure scenarios for reliable log cleanup. +- Fix incorrect rejection of --grow --add for linear arrays by properly checking uninitialized bitmap state. +- Remove outdated library support (klibc and uclibc) to simplify maintenance and improve build reliability. +- Simplify removal logic to improve reliability and maintainability during unexpected device disconnections. +- Fix regression, improve test reliability, and ensure correct error reporting in systemd-disabled environments. +- Prevent incorrect RAID assembly on multipath or suspended DM devices to avoid system instability. + * Tue Mar 18 2025 mgb01105731 - 4.3-1 - Update to 4.3 from 4.2 diff --git a/mdcheck b/mdcheck new file mode 100644 index 0000000000000000000000000000000000000000..700c3e252e72e526807a12673ee54ddc289f3e3d --- /dev/null +++ b/mdcheck @@ -0,0 +1,166 @@ +#!/bin/bash + +# Copyright (C) 2014-2017 Neil Brown +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Neil Brown +# Email: + +# This script should be run periodically to automatically +# perform a 'check' on any md arrays. +# +# It supports a 'time budget' such that any incomplete 'check' +# will be checkpointed when that time has expired. +# A subsequent invocation can allow the 'check' to continue. +# +# Options are: +# --continue Don't start new checks, only continue old ones. +# --duration This is passed to "date --date=$duration" to find out +# when to finish +# +# To support '--continue', arrays are identified by UUID and the 'sync_completed' +# value is stored in /var/lib/mdcheck/$UUID + +# convert a /dev/md name into /sys/.../md equivalent +sysname() { + set `ls -lLd $1` + maj=${5%,} + min=$6 + readlink -f /sys/dev/block/$maj:$min +} + +args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@") +rv=$? +if [ $rv -ne 0 ]; then exit $rv; fi + +eval set -- $args + +cont= +endtime= +while [ " $1" != " --" ] +do + case $1 in + --help ) + echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]' + echo >&2 ' time-offset must be understood by "date --date"' + exit 0 + ;; + --continue ) cont=yes ;; + --duration ) shift; dur=$1 + endtime=$(date --date "$dur" "+%s") + ;; + esac + shift +done +shift + +# We need a temp file occasionally... +tmp=/var/lib/mdcheck/.md-check-$$ +trap 'rm -f "$tmp"' 0 2 3 15 + + +# firstly, clean out really old state files +mkdir -p /var/lib/mdcheck +find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; + +# Now look at each md device. +cnt=0 +for dev in /dev/md?* +do + [ -e "$dev" ] || continue + sys=`sysname $dev` + if [ ! -f "$sys/md/sync_action" ] + then # cannot check this array + continue + fi + if [ "`cat $sys/md/sync_action`" != 'idle' ] + then # This array is busy + continue + fi + + mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue + source $tmp + fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" + if [ -z "$cont" ] + then + start=0 + logger -p daemon.info mdcheck start checking $dev + elif [ -z "$MD_UUID" -o ! -f "$fl" ] + then + # Nothing to continue here + continue + else + start=`cat "$fl"` + logger -p daemon.info mdcheck continue checking $dev from $start + fi + + cnt=$[cnt+1] + eval MD_${cnt}_fl=\$fl + eval MD_${cnt}_sys=\$sys + eval MD_${cnt}_dev=\$dev + echo $start > $fl + echo $start > $sys/md/sync_min + echo check > $sys/md/sync_action +done + +if [ -z "$endtime" ] +then + exit 0 +fi + +while [ `date +%s` -lt $endtime ] +do + any= + for i in `eval echo {1..$cnt}` + do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + logger -p daemon.info mdcheck finished checking $dev + eval MD_${i}_fl= + rm -f $fl + continue; + fi + read a rest < $sys/md/sync_completed + echo $a > $fl + any=yes + done + if [ -z "$any" ]; then exit 0; fi + sleep 120 +done + +# We've waited, and there are still checks running. +# Time to stop them. +for i in `eval echo {1..$cnt}` +do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + eval MD_${i}_fl= + rm -f $fl + continue; + fi + echo idle > $sys/md/sync_action + cat $sys/md/sync_min > $fl + logger -p daemon.info pause checking $dev at `cat $fl` +done