Index: LVM2.2.02.39/dmeventd/mirror/dmeventd_mirror.c =================================================================== --- LVM2.2.02.39.orig/dmeventd/mirror/dmeventd_mirror.c 2008-01-31 20:19:35.000000000 +0800 +++ LVM2.2.02.39/dmeventd/mirror/dmeventd_mirror.c 2010-04-03 11:46:40.000000000 +0800 @@ -152,7 +152,7 @@ } /* FIXME Is any sanity-checking required on %s? */ - if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "vgreduce --config devices{ignore_suspended_devices=1} --removemissing %s", vg)) { + if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "lvconvert --config devices{ignore_suspended_devices=1} --repair --use-policies %s/%s", vg, lv)) { /* this error should be caught above, but doesn't hurt to check again */ syslog(LOG_ERR, "Unable to form LVM command: Device name too long"); dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */ @@ -161,7 +161,7 @@ r = lvm2_run(_lvm_handle, cmd_str); - dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */ + dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */ return (r == 1) ? 0 : -1; } Index: LVM2.2.02.39/doc/example.conf =================================================================== --- LVM2.2.02.39.orig/doc/example.conf 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/doc/example.conf 2010-04-03 11:26:22.000000000 +0800 @@ -306,8 +306,10 @@ # A disk log ensures that a mirror does not need to be re-synced # (all copies made the same) every time a machine reboots or crashes. # - # In the event of a failure, the specified policy will be used to - # determine what happens: + # In the event of a failure, the specified policy will be used to determine + # what happens. This applies to automatic repairs (when the mirror is being + # monitored by dmeventd) and to manual lvconvert --repair when + # --use-policies is given. # # "remove" - Simply remove the faulty device and run without it. If # the log device fails, the mirror would convert to using @@ -327,20 +329,13 @@ # will preserve the mirror characteristic of the device. # This policy acts like "remove" if no suitable device and # space can be allocated for the replacement. - # Currently this is not implemented properly and behaves - # similarly to: # - # "allocate_anywhere" - Operates like "allocate", but it does not - # require that the new space being allocated be on a - # device is not part of the mirror. For a log device - # failure, this could mean that the log is allocated on - # the same device as a mirror device. For a mirror - # device, this could mean that the mirror device is - # allocated on the same device as another mirror device. - # This policy would not be wise for mirror devices - # because it would break the redundant nature of the - # mirror. This policy acts like "remove" if no suitable - # device and space can be allocated for the replacement. + # "allocate_anywhere" - Not yet implemented. Useful to place the log device + # temporarily on same physical volume as one of the mirror + # images. This policy is not recommended for mirror devices + # since it would break the redundant nature of the mirror. This + # policy acts like "remove" if no suitable device and space can + # be allocated for the replacement. mirror_log_fault_policy = "allocate" mirror_device_fault_policy = "remove" Index: LVM2.2.02.39/lib/config/defaults.h =================================================================== --- LVM2.2.02.39.orig/lib/config/defaults.h 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/lib/config/defaults.h 2010-04-03 11:26:22.000000000 +0800 @@ -125,4 +125,7 @@ #define DEFAULT_SEGS_SORT "vg_name,lv_name,seg_start" #define DEFAULT_PVSEGS_SORT "pv_name,pvseg_start" +#define DEFAULT_MIRROR_DEVICE_FAULT_POLICY "remove" +#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate" + #endif /* _LVM_DEFAULTS_H */ Index: LVM2.2.02.39/man/lvconvert.8 =================================================================== --- LVM2.2.02.39.orig/man/lvconvert.8 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/man/lvconvert.8 2010-04-03 11:26:22.000000000 +0800 @@ -63,8 +63,14 @@ .TP .I \-\-repair Repair a mirror that has suffered a disk failure. The mirror will be brought -back into a consistent state, and if possible, the original number of -mirrors will be restored. +back into a consistent state, and if possible, the original number of mirrors +will be restored, if so desired. By default, lvconvert will prompt you whether +o perform the replacement. If you instead wish to unconditionally replace +missing devices, you may specify \-y on the commandline and if you in stead want +no replacement to happen at all, you may provide \-f. Additionally, you may use +"--use-policies" - this option will use the device replacement policy specified +in lvm.conf, specifically "activation/mirror_log_fault_policy" and +"activation/mirror_device_fault_policy". .br .TP .I \-s, \-\-snapshot Index: LVM2.2.02.39/tools/args.h =================================================================== --- LVM2.2.02.39.orig/tools/args.h 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/tools/args.h 2010-04-03 11:26:22.000000000 +0800 @@ -50,6 +50,7 @@ arg(corelog_ARG, '\0', "corelog", NULL, 0) arg(mirrorlog_ARG, '\0', "mirrorlog", string_arg, 0) arg(repair_ARG, '\0', "repair", NULL, 0) +arg(use_policies_ARG, '\0', "use-policies", NULL, 0) arg(monitor_ARG, '\0', "monitor", yes_no_arg, 0) arg(config_ARG, '\0', "config", string_arg, 0) arg(trustcache_ARG, '\0', "trustcache", NULL, 0) Index: LVM2.2.02.39/tools/commands.h =================================================================== --- LVM2.2.02.39.orig/tools/commands.h 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/tools/commands.h 2010-04-03 11:26:22.000000000 +0800 @@ -94,7 +94,7 @@ 0, "lvconvert " "[-m|--mirrors Mirrors [{--mirrorlog {disk|core}|--corelog}]]\n" - "\t[--repair]\n" + "\t[--repair [--use-policies]]\n" "\t[-R|--regionsize MirrorLogRegionSize]\n" "\t[--alloc AllocationPolicy]\n" "\t[-b|--background]\n" @@ -117,7 +117,7 @@ alloc_ARG, background_ARG, chunksize_ARG, corelog_ARG, interval_ARG, mirrorlog_ARG, mirrors_ARG, regionsize_ARG, repair_ARG, snapshot_ARG, - test_ARG, zero_ARG) + test_ARG, use_policies_ARG, yes_ARG, force_ARG, zero_ARG) xx(lvcreate, "Create a logical volume", Index: LVM2.2.02.39/tools/lvconvert.c =================================================================== --- LVM2.2.02.39.orig/tools/lvconvert.c 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/tools/lvconvert.c 2010-04-03 11:46:55.000000000 +0800 @@ -408,6 +408,16 @@ if (!(pvl->pv->status & MISSING_PV)) continue; + /* + * Finally, --repair will remove empty PVs. + * But we only want remove these which are output of repair, + * Do not count these which are already empty here. + * FIXME: code should traverse PV in LV not in whole VG. + * FIXME: layer violation? should it depend on vgreduce --removemising? + */ + if (pvl->pv->pe_alloc_count == 0) + continue; + if (!(new_pvl = dm_pool_alloc(vg->vgmem, sizeof(*new_pvl)))) { log_error("Unable to allocate physical volume list."); return_0; @@ -429,6 +439,87 @@ return next_lv; } +/* + * Reomove missing and empty PVs from VG, if are also in provided list + */ +static void _remove_missing_empty_pv(struct volume_group *vg, struct list *remove_pvs) +{ + struct pv_list *pvl, *pvl_vg, *pvlt; + int removed = 0; + + if (!remove_pvs) + return; + + list_iterate_items(pvl, remove_pvs) { + list_iterate_items_safe(pvl_vg, pvlt, &vg->pvs) { + if (!id_equal(&pvl->pv->id, &pvl_vg->pv->id) || + !(pvl_vg->pv->status & MISSING_PV) || + pvl_vg->pv->pe_alloc_count != 0) + continue; + + /* FIXME: duplication of vgreduce code, move this to library */ + vg->free_count -= pvl_vg->pv->pe_count; + vg->extent_count -= pvl_vg->pv->pe_count; + vg->pv_count--; + list_del(&pvl_vg->list); + + removed++; + } + } + + if (removed) { + if (!vg_write(vg) || !vg_commit(vg)) { + stack; + return; + } + + log_warn("%d missing and now unallocated Physical Volumes removed from VG.", removed); + } +} + +static void _lvconvert_mirrors_repair_ask(struct cmd_context *cmd, + int failed_log, int failed_mirrors, + int *replace_log, int *replace_mirrors) +{ + const char *leg_policy = NULL, *log_policy = NULL; + + int force = arg_count(cmd, force_ARG); + int yes = arg_count(cmd, yes_ARG); + + *replace_log = *replace_mirrors = 1; + + if (arg_count(cmd, use_policies_ARG)) { + leg_policy = find_config_tree_str(cmd, + "activation/mirror_device_fault_policy", + DEFAULT_MIRROR_DEVICE_FAULT_POLICY); + log_policy = find_config_tree_str(cmd, + "activation/mirror_log_fault_policy", + DEFAULT_MIRROR_LOG_FAULT_POLICY); + *replace_mirrors = strcmp(leg_policy, "remove"); + *replace_log = strcmp(log_policy, "remove"); + return; + } + + if (yes) + return; + + if (force != PROMPT) { + *replace_log = *replace_mirrors = 0; + return; + } + + if (failed_log && + yes_no_prompt("Attempt to replace failed mirror log? [y/n]: ") == 'n') { + *replace_log = 0; + } + + if (failed_mirrors && + yes_no_prompt("Attempt to replace failed mirror images " + "(requires full device resync)? [y/n]: ") == 'n') { + *replace_mirrors = 0; + } +} + static int lvconvert_mirrors(struct cmd_context * cmd, struct logical_volume * lv, struct lvconvert_params *lp) { @@ -439,7 +530,10 @@ struct logical_volume *original_lv; struct logical_volume *log_lv; int failed_mirrors = 0, failed_log = 0; - struct list* old_pvh, *remove_pvs = NULL; + struct list* old_pvh, *remove_pvs = NULL, *failed_pvs = NULL; + + int repair = arg_count(cmd, repair_ARG); + int replace_log = 1, replace_mirrors = 1; seg = first_seg(lv); existing_mirrors = lv_mirror_count(lv); @@ -447,12 +541,12 @@ /* If called with no argument, try collapsing the resync layers */ if (!arg_count(cmd, mirrors_ARG) && !arg_count(cmd, mirrorlog_ARG) && !arg_count(cmd, corelog_ARG) && !arg_count(cmd, regionsize_ARG) && - !arg_count(cmd, repair_ARG)) { + !repair) { lp->need_polling = 1; return 1; } - if (arg_count(cmd, mirrors_ARG) && arg_count(cmd, repair_ARG)) { + if (arg_count(cmd, mirrors_ARG) && repair) { log_error("You can only use one of -m, --repair."); return 0; } @@ -474,7 +568,7 @@ else lp->mirrors += 1; - if (arg_count(cmd,repair_ARG)) { + if (repair) { cmd->handles_missing_pvs = 1; cmd->partial_activate = 1; lp->need_polling = 0; @@ -485,11 +579,12 @@ if ((failed_mirrors = _count_failed_mirrors(lv)) < 0) return_0; lp->mirrors -= failed_mirrors; - log_error("Mirror status: %d/%d legs failed.", + log_error("Mirror status: %d/%d images failed.", failed_mirrors, existing_mirrors); old_pvh = lp->pvh; if (!(lp->pvh = _failed_pv_list(lv->vg))) return_0; + failed_pvs = lp->pvh; log_lv=first_seg(lv)->log_lv; if (!log_lv || log_lv->status & PARTIAL_LV) failed_log = corelog = 1; @@ -549,6 +644,10 @@ return 0; } + if (repair) + _lvconvert_mirrors_repair_ask(cmd, failed_log, failed_mirrors, + &replace_log, &replace_mirrors); + restart: /* * Converting from mirror to linear @@ -566,7 +665,7 @@ */ if (lp->mirrors < existing_mirrors) { /* Reduce number of mirrors */ - if (arg_count(cmd, repair_ARG) || lp->pv_count) + if (repair || lp->pv_count) remove_pvs = lp->pvh; if (!lv_remove_mirrors(cmd, lv, existing_mirrors - lp->mirrors, (corelog || lp->mirrors == 1) ? 1U : 0U, @@ -701,15 +800,21 @@ if (failed_log || failed_mirrors) { lp->pvh = old_pvh; - if (failed_log) + if (failed_log && replace_log) failed_log = corelog = 0; - lp->mirrors += failed_mirrors; + if (replace_mirrors) + lp->mirrors += failed_mirrors; failed_mirrors = 0; existing_mirrors = lv_mirror_count(lv); /* Now replace missing devices. */ - goto restart; + if (replace_log || replace_mirrors) + goto restart; } + /* If repairing and using policies, remove missing PVs from VG */ + if (repair && arg_count(cmd, use_policies_ARG)) + _remove_missing_empty_pv(lv->vg, failed_pvs); + if (!lp->need_polling) log_print("Logical volume %s converted.", lv->name); Index: LVM2.2.02.39/lib/metadata/mirror.c =================================================================== --- LVM2.2.02.39.orig/lib/metadata/mirror.c 2008-06-27 07:05:11.000000000 +0800 +++ LVM2.2.02.39/lib/metadata/mirror.c 2010-04-03 11:26:22.000000000 +0800 @@ -394,7 +394,12 @@ pv_found = 0; list_iterate_items(pvl, removable_pvs) { - if (pv->dev->dev == pvl->pv->dev->dev) { + if (id_equal(&pv->id, &pvl->pv->id)) { + pv_found = 1; + break; + } + if (pvl->pv->dev && pv->dev && + pv->dev->dev == pvl->pv->dev->dev) { pv_found = 1; break; } Index: LVM2.2.02.39/lib/metadata/pv_map.c =================================================================== --- LVM2.2.02.39.orig/lib/metadata/pv_map.c 2008-01-30 22:00:00.000000000 +0800 +++ LVM2.2.02.39/lib/metadata/pv_map.c 2010-04-03 11:26:22.000000000 +0800 @@ -128,6 +128,10 @@ if (!(pvl->pv->status & ALLOCATABLE_PV)) continue; + if (pvl->pv->status & MISSING_PV) + continue; + assert(pvl->pv->dev); + pvm = NULL; list_iterate_items(pvm2, pvms) Index: LVM2.2.02.39/lib/metadata/metadata.c =================================================================== --- LVM2.2.02.39.orig/lib/metadata/metadata.c 2010-04-03 11:26:22.000000000 +0800 +++ LVM2.2.02.39/lib/metadata/metadata.c 2010-04-03 11:26:22.000000000 +0800 @@ -1644,6 +1644,26 @@ return ret; } +static void check_reappeared_pv(struct volume_group *correct_vg, + struct physical_volume *pv) +{ + struct pv_list *pvl; + + list_iterate_items(pvl, &correct_vg->pvs) + if (pv->dev == pvl->pv->dev && pvl->pv->status & MISSING_PV) { + log_warn("Missing device %s reappeared, updating " + "metadata for VG %s to version %u.", + pv_dev_name(pvl->pv), pv_vg_name(pvl->pv), + correct_vg->seqno); + if (pvl->pv->pe_alloc_count == 0) { + pv->status &= ~MISSING_PV; + pvl->pv->status &= ~MISSING_PV; + } else + log_warn("Device still marked missing because of alocated data " + "on it, remove volumes and consider vgreduce --removemissing."); + } +} + /* Caller sets consistent to 1 if it's safe for vg_read to correct * inconsistent metadata on disk (i.e. the VG write lock is held). * This guarantees only consistent metadata is returned. @@ -1890,6 +1910,13 @@ log_warn("WARNING: Inconsistent metadata found for VG %s - updating " "to use version %u", vgname, correct_vg->seqno); + /* + * If PV is marked missing but we found it, + * update metadata and remove MISSING flag + */ + list_iterate_items(pvl, &all_pvs) + check_reappeared_pv(correct_vg, pvl->pv); + if (!vg_write(correct_vg)) { log_error("Automatic metadata correction failed"); return NULL;