#! /bin/bash MDADM=/sbin/mdadm LOCTL=/usr/local/bin/loopctl LOSETUP=/sbin/losetup TIOTEST=/root/tiotest # How often will the tests be run? IT=2000 LOGFILE=${LOGFILE:-/tmp/mp-test.log} PrintHelp() { cat < ... Will run the tests interactively; your help will be required to create and restore various failure scenarios. Please specify a md device currently not in use, the number of paths to the target and the "real devices" at which Linux detects the disk, for example: $0 real /dev/md0 2 /dev/sda /dev/sdb Loop mode: $0 loop Will run the tests non-interactively; this can be used to verify that md itself is working correctly. The real device you specify should be a "backing store" block device; for example, a SCSI device or just a logical volume. Loop devices will be set up to simulate two access paths to the same device. $0 loop /dev/md0 3 /dev/system/multipath-test It is recommended to complete a run in loop mode first. WARNING: Data on the real devices given will be destroyed! END } declare -a DEVS Exec() { local cmd="$@" local output local rc LogStatus "Status prior to executing $cmd:" LogNl "Executing $cmd:" no output=$($cmd 2>&1) rc=$? LogNl "$output" no LogNl "Exit code $rc" no LogNl "---" no if [ $rc -ne 0 ]; then LogStatus "Status directly after failed command $cmd:" fi return $rc } LogStatus() { LOGENTRY=$(echo "$LOGENTRY" ; echo "$@" ; echo "---"; $MDADM --detail $MD 2>&1 ; echo "---" ; echo " ") } LogFlush() { echo "$LOGENTRY" >>$LOGFILE LogClear } LogClear() { LOGENTRY="" } Log() { if [ -z "$2" ]; then echo -n "$1" fi LOGENTRY=$(echo -n "$LOGENTRY" "$1") } LogNl() { if [ -z "$2" ]; then echo "$1" fi LOGENTRY=$(echo -n "$LOGENTRY" ; echo "$1" ; echo " ") } FindFreeLoopDevice() { local i=0 local loop="" local device local found while [ $i -le 32 ]; do loop="/dev/loop$i" if [ ! -b $loop ] ; then LogNl "$loop is not a blockdevice" return 1 fi found=0 for device in ${DEVS[@]} ; do if [ "$device" == "$loop" ]; then found=1 break fi done if [ $found -ne 1 ] && \ ! $LOSETUP /dev/loop$i >/dev/null 2>&1 ; then echo "/dev/loop$i" return 0 fi i=$[i+1] done echo "NULL" return 1 } Unbind() { local i=0 Log "Freeing loop devices: " while [ $i -lt ${#DEVS[*]} ]; do $LOCTL clear ${DEVS[$i]} >/dev/null 2>&1 if ! $LOSETUP -d ${DEVS[$i]} 2>/dev/null ; then Log " FAIL: ${DEVS[$i]}" else Log " OK: ${DEVS[$i]}" fi i=$[i+1] done LogNl " - DONE" return 0 } VerifyDevices() { local i=0 local MD5="" local DEV_MD5="" while [ $i -lt ${#DEVS[*]} ]; do if [ $i -eq 0 ]; then MD5=$(dd if=${DEVS[$i]} bs=1024 count=128 2>/dev/null| md5sum) else DEV_MD5=$(dd if=${DEVS[$i]} bs=1024 count=128 2>/dev/null| md5sum) if [ "$MD5" != "$DEV_MD5" ]; then Log "${DEVS[$i]} doesn't seem to point at the same device ${DEVS[0]} does" Cleanup 1 fi fi i=$[i+1] done return 0 } ClearDevices() { local force="$1" if [ "$force" != "yes" ]; then LogNl "Destroying ALL DATA on the devices NOW! _5s_ to abort!" sleep 3 fi dd if=/dev/urandom of=${DEVS[0]} bs=1024 count=128 2>/dev/null mdadm --zero-superblock ${DEVS[0]} } BindDevices() { for (( i=0 ; $i < $DEVNO ; i++ )); do $LOCTL clear ${DEVS[$i]} >/dev/null 2>&1 if ! $LOSETUP ${DEVS[$i]} $BACKING_DEV ; then LogNl "Failure to bind ${DEVS[$i]} to $BACKING_DEV" Cleanup 1 fi done } SetupBase() { local i=0 LogNl "SETUP PHASE: " if [ $# -le 3 ] ; then LogNl "Too few parameters." return 1 fi MODE=$1 ; shift MD=$1 ; shift DEVNO=$1 ; shift case $MODE in real|loop|fake) ;; *) return 1; ;; esac LogNl "Operating in $MODE mode" if [ ! -x $MDADM ] ; then LogNl "mdadm utility not found; please install mdadm" return 1 fi DETAILS=`mktemp $0.XXXXXXX` case $MODE in real|fake) if [ "$DEVNO" -ne "$#" ]; then LogNl "Incorrect of devices specified." return 1; fi for (( i=0 ; $i < $DEVNO ; i=$[i + 1] )); do DEVS[$i]="$1"; shift done ;; loop) BACKING_DEV="$1" ; shift if [ ! -b "$BACKING_DEV" ]; then LogNl "Ivalid backing store specified" return 1 fi if [ "$#" -ne "0" ]; then LogNl "Superfluous parameters for loop mode" return 1 fi for (( i=0 ; $i < $DEVNO ; i++ )); do DEVS[$i]=$(FindFreeLoopDevice); done LogNl "Loop devices being used: ${DEVS[@]}" BindDevices ;; esac VerifyDevices ClearDevices LogNl "Setup complete." LogNl } RestartMD() { local spares="$1" local actives local opts if [ -n "$FAILED_PATH" ]; then ClearError skip fi if [ -z "$spares" ]; then opts="-n $DEVNO" else if [ $spares -ge $DEVNO ] || [ $spares -eq 0 ] ; then LogNl "WARNING: spares $spares and device count $DEVNO fixed up!" spares=1 fi actives=$[DEVNO - $spares] opts="-n $actives -x $spares" fi LogNl Log "Starting clean md device: " # First, remove all left overs Exec $MDADM $MD -S if [ $MODE == loop ]; then Unbind BindDevices fi # Get rid of any excess data ClearDevices yes # Start again! if ! yes | Exec $MDADM -C $MD -l mp $opts ${DEVS[@]} ; then LogNl "FAILED" Cleanup 1 fi UUID=$($MDADM --detail $MD | awk '/UUID : / { print $3 }') if [ -z "$UUID" ]; then LogNl "FAILED (UUID could not be read)" Cleanup 1 fi Log "(UUID is: $UUID) " LogNl "OK" } Cleanup() { local err=$1 LogNl LogNl "CLEANUP PHASE:" LogNl "Stopping MD device: $MD" Exec $MDADM $MD -S if [ "$MODE" == "loop" ]; then Unbind fi if [ -n "$DETAILS" ]; then rm -f $DETAILS fi LogFlush exit $err } GetDetails() { $MDADM --detail $MD >$DETAILS 2>&1 return 0 } CheckIfActive() { local path="" local err=0 Log "CHECK: All paths should be active: " for path in ${DEVS[@]} ; do if ! IsActive $path quiet ; then err=1 fi done if [ $err -ne 0 ]; then LogNl "FAILED" return 1 fi LogNl "OK" } IntroduceError() { case $MODE in real) echo "REQUEST: Please fail a path now and tell me which (ie, ${DEVS[0]}):" read FAILED_PATH if dd if=$FAILED_PATH of=/dev/null bs=1024 count=1024 2>/dev/null; then LogNl "ERROR: $FAILED_PATH still operational." fi ;; loop|fake) FAILED_PATH=$(SelectOnePath) LogNl "(Failing $FAILED_PATH)" Exec $MDADM $MD --fail $FAILED_PATH ;; foo) FAILED_PATH=$(SelectOnePath) LogNl "(Failing $FAILED_PATH)" Exec $LOCTL fail $FAILED_PATH ;; esac LogNl "" } IsFailed() { local path="$1" GetDetails if ! GrepDetails "faulty.*$path\$" ; then LogStatus "FAILED ($path not marked faulty)" return 1 fi LogNl "OK" } IsActive() { local path="$1" local quiet="$2" GetDetails if ! GrepDetails "active.*$path\$" ; then test -n "$quiet" || LogStatus "FAILED ($path not marked active)" return 1 fi test -n "$quiet" || LogNl "OK" } IsSpare() { local path="$1" GetDetails if GrepDetails "active.*$path\$" ; then LogStatus "FAILED ($path not spare)" return 1 fi LogNl "OK" } DoSomeIO() { Log "(doing some IO on $1) " blockdev --flushbufs $1 $TIOTEST -R -d $1 -o 100 -r 10 -S >/dev/null 2>&1 } DoLotsOfIO() { blockdev --flushbufs $MD $TIOTEST -R -d $1 -o 100 -r 40 -S -p 6 >/dev/null 2>&1 } DoLotsOfIOwithCheck() { blockdev --flushbufs $MD $TIOTEST -R -d $1 -o 100 -r 4000 -c -S -p 6 } SelectOnePath() { local pathno=$[RANDOM % $DEVNO] local device=${DEVS[$pathno]} echo $device } ClearError() { local skip_mdstuff="$1" case $MODE in foo) LogNl "Clearing $FAILED_PATH" Exec $LOCTL clear $FAILED_PATH ;; loop|fake) # If mdadm --fail is used, this is a noop ;; real) echo "Please restore $FAILED_PATH now and press enter when ready" read ;; esac if [ -n "$skip_mdstuff" ]; then FAILED_PATH="" return 0 fi Log "CHECK: Setting device $FAILED_PATH clean: " if ! Exec $MDADM $MD --clean $FAILED_PATH ; then LogNl "FAIL" return 1 else LogNl "OK" fi Log "CHECK: Setting device $FAILED_PATH active: " if ! Exec $MDADM $MD --active $FAILED_PATH ; then LogNl "FAIL" return 1 else LogNl "OK" fi FAILED_PATH="" } PerformanceAllDevices() { local device LogNl "Performance of the single devices:" for device in ${DEVS[@]} ; do LogNl "$device: " DoLotsOfIO $device done } GrepDetails() { local pattern=$1 GetDetails grep -qE "$pattern" $DETAILS } CheckAdditionalPath() { LogNl "Checking if adding/removing an additional path works. " if [ $MODE != loop ]; then LogNl "SKIPPED (only in loop mode)" return 0 fi local device=$(FindFreeLoopDevice) if ! losetup $device $BACKING_DEV ; then LogNl "FAILED: Couldn't bind $device to $BACKING_DEV" return 1 fi Log "CHECK: Adding path $device: " if ! Exec $MDADM $MD --add $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi Log "CHECK: Enabling path $device: " if ! Exec $MDADM $MD --active $device ; then LogNl "FAILED" return 1 else echo "OK" fi Log "CHECK: Disabling path $device: " if ! Exec $MDADM $MD --inactive $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi echo -n "CHECK: Removing path $device: " if ! Exec $MDADM $MD --remove $device >/dev/null 2>&1 ; then LogNl "FAILED" return 1 else LogNl "OK" fi Exec losetup -d $device } CheckIfHotModWorks() { local device=$(SelectOnePath) LogNl "Removing/adding an existing path." Log "CHECK: Removing path $device fails if active: " if Exec $MDADM $MD --remove $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi Log "CHECK: Setting path $device inactive: " if ! Exec $MDADM $MD --inactive $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi Log "CHECK: Removing path $device: " if ! Exec $MDADM $MD --remove $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi Log "CHECK: Adding path $device again: " if ! Exec $MDADM $MD --add $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi Log "CHECK: Enabling path $device again: " if ! Exec $MDADM $MD --active $device ; then LogNl "FAILED" return 1 else LogNl "OK" fi } CheckIfTogglingWorks() { local device=$(SelectOnePath) Log "CHECK: Toggling active/inactive status of paths: " Exec $MDADM $MD --inactive $device if GrepDetails "active.*$device\$" ; then LogNl "FAILED: $device could not be set inactive" return 1 fi Exec $MDADM $MD --active $device GetDetails if ! GrepDetails "active.*$device\$" ; then LogNl "FAILED: $device could not be set active again" return 1 fi LogNl "OK" } CheckReassembly() { LogNl "Checking if md can restart the array: " Log "CHECK: Stopping $MD: " if ! Exec $MDADM $MD -S ; then LogNl "FAILED: $MD could not be stopped!" return 1 else LogNl "OK" fi Log "CHECK: Reassembling the array: " if ! Exec $MDADM -A $MD -u $UUID ${DEVS[@]} ; then LogNl "FAILED (trying to recover)" return 1 else LogNl "OK" fi } CheckMovedDevices() { if [ $MODE != "loop" ]; then return 0 fi Log "Checking if md can cope with moved devices: " if ! Exec $MDADM $MD -S ; then LogNl "$MD could not be stopped!" return 1 fi local i local device local -a NEW_DEVS for (( i=0 ; $i < $DEVNO ; i++ )); do NEW_DEVS[$i]=$(FindFreeLoopDevice); if ! $LOSETUP ${NEW_DEVS[$i]} $BACKING_DEV ; then LogNl "Failure to bind ${NEW_DEVS[$i]} to $BACKING_DEV" Cleanup 1 fi done Unbind >/dev/null 2>&1 if ! Exec $MDADM -A $MD -u $UUID ${NEW_DEVS[@]} ; then for device in ${NEW_DEVS[@]} ; do Exec losetup -d $device done LogNl "FAILED" return 1 else LogNl "OK (proceeding at new devices)" DEVS=(${NEW_DEVS[@]}) fi } CheckFailSpare() { LogNl "Checking if failing a spare device works: " local spare=$(SelectOnePath) Log "CHECK: Marking path $spare spare: " Exec $MDADM $MD --inactive $spare if ! IsSpare $spare ; then return 1 fi Log "CHECK: Failing spare path $spare: " Exec $MDADM $MD --fail $spare if ! IsFailed $spare ; then return 1 fi LogNl "" Log "CHECK: Reenabling spare: " Exec $MDADM $MD --clean $spare --active $spare if ! IsActive $spare ; then return 1 fi return 0 } CheckReassemblyAfterFailure() { LogNl "Checking if md can restart the array after a failure: " IntroduceError local failed="$FAILED_PATH" Log "CHECK: Checking whether $FAILED_PATH has been detected: " DoSomeIO $MD if ! IsFailed $FAILED_PATH ; then LogNl "FAILED: Failure was not detected." return 1 fi Log "CHECK: Stopping failed $MD: " if ! Exec $MDADM $MD -S ; then LogNl "FAILED $MD could not be stopped!" return 1 else LogNl "OK" fi local tmp_path="$FAILED_PATH" # TODO: Can either try to restart with a failed path still present # or not, needs to be conditional! # If the path was clear at the time of the reload, it will be present # but inactive. # Otherwise it will be missing. if ! ClearError skip ; then LogNl "FAILED: Could not clear error!" return 1 fi Log "CHECK: Reassembling the failed array: " if ! Exec $MDADM -A $MD -u $UUID ${DEVS[@]} ; then LogNl "FAILED (trying to recover)" return 1 else LogNl "OK" fi # This is necessary because it turns out that after the reenabling, it # is possible that a path which was previously active is now inactive # and vice versa, 'cause md keeps the _number_ of active paths # consistent, not necessarily the paths... for tmp_path in ${DEVS[@]} ; do if ! IsActive $tmp_path yes ; then Log "CHECK: Reactivating path $tmp_path: " if ! Exec $MDADM $MD --active $tmp_path ; then LogNl "FAILED" return 1 else LogNl "OK" fi fi done return 0 } IsPersonalityLoaded() { grep -qE '^Personalities.*multipath' /proc/mdstat } CheckModule() { LogNl "CHECKS: Loading personality module" if IsPersonalityLoaded ; then LogNl "SKIPPED - personality already loaded at startup" return 0 fi Log "CHECK: Loading multipath module 'manually': " Exec modprobe multipath if ! IsPersonalityLoaded ; then LogNl "FAILED: unable to load module multipath, aborting" Cleanup 1 else LogNl "OK" fi Log "CHECK: Unloading module: " if ! Exec rmmod multipath ; then LogNl "FAILED" Cleanup 1 else LogNl "OK" fi Log "CHECK: Loading multipath module by alias md-personality-7: " Exec modprobe md-personality-7 if ! IsPersonalityLoaded ; then LogNl "FAILED: unable to load module multipath, aborting" Cleanup 1 else LogNl "OK" fi Log "CHECK: Unloading module: " if ! Exec rmmod multipath ; then LogNl "FAILED" Cleanup 1 else LogNl "OK" fi } CheckError() { IntroduceError Log "CHECK: Checking whether $FAILED_PATH has been detected: " DoSomeIO $MD IsFailed $FAILED_PATH ClearError } ####################################################################### # Main script trap "Cleanup 0" SIGINT rm $LOGFILE if ! SetupBase $@ ; then PrintHelp exit 1 fi CheckModule RestartMD TESTS=(CheckIfTogglingWorks CheckAdditionalPath CheckIfHotModWorks CheckReassembly CheckMovedDevices CheckError CheckReassemblyAfterFailure CheckFailSpare) TESTCOUNT=7 declar -a TESTS_RUN testsdone=0 LogNl "=================================================================" LogNl "INFO: $IT passes will be run; first all test once, then randomly." SEED=$[$$ ^ $PPID ^ $SECONDS] RANDOM=$SEED LogNl "Random seed is $SEED" LogNl "=================================================================" LogNl while [ $testsdone -lt $IT ]; do if [ $testsdone -le $TESTCOUNT ] ; then # Run through the tests once in order testno=$testsdone else testno=$[RANDOM % ($TESTCOUNT+1)] fi testsdone=$[testsdone+1] if [ -n "${TESTS[$testno]}" ]; then TESTS_RUN[$testno]=$[TESTS_RUN[$testno] + 1] LogNl "RUNNING TEST $testsdone - ${TESTS[$testno]} / ${TESTS_RUN[$testno]}" ${TESTS[$testno]} else LogNl "Generated invalid testnumber $testno" LogFlush exit 1 fi LogFlush # All tests should leave the array in a full enabled mode CheckIfActive # This is such an imperative that we don't need to always log # it if [ $? -ne 0 ]; then LogStatus LogFlush exit 1 else LogClear fi LogNl done LogNl "=================================================================" LogNl "SUMMARY:" LogNl "Number of total test runs: $testsdone" testno=0 while [ $testno -le $TESTCOUNT ]; do LogNl "${TESTS[$testno]} ran for ${TESTS_RUN[$testno]} times" testno=$[testno + 1] done LogNl "=================================================================" LogNl "ALL TESTS SUCCESSFUL" LogNl "=================================================================" LogNl if [ $MODE == "real" ]; then # PerformanceAllDevices # LogNl "Consolidated (this should be higher ;):" # DoLotsOfIO $MD echo else LogNl "Skipping performance tests" fi echo Cleanup 0