| |
| /pliant/linux/storage/raid.pli |
| |
| 1 |
abstract | |
| 2 |
[The 'raid_convert' function will allow you to change the configuration of a Linux software RAID array with (or with the hope of) no data loss.] ; eol | |
| 3 |
highlight "THIS IS VERY ALPHA CODE: IT MAY DESTROY ALL YOUR DATAS AS WELL." | |
| 4 |
| |
| 5 |
| |
| 6 |
# Copyright Hubert Tonneau hubert.tonneau@pliant.cx | |
| 7 |
# | |
| 8 |
# This program is free software; you can redistribute it and/or | |
| 9 |
# modify it under the terms of the GNU General Public License version 2 | |
| 10 |
# as published by the Free Software Foundation. | |
| 11 |
# | |
| 12 |
# This program is distributed in the hope that it will be useful, | |
| 13 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 15 |
# GNU General Public License for more details. | |
| 16 |
# | |
| 17 |
# You should have received a copy of the GNU General Public License | |
| 18 |
# version 2 along with this program; if not, write to the Free Software | |
| 19 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
| 20 |
| |
| 21 |
# release 6 | |
| 22 |
| |
| 23 |
module "/pliant/language/stream.pli" | |
| 24 |
module "/pliant/language/context.pli" | |
| 25 |
module "/pliant/admin/file.pli" | |
| 26 |
module "/pliant/admin/execute.pli" | |
| 27 |
module "/pliant/language/os.pli" | |
| 28 |
module "/pliant/linux/storage/filesystem.pli" | |
| 29 |
module "/pliant/linux/storage/partition.pli" | |
| 30 |
module "/pliant/linux/misc/warn.pli" | |
| 31 |
if os_api<>"linux" | |
| 32 |
error "This module is dealing only with Linux software RAID arrays" | |
| 33 |
| |
| 34 |
| |
| 35 |
constant conservative true | |
| 36 |
constant speed_report true | |
| 37 |
constant mdadm (file_query "file:/bin/mdadm" standard)=success | |
| 38 |
| |
| 39 |
| |
| 40 |
| |
| 41 |
| |
| 42 |
doc | |
| 43 |
[These are a few classical arithmetic functions on unlimited integers that we will use at a later point.] | |
| 44 |
| |
| 45 |
| |
| 46 |
function min a b -> c | |
| 47 |
arg Intn a b c | |
| 48 |
c := shunt a<=b a b | |
| 49 |
| |
| 50 |
function max a b -> c | |
| 51 |
arg Intn a b c | |
| 52 |
c := shunt a>=b a b | |
| 53 |
| |
| 54 |
function pgcd a b -> g | |
| 55 |
arg Intn a b g | |
| 56 |
var Intn x := a | |
| 57 |
var Intn y := b | |
| 58 |
g := y | |
| 59 |
while x>0 | |
| 60 |
g := x | |
| 61 |
x := y%x | |
| 62 |
y := g | |
| 63 |
| |
| 64 |
function ppcm a b -> m | |
| 65 |
arg Intn a b m | |
| 66 |
m := a*b\(pgcd a b) | |
| 67 |
| |
| 68 |
| |
| 69 |
| |
| 70 |
| |
| 71 |
| |
| 72 |
type Raid | |
| 73 |
field Str device_name | |
| 74 |
field Array:Stream devices | |
| 75 |
field Int level | |
| 76 |
field Int sparse <- 0 | |
| 77 |
field Int geometry <- 0 | |
| 78 |
field Int chunk_size | |
| 79 |
field Intn size | |
| 80 |
| |
| 81 |
| |
| 82 |
doc | |
| 83 |
[Opens the various Pliant streams used to access the disks of a RAID array.] ; eol | |
| 84 |
[An example could be:] | |
| 85 |
listing | |
| 86 |
(var Raid raid) define "device:/md0" "device:/hda5 device:/hda6 device:/hda7" "level 5" | |
| 87 |
[or, if you prefer or more compact encoding:] | |
| 88 |
listing | |
| 89 |
(var Raid raid) define "md0" "hda5 hda6 hda7" "" | |
| 90 |
| |
| 91 |
method raid bind device_name devices_names | |
| 92 |
arg_rw Raid raid ; arg Str device_name devices_names | |
| 93 |
raid:device_name := shunt (device_name search ":" -1)=(-1) "device:/"+device_name device_name | |
| 94 |
raid:devices size := 0 | |
| 95 |
var Str names := devices_names | |
| 96 |
while names<>"" | |
| 97 |
var Int i := raid:devices:size | |
| 98 |
if (names parse any:(var Str name1) _ any:(var Str name2)) | |
| 99 |
raid:devices size := i+1 | |
| 100 |
raid:devices:i open (shunt (name1 search ":" -1)=(-1) "device:/"+name1 name1) in+out+nocache | |
| 101 |
names := name2 | |
| 102 |
else | |
| 103 |
raid:devices size := i+1 | |
| 104 |
raid:devices:i open (shunt (names search ":" -1)=(-1) "device:/"+names names) in+out+nocache | |
| 105 |
names := "" | |
| 106 |
| |
| 107 |
method raid define device_name devices_names options | |
| 108 |
arg_rw Raid raid ; arg Str device_name devices_names ; arg Str options | |
| 109 |
raid bind device_name devices_names | |
| 110 |
raid level := options option "level" Int (shunt raid:devices:size>2 5 1) | |
| 111 |
raid sparse := options option "sparse" Int 0 | |
| 112 |
raid chunk_size := options option "chunk_size" Int 64*2^10 | |
| 113 |
if raid:sparse<>0 and raid:level<>5 | |
| 114 |
error "sparse disks is supported only with RAID 5." | |
| 115 |
| |
| 116 |
| |
| 117 |
| |
| 118 |
| |
| 119 |
| |
| 120 |
method raid build_configuration_file filename | |
| 121 |
arg Raid raid ; arg Str filename | |
| 122 |
(var Stream rt) open filename out | |
| 123 |
rt writeline "raiddev "+(file_os_name raid:device_name) | |
| 124 |
rt writeline " raid-level "+(string raid:level) | |
| 125 |
rt writeline " nr-raid-disks "+(string raid:devices:size) | |
| 126 |
rt writeline " nr-spare-disks "+(string raid:sparse) | |
| 127 |
rt writeline " persistent-superblock 1" | |
| 128 |
rt writeline " chunk-size "+(string raid:chunk_size\1024) | |
| 129 |
for (var Int i) 0 raid:devices:size-1 | |
| 130 |
rt writeline " device "+(file_os_name raid:devices:i:name) | |
| 131 |
rt writeline " raid-disk "+string:i | |
| 132 |
| |
| 133 |
method raid command cmd options param -> status | |
| 134 |
arg Raid raid ; arg Str cmd options param ; arg Status status | |
| 135 |
var Str temp := file_temporary | |
| 136 |
raid build_configuration_file temp | |
| 137 |
# status := shunt (execute cmd+" -c "+file_os_name:temp+(shunt options<>"" " "+options "")+" "+(file_os_name raid:device_name)+(shunt param<>"" " "+param "") quiet)=0 success failure | |
| 138 |
status := shunt (execute cmd+" -c "+file_os_name:temp+(shunt options<>"" " "+options "")+" "+(file_os_name raid:device_name)+(shunt param<>"" " "+param "") output "file:/tmp/raid.log" mixed)=0 success failure | |
| 139 |
file_delete temp | |
| 140 |
sleep 2 | |
| 141 |
| |
| 142 |
method raid start -> status | |
| 143 |
arg Raid raid ; arg Status status | |
| 144 |
if mdadm | |
| 145 |
var Str cmd := "mdadm --assemble "+(file_os_name raid:device_name) | |
| 146 |
for (var Int i) 0 raid:devices:size-1 | |
| 147 |
cmd += " "+(file_os_name raid:devices:i:name) | |
| 148 |
status := shunt (execute cmd)=0 status failure | |
| 149 |
else | |
| 150 |
status := raid command "raidstart" "" "" | |
| 151 |
| |
| 152 |
method raid stop -> status | |
| 153 |
arg Raid raid ; arg Status status | |
| 154 |
if mdadm | |
| 155 |
status := shunt (execute "mdadm --stop "+(file_os_name raid:device_name))=0 status failure | |
| 156 |
else | |
| 157 |
status := raid command "raidstop" "" "" | |
| 158 |
| |
| 159 |
method raid initialize -> status | |
| 160 |
arg Raid raid ; arg Status status | |
| 161 |
raid command "raidstop" "" "" | |
| 162 |
if mdadm | |
| 163 |
var Str cmd := "mdadm --create "+(file_os_name raid:device_name) | |
| 164 |
cmd += " --level="+(string raid:level)+" --raid-devices="+(string raid:devices:size)+" --spare-devices="+(string raid:sparse)+" --chunk="+(string raid:chunk_size\1024) | |
| 165 |
cmd += " --run" | |
| 166 |
for (var Int i) 0 raid:devices:size-1 | |
| 167 |
cmd += " "+(file_os_name raid:devices:i:name) | |
| 168 |
status := shunt (execute cmd)=0 status failure | |
| 169 |
else | |
| 170 |
var FileInfo mtab := file_query "file:/etc/mtab" standard | |
| 171 |
if mtab=undefined # get around the bug in raidtools | |
| 172 |
(var Stream s) open "file:/etc/mtab" out+mkdir+safe | |
| 173 |
s close | |
| 174 |
status := raid command "mkraid" "--really-force" "" | |
| 175 |
if mtab=undefined | |
| 176 |
file_delete "file:/etc/mtab" | |
| 177 |
file_delete "file:/etc/" | |
| 178 |
| |
| 179 |
method raid hotadd device -> status | |
| 180 |
arg Raid raid ; arg Str device ; arg Status status | |
| 181 |
if mdadm | |
| 182 |
status := shunt (execute "mdadm "+(file_os_name raid:device_name)+" -add /dev/"+device)=0 status failure | |
| 183 |
else | |
| 184 |
status := raid command "raidhotadd" "" "/dev/"+device | |
| 185 |
| |
| 186 |
| |
| 187 |
| |
| 188 |
| |
| 189 |
| |
| 190 |
doc | |
| 191 |
[When the RAID is set in safe mode, an IO error will not produce an exception. This is very usefull during the conversion, since, in case of an IO error, we want to continue anyway with the faulty disk disabled.] | |
| 192 |
| |
| 193 |
method raid set_safe_mode | |
| 194 |
arg_rw Raid raid | |
| 195 |
for (var Int i) 0 raid:devices:size-1 | |
| 196 |
raid:devices:i open raid:devices:i:name in+out+nocache+safe | |
| 197 |
| |
| 198 |
| |
| 199 |
doc | |
| 200 |
[How many usefull disks do we have in the RAID array.] | |
| 201 |
| |
| 202 |
method raid data_disks_count -> count | |
| 203 |
arg Raid raid ; arg Int count | |
| 204 |
if raid:level=0 | |
| 205 |
count := raid:devices:size | |
| 206 |
eif raid:level=1 | |
| 207 |
count := 1 | |
| 208 |
eif raid:level=5 | |
| 209 |
count := raid:devices:size-raid:sparse-1 | |
| 210 |
else | |
| 211 |
error "raid level "+(string raid:level)+" is not supported yet." | |
| 212 |
| |
| 213 |
doc | |
| 214 |
[How many extra disks do we have in the RAID array (spare disks are not counted here, and are more generally completely ignored by this software).] | |
| 215 |
| |
| 216 |
method raid checksum_disks_count -> count | |
| 217 |
arg Raid raid ; arg Int count | |
| 218 |
if raid:level=0 | |
| 219 |
count := 0 | |
| 220 |
eif raid:level=1 | |
| 221 |
count := raid:devices:size-1 | |
| 222 |
eif raid:level=5 | |
| 223 |
count := 1 | |
| 224 |
else | |
| 225 |
error "raid level "+(string raid:level)+" is not supported yet." | |
| 226 |
| |
| 227 |
| |
| 228 |
doc | |
| 229 |
[Parses the /proc/mdstat file in order to extract informations on the RAID array. This function is very sensible to the format used in /proc/mdstat because we prefer to stop with an error rather that attempting a conversion on an array that would contain an misunderstood information.] | |
| 230 |
| |
| 231 |
method raid query | |
| 232 |
arg_rw Raid raid | |
| 233 |
(var Stream s) open "file:/proc/mdstat" in | |
| 234 |
while not s:atend | |
| 235 |
if (s:readline parse any:(var Str name) ":" word:"active" "raid" (var Int level) any:(var Str stat)) | |
| 236 |
if "/dev/"+name=(file_os_name raid:device_name) | |
| 237 |
var Int disks_count := 0 | |
| 238 |
while (stat parse any:(var Str drop) "[lb]" (var Int i) "[rb]" _ any:(var Str stat2)) and (drop search " " -1)=(-1) | |
| 239 |
disks_count += 1 | |
| 240 |
stat := stat2 | |
| 241 |
if disks_count<>raid:devices:size | |
| 242 |
error "The number of disks in "+raid:device_name+" does not match your declarations." | |
| 243 |
var CBool ok := false | |
| 244 |
if level=0 and (stat parse (var uInt blocks) word:"blocks" (var Int chunk) "k" word:"chunks") | |
| 245 |
raid chunk_size := chunk*1024 | |
| 246 |
ok := true | |
| 247 |
eif level=1 and (stat parse (var uInt blocks) word:"blocks" "[lb]" any "[rb]" "[lb]" any:(var Str ups) "[rb]") | |
| 248 |
raid chunk_size := 512 | |
| 249 |
ok := ups=(repeat ups:len "U") | |
| 250 |
eif level=5 and (stat parse (var uInt blocks) word:"blocks" word:"level" (var Int level) "," (var Int chunk) "k" word:"chunk" "," word:"algorithm" (var Int algorithm) "[lb]" any "[rb]" "[lb]" any:(var Str ups) "[rb]") | |
| 251 |
raid chunk_size := chunk*1024 | |
| 252 |
raid geometry := algorithm | |
| 253 |
ok := (algorithm=0 or algorithm=1) and ups=(repeat ups:len "U") | |
| 254 |
if ok | |
| 255 |
raid level := level | |
| 256 |
raid size := 1024n*blocks | |
| 257 |
if raid:size%(raid:chunk_size*raid:data_disks_count)<>0 | |
| 258 |
error "The total "+raid:device_name+" size does not match the chunk size and number of devices" | |
| 259 |
return | |
| 260 |
error "Unexpected "+raid:device_name+" status !" | |
| 261 |
| |
| 262 |
| |
| 263 |
doc | |
| 264 |
[This is the central function of the script: it defines on which device, and at which offset, an area is stored. The area is specifyed by it's start position in the array. On return, we also get how many bytes in the area are following on the same device.] | |
| 265 |
| |
| 266 |
method raid map raid_offset parity device device_offset -> continuous_size | |
| 267 |
arg Raid raid ; arg Intn raid_offset ; arg Int parity ; arg_w Int device ; arg_w Intn device_offset ; arg Int continuous_size | |
| 268 |
if raid_offset<0 or raid_offset>=raid:size | |
| 269 |
error "attempted to access raid device outside boundaries" | |
| 270 |
if parity<0 or parity>raid:checksum_disks_count | |
| 271 |
error "attempted to access an nonexistent parity disk" | |
| 272 |
if raid:level=0 | |
| 273 |
var Intn chunk_number := raid_offset\raid:chunk_size | |
| 274 |
continuous_size := (chunk_number+1)*raid:chunk_size-raid_offset | |
| 275 |
var Int chunk_per_row := raid:devices:size | |
| 276 |
var Intn row := chunk_number\chunk_per_row | |
| 277 |
var Int col := chunk_number%chunk_per_row | |
| 278 |
device := col | |
| 279 |
device_offset := row*raid:chunk_size | |
| 280 |
eif raid:level=1 | |
| 281 |
device := parity | |
| 282 |
device_offset := raid_offset | |
| 283 |
continuous_size := min raid:size-raid_offset 64*2^10 | |
| 284 |
eif raid:level=5 | |
| 285 |
var Intn chunk_number := raid_offset\raid:chunk_size | |
| 286 |
continuous_size := (chunk_number+1)*raid:chunk_size-raid_offset | |
| 287 |
var Int chunk_per_row := raid:devices:size-raid:sparse-1 | |
| 288 |
var Intn row := chunk_number\chunk_per_row | |
| 289 |
var Int col := chunk_number%chunk_per_row | |
| 290 |
var Int disks := raid:devices:size-raid:sparse | |
| 291 |
var Int parity_disk := shunt raid:geometry=0 (disks-1)-row%disks raid:geometry=1 row%disks -1 | |
| 292 |
if col>=parity_disk | |
| 293 |
col += 1 | |
| 294 |
if parity=1 | |
| 295 |
col := parity_disk | |
| 296 |
device := col | |
| 297 |
device_offset := row*raid:chunk_size | |
| 298 |
else | |
| 299 |
error "raid level "+(string raid:level)+" is not supported yet." | |
| 300 |
| |
| 301 |
doc | |
| 302 |
['read' method will read an area in the RAID array. If parity=0 we read the true datas. If parity=1, we read the first extra disk datas (RAID 1 or RAID 5). If parity=2, we read the second extra disk datas (RAID 1). etc.] ; eol | |
| 303 |
['read' will call 'map' in order to determine where each area of the RAID truly stands.] ; eol | |
| 304 |
[ It will also call 'read_using_checksums' in order to deduce the content using extra disks, if the data disk is dead.] | |
| 305 |
| |
| 306 |
function memory_xor buffer1 buffer2 size | |
| 307 |
arg Address buffer1 buffer2 ; arg Int size | |
| 308 |
for (var Int i) 0 size step Int:size | |
| 309 |
(buffer1 translate Byte i) map Int := ((buffer1 translate Byte i) map Int) .xor. ((buffer2 translate Byte i) map Int) | |
| 310 |
| |
| 311 |
method raid read_using_checksums offset buffer size | |
| 312 |
arg_rw Raid raid ; arg Intn offset ; arg Address buffer ; arg Int size | |
| 313 |
var Int done := 0 | |
| 314 |
while done<size | |
| 315 |
var Int step := min (raid map offset+done 0 (var Int d) (var Intn o)) size-done | |
| 316 |
if raid:level=1 | |
| 317 |
var Int p := 1 ; var Int i | |
| 318 |
while p<raid:checksum_disks_count and { i := (d+p)%raid:devices:size; raid:devices:i configure "seek "+string:o ; raid:devices:i raw_read (buffer translate Byte done) step ; raid:devices:i:is_crashed } | |
| 319 |
p += 1 | |
| 320 |
eif raid:level=5 | |
| 321 |
var Address buf := memory_allocate step null | |
| 322 |
memory_clear (buffer translate Byte done) step | |
| 323 |
for (var Int i) 0 raid:devices:size-1 | |
| 324 |
if i<>d | |
| 325 |
raid:devices:i configure "seek "+string:o | |
| 326 |
raid:devices:i raw_read buf step | |
| 327 |
memory_xor (buffer translate Byte done) buf step | |
| 328 |
memory_free buf | |
| 329 |
done += step | |
| 330 |
| |
| 331 |
method raid read offset parity buffer size | |
| 332 |
arg_rw Raid raid ; arg Intn offset ; arg Int parity ; arg Address buffer ; arg Int size | |
| 333 |
var Int done := 0 | |
| 334 |
while done<size | |
| 335 |
var Int step := min (raid map offset+done parity (var Int d) (var Intn o)) size-done | |
| 336 |
raid:devices:d configure "seek "+string:o ? | |
| 337 |
raid:devices:d raw_read (buffer translate Byte done) step ? | |
| 338 |
if raid:devices:d:is_crashed and raid:level>0 and parity=0 | |
| 339 |
raid read_using_checksums offset+done (buffer translate Byte done) step | |
| 340 |
done += step | |
| 341 |
| |
| 342 |
| |
| 343 |
doc | |
| 344 |
['write' method will ... write an area in the RAID array.] | |
| 345 |
| |
| 346 |
method raid write offset parity buffer size | |
| 347 |
arg_rw Raid raid ; arg Intn offset ; arg Int parity ; arg Address buffer ; arg Int size | |
| 348 |
var Int done := 0 | |
| 349 |
while done<size | |
| 350 |
var Int step := min (raid map offset+done parity (var Int d) (var Intn o)) size-done | |
| 351 |
raid:devices:d configure "seek "+string:o ? | |
| 352 |
raid:devices:d raw_write (buffer translate Byte done) step ? | |
| 353 |
if raid:devices:d:is_crashed | |
| 354 |
void | |
| 355 |
done += step | |
| 356 |
| |
| 357 |
| |
| 358 |
| |
| 359 |
| |
| 360 |
| |
| 361 |
doc | |
| 362 |
[This function is responsible for checking both the RAID array consistency, and the fact that this program is able to read it properly. In case of troubles, it will stop disgracefully, but since we are only reading, it is a good behavior.] ; eol | |
| 363 |
para | |
| 364 |
[We read the datas using three methods:] | |
| 365 |
list | |
| 366 |
item [using the 'map' function, then reading individual disk] | |
| 367 |
item | |
| 368 |
[directly from the /dev/md] ; italic [x] ; [ device] | |
| 369 |
[ (if 'full_check' is true)] | |
| 370 |
item [computing each area from the extra disks, without using the real datas (if 'full_check' is true and the RAID level is 1 or 5)] | |
| 371 |
[If one method does not produce the same data stream as others, we stop.] | |
| 372 |
para | |
| 373 |
[We also recompute all the datas on each extra disk, and compare them to the content of the physical disk in order to verify that the RAID array is properly synced, and that this propram computes the extra disks content properly (the same way the Linux kernel does it).] ; eol | |
| 374 |
[All this is not necessary for converting a RAID array, but we take maximum precautions before starting the real conversion. Again, a failure at this level will not hurt much.] | |
| 375 |
| |
| 376 |
method raid check_checksums full_check continue_if_corrupted correct_if_corrupted | |
| 377 |
arg_rw Raid raid ; arg CBool full_check continue_if_corrupted correct_if_corrupted | |
| 378 |
var DateTime start := datetime | |
| 379 |
var Int unit := raid:chunk_size*raid:data_disks_count | |
| 380 |
var Address chunks := memory_allocate unit null | |
| 381 |
var Address chunks2 := memory_allocate unit null | |
| 382 |
var Address checksum1 := memory_allocate raid:chunk_size null | |
| 383 |
var Address checksum2 := memory_allocate raid:chunk_size null | |
| 384 |
if full_check | |
| 385 |
(var Stream md) open raid:device_name in+out+nocache | |
| 386 |
var Intn corrupted := 0 | |
| 387 |
var Intn position := 0 | |
| 388 |
while position<raid:size | |
| 389 |
if position\2^20<>(position-unit)\2^20 | |
| 390 |
console "checked " position\2^20 " MB (" 100*position\raid:size "% " (cast position\(max (cast (datetime:seconds-start:seconds)*1024 Int) 1) Int) " KB/s)" | |
| 391 |
if corrupted<>0 | |
| 392 |
console " (" corrupted\raid:chunk_size " corrupted chunks)" | |
| 393 |
console " [cr]" | |
| 394 |
raid read position 0 chunks unit | |
| 395 |
if full_check | |
| 396 |
md configure "seek "+string:position ? | |
| 397 |
md raw_read chunks2 unit ? | |
| 398 |
if (memory_compare chunks unit chunks2 unit)<>compare_equal | |
| 399 |
error "This program is buggy (at position "+string:position+") !" ? | |
| 400 |
if raid:level=1 | |
| 401 |
for (var Int i) 1 raid:checksum_disks_count | |
| 402 |
raid read position i chunks2 unit | |
| 403 |
if (memory_compare chunks unit chunks2 unit)<>compare_equal | |
| 404 |
if continue_if_corrupted | |
| 405 |
if correct_if_corrupted | |
| 406 |
raid write position i chunks unit | |
| 407 |
corrupted += unit | |
| 408 |
else | |
| 409 |
error "The raid array is corrupted (at position "+string:position+") !" ? | |
| 410 |
eif raid:level=5 | |
| 411 |
memory_clear checksum1 raid:chunk_size | |
| 412 |
for (var Int i) 0 unit-1 step raid:chunk_size | |
| 413 |
memory_xor checksum1 (chunks translate Byte i) raid:chunk_size | |
| 414 |
raid read position 1 checksum2 raid:chunk_size | |
| 415 |
if (memory_compare checksum1 raid:chunk_size checksum2 raid:chunk_size)<>compare_equal | |
| 416 |
if continue_if_corrupted | |
| 417 |
if correct_if_corrupted | |
| 418 |
raid write position 1 checksum1 raid:chunk_size | |
| 419 |
corrupted += raid chunk_size | |
| 420 |
else | |
| 421 |
error "The raid array is corrupted (at position "+string:position+") !" ? | |
| 422 |
if full_check and raid:level>0 | |
| 423 |
raid read_using_checksums position chunks2 unit | |
| 424 |
if (memory_compare chunks unit chunks2 unit)<>compare_equal | |
| 425 |
error "This program is not safe (at position "+string:position+") !" ? | |
| 426 |
position += unit | |
| 427 |
memory_free chunks | |
| 428 |
memory_free chunks2 | |
| 429 |
memory_free checksum1 | |
| 430 |
memory_free checksum2 | |
| 431 |
if speed_report | |
| 432 |
console "check passed (in " (cast datetime:seconds-start:seconds Int) " seconds, at " (cast raid:size\(max (cast (datetime:seconds-start:seconds)*1024 Int) 1) Int) " KB/s) " eol | |
| 433 |
if corrupted<>0 | |
| 434 |
console (shunt correct_if_corrupted "corrected" "found") " " corrupted " bytes corrupted (" corrupted\raid:chunk_size " chunks)" eol | |
| 435 |
| |
| 436 |
doc | |
| 437 |
[This is an additional security: we read all the sectors on the new RAID array, and stop disgracefully if we get a bad sector.] | |
| 438 |
| |
| 439 |
function raid_check_surfaces raid | |
| 440 |
arg_rw Raid raid | |
| 441 |
var Int unit := raid:chunk_size*raid:data_disks_count | |
| 442 |
var Address buffer := memory_allocate unit null | |
| 443 |
for (var Int i) 0 raid:devices:size-1 | |
| 444 |
var Intn position := 0 | |
| 445 |
var Intn disksize := raid:size\raid:data_disks_count | |
| 446 |
while position<disksize | |
| 447 |
if position\2^20<>(position-unit)\2^20 | |
| 448 |
console "surface checked " i+1 "/" raid:devices:size " " position\2^20 " MB (" 100*position\disksize "%) [cr]" | |
| 449 |
raid:devices:i configure "seek "+string:position ? | |
| 450 |
raid:devices:i raw_read buffer unit ? | |
| 451 |
position += unit | |
| 452 |
memory_free buffer | |
| 453 |
| |
| 454 |
| |
| 455 |
doc | |
| 456 |
[Now the real conversion.] ; eol | |
| 457 |
[The main idea with this program is that if we convert 'common_unit' bytes at once (read all them on the old array, then write them on the new array), we get no clash (no data is written on the new array that will be red from the old array at the same physical place at a later point). ] | |
| 458 |
[If the new array will contain more datas per row, we must go forward (we must go backward if it will contain less). ] | |
| 459 |
[If the new array is bigger than the old one, we'll pad the extra space with zeros on all disks (so the value on extra disks will be consistent).] | |
| 460 |
para | |
| 461 |
[Since the Pliant streams that are used to access Linux devices are set in 'safe' mode, an IO failure on one of them will simply desable the device (subsequent IOs will just be dropped) but not stop the program. Since we also write extra disks informations on the fly, it means that a failure on a disk of a RAID5 array should not be fatal.] | |
| 462 |
| |
| 463 |
function raid_convert old new | |
| 464 |
arg_rw Raid old new | |
| 465 |
var Int old_unit := old:chunk_size*old:data_disks_count | |
| 466 |
var Int new_unit := new:chunk_size*new:data_disks_count | |
| 467 |
var Int common_unit := ppcm old_unit new_unit | |
| 468 |
while common_unit<64*2^10 | |
| 469 |
common_unit*=2 | |
| 470 |
if common_unit>memory_assigned | |
| 471 |
error "The conversion would consume too much memory.[lf]I'd better not attempt it (no data has been modifyed yet)." ? | |
| 472 |
var Address chunks := memory_allocate common_unit null | |
| 473 |
var Address checksum := memory_allocate new:chunk_size null | |
| 474 |
var Intn size := min old:size new:size | |
| 475 |
var CBool forward := new:data_disks_count>=old:data_disks_count | |
| 476 |
var DateTime start := datetime | |
| 477 |
var Intn position := shunt forward 0 (size-1)\common_unit*common_unit | |
| 478 |
while (shunt forward position<size position>=0) | |
| 479 |
console "converted " (shunt forward position size-position)\2^20 " MB (" 100*(shunt forward position size-position)\size "% " (cast position\(max (cast (datetime:seconds-start:seconds)*1024 Int) 1) Int) " KB/s) [cr]" | |
| 480 |
var Int step := min size-position common_unit | |
| 481 |
old read position 0 chunks step | |
| 482 |
new write position 0 chunks step | |
| 483 |
if new:level=1 | |
| 484 |
for (var Int i) 1 new:checksum_disks_count | |
| 485 |
new write position i chunks common_unit | |
| 486 |
eif new:level=5 | |
| 487 |
for (var Int base) 0 common_unit-1 step new_unit | |
| 488 |
memory_clear checksum new:chunk_size | |
| 489 |
for (var Int i) 0 new_unit-1 step new:chunk_size | |
| 490 |
memory_xor checksum (chunks translate Byte base+i) new:chunk_size | |
| 491 |
new write position+base 1 checksum new:chunk_size | |
| 492 |
position += shunt forward common_unit -common_unit | |
| 493 |
position := size | |
| 494 |
memory_clear chunks common_unit | |
| 495 |
while position<new:size | |
| 496 |
console "cleared " (position-size)\2^20 " MB (" 100*(position-size)\(new:size-size) "%) [cr]" | |
| 497 |
var Int step := min new:size-position common_unit | |
| 498 |
new write position 0 chunks step | |
| 499 |
if new:level=1 | |
| 500 |
for (var Int i) 1 new:checksum_disks_count | |
| 501 |
new write position i chunks step | |
| 502 |
eif new:level=5 | |
| 503 |
for (var Int base) 0 step-1 step new_unit | |
| 504 |
new write position+base 1 chunks new:chunk_size | |
| 505 |
position += step | |
| 506 |
memory_free chunks | |
| 507 |
memory_free checksum | |
| 508 |
if speed_report | |
| 509 |
console "conversion passed (in " (cast datetime:seconds-start:seconds Int) " seconds, at " (cast new:size\(max (cast (datetime:seconds-start:seconds)*1024 Int) 1) Int) " KB/s) " eol | |
| 510 |
| |
| 511 |
| |
| 512 |
doc | |
| 513 |
[This is just the main function that calls the various ones described above, and displays additional informations and warnings.] ; eol | |
| 514 |
[A sample usage (from the shell prompt) might be:] | |
| 515 |
fixed | |
| 516 |
[pliant module /pliant/admin/raid.pli command 'raid_convert [dq]/dev/md0[dq] [dq]/dev/hda5 /dev/hda6 /dev/hda7[dq] [dq]/dev/hda5 /dev/hda6 /dev/hda7 /dev/hda8[dq] 5 64*2^10'] | |
| 517 |
| |
| 518 |
| |
| 519 |
function raid_convert raid_device old_devices new_devices new_raid_level new_chunk_size -> deads | |
| 520 |
arg Str raid_device old_devices new_devices ; arg Int new_raid_level new_chunk_size ; arg_w Array:Str deads | |
| 521 |
(var Raid old) bind raid_device old_devices | |
| 522 |
if (execute "raidstart "+file_os_name:raid_device quiet)=0 | |
| 523 |
sleep 5 | |
| 524 |
old:query ? | |
| 525 |
console "[lf]The old RAID" old:level " array is a " old:size\2^20 " MB RAID" old:level " array on " old:devices:size " disks with " old:chunk_size\1024 " K chunks.[lf]" | |
| 526 |
(var Raid new) define raid_device new_devices "level "+string:new_raid_level+" chunk_size "+string:new_chunk_size | |
| 527 |
new size := old:size*new:data_disks_count\old:data_disks_count | |
| 528 |
console "The new RAID" new:level " array will be a " new:size\2^20 " MB RAID" new:level " array on " new:devices:size " disks with " new:chunk_size\1024 " K chunks.[lf]" | |
| 529 |
console "[lf]THIS IS VERY ALPHA CODE, IT MAY DESTROY ALL YOUR DATAS !" eol | |
| 530 |
console "You have 10 seconds to press Ctrl+C if you changed your mind." eol | |
| 531 |
sleep 10 | |
| 532 |
console "So, let's go.[lf][lf]" | |
| 533 |
filesystem_dismount old:device_name | |
| 534 |
if conservative | |
| 535 |
console "step 1: checking consistency in the existing array " eol | |
| 536 |
console "It is still safe to stop while step 1 is running." eol | |
| 537 |
old check_checksums true false false ? | |
| 538 |
if (execute "raidstop "+(file_os_name old:device_name) quiet)<>0 | |
| 539 |
error "Failed to stop RAID array "+raid_device ? | |
| 540 |
sleep 5 | |
| 541 |
if conservative | |
| 542 |
console "step 2: testing disks surfaces " eol | |
| 543 |
console "It is still safe to stop while step 2 is running." eol | |
| 544 |
raid_check_surfaces new ? | |
| 545 |
old set_safe_mode | |
| 546 |
new set_safe_mode | |
| 547 |
void ? | |
| 548 |
if conservative | |
| 549 |
memory_checkup ? | |
| 550 |
console "step 3: converting datas " eol | |
| 551 |
console "IF THIS PROCESS IS STOPPED IN THE MIDDLE, YOUR DATAS WILL BE LOST." eol | |
| 552 |
sleep 2 | |
| 553 |
raid_convert old new | |
| 554 |
var Int old_crashed_count := 0 | |
| 555 |
for (var Int i) 0 old:devices:size-1 | |
| 556 |
if old:devices:i:is_crashed | |
| 557 |
console old:devices:i:name+" IS CRASHED IN THE OLD RAID !" eol | |
| 558 |
old_crashed_count += 1 | |
| 559 |
deads size := 0 | |
| 560 |
for (var Int i) 0 new:devices:size-1 | |
| 561 |
if new:devices:i:is_crashed | |
| 562 |
console new:devices:i:name+" IS CRASHED IN THE NEW RAID !" eol | |
| 563 |
if (new:devices:i:name eparse "file:" any:(var Str device_name)) | |
| 564 |
deads += device_name | |
| 565 |
else | |
| 566 |
console "this program is really buggy !!!" eol | |
| 567 |
deads += new:devices:i:name | |
| 568 |
if old_crashed_count>old:checksum_disks_count | |
| 569 |
console "YOU DATAS ARE LOST BECAUSE TOO MANY DISKS CRASHED IN THE OLD RAID ARRAY." eol | |
| 570 |
return | |
| 571 |
if deads:size>old:checksum_disks_count | |
| 572 |
console "YOU DATAS ARE LOST BECAUSE TOO MANY DISKS CRASHED IN THE NEW RAID ARRAY." eol | |
| 573 |
return | |
| 574 |
console "The conversion is finished.[lf]" | |
| 575 |
if conservative and deads:size=0 | |
| 576 |
console "step 4: checking consistency in the new array " eol | |
| 577 |
console "A failure in the middle of step 4 would not crash your datas:[lf]they are already converted." eol | |
| 578 |
new check_checksums false false false ? | |
| 579 |
memory_checkup | |
| 580 |
console "Please UPDATE YOUR /etc/raidtab FILE FIRST,[lf]then run the following commands:" eol | |
| 581 |
console " mkraid --force --dangerous-no-resync "+raid_device eol | |
| 582 |
for (var Int i) 0 deads:size-1 | |
| 583 |
console " raidhotremove "+raid_device+" "+deads:i eol | |
| 584 |
console "in order to update the raid superblocks." eol | |
| 585 |
| |
| 586 |
| |
| 587 |
function raid_check raid_device old_devices | |
| 588 |
arg Str raid_device old_devices | |
| 589 |
(var Raid raid) bind raid_device old_devices | |
| 590 |
var CBool started := raid:start=success | |
| 591 |
raid:query ? | |
| 592 |
console "[lf]The RAID" raid:level " array is a " raid:size\2^20 " MB RAID" raid:level " array on " raid:devices:size " disks with " raid:chunk_size\1024 " K chunks.[lf]" | |
| 593 |
filesystem_dismount raid_device | |
| 594 |
if conservative | |
| 595 |
console "step 1: checking consistency in the existing array " eol | |
| 596 |
raid check_checksums true false false ? | |
| 597 |
if started and raid:stop=failure | |
| 598 |
error "Failed to stop RAID array "+raid_device ? | |
| 599 |
if conservative | |
| 600 |
console "step 2: testing disks surfaces " eol | |
| 601 |
raid_check_surfaces raid ? | |
| 602 |
console raid_device " checked." eol | |
| 603 |
| |
| 604 |
| |
| 605 |
export Raid '. define' '. build_configuration_file' '. initialize' '. start' '. stop' '. hotadd' | |
| 606 |
export raid_convert raid_check | |
| 607 |
| |
| 608 |
| |
| 609 |
# Sample recovery : | |
| 610 |
# | |
| 611 |
# module "/pliant/linux/storage/raid.pli" | |
| 612 |
# (gvar Raid r) define "md0" "sda1 sdb1" "" | |
| 613 |
# r hotadd "sda1" | |
| 614 |
| |
| 615 |
| |
| 616 |
| |
| 617 |
| |
| 618 |
| |
| 619 |
if false | |
| 620 |
| |
| 621 |
doc | |
| 622 |
para | |
| 623 |
[All the rest of the script is just for building a sample RAID and test the conversion on it.] | |
| 624 |
['raid_configure' will write the /etc/raidtab file with the provided informations so that we can run 'raidstart'.] ; eol | |
| 625 |
[Since this function will destroy existing configurations stored in /etc/raidtab, the first time it's used, the /etc/raidtab is copyed to /etc/raidtab.backup] | |
| 626 |
| |
| 627 |
function raid_configure raid_device devices_names level chunksize filename | |
| 628 |
arg Str raid_device devices_names ; arg Int level chunksize ; arg Str filename | |
| 629 |
var Str dev := shunt (raid_device search ":" -1)=(-1) "device:/"+raid_device raid_device | |
| 630 |
var Array:Str devices | |
| 631 |
var Str names := devices_names | |
| 632 |
while names<>"" | |
| 633 |
if (names parse any:(var Str name1) _ any:(var Str name2)) | |
| 634 |
devices += shunt (name1 search ":" -1)=(-1) "device:/"+name1 name1 | |
| 635 |
names := name2 | |
| 636 |
else | |
| 637 |
devices += shunt (names search ":" -1)=(-1) "device:/"+names names | |
| 638 |
names := "" | |
| 639 |
(var Stream rt) open filename out | |
| 640 |
rt writeline "raiddev "+file_os_name:dev | |
| 641 |
rt writeline " raid-level "+string:level | |
| 642 |
rt writeline " nr-raid-disks "+(string devices:size) | |
| 643 |
rt writeline " nr-spare-disks 0" | |
| 644 |
rt writeline " persistent-superblock 1" | |
| 645 |
rt writeline " chunk-size "+(string chunksize\1024) | |
| 646 |
for (var Int i) 0 devices:size-1 | |
| 647 |
rt writeline " device "+(file_os_name devices:i) | |
| 648 |
rt writeline " raid-disk "+string:i | |
| 649 |
| |
| 650 |
function raid_configure raid_device devices_names level chunksize | |
| 651 |
arg Str raid_device devices_names ; arg Int level chunksize | |
| 652 |
if (file_query "file:/etc/raidtab" standard)=defined and (file_query "file:/etc/raidtab.backup" standard)=undefined | |
| 653 |
file_copy "file:/etc/raidtab" "file:/etc/raidtab.backup" | |
| 654 |
console "Your /etc/raidtab file has been saved to /etc/raidtab.backup" eol | |
| 655 |
raid_configure raid_device devices_names level chunksize "file:/etc/raidtab" | |
| 656 |
| |
| 657 |
doc | |
| 658 |
[Wait's for the resync to finish in the kernel.] | |
| 659 |
| |
| 660 |
function raid_wait | |
| 661 |
while true | |
| 662 |
var CBool more := false | |
| 663 |
(var Stream s) open "file:/proc/mdstat" in | |
| 664 |
while not s:atend | |
| 665 |
if (s:readline search "resync" -1)<>-1 | |
| 666 |
more := true | |
| 667 |
if not more | |
| 668 |
return | |
| 669 |
s close | |
| 670 |
console "waiting for raid resync[cr]" | |
| 671 |
sleep 60 | |
| 672 |
console " [cr]" | |
| 673 |
| |
| 674 |
| |
| 675 |
doc | |
| 676 |
[These two functions are simply generating a sample file, and checking it's content after the conversion.] | |
| 677 |
| |
| 678 |
function generate_file name size | |
| 679 |
arg Str name ; arg Int size | |
| 680 |
console "generating a " size\2^20 " MB test file" eol | |
| 681 |
(var Stream s) open name out | |
| 682 |
for (var Int i) 1 size\Int:size | |
| 683 |
s raw_write addressof:i Int:size | |
| 684 |
s close | |
| 685 |
| |
| 686 |
function check_file name size | |
| 687 |
arg Str name ; arg Int size | |
| 688 |
console "checking the " size\2^20 " MB test file" eol | |
| 689 |
(var Stream s) open name in | |
| 690 |
for (var Int i) 1 size\Int:size | |
| 691 |
s raw_read addressof:(var Int j) Int:size | |
| 692 |
if j<>i | |
| 693 |
error error_id_corrupted "The test file is corrupted at offset "+(string i*Int:size) | |
| 694 |
| |
| 695 |
| |
| 696 |
doc | |
| 697 |
[This is my test process. It will build a RAID array, then feed it with sample datas, then convert it, and finally check some datas on the new array.] ; eol | |
| 698 |
[If you want to use it, you have to update the constants at the beginning in order to match your configuration, then change 'if false' with 'if true'. Don't forget to put back 'if false' at the end because until you do it, each load of this module will run the sample, so erase all your datas on the sample raid.] ; eol | |
| 699 |
[If you get 'sample test passed.' message at the end, then the test succeeded on your configuration. If it stops in the middle, there is a problem with your configuration. If it stops in the middle of step 3, there is a big problem because the datas are lost, so please email to hubert.tonneau@pliant.cx] | |
| 700 |
| |
| 701 |
function run_sample_raid_test | |
| 702 |
constant raid_device "md0" | |
| 703 |
constant initial_devices "hda5 hda6 hda7" | |
| 704 |
constant initial_raid_level 5 | |
| 705 |
constant initial_chunksize 16*2^10 | |
| 706 |
constant final_devices "hda5 hda6 hda7 hda8" | |
| 707 |
constant final_raid_level 5 | |
| 708 |
constant final_chunksize 64*2^10 | |
| 709 |
constant test_file_size 64*2^20 | |
| 710 |
constant doit false | |
| 711 |
# do not allow to run this test if the constants have not been changed in | |
| 712 |
# order to match site test configuration | |
| 713 |
if not doit | |
| 714 |
console "You must update the constants at the top of function 'run_sample_raid_test'[lf]before using it." eol | |
| 715 |
return | |
| 716 |
var Str dev := shunt (raid_device search ":" -1)=(-1) "device:/"+raid_device raid_device | |
| 717 |
warn "You are attempting to run raid test reconfigure utility on "+file_os_name:dev+"[lf]It will erase all datas on this RAID device.[lf]A bug or misconfiguration in this program might also corrupt all your system:[lf]be sure to have up to date backups !" | |
| 718 |
# initial cleanup | |
| 719 |
filesystem_dismount "file:/mnt/raid" | |
| 720 |
execute "raidstop "+file_os_name:dev quiet | |
| 721 |
# create a sample raid | |
| 722 |
raid_configure raid_device initial_devices initial_raid_level initial_chunksize | |
| 723 |
execute "mkraid --really-force "+file_os_name:dev quiet | |
| 724 |
raid_wait | |
| 725 |
format_partition dev "name [dq]test[dq]" | |
| 726 |
filesystem_mount dev "file:/mnt/raid/" "" | |
| 727 |
file_copy "/" "file:/mnt/raid/pliant/" extended+recursive | |
| 728 |
execute "tar -zc -f /mnt/raid/pliant.tgz /pliant/" | |
| 729 |
generate_file "file:/mnt/raid/test" test_file_size | |
| 730 |
filesystem_dismount dev | |
| 731 |
# now convert it | |
| 732 |
var Array:Str deads := raid_convert raid_device initial_devices final_devices final_raid_level final_chunksize ? | |
| 733 |
raid_configure raid_device final_devices final_raid_level final_chunksize | |
| 734 |
execute "mkraid --really-force --dangerous-no-resync "+file_os_name:dev quiet | |
| 735 |
for (var Int i) 0 deads:size-1 | |
| 736 |
execute "raidhotremove "+file_os_name:dev+" "+deads:i quiet | |
| 737 |
# and finally test it | |
| 738 |
filesystem_mount dev "file:/mnt/raid/" "" | |
| 739 |
if (execute "tar -ztv -f /mnt/raid/pliant.tgz" quiet)<>0 | |
| 740 |
error "The tar file is corrupted" ? | |
| 741 |
check_file "file:/mnt/raid/test" test_file_size ? | |
| 742 |
# final cleanup | |
| 743 |
filesystem_dismount dev | |
| 744 |
execute "raidstop "+file_os_name:dev quiet | |
| 745 |
console "sample test passed." eol | |
| 746 |
| |
| 747 |
export run_sample_raid_test | |
| |