Re: Starwind 6. Sync over 10 Gbit very slow
Posted: Thu Apr 25, 2019 10:46 pm
I do. You can see in my post above, where I include the application log that MM is noted as being rolled back, just after heartbeat links all fail.
iSCSI, FCoE, AoE solutions for SMBs and Corporations. VTL, CDP and other advanced features.
https://forums.starwindsoftware.com/
https://forums.starwindsoftware.com/viewtopic.php?f=5&t=5279
Code: Select all
# Set SaveVMs to true will save the virtual machine state
# Setting it to false will shut down the virtual machine guest operating system
$SaveVMs = $true
# Maximum time to wait for all virtual machines to stop
$StopTimeout = 60
# Temporarty file holding node names, used for restart script
$NodeFile = "C:\Scripts\nodes.tmp"
# Get this node name, and save the cluster node names for the ExitMaintenance script to use later
$nodes = (Get-ClusterNode).Name
$nodes | Set-Content -Path $NodeFile
# Shut down VMs on this node, and prevent live migration by setting the owner node to just this node
$VMs = Get-VM
$VMcrs = Get-ClusterResource | ? {($VMs.Name -contains $_.OwnerGroup) -and ($_.ResourceType -eq "Virtual Machine")}
Write-Host "Setting cluster node ownership for $(($VMcrs).Count) cluster resources"
$VMcrs | Set-ClusterOwnerNode -Owners (Get-WmiObject win32_computersystem).DNSHostName
if ($SaveVMs) {
Write-Host "Saving state of $($VMs.Count) virtual machines"
$VMs | Stop-VM -Save -AsJob > $null 2>&1
} else {
Write-Host "Shutting down $($VMs.Count) virtual machines"
$VMs | Stop-VM -AsJob > $null 2>&1
}
# Wait until all virtual machine resources on all cluster nodes are offline
Write-Host "Waiting for all virtual machines on all nodes to stop"
$c1 = 0
do {
$ResourcesOnline = ((Get-ClusterResource | ? {($_.ResourceType -eq "Virtual Machine") -and ($_.State -ne "Offline")}) | measure).Count
if ($ResourcesOnline -gt 0) { $c1 += 1 ; sleep 1 }
} while (($ResourcesOnline -gt 0) -and ($c1 -lt $StopTimeout))
# If any cluster virtual machine resources remain in a non-offline state, terminate them if they are running on this node
if ($ResourcesOnline -gt 0) {
$StillRunning = Get-VM | ? {(Get-ClusterResource | ? {($_.ResourceType -eq "Virtual Machine") -and ($_.State -ne "Offline")}).OwnerGroup.Name -eq $_.Name}
$StillRunning | ForEach-Object { Write-Host "$($_.Name) still not offline ... turning off" }
$StillRunning | Stop-VM -TurnOff -Force -ErrorAction SilentlyContinue
}
# Randomly cause one node to continue first
$delay = Get-Random -Maximum 5000
Write-Host "Pausing $($delay) miliseconds"
Start-Sleep -Milliseconds $delay
# Take all cluster shared volumes offline
Write-Host "Taking cluster shared volumes offline"
do {
Sleep 1
$OnlineCSVs = Get-ClusterSharedVolume | ? {$_.State -eq 'Online'}
$OnlineCSVs | ForEach-Object { $_ | Stop-ClusterResource -ErrorAction SilentlyContinue > $null }
} until (($OnlineCSVs | measure).Count -eq 0)
Import-Module StarWindX
# Set Starwind devices in maintenance mode
Write-Host "Preparing Starwind devices"
try {
$SWserver = New-SWServer -Host 127.0.0.1 -Port 3261 -User root -Password starwind
$SWserver.Connect()
$SWserver.Devices | ? {$_.DeviceType -eq "HA Image"} | ForEach-Object {
Write-Host "Putting $($_.Name) into maintenance mode"
$_.SwitchMaintenanceMode($true, $true)
}
} catch {
Write-Host $_ -foreground red
} finally {
$SWserver.Disconnect()
}
# Create a scheduled task to disable maintenance mode on startup
Write-Host "Creating scheduled task to exit maintenance mode"
try {
$action = New-ScheduledTaskAction -Execute "Powershell.exe" -Argument '-command "Powershell -ExecutionPolicy Bypass -NoProfile -File C:\Scripts\SW-ExitMaintenance.ps1 > C:\Scripts\SW-ExitMaintenance.log 2>&1"'
$trigger = New-ScheduledTaskTrigger -AtStartup -RandomDelay 00:00:30
$settings = New-ScheduledTaskSettingsSet -Compatibility Win8
$principal = New-ScheduledTaskPrincipal -UserId SYSTEM -LogonType ServiceAccount -RunLevel Highest
$definition = New-ScheduledTask -Action $action -Principal $principal -Trigger $trigger -Settings $settings -Description "Exit maintenance mode for Starwind HA devices"
Register-ScheduledTask -TaskName "Maintenance Mode Off" -InputObject $definition > $null 2>&1
} catch {
Write-Host $_ -foreground red
}
# Set the Cluster and Virtual Machine Management services to manual
Write-Host "Setting cluster and vmms services to manual startup"
Get-Service -Name vmms | Set-Service -StartupType Manual
Get-Service -Name ClusSvc | Set-Service -StartupType Manual
# Shut down the node
Write-Host "Stopping cluster node"
Stop-Computer -Force
Code: Select all
$NodeFile = "C:\Scripts\nodes.tmp"
$ServiceTimeout = 120
# Get the cluster node names that were saved by the UPS-Shutdown-Node script (can't use Get-ClusterNode as cluster not started yet)
$nodes = Get-Content -Path $NodeFile
Import-Module StarWindX
$ServiceName = "StarWindService"
do {
sleep 1
$s1 = Get-Service -ComputerName $nodes[0] -Name $ServiceName -ErrorAction SilentlyContinue
$s2 = Get-Service -ComputerName $nodes[1] -Name $ServiceName -ErrorAction SilentlyContinue
} until (($s1.Status -eq "Running") -and ($s2.Status -eq "Running"))
Write-Host "$ServiceName is running on both nodes"
# Randomly cause one node to continue first
$delay = Get-Random -Maximum 5000
Write-Host "Pausing $($delay) miliseconds"
Start-Sleep -Milliseconds $delay
# Take Starwind devices out of maintenance mode
Write-Host "Preparing Starwind devices"
try {
$SWserver = New-SWServer -Host 127.0.0.1 -Port 3261 -User root -Password starwind
$SWserver.Connect()
$SWserver.Devices | ? {$_.DeviceType -eq "HA Image"} | ForEach-Object {
Write-Host "Taking $($_.Name) out of maintenance mode"
$_.SwitchMaintenanceMode($false, $true)
}
} catch {
Write-Host $_ -foreground red
} finally {
$SWserver.Disconnect()
}
# Set the Cluster and Virtual Machine Management services to automatic, and start the services
Write-Host "Setting cluster and vmms services to manual startup"
Get-Service -Name vmms | Set-Service -StartupType Automatic
Get-Service -Name vmms | Start-Service
Get-Service -Name ClusSvc | Set-Service -StartupType Automatic
Get-Service -Name ClusSvc | Start-Service
$c1 = 0
$c2 = 0
$c3 = 0
$c4 = 0
do {
$s1 = (Get-Service -ComputerName $nodes[0] -Name vmms).Status
$s2 = (Get-Service -ComputerName $nodes[0] -Name ClusSvc).Status
$s3 = (Get-Service -ComputerName $nodes[1] -Name vmms).Status
$s4 = (Get-Service -ComputerName $nodes[1] -Name ClusSvc).Status
if ($s1 -ne "Running") { $c1 += 1 } else { Write-Host "$($nodes[0]): Virtual Machine Management service running after $($c1) seconds" }
if ($s2 -ne "Running") { $c2 += 1 } else { Write-Host "$($nodes[0]): Cluster service running after $($c2) seconds" }
if ($s3 -ne "Running") { $c3 += 1 } else { Write-Host "$($nodes[1]): Virtual Machine Management service running after $($c3) seconds" }
if ($s4 -ne "Running") { $c4 += 1 } else { Write-Host "$($nodes[1]): Cluster service running after $($c4) seconds" }
if (($s1 -ne "Running") -or ($s2 -ne "Running") -or ($s3 -ne "Running") -or ($s4 -ne "Running")) { Sleep 1 }
} until ((($s1 -eq "Running") -and ($s2 -eq "Running") -and ($s3 -eq "Running") -and ($s4 -eq "Running")) -or ($c1 -gt $ServiceTimeout) -or ($c2 -gt $ServiceTimeout) -or ($c3 -gt $ServiceTimeout) -or ($c4 -gt $ServiceTimeout))
if (($c1 -gt $ServiceTimeout) -or ($c2 -gt $ServiceTimeout) -or ($c3 -gt $ServiceTimeout) -or ($c4 -gt $ServiceTimeout)) {
Write-Host "A service failed to start"
exit
}
# Make sure all CSVs are online
Write-Host "Waiting for all cluster shared volumes online"
do {
Sleep 1
$OfflineCSVs = Get-ClusterSharedVolume | ? {$_.State -ne 'Online'}
foreach ($OfflineCSV in $OfflineCSVs) { Start-ClusterResource -Name $OfflineCSV.Name -ErrorAction SilentlyContinue > $null 2>&1 }
} until (($OfflineCSVs | measure).Count -eq 0)
# Small pause to ensure cluster recognises that cluster shared volumes are up
Sleep 15
# Start the VMs on all nodes, and set any node to be the owner
Write-Host "Setting virtual machine possible owners to all nodes"
Get-ClusterResource | ? {($_.ResourceType -eq "Virtual Machine")} | Set-ClusterOwnerNode -Owners (Get-ClusterNode).Name
Write-Host "Starting virtual machines on all nodes"
Get-ClusterResource | ? {($_.ResourceType -eq "Virtual Machine") -and ($_.State -eq "Offline")} | Start-ClusterResource
Write-Host "Unregistering scheduled task"
Unregister-ScheduledTask -TaskName "Maintenance Mode Off" -Confirm:$false -ErrorAction SilentlyContinue