X Tutup
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

public class KVMHAMonitor extends KVMHABase implements Runnable {

public static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HA-supported pool types are now defined in multiple places (e.g., also in CloudStackPrimaryDataStoreDriverImpl), and LibvirtStoragePool depends on KVMHAMonitor just to reuse this constant. Consider centralizing this list in a small shared utility/helper in an appropriate common package for the KVM plugin (or a single authoritative location) to avoid duplication and reduce cross-class coupling.

Copilot uses AI. Check for mistakes.

private final Map<String, HAStoragePool> storagePool = new ConcurrentHashMap<>();
private final boolean rebootHostAndAlertManagementOnHeartbeatTimeout;

Expand Down Expand Up @@ -86,7 +88,7 @@ protected void runHeartBeat() {
Set<String> removedPools = new HashSet<>();
for (String uuid : storagePool.keySet()) {
HAStoragePool primaryStoragePool = storagePool.get(uuid);
if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) {
if (STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) {
checkForNotExistingPools(removedPools, uuid);
if (removedPools.contains(uuid)) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final L
final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();

KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid());
primaryPool.setType(pool.getType());

if (primaryPool.isPoolSupportHA()){
if (primaryPool.isPoolSupportHA()) {
final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
final Future<Boolean> future = executors.submit(ha);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ default Long getUsedIops() {

public StoragePoolType getType();

default void setType(StoragePoolType type) {
}

public boolean delete();

PhysicalDiskFormat getDefaultFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ public KVMStoragePool createStoragePool(String name, String host, int port, Stri
private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map<String, String> details, boolean primaryStorage) {
StorageAdaptor adaptor = getStorageAdaptor(type);
KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage);
pool.setType(type);

// LibvirtStorageAdaptor-specific statement
if (pool.isPoolSupportHA() && primaryStorage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
import com.cloud.hypervisor.kvm.resource.KVMHAMonitor;
import com.cloud.storage.Storage;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.exception.CloudRuntimeException;
Expand Down Expand Up @@ -320,13 +321,16 @@ public void setDetails(Map<String, String> details) {

@Override
public boolean isPoolSupportHA() {
return type == StoragePoolType.NetworkFilesystem;
return KVMHAMonitor.STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}

public String getHearthBeatPath() {
if (type == StoragePoolType.NetworkFilesystem) {
if (StoragePoolType.NetworkFilesystem.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
} else if (StoragePoolType.SharedMountPoint.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
return Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh");
}
return null;
}
Expand Down Expand Up @@ -410,4 +414,9 @@ public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activit
return true;
}
}

@Override
public void setType(StoragePoolType type) {
this.type = type;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

Expand Down Expand Up @@ -100,6 +101,8 @@ public Map<String, String> getCapabilities() {
protected Logger logger = LogManager.getLogger(getClass());
private static final String NO_REMOTE_ENDPOINT_WITH_ENCRYPTION = "No remote endpoint to send command, unable to find a valid endpoint. Requires encryption support: %s";

private static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);

@Inject
DiskOfferingDao diskOfferingDao;
@Inject
Expand Down Expand Up @@ -587,7 +590,7 @@ private boolean anyVolumeRequiresEncryption(DataObject ... objects) {

@Override
public boolean isStorageSupportHA(StoragePoolType type) {
return StoragePoolType.NetworkFilesystem == type;
return type != null && STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}

@Override
Expand Down
212 changes: 212 additions & 0 deletions scripts/vm/hypervisor/kvm/kvmsmpheartbeat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

help() {
printf "Usage: $0
-i identifier (required for CLI compatibility; value ignored by local-only heartbeat)
-p path (required for CLI compatibility; value ignored by local-only heartbeat)
-m mount point (local path where heartbeat will be written)
-h host (host IP/name to include in heartbeat filename)
-r write/read hb log (read-check mode)
-c cleanup (trigger emergency reboot)
-t interval between read hb log\n"
exit 1
}

#set -x
NfsSvrIP=
NfsSvrPath=
MountPoint=
HostIP=
interval=
rflag=0
cflag=0

while getopts 'i:p:m:h:t:rc' OPTION
do
case $OPTION in
i)
NfsSvrIP="$OPTARG"
;; # retained for CLI compatibility but unused for local-only script
p)
NfsSvrPath="$OPTARG"
;; # retained for CLI compatibility but unused for local-only script
m)
MountPoint="$OPTARG"
;;
h)
HostIP="$OPTARG"
;;
r)
rflag=1
;;
t)
interval="$OPTARG"
;;
c)
cflag=1
;;
*)
help
;;
esac
done

# For local-only heartbeat we require a mountpoint
if [ -z "$MountPoint" ]
then
echo "Mount point (-m) is required"
help
fi

# Ensure mount point exists and is writable
if [ ! -d "$MountPoint" ]; then
mkdir -p "$MountPoint" 2>/dev/null
if [ $? -ne 0 ]; then
echo "Failed to create mount point directory: $MountPoint" >&2
exit 1
fi
fi

# Determine a sensible HostIP if not provided
if [ -z "$HostIP" ]; then
# try to get a non-loopback IPv4 address, fallback to hostname
ipaddr=$(hostname -I 2>/dev/null | awk '{print $1}')
if [ -n "$ipaddr" ]; then
HostIP="$ipaddr"
else
HostIP=$(hostname)
fi
fi

#delete VMs on this mountpoint (best-effort)
deleteVMs() {
local mountPoint=$1
vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null)
if [ $? -gt 0 ]
then
return
fi
Comment on lines +101 to +104
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

$? here reflects only the exit status of the last command in the pipeline (awk), not whether ps/grep matched anything. As written, this check won’t behave as intended and is effectively redundant with the later -z "$vmPids" guard. Consider removing this $? block, or use set -o pipefail (with care for script-wide impact) or replace the pipeline with a more direct PID lookup (e.g., pgrep) and check for empty output.

Suggested change
if [ $? -gt 0 ]
then
return
fi

Copilot uses AI. Check for mistakes.

if [ -z "$vmPids" ]
then
return
fi

for pid in $vmPids
do
kill -9 $pid &> /dev/null
done
}

#checking is there the mount point present under $MountPoint?
mounts=$(cat /proc/mounts | grep "$MountPoint")
if [ $? -gt 0 ]
then
# mount point not present — we don't remount in local-only script
# nothing to do here; keep for compatibility with original flow
:
else
Comment on lines +118 to +124
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

grep "$MountPoint" can produce false positives when $MountPoint is a substring of another mount target (e.g., /mnt/cloudstack1 matching /mnt/cloudstack10). This can incorrectly treat the mount as present and trigger deleteVMs. Consider using a stricter mount check (e.g., matching the mount target field exactly or using findmnt).

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mounts=$(cat /proc/mounts | grep "$MountPoint")
if [ $? -gt 0 ]
then
# mount point not present — we don't remount in local-only script
# nothing to do here; keep for compatibility with original flow
:
else
mounts=$(cat /proc/mounts | grep "$MountPoint ")
if [ $? -gt 0 ]
then
# mount point not present — we don't remount in local-only script
# nothing to do here; keep for compatibility with original flow
:
else

# mount exists; if not in read-check mode, consider deleting VMs similar to original behavior
if [ "$rflag" == "0" ]
then
deleteVMs $MountPoint
fi
fi

hbFolder="$MountPoint/KVMHA/"
hbFile="$hbFolder/hb-$HostIP"

write_hbLog() {
#write the heart beat log
stat "$hbFile" &> /dev/null
if [ $? -gt 0 ]
then
# create a new one
mkdir -p "$hbFolder" &> /dev/null
# touch will be done by atomic write below; ensure folder is writable
if [ ! -w "$hbFolder" ]; then
printf "Folder not writable: $hbFolder" >&2
return 2
fi
fi

timestamp=$(date +%s)
# Write atomically to avoid partial writes (write to tmp then mv)
tmpfile="${hbFile}.$$"
printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null
if [ $? -ne 0 ]; then
printf "Failed to write heartbeat to $tmpfile" >&2
return 2
fi
mv -f "$tmpfile" "$hbFile" 2>/dev/null
return $?
}

check_hbLog() {
if [ ! -f "$hbFile" ]; then
# signal large difference if file missing
return 999999
fi
now=$(date +%s)
hb=$(cat "$hbFile" 2>/dev/null)
if [ -z "$hb" ]; then
return 999998
fi
diff=`expr $now - $hb 2>/dev/null`
if [ $? -ne 0 ]
then
return 999997
fi
if [ -z "$interval" ]; then
# if no interval provided, consider 0 as success
if [ $diff -gt 0 ]; then
return $diff
else
return 0
fi
fi
if [ $diff -gt $interval ]
then
return $diff
fi
return 0
}

if [ "$rflag" == "1" ]
then
check_hbLog
diff=$?
if [ $diff == 0 ]
Comment on lines +162 to +195
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return in bash only supports exit codes 0–255; values like 999999 will wrap modulo 256, so diff=$? will not contain the intended “seconds ago” value and can lead to incorrect DEAD/ALIVE reporting. Consider having check_hbLog print the computed diff (e.g., via echo) and use its return code only as success/failure, or store the diff in a global variable and return 0/1.

Suggested change
if [ ! -f "$hbFile" ]; then
# signal large difference if file missing
return 999999
fi
now=$(date +%s)
hb=$(cat "$hbFile" 2>/dev/null)
if [ -z "$hb" ]; then
return 999998
fi
diff=`expr $now - $hb 2>/dev/null`
if [ $? -ne 0 ]
then
return 999997
fi
if [ -z "$interval" ]; then
# if no interval provided, consider 0 as success
if [ $diff -gt 0 ]; then
return $diff
else
return 0
fi
fi
if [ $diff -gt $interval ]
then
return $diff
fi
return 0
}
if [ "$rflag" == "1" ]
then
check_hbLog
diff=$?
if [ $diff == 0 ]
hb_diff=0
if [ ! -f "$hbFile" ]; then
# signal large difference if file missing
hb_diff=999999
return 1
fi
now=$(date +%s)
hb=$(cat "$hbFile" 2>/dev/null)
if [ -z "$hb" ]; then
hb_diff=999998
return 1
fi
diff=`expr $now - $hb 2>/dev/null`
if [ $? -ne 0 ]
then
hb_diff=999997
return 1
fi
if [ -z "$interval" ]; then
# if no interval provided, consider 0 as success
if [ $diff -gt 0 ]; then
hb_diff=$diff
return 1
else
hb_diff=0
return 0
fi
fi
if [ $diff -gt $interval ]
then
hb_diff=$diff
return 1
fi
hb_diff=0
return 0
}
if [ "$rflag" == "1" ]
then
check_hbLog
status=$?
diff="${hb_diff:-0}"
if [ $status -eq 0 ]

Copilot uses AI. Check for mistakes.
then
echo "=====> ALIVE <====="
else
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
fi
Comment on lines +195 to +200
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is doing a numeric comparison but uses the string operator == and unquoted $diff. Use numeric operators (-eq, -ne) and quote variables in [ tests (or switch to [[ ... ]]) to avoid mis-comparisons or test errors if the variable is empty/unset.

Copilot uses AI. Check for mistakes.
exit 0
elif [ "$cflag" == "1" ]
then
/usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
sync &
sleep 5
echo b > /proc/sysrq-trigger
exit $?
else
write_hbLog
exit $?
fi
Loading
X Tutup