From b6eb270bbd443eec0202f09f99e1f60ffaea7247 Mon Sep 17 00:00:00 2001 From: MrD3y5eL Date: Fri, 27 Dec 2024 13:59:53 +1000 Subject: [PATCH] Code refactoring --- custom_components/unraid/api/__init__.py | 3 + .../unraid/{ => api}/disk_mapping.py | 2 +- .../unraid/api/disk_operations.py | 2 +- custom_components/unraid/api/disk_state.py | 2 +- custom_components/unraid/binary_sensor.py | 1126 +---------------- custom_components/unraid/coordinator.py | 118 +- .../unraid/diagnostics/__init__.py | 18 + custom_components/unraid/diagnostics/base.py | 97 ++ custom_components/unraid/diagnostics/const.py | 53 + custom_components/unraid/diagnostics/disk.py | 359 ++++++ .../unraid/diagnostics/parity.py | 601 +++++++++ custom_components/unraid/diagnostics/pool.py | 440 +++++++ custom_components/unraid/diagnostics/ups.py | 177 +++ custom_components/unraid/helpers.py | 34 + custom_components/unraid/manifest.json | 2 +- custom_components/unraid/naming.py | 6 +- custom_components/unraid/sensors/storage.py | 303 +++-- 17 files changed, 2124 insertions(+), 1219 deletions(-) rename custom_components/unraid/{ => api}/disk_mapping.py (99%) create mode 100644 custom_components/unraid/diagnostics/__init__.py create mode 100644 custom_components/unraid/diagnostics/base.py create mode 100644 custom_components/unraid/diagnostics/const.py create mode 100644 custom_components/unraid/diagnostics/disk.py create mode 100644 custom_components/unraid/diagnostics/parity.py create mode 100644 custom_components/unraid/diagnostics/pool.py create mode 100644 custom_components/unraid/diagnostics/ups.py diff --git a/custom_components/unraid/api/__init__.py b/custom_components/unraid/api/__init__.py index a4edbbc..84b4277 100755 --- a/custom_components/unraid/api/__init__.py +++ b/custom_components/unraid/api/__init__.py @@ -9,6 +9,7 @@ from .smart_operations import SmartDataManager from .disk_state import DiskStateManager, DiskState from .disk_utils import is_valid_disk_name +from .disk_mapping import get_unraid_disk_mapping, get_disk_info __all__ = [ "DiskOperationsMixin", @@ -23,4 +24,6 @@ "DiskStateManager", "DiskState", "is_valid_disk_name", + "get_unraid_disk_mapping", + "get_disk_info", ] \ No newline at end of file diff --git a/custom_components/unraid/disk_mapping.py b/custom_components/unraid/api/disk_mapping.py similarity index 99% rename from custom_components/unraid/disk_mapping.py rename to custom_components/unraid/api/disk_mapping.py index 804f55e..b1acc8d 100755 --- a/custom_components/unraid/disk_mapping.py +++ b/custom_components/unraid/api/disk_mapping.py @@ -3,7 +3,7 @@ from typing import Dict, Any import logging -from .helpers import format_bytes +from ..helpers import format_bytes _LOGGER = logging.getLogger(__name__) diff --git a/custom_components/unraid/api/disk_operations.py b/custom_components/unraid/api/disk_operations.py index 79c935b..b1acf8e 100755 --- a/custom_components/unraid/api/disk_operations.py +++ b/custom_components/unraid/api/disk_operations.py @@ -10,7 +10,7 @@ from datetime import datetime from .disk_utils import is_valid_disk_name -from ..disk_mapping import parse_disk_config, parse_disks_ini +from .disk_mapping import parse_disk_config, parse_disks_ini from .smart_operations import SmartDataManager from .disk_state import DiskState, DiskStateManager diff --git a/custom_components/unraid/api/disk_state.py b/custom_components/unraid/api/disk_state.py index c1ac09a..6b3d241 100755 --- a/custom_components/unraid/api/disk_state.py +++ b/custom_components/unraid/api/disk_state.py @@ -194,4 +194,4 @@ def get_spindown_delay(self, disk_name: str) -> int: return self._spindown_delays.get( disk_name, self._spindown_delays.get("default", 1800) - ) + ) \ No newline at end of file diff --git a/custom_components/unraid/binary_sensor.py b/custom_components/unraid/binary_sensor.py index c5a7e03..af18698 100755 --- a/custom_components/unraid/binary_sensor.py +++ b/custom_components/unraid/binary_sensor.py @@ -1,1064 +1,27 @@ """Binary sensors for Unraid.""" from __future__ import annotations -import asyncio -from datetime import datetime, timezone -from dataclasses import dataclass, field -from typing import Any, Callable, Dict, Optional import logging +from typing import Optional, Dict, Any -from homeassistant.components.binary_sensor import ( # type: ignore - BinarySensorDeviceClass, - BinarySensorEntity, - BinarySensorEntityDescription, -) from homeassistant.config_entries import ConfigEntry # type: ignore -from homeassistant.const import EntityCategory # type: ignore -from homeassistant.core import HomeAssistant, callback # type: ignore +from homeassistant.core import HomeAssistant # type: ignore from homeassistant.helpers.entity_platform import AddEntitiesCallback # type: ignore -from homeassistant.helpers.typing import StateType # type: ignore -from homeassistant.helpers.update_coordinator import CoordinatorEntity # type: ignore -from homeassistant.helpers.entity import DeviceInfo # type: ignore -from .const import ( - DOMAIN, - SpinDownDelay, +from .const import DOMAIN +from .diagnostics import ( + UnraidBinarySensorBase, + UnraidArrayDiskSensor, + UnraidParityDiskSensor, + UnraidPoolDiskSensor, + UnraidUPSBinarySensor, + SENSOR_DESCRIPTIONS, + UnraidParityCheckSensor, ) - from .coordinator import UnraidDataUpdateCoordinator -from .helpers import ( - DiskDataHelperMixin, - format_bytes, - get_disk_identifiers, - get_disk_number, - get_unraid_disk_mapping, -) -from .naming import EntityNaming _LOGGER = logging.getLogger(__name__) -def is_valid_disk_name(disk_name: str) -> bool: - """Determine if a disk name should be monitored. - - Args: - disk_name: The name of the disk to check. - - Returns: - bool: True if the disk should be monitored, False otherwise. - """ - if not disk_name: - return False - - # Array disks (disk1, disk2, etc) - if disk_name.startswith("disk"): - return True - - # Any cache pool (cache, cache2, cacheNVME, etc) - if disk_name.startswith("cache"): - return True - - # Custom pools (fastpool, nvmepool, etc) - # Skip system paths and known special names - invalid_names = {"parity", "flash", "boot", "temp", "user"} - if disk_name.lower() not in invalid_names: - return True - - return False - -@dataclass -class UnraidBinarySensorEntityDescription(BinarySensorEntityDescription): - """Describes Unraid binary sensor entity.""" - - # Add inherited fields that need to be explicitly declared - key: str - name: str | None = None - device_class: BinarySensorDeviceClass | None = None - entity_category: EntityCategory | None = None - icon: str | None = None - - # Custom fields - value_fn: Callable[[dict[str, Any]], bool | None] = field(default=lambda x: None) - has_warning_threshold: bool = False - warning_threshold: float | None = None - -SENSOR_DESCRIPTIONS: tuple[UnraidBinarySensorEntityDescription, ...] = ( - UnraidBinarySensorEntityDescription( - key="ssh_connectivity", - name="Server Connection", # Base class will add Unraid prefix - device_class=BinarySensorDeviceClass.CONNECTIVITY, - entity_category=EntityCategory.DIAGNOSTIC, - value_fn=lambda data: data.get("system_stats") is not None, - icon="mdi:server-network", - ), - UnraidBinarySensorEntityDescription( - key="docker_service", - name="Docker Service", # Base class will add Unraid prefix - device_class=BinarySensorDeviceClass.RUNNING, - entity_category=EntityCategory.DIAGNOSTIC, - value_fn=lambda data: bool(data.get("docker_containers")), - icon="mdi:docker", - ), - UnraidBinarySensorEntityDescription( - key="vm_service", - name="VM Service", # Base class will add Unraid prefix - device_class=BinarySensorDeviceClass.RUNNING, - entity_category=EntityCategory.DIAGNOSTIC, - value_fn=lambda data: bool(data.get("vms")), - icon="mdi:desktop-tower", - ), -) - -class UnraidBinarySensorEntity(CoordinatorEntity, BinarySensorEntity): - """Base entity for Unraid binary sensors.""" - - entity_description: UnraidBinarySensorEntityDescription - - def __init__( - self, - coordinator: UnraidDataUpdateCoordinator, - description: UnraidBinarySensorEntityDescription, - ) -> None: - """Initialize the binary sensor.""" - super().__init__(coordinator) - self.entity_description = description - - # Initialize entity naming - naming = EntityNaming( - domain=DOMAIN, - hostname=coordinator.hostname, - component=description.key.split('_')[0] # First part of key as component - ) - - # Set unique ID and name using naming utility - self._attr_unique_id = naming.get_entity_id(description.key) - self._attr_name = f"{naming.clean_hostname()} {description.name}" - - # All binary sensors belong to main server device - self._attr_device_info = DeviceInfo( - identifiers={(DOMAIN, coordinator.entry.entry_id)}, - name=f"Unraid Server ({coordinator.hostname})", - manufacturer="Lime Technology", - model="Unraid Server", - ) - self._attr_has_entity_name = True - - @property - def available(self) -> bool: - """Return True if entity is available.""" - if self.entity_description.key == "ssh_connectivity": - return True - return self.coordinator.last_update_success - - @property - def is_on(self) -> bool | None: - """Return true if the binary sensor is on.""" - try: - return self.entity_description.value_fn(self.coordinator.data) - except KeyError as err: - _LOGGER.debug( - "Missing key in data for sensor %s: %s", - self.entity_description.key, - err - ) - return None - except TypeError as err: - _LOGGER.debug( - "Type error processing sensor %s: %s", - self.entity_description.key, - err - ) - return None - except AttributeError as err: - _LOGGER.debug( - "Attribute error for sensor %s: %s", - self.entity_description.key, - err - ) - return None - - @callback - def _handle_coordinator_update(self) -> None: - """Handle updated data from the coordinator.""" - self.async_write_ha_state() - -class UnraidDiskHealthSensor(UnraidBinarySensorEntity, DiskDataHelperMixin): - """Binary sensor for individual disk health.""" - - def __init__( - self, - coordinator: UnraidDataUpdateCoordinator, - disk_name: str, - ) -> None: - """Initialize the disk health sensor.""" - self._disk_name = disk_name - self._disk_num = get_disk_number(disk_name) - - # Initialize entity naming - naming = EntityNaming( - domain=DOMAIN, - hostname=coordinator.hostname, - component="disk" - ) - - # Get pretty name using naming utility - component_type = "cache" if disk_name == "cache" else "disk" - pretty_name = naming.get_entity_name(disk_name, component_type) - - super().__init__( - coordinator, - UnraidBinarySensorEntityDescription( - key=f"disk_health_{disk_name}", - name=f"{pretty_name} Health", - device_class=BinarySensorDeviceClass.PROBLEM, - entity_category=EntityCategory.DIAGNOSTIC, - icon="mdi:harddisk", - has_warning_threshold=True, - ), - ) - - # Get device and serial from helpers - self._device, self._serial = get_disk_identifiers(coordinator.data, disk_name) - - # Initialize tracking variables - self._last_smart_check = None - self._smart_status = None - self._last_problem_state = None - self._spin_down_delay = self._get_spin_down_delay() - self._last_temperature = None - self._problem_attributes: Dict[str, Any] = {} - - _LOGGER.debug( - "Initialized disk health sensor with device: %s, serial: %s", - self._device or "unknown", - self._serial or "unknown" - ) - - def _get_spin_down_delay(self) -> SpinDownDelay: - """Get spin down delay for this disk.""" - try: - disk_cfg = self.coordinator.data.get("disk_config", {}) - # Get global setting (default to NEVER/0 if not specified) - global_delay = int(disk_cfg.get("spindownDelay", "0")) - # Check for disk-specific setting if this is an array disk - if self._disk_num is not None: - disk_delay = disk_cfg.get(f"diskSpindownDelay.{self._disk_num}") - if disk_delay and disk_delay != "-1": # -1 means use global setting - global_delay = int(disk_delay) - return SpinDownDelay(global_delay) - except (ValueError, TypeError) as err: - _LOGGER.warning( - "Error getting spin down delay for %s: %s. Using default Never.", - self._disk_name, - err - ) - return SpinDownDelay.NEVER - - def _analyze_smart_status(self, disk_data: Dict[str, Any]) -> bool: - """Analyze SMART status and attributes for actual problems.""" - self._problem_attributes = {} - - try: - # Log sanitized disk data (exclude large data structures) - _LOGGER.debug( - "Starting SMART analysis for disk %s with data: %s", - self._disk_name, - {k: v for k, v in disk_data.items() if k not in ['smart_data']} - ) - - # Detailed initial state logging - _LOGGER.debug( - "Disk %s initial state - State: %s, Health: %s, Temperature: %s, Status: %s, Smart Status: %s", - self._disk_name, - disk_data.get("state", "unknown"), - disk_data.get("health", "unknown"), - disk_data.get("temperature", "unknown"), - disk_data.get("status", "unknown"), - disk_data.get("smart_status", "unknown") - ) - - # Check disk state using proper standby detection - disk_state = disk_data.get("state", "unknown").lower() - _LOGGER.debug("Disk %s current state: %s", self._disk_name, disk_state) - - # Only return cached state for confirmed standby - if disk_state == "standby": - _LOGGER.debug( - "Disk %s is in standby, using cached problem state: %s", - self._disk_name, - self._last_problem_state - ) - return self._last_problem_state if self._last_problem_state is not None else False - - has_problem = False - - # Get and validate SMART data - smart_data = disk_data.get("smart_data", {}) - if not smart_data: - _LOGGER.debug("No SMART data available for %s", self._disk_name) - return self._last_problem_state if self._last_problem_state is not None else False - - # Check overall SMART status - smart_status = smart_data.get("smart_status", True) - _LOGGER.debug("Disk %s SMART status: %s", self._disk_name, smart_status) - - if not smart_status: - self._problem_attributes["smart_status"] = "FAILED" - has_problem = True - _LOGGER.warning( - "Disk %s has failed SMART status", - self._disk_name - ) - - # Determine device type - device_type = "nvme" if smart_data.get("type") == "nvme" else "sata" - _LOGGER.debug( - "Processing %s disk %s", - device_type.upper(), - self._disk_name - ) - - # Device specific checks - if device_type == "nvme": - # NVMe specific health checks - nvme_health = smart_data.get("nvme_smart_health_information_log", {}) - _LOGGER.debug( - "NVMe health data for %s: %s", - self._disk_name, - nvme_health - ) - - # Media errors check - media_errors = nvme_health.get("media_errors", 0) - if int(media_errors) > 0: - self._problem_attributes["media_errors"] = media_errors - has_problem = True - _LOGGER.warning( - "NVMe disk %s has %d media errors", - self._disk_name, - media_errors - ) - - # Critical warning check - if warning := nvme_health.get("critical_warning"): - if warning != 0: # NVMe uses numeric warning flags - self._problem_attributes["critical_warning"] = warning - has_problem = True - _LOGGER.warning( - "NVMe disk %s has critical warning: %d", - self._disk_name, - warning - ) - - # Temperature from NVMe health log - if temp := nvme_health.get("temperature"): - _LOGGER.debug( - "NVMe temperature for %s: %d°C", - self._disk_name, - temp - ) - if temp > 70: # NVMe temperature threshold - self._problem_attributes["temperature"] = f"{temp}°C" - has_problem = True - _LOGGER.warning( - "NVMe disk %s temperature is high: %d°C (threshold: 70°C)", - self._disk_name, - temp - ) - - else: - # SATA disk checks - _LOGGER.debug( - "Processing SATA attributes for %s", - self._disk_name - ) - - attributes = smart_data.get("ata_smart_attributes", {}).get("table", []) - - # Map of critical attributes and their thresholds - critical_attrs = { - "Reallocated_Sector_Ct": 0, - "Current_Pending_Sector": 0, - "Offline_Uncorrectable": 0, - "UDMA_CRC_Error_Count": 100, - "Reallocated_Event_Count": 0, - "Reported_Uncorrect": 0, - "Command_Timeout": 100 - } - - # Process each attribute - for attr in attributes: - name = attr.get("name") - if not name: - continue - - # Check critical attributes - if name in critical_attrs: - raw_value = attr.get("raw", {}).get("value", 0) - threshold = critical_attrs[name] - - _LOGGER.debug( - "Checking %s for %s: value=%s, threshold=%s", - name, - self._disk_name, - raw_value, - threshold - ) - - if int(raw_value) > threshold: - self._problem_attributes[name.lower()] = raw_value - has_problem = True - _LOGGER.warning( - "Disk %s has high %s: %d (threshold: %d)", - self._disk_name, - name, - raw_value, - threshold - ) - - # Temperature check from attributes - elif name == "Temperature_Celsius": - temp = attr.get("raw", {}).get("value") - if temp is not None: - _LOGGER.debug( - "SATA temperature for %s: %d°C", - self._disk_name, - temp - ) - if temp > 55: # SATA temperature threshold - self._problem_attributes["temperature"] = f"{temp}°C" - has_problem = True - _LOGGER.warning( - "SATA disk %s temperature is high: %d°C (threshold: 55°C)", - self._disk_name, - temp - ) - - # Store final state - self._last_problem_state = has_problem - - if has_problem: - _LOGGER.warning( - "Disk %s has problems: %s", - self._disk_name, - self._problem_attributes - ) - else: - _LOGGER.debug( - "No problems found for disk %s", - self._disk_name - ) - - return has_problem - - except Exception as err: - _LOGGER.error( - "SMART analysis failed for %s: %s", - self._disk_name, - err, - exc_info=True - ) - return self._last_problem_state if self._last_problem_state is not None else False - - @property - def is_on(self) -> bool | None: - """Return true if there's a problem with the disk.""" - try: - for disk in self.coordinator.data["system_stats"]["individual_disks"]: - if disk["name"] == self._disk_name: - # Update spin down delay if changed - new_delay = SpinDownDelay(disk.get("spin_down_delay", SpinDownDelay.MINUTES_30)) - if new_delay != self._spin_down_delay: - self._spin_down_delay = new_delay - _LOGGER.debug( - "Updated spin down delay for %s to %s", - self._disk_name, - self._spin_down_delay.to_human_readable() - ) - - # Get current state - is_standby = disk.get("state", "unknown").lower() == "standby" - if is_standby: - return self._last_problem_state if self._last_problem_state is not None else False - - current_time = datetime.now(timezone.utc) - should_check_smart = ( - self._smart_status is None # First check - or self._spin_down_delay == SpinDownDelay.NEVER # Never spin down - or ( - self._last_smart_check is not None - and ( - current_time - self._last_smart_check - ).total_seconds() >= self._spin_down_delay.to_seconds() - ) - ) - - if should_check_smart: - self._last_smart_check = current_time - return self._analyze_smart_status(disk) - - return self._last_problem_state if self._last_problem_state is not None else False - - return None - - except (KeyError, AttributeError, TypeError, ValueError) as err: - _LOGGER.debug("Error checking disk health: %s", err) - return self._last_problem_state if self._last_problem_state is not None else None - - @property - def extra_state_attributes(self) -> dict[str, StateType]: - """Return additional state attributes.""" - try: - for disk in self.coordinator.data["system_stats"]["individual_disks"]: - if disk["name"] == self._disk_name: - # Get current disk state and device type - is_standby = disk.get("state", "unknown").lower() == "standby" - is_nvme = "nvme" in str(self._device or "").lower() - - # Get storage attributes - attrs = self._get_storage_attributes( - total=disk.get("total", 0), - used=disk.get("used", 0), - free=disk.get("free", 0), - mount_point=disk.get("mount_point"), - device=self._device, - is_standby=is_standby - ) - - # Add disk serial - disk_map = get_unraid_disk_mapping( - {"system_stats": self.coordinator.data.get("system_stats", {})} - ) - if serial := disk_map.get(self._disk_name, {}).get("serial"): - attrs["disk_serial"] = serial - - # Handle temperature - temp = disk.get("temperature") - if is_nvme: - # NVMe drives always show actual temperature from SMART data - smart_data = disk.get("smart_data", {}) - nvme_temp = ( - smart_data.get("temperature") - or temp - or smart_data.get("nvme_temperature") - ) - if not is_standby and nvme_temp is not None: - self._last_temperature = nvme_temp - else: - # SATA drives - if not is_standby and temp is not None: - self._last_temperature = temp - - attrs["temperature"] = self._get_temperature_str( - self._last_temperature if is_standby else temp, - is_standby - ) - - # Add SMART status - if smart_data := disk.get("smart_data", {}): - attrs["smart_status"] = ( - "Passed" if smart_data.get("smart_status", True) - else "Failed" - ) - - # Add spin down delay - attrs["spin_down_delay"] = self._spin_down_delay.to_human_readable() - - # Add any problem details - if self._problem_attributes: - attrs["problem_details"] = self._problem_attributes - - return attrs - - return {} - - except (KeyError, AttributeError, TypeError) as err: - _LOGGER.debug("Missing key in disk data: %s", err) - return {} - -class UnraidParityDiskSensor(UnraidBinarySensorEntity, DiskDataHelperMixin): - """Binary sensor for parity disk health with enhanced monitoring.""" - - def _get_spin_down_delay(self) -> SpinDownDelay: - """Get spin down delay for parity disk with fallback.""" - try: - # Check disk config for parity-specific setting - disk_cfg = self.coordinator.data.get("disk_config", {}) - - # Get parity delay (diskSpindownDelay.0) - delay = disk_cfg.get("diskSpindownDelay.0") - if delay and delay != "-1": - _LOGGER.debug("Using parity-specific spin down delay: %s", delay) - return SpinDownDelay(int(delay)) - - # Use global setting - global_delay = disk_cfg.get("spindownDelay", "0") - _LOGGER.debug("Using global spin down delay: %s", global_delay) - return SpinDownDelay(int(global_delay)) - - except (ValueError, TypeError) as err: - _LOGGER.warning( - "Error getting spin down delay for parity disk: %s. Using default.", - err - ) - return SpinDownDelay.NEVER - - def __init__( - self, - coordinator: UnraidDataUpdateCoordinator, - parity_info: Dict[str, Any] - ) -> None: - """Initialize the parity disk sensor.""" - self._parity_info = parity_info - self._disk_serial = parity_info.get("diskId.0", "") # Get serial number - device = parity_info.get("rdevName.0", "").strip() - - _LOGGER.debug( - "Initializing parity disk sensor with device: %s, info: %s", - device, - {k: v for k, v in parity_info.items() if k != "smart_data"} - ) - - # Initialize entity naming - naming = EntityNaming( - domain=DOMAIN, - hostname=coordinator.hostname, - component="parity" - ) - - description = UnraidBinarySensorEntityDescription( - key="parity_health", - name=f"{naming.get_entity_name('parity', 'parity')} Health", - device_class=BinarySensorDeviceClass.PROBLEM, - entity_category=EntityCategory.DIAGNOSTIC, - icon="mdi:harddisk", - has_warning_threshold=True, - ) - - # Initialize parent class - super().__init__(coordinator, description) - - # Override device info for parity disk - self._device = device - self._attr_name = f"{naming.clean_hostname()} Parity Health" - - # Initialize state variables - self._last_state = None - self._problem_attributes: Dict[str, Any] = {} - self._last_smart_check = None - self._smart_status = None - self._last_temperature = None - self._disk_state = "unknown" # Add disk state initialization - - # Get spin down delay from config - self._spin_down_delay = self._get_spin_down_delay() - - def _get_temperature(self) -> Optional[int]: - """Get current disk temperature.""" - try: - # Get current array state - array_state = self.coordinator.data.get("array_state", {}) - self._disk_state = "active" if array_state.get("state") == "STARTED" else "standby" - - # First check disk data - for disk in self.coordinator.data.get("system_stats", {}).get("individual_disks", []): - if disk.get("name") == "parity" and (temp := disk.get("temperature")) is not None: - _LOGGER.debug("Got parity temperature %d°C from disk data", temp) - self._last_temperature = temp - return temp - - # Try SMART data if available - if self._device: - smart_data = self.coordinator.data.get("smart_data", {}).get(self._device, {}) - if temp := smart_data.get("temperature"): - _LOGGER.debug("Got parity temperature %d°C from SMART data", temp) - self._last_temperature = temp - return temp - - # Return cached temperature if available - if self._disk_state == "standby" and self._last_temperature is not None: - _LOGGER.debug("Using cached temperature for standby parity disk: %d°C", self._last_temperature) - return self._last_temperature - - _LOGGER.debug("No temperature data available for parity disk") - return None - - except Exception as err: - _LOGGER.error("Error getting parity temperature: %s", err) - return None - - def _analyze_smart_status(self, disk_data: Dict[str, Any]) -> bool: - """Analyze SMART status and attributes for actual problems.""" - self._problem_attributes = {} - previous_state = self._last_state - - try: - _LOGGER.debug( - "Starting SMART analysis for parity disk with data: %s", - {k: v for k, v in disk_data.items() if k not in ['smart_data', 'attributes']} - ) - - _LOGGER.debug( - "Parity disk initial state - State: %s, Temperature: %s°C, Status: %s", - disk_data.get("state", "unknown"), - disk_data.get("temperature", "unknown"), - self._parity_info.get("rdevStatus.0", "unknown") - ) - - has_problem = False - - # Check parity status first - if (status := self._parity_info.get("rdevStatus.0")) != "DISK_OK": - self._problem_attributes["parity_status"] = status - has_problem = True - _LOGGER.warning("Parity disk status issue: %s", status) - - # Check disk state (7 is normal operation) - if (state := self._parity_info.get("diskState.0", "0")) != "7": - self._problem_attributes["disk_state"] = f"Abnormal ({state})" - has_problem = True - _LOGGER.warning("Parity disk state issue: %s", state) - - # Get and validate SMART data - smart_data = disk_data.get("smart_data", {}) - if smart_data: - _LOGGER.debug("Processing SMART data for parity disk") - - # Check overall SMART status - smart_status = smart_data.get("smart_status", True) - if not smart_status: - self._problem_attributes["smart_status"] = "FAILED" - has_problem = True - _LOGGER.warning("Parity disk has failed SMART status") - - # Process SMART attributes - attributes = smart_data.get("ata_smart_attributes", {}).get("table", []) - - # Map of critical attributes and their thresholds - critical_attrs = { - "Reallocated_Sector_Ct": 0, - "Current_Pending_Sector": 0, - "Offline_Uncorrectable": 0, - "UDMA_CRC_Error_Count": 100, - "Reallocated_Event_Count": 0, - "Reported_Uncorrect": 0, - "Command_Timeout": 100 - } - - # Process each attribute - for attr in attributes: - name = attr.get("name") - if not name: - continue - - # Check critical attributes - if name in critical_attrs: - raw_value = attr.get("raw", {}).get("value", 0) - threshold = critical_attrs[name] - - _LOGGER.debug( - "Checking parity disk attribute %s: value=%s, threshold=%s", - name, - raw_value, - threshold - ) - - if int(raw_value) > threshold: - self._problem_attributes[name.lower()] = raw_value - has_problem = True - _LOGGER.warning( - "Parity disk has high %s: %d (threshold: %d)", - name, - raw_value, - threshold - ) - - # Temperature check - elif name == "Temperature_Celsius": - temp = attr.get("raw", {}).get("value") - if temp is not None: - _LOGGER.debug("Parity disk temperature from SMART: %d°C", temp) - if temp > 55: # Temperature threshold - self._problem_attributes["temperature"] = f"{temp}°C" - has_problem = True - _LOGGER.warning( - "Parity disk temperature is high: %d°C (threshold: 55°C)", - temp - ) - - # Log state changes - if previous_state != has_problem: - _LOGGER.info( - "Parity disk health state changed: %s -> %s", - "Problem" if previous_state else "OK", - "Problem" if has_problem else "OK" - ) - - # Store final state - self._last_state = has_problem - - if has_problem: - _LOGGER.warning( - "Parity disk has problems: %s", - self._problem_attributes - ) - else: - _LOGGER.debug("No problems found for parity disk") - - return has_problem - - except Exception as err: - _LOGGER.error( - "SMART analysis failed for parity disk: %s", - err, - exc_info=True - ) - return self._last_state if self._last_state is not None else False - - @property - def available(self) -> bool: - """Return True if entity is available.""" - return ( - self.coordinator.last_update_success - and bool(self._device) - and bool(self._parity_info) - ) - - @property - def is_on(self) -> bool | None: - """Return true if there's a problem with the disk.""" - try: - for disk in self.coordinator.data["system_stats"]["individual_disks"]: - if disk["name"] == "parity": - # Update spin down delay if changed - new_delay = SpinDownDelay(disk.get("spin_down_delay", SpinDownDelay.MINUTES_30)) - if new_delay != self._spin_down_delay: - self._spin_down_delay = new_delay - _LOGGER.debug( - "Updated spin down delay for %s to %s", - "parity", - self._spin_down_delay.to_human_readable() - ) - - # Get current state - current_state = disk.get("state", "unknown").lower() - if current_state == "standby": - return self._last_state if self._last_state is not None else False - - current_time = datetime.now(timezone.utc) - should_check_smart = ( - self._smart_status is None # First check - or self._spin_down_delay == SpinDownDelay.NEVER # Never spin down - or ( - self._last_smart_check is not None - and ( - current_time - self._last_smart_check - ).total_seconds() >= self._spin_down_delay.to_seconds() - ) - ) - - if should_check_smart: - # Smart data will be updated by coordinator - self._last_smart_check = current_time - return self._analyze_smart_status(disk) - - # Use cached status - return self._last_state if self._last_state is not None else False - - return None - - except (KeyError, AttributeError, TypeError, ValueError) as err: - _LOGGER.debug("Error checking disk health: %s", err) - return self._last_state if self._last_state is not None else None - - @property - def state(self) -> str: - """Return the state of the sensor.""" - if self.is_on: - return "Problem" - return "OK" - - @property - def extra_state_attributes(self) -> dict[str, StateType]: - """Return additional state attributes.""" - try: - # Get current status from array state - array_state = self.coordinator.data.get("array_state", {}) - disk_status = "active" if array_state.get("state") == "STARTED" else "standby" - - # Build attributes - attrs = { - "device": self._device, - "disk_status": disk_status, - "power_state": disk_status, - "spin_down_delay": self._spin_down_delay.to_human_readable(), - "smart_status": "Failed" if self._last_state else "Passed", - "disk_serial": self._disk_serial - } - - # Add temperature if available - attrs["temperature"] = self._get_temperature_str( - self._get_temperature(), - disk_status == "standby" - ) - - # Add disk size information using cached size - if size := self._parity_info.get("diskSize.0"): - try: - # Get device path - device_path = self._parity_info.get("rdevName.0") - # Use cached size if available, otherwise use sector calculation - if hasattr(self, '_cached_size'): - size_bytes = self._cached_size - else: - size_bytes = int(size) * 512 # Fallback to sector calculation - - attrs["total_size"] = format_bytes(size_bytes) - _LOGGER.debug( - "Added disk size for %s: %s (raw sectors: %s)", - device_path or "unknown", - attrs["total_size"], - size - ) - except (ValueError, TypeError) as err: - _LOGGER.error("Error calculating disk size: %s", err) - size_bytes = int(size) * 512 # Fallback - attrs["total_size"] = format_bytes(size_bytes) - - # Add SMART details if available - if self._device: - smart_data = self.coordinator.data.get("smart_data", {}).get(self._device, {}) - if smart_data: - attrs["smart_details"] = { - "power_on_hours": smart_data.get("power_on_hours"), - "status": "Passed" if smart_data.get("smart_status", True) else "Failed", - "device_model": smart_data.get("model_name", "Unknown"), - "serial_number": smart_data.get("serial_number", "Unknown"), - "firmware": smart_data.get("firmware_version", "Unknown") - } - _LOGGER.debug("Added SMART details: %s", attrs["smart_details"]) - - # Add any problem details - if self._problem_attributes: - attrs["problem_details"] = self._problem_attributes - _LOGGER.debug("Added problem details: %s", self._problem_attributes) - - return attrs - - except Exception as err: - _LOGGER.error("Error getting parity attributes: %s", err) - return {} - - async def async_update_disk_size(self) -> None: - """Update disk size asynchronously.""" - try: - if size := self._parity_info.get("diskSize.0"): - device_path = self._parity_info.get("rdevName.0") - if device_path: - result = await self.coordinator.api.execute_command( - f"lsblk -b -d -o SIZE /dev/{device_path} | tail -n1" - ) - if result.exit_status == 0 and result.stdout.strip(): - self._cached_size = int(result.stdout.strip()) - _LOGGER.debug( - "Updated cached disk size for %s: %d bytes", - device_path, - self._cached_size - ) - else: - self._cached_size = int(size) * 512 - else: - self._cached_size = int(size) * 512 - except Exception as err: - _LOGGER.error("Error updating disk size: %s", err) - if size: - self._cached_size = int(size) * 512 - - async def async_added_to_hass(self) -> None: - """When entity is added to hass.""" - await super().async_added_to_hass() - # Initialize disk size - await self.async_update_disk_size() - - @callback - def _handle_coordinator_update(self) -> None: - """Handle updated data from the coordinator.""" - # Schedule disk size update - asyncio.create_task(self.async_update_disk_size()) - super()._handle_coordinator_update() - -class UnraidUPSBinarySensor(UnraidBinarySensorEntity): - """Binary sensor for UPS monitoring.""" - - def __init__(self, coordinator: UnraidDataUpdateCoordinator) -> None: - """Initialize UPS binary sensor.""" - # Initialize entity naming - naming = EntityNaming( - domain=DOMAIN, - hostname=coordinator.hostname, - component="ups" - ) - - super().__init__( - coordinator, - UnraidBinarySensorEntityDescription( - key="ups_status", - name=f"{naming.get_entity_name('ups', 'ups')} Status", - device_class=BinarySensorDeviceClass.POWER, - entity_category=EntityCategory.DIAGNOSTIC, - icon="mdi:battery-medium", - ) - ) - - @property - def available(self) -> bool: - """Return True if entity is available.""" - ups_info = self.coordinator.data.get("system_stats", {}).get("ups_info") - return self.coordinator.last_update_success and bool(ups_info) - - @property - def is_on(self) -> bool | None: - """Return true if the UPS is online.""" - try: - status = self.coordinator.data["system_stats"].get("ups_info", {}).get("STATUS") - if status is None: - return None - return status.upper() in ["ONLINE", "ON LINE"] - except (KeyError, AttributeError, TypeError) as err: - _LOGGER.debug("Error getting UPS status: %s", err) - return None - - @property - def extra_state_attributes(self) -> dict[str, Any]: - """Return additional state attributes.""" - try: - ups_info = self.coordinator.data["system_stats"].get("ups_info", {}) - - # Format numeric values with units - attrs = { - "model": ups_info.get("MODEL", "Unknown"), - "status": ups_info.get("STATUS", "Unknown"), - } - # Add percentage values - if "BCHARGE" in ups_info: - attrs["battery_charge"] = f"{ups_info['BCHARGE']}%" - if "LOADPCT" in ups_info: - attrs["load_percentage"] = f"{ups_info['LOADPCT']}%" - # Add time values - if "TIMELEFT" in ups_info: - attrs["runtime_left"] = f"{ups_info['TIMELEFT']} minutes" - # Add power/voltage values - if "NOMPOWER" in ups_info: - attrs["nominal_power"] = f"{ups_info['NOMPOWER']}W" - if "LINEV" in ups_info: - attrs["line_voltage"] = f"{ups_info['LINEV']}V" - if "BATTV" in ups_info: - attrs["battery_voltage"] = f"{ups_info['BATTV']}V" - - return attrs - except (KeyError, AttributeError, TypeError) as err: - _LOGGER.debug("Error getting UPS attributes: %s", err) - return {} - async def _get_parity_info(coordinator: UnraidDataUpdateCoordinator) -> Optional[Dict[str, Any]]: """Get parity disk information from mdcmd status.""" try: @@ -1097,12 +60,12 @@ async def async_setup_entry( ) -> None: """Set up Unraid binary sensors.""" coordinator: UnraidDataUpdateCoordinator = hass.data[DOMAIN][entry.entry_id] - entities: list[UnraidBinarySensorEntity] = [] + entities: list[UnraidBinarySensorBase] = [] processed_disks = set() # Track processed disks # Add base sensors first for description in SENSOR_DESCRIPTIONS: - entities.append(UnraidBinarySensorEntity(coordinator, description)) + entities.append(UnraidBinarySensorBase(coordinator, description)) _LOGGER.debug( "Added binary sensor | description_key: %s | name: %s", description.key, @@ -1112,21 +75,27 @@ async def async_setup_entry( # Add UPS sensor if UPS info is available if coordinator.data.get("system_stats", {}).get("ups_info"): entities.append(UnraidUPSBinarySensor(coordinator)) - _LOGGER.debug( - "Added UPS binary sensor | name: %s", - "UPS Status", - ) + _LOGGER.debug("Added UPS binary sensor") - # Check for and add parity disk sensor + # Check for and add parity-related sensors parity_info = await _get_parity_info(coordinator) if parity_info: - # Store parity info in coordinator data for future updates + # Store parity info in coordinator data coordinator.data["parity_info"] = parity_info + + # Add parity disk sensor entities.append(UnraidParityDiskSensor(coordinator, parity_info)) _LOGGER.debug( - "Added parity disk sensor for device: %s", + "Added parity disk sensor | device: %s", parity_info.get("rdevName.0") ) + + # Add parity check sensor + entities.append(UnraidParityCheckSensor(coordinator)) + _LOGGER.debug( + "Added parity check sensor for %s", + coordinator.hostname + ) # Filter out tmpfs and special mounts ignored_mounts = { @@ -1145,35 +114,50 @@ async def async_setup_entry( ) ] + # First process array disks for disk in valid_disks: disk_name = disk.get("name") - mount_point = disk.get("mount_point", "") - - # Skip if invalid or already processed if not disk_name or disk_name in processed_disks: continue + + if disk_name.startswith("disk"): + try: + entities.append( + UnraidArrayDiskSensor( + coordinator=coordinator, + disk_name=disk_name + ) + ) + processed_disks.add(disk_name) + _LOGGER.info( + "Added array disk sensor: %s", + disk_name + ) + except ValueError as err: + _LOGGER.warning("Skipping invalid array disk %s: %s", disk_name, err) + continue - if is_valid_disk_name(disk_name): - _LOGGER.debug( - "Adding health sensor for disk: %s (mount: %s)", - disk_name, - mount_point - ) + # Then process pool disks + for disk in valid_disks: + disk_name = disk.get("name") + if not disk_name or disk_name in processed_disks: + continue + + if not disk_name.startswith("disk"): try: entities.append( - UnraidDiskHealthSensor( + UnraidPoolDiskSensor( coordinator=coordinator, disk_name=disk_name ) ) processed_disks.add(disk_name) _LOGGER.info( - "Added health sensor for %s disk: %s", - "pool" if not (disk_name.startswith("disk") or disk_name == "cache") else "array", + "Added pool disk sensor: %s", disk_name ) except ValueError as err: - _LOGGER.warning("Skipping invalid disk %s: %s", disk_name, err) + _LOGGER.warning("Skipping invalid pool disk %s: %s", disk_name, err) continue - async_add_entities(entities) + async_add_entities(entities) \ No newline at end of file diff --git a/custom_components/unraid/coordinator.py b/custom_components/unraid/coordinator.py index dd1bfc4..502e517 100755 --- a/custom_components/unraid/coordinator.py +++ b/custom_components/unraid/coordinator.py @@ -36,7 +36,7 @@ from .unraid import UnraidAPI from .helpers import get_unraid_disk_mapping from .insights import get_docker_insights -from .disk_mapping import get_disk_info +from .api.disk_mapping import get_disk_info _LOGGER = logging.getLogger(__name__) @@ -396,6 +396,15 @@ async def _async_update_data(self) -> Dict[str, Any]: _LOGGER.error("Error getting system stats: %s", err) data["system_stats"] = {} + # Get array state and parity info + array_state = await self._get_array_state() + if array_state: + data["array_state"] = array_state + _LOGGER.debug( + "Got array state with parity history: %s", + {k: v for k, v in array_state.items() if k != "parity_history"} + ) + # Step 2: Get VMs, Docker Containers, and User Scripts try: # Run these tasks concurrently @@ -713,4 +722,109 @@ async def async_update_docker_insights(self, enabled: bool) -> None: finally: # Ensure we're in a consistent state if not enabled and self.docker_monitor: - self.docker_monitor = None \ No newline at end of file + self.docker_monitor = None + + async def _get_array_state(self) -> Optional[Dict[str, Any]]: + """Get array state information.""" + try: + result = await self.api.execute_command("mdcmd status") + if result.exit_status != 0: + return None + + array_state = {} + for line in result.stdout.splitlines(): + if "=" not in line: + continue + key, value = line.split("=", 1) + array_state[key.strip()] = value.strip() + + # Parse parity history + parity_history = await self._parse_parity_history() + if parity_history: + array_state["parity_history"] = parity_history + + return array_state + + except Exception as err: + _LOGGER.error("Error getting array state: %s", err) + return None + + async def _parse_parity_history(self) -> Optional[Dict[str, Any]]: + """Parse parity check history from the log file.""" + try: + _LOGGER.debug("Attempting to read parity check history") + result = await self.api.execute_command( + "cat /boot/config/parity-checks.log" + ) + if result.exit_status != 0: + _LOGGER.warning( + "Failed to read parity history file: exit_code=%d, stderr='%s'", + result.exit_status, + result.stderr + ) + return None + + _LOGGER.debug("Raw parity history content: %s", result.stdout) + + latest_check = None + for line in result.stdout.splitlines(): + # Format: YYYY MMM DD HH:MM:SS|Duration|Speed|Status|Errors|Type|Size + _LOGGER.debug("Processing parity history line: %s", line) + + fields = line.strip().split("|") + if len(fields) < 7: + _LOGGER.warning("Invalid parity history line (not enough fields): %s", line) + continue + + try: + # Handle the space-separated date format + date_str = fields[0] + _LOGGER.debug("Parsing date: %s", date_str) + check_date = datetime.strptime(date_str, "%Y %b %d %H:%M:%S") + + # Format duration from seconds to readable format + duration_secs = int(fields[1]) + hours = duration_secs // 3600 + minutes = (duration_secs % 3600) // 60 + seconds = duration_secs % 60 + duration_str = f"{hours} hours, {minutes} minutes, {seconds} seconds" + + # Format speed in MB/s + speed_bytes = int(fields[2]) + speed_mb = round(speed_bytes / (1024 * 1024), 2) + + check_info = { + "date": check_date.strftime("%Y-%m-%d %H:%M:%S"), + "duration": duration_str, + "speed": f"{speed_mb} MB/s", + "status": "Success" if fields[3] == "0" else f"Failed ({fields[3]} errors)", + "errors": int(fields[4]), + "type": fields[5], + "size": fields[6] + } + + _LOGGER.debug("Processed check info: %s", check_info) + + if not latest_check or check_date > datetime.strptime(latest_check["date"], "%Y-%m-%d %H:%M:%S"): + latest_check = check_info + _LOGGER.debug("Updated latest check info") + + except (ValueError, IndexError) as err: + _LOGGER.warning( + "Error parsing parity history line '%s': %s", + line, + err, + exc_info=True + ) + continue + + _LOGGER.debug("Final latest check info: %s", latest_check) + return latest_check + + except Exception as err: + _LOGGER.error( + "Error reading parity history: %s", + err, + exc_info=True + ) + return None \ No newline at end of file diff --git a/custom_components/unraid/diagnostics/__init__.py b/custom_components/unraid/diagnostics/__init__.py new file mode 100644 index 0000000..aa9d081 --- /dev/null +++ b/custom_components/unraid/diagnostics/__init__.py @@ -0,0 +1,18 @@ +"""Diagnostic sensor implementations for Unraid.""" +from .base import UnraidBinarySensorBase +from .disk import UnraidArrayDiskSensor +from .pool import UnraidPoolDiskSensor +from .parity import UnraidParityDiskSensor, UnraidParityCheckSensor +from .ups import UnraidUPSBinarySensor +from .const import UnraidBinarySensorEntityDescription, SENSOR_DESCRIPTIONS + +__all__ = [ + "UnraidBinarySensorBase", + "UnraidArrayDiskSensor", + "UnraidPoolDiskSensor", + "UnraidParityDiskSensor", + "UnraidParityCheckSensor", + "UnraidUPSBinarySensor", + "UnraidBinarySensorEntityDescription", + "SENSOR_DESCRIPTIONS", +] \ No newline at end of file diff --git a/custom_components/unraid/diagnostics/base.py b/custom_components/unraid/diagnostics/base.py new file mode 100644 index 0000000..05a9632 --- /dev/null +++ b/custom_components/unraid/diagnostics/base.py @@ -0,0 +1,97 @@ +"""Base binary sensor implementations for Unraid.""" +from __future__ import annotations + +import logging + +from homeassistant.components.binary_sensor import ( # type: ignore + BinarySensorEntity, +) +from homeassistant.core import callback # type: ignore +from homeassistant.helpers.update_coordinator import CoordinatorEntity # type: ignore +from homeassistant.helpers.entity import DeviceInfo # type: ignore + +from ..const import DOMAIN +from .const import UnraidBinarySensorEntityDescription +from ..naming import EntityNaming +from ..coordinator import UnraidDataUpdateCoordinator + +_LOGGER = logging.getLogger(__name__) + +class UnraidBinarySensorBase(CoordinatorEntity, BinarySensorEntity): + """Base class for Unraid binary sensors.""" + + entity_description: UnraidBinarySensorEntityDescription + + def __init__( + self, + coordinator: UnraidDataUpdateCoordinator, + description: UnraidBinarySensorEntityDescription, + ) -> None: + """Initialize the binary sensor.""" + super().__init__(coordinator) + self.entity_description = description + self._attr_has_entity_name = True + + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component=description.key.split('_')[0] + ) + + self._attr_unique_id = naming.get_entity_id(description.key) + self._attr_name = f"{naming.clean_hostname()} {description.name}" + + _LOGGER.debug( + "Binary Sensor initialized | unique_id: %s | name: %s | description.key: %s", + self._attr_unique_id, + self._attr_name, + description.key + ) + + @property + def device_info(self) -> DeviceInfo: + """Return device information.""" + return DeviceInfo( + identifiers={(DOMAIN, self.coordinator.entry.entry_id)}, + name=f"Unraid Server ({self.coordinator.hostname})", + manufacturer="Lime Technology", + model="Unraid Server", + ) + + @property + def is_on(self) -> bool | None: + """Return true if the binary sensor is on.""" + try: + return self.entity_description.value_fn(self.coordinator.data) + except KeyError as err: + _LOGGER.debug( + "Missing key in data for sensor %s: %s", + self.entity_description.key, + err + ) + return None + except TypeError as err: + _LOGGER.debug( + "Type error processing sensor %s: %s", + self.entity_description.key, + err + ) + return None + except AttributeError as err: + _LOGGER.debug( + "Attribute error for sensor %s: %s", + self.entity_description.key, + err + ) + return None + + @property + def available(self) -> bool: + """Return if entity is available.""" + return self.coordinator.last_update_success + + @callback + def _handle_coordinator_update(self) -> None: + """Handle updated data from the coordinator.""" + self.async_write_ha_state() \ No newline at end of file diff --git a/custom_components/unraid/diagnostics/const.py b/custom_components/unraid/diagnostics/const.py new file mode 100644 index 0000000..f2bb4f3 --- /dev/null +++ b/custom_components/unraid/diagnostics/const.py @@ -0,0 +1,53 @@ +"""Constants for Unraid diagnostic sensors.""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable, Any + +from homeassistant.components.binary_sensor import ( # type: ignore + BinarySensorDeviceClass, + BinarySensorEntityDescription, +) +from homeassistant.const import EntityCategory # type: ignore + +@dataclass +class UnraidBinarySensorEntityDescription(BinarySensorEntityDescription): + """Describes Unraid binary sensor entity.""" + + key: str + name: str | None = None + device_class: BinarySensorDeviceClass | None = None + entity_category: EntityCategory | None = None + icon: str | None = None + value_fn: Callable[[dict[str, Any]], bool | None] = field(default=lambda x: None) + has_warning_threshold: bool = False + warning_threshold: float | None = None + +SENSOR_DESCRIPTIONS: tuple[UnraidBinarySensorEntityDescription, ...] = ( + UnraidBinarySensorEntityDescription( + key="ssh_connectivity", + name="Server Connection", + device_class=BinarySensorDeviceClass.CONNECTIVITY, + entity_category=EntityCategory.DIAGNOSTIC, + value_fn=lambda data: data.get("system_stats") is not None, + icon="mdi:server-network", + ), + UnraidBinarySensorEntityDescription( + key="docker_service", + name="Docker Service", + device_class=BinarySensorDeviceClass.RUNNING, + entity_category=EntityCategory.DIAGNOSTIC, + value_fn=lambda data: bool(data.get("docker_containers")), + icon="mdi:docker", + ), + UnraidBinarySensorEntityDescription( + key="vm_service", + name="VM Service", + device_class=BinarySensorDeviceClass.RUNNING, + entity_category=EntityCategory.DIAGNOSTIC, + value_fn=lambda data: bool(data.get("vms")), + icon="mdi:desktop-tower", + ), +) + +__all__ = ["UnraidBinarySensorEntityDescription", "SENSOR_DESCRIPTIONS"] \ No newline at end of file diff --git a/custom_components/unraid/diagnostics/disk.py b/custom_components/unraid/diagnostics/disk.py new file mode 100644 index 0000000..21e3e67 --- /dev/null +++ b/custom_components/unraid/diagnostics/disk.py @@ -0,0 +1,359 @@ +"""Array disk health monitoring for Unraid.""" +from __future__ import annotations + +import logging +from datetime import datetime, timezone +from typing import Any, Dict + +from homeassistant.components.binary_sensor import BinarySensorDeviceClass # type: ignore +from homeassistant.const import EntityCategory # type: ignore +from homeassistant.helpers.typing import StateType # type: ignore + +from .base import UnraidBinarySensorBase +from .const import UnraidBinarySensorEntityDescription +from ..const import DOMAIN, SpinDownDelay +from ..coordinator import UnraidDataUpdateCoordinator +from ..helpers import ( + DiskDataHelperMixin, + get_disk_identifiers, + get_disk_number, + get_unraid_disk_mapping, +) +from ..naming import EntityNaming + +_LOGGER = logging.getLogger(__name__) + +class UnraidArrayDiskSensor(UnraidBinarySensorBase, DiskDataHelperMixin): + """Binary sensor for array disk health monitoring.""" + + def __init__( + self, + coordinator: UnraidDataUpdateCoordinator, + disk_name: str, + ) -> None: + """Initialize the array disk health sensor.""" + if not disk_name.startswith("disk"): + raise ValueError(f"Not an array disk: {disk_name}") + + self._disk_name = disk_name + self._disk_num = get_disk_number(disk_name) + + if self._disk_num is None: + raise ValueError(f"Invalid array disk number: {disk_name}") + + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component="disk" + ) + + # Get pretty name using naming utility + pretty_name = naming.get_entity_name(disk_name, "disk") + + description = UnraidBinarySensorEntityDescription( + key=f"disk_health_{disk_name}", + name=f"{pretty_name} Health", + device_class=BinarySensorDeviceClass.PROBLEM, + entity_category=EntityCategory.DIAGNOSTIC, + icon="mdi:harddisk", + has_warning_threshold=True, + ) + + super().__init__(coordinator, description) + + # Get device and serial from helpers + self._device, self._serial = get_disk_identifiers(coordinator.data, disk_name) + + # Initialize tracking variables + self._last_smart_check: datetime | None = None + self._smart_status: bool | None = None + self._last_problem_state: bool | None = None + self._spin_down_delay = self._get_spin_down_delay() + self._last_temperature: int | None = None + self._problem_attributes: Dict[str, Any] = {} + + _LOGGER.debug( + "Initialized array disk sensor | disk: %s | device: %s | serial: %s", + disk_name, + self._device or "unknown", + self._serial or "unknown" + ) + + def _get_spin_down_delay(self) -> SpinDownDelay: + """Get spin down delay for this array disk.""" + try: + disk_cfg = self.coordinator.data.get("disk_config", {}) + # Get global setting (default to NEVER/0 if not specified) + global_delay = int(disk_cfg.get("spindownDelay", "0")) + # Check for disk-specific setting + disk_delay = disk_cfg.get(f"diskSpindownDelay.{self._disk_num}") + if disk_delay and disk_delay != "-1": # -1 means use global setting + global_delay = int(disk_delay) + return SpinDownDelay(global_delay) + except (ValueError, TypeError) as err: + _LOGGER.warning( + "Error getting spin down delay for %s: %s. Using default Never.", + self._disk_name, + err + ) + return SpinDownDelay.NEVER + + def _analyze_smart_status(self, disk_data: Dict[str, Any]) -> bool: + """Analyze SMART status and attributes for array disk problems.""" + self._problem_attributes = {} + + try: + _LOGGER.debug( + "Starting SMART analysis for array disk %s with data: %s", + self._disk_name, + {k: v for k, v in disk_data.items() if k not in ['smart_data']} + ) + + # Check disk state using proper standby detection + disk_state = disk_data.get("state", "unknown").lower() + _LOGGER.debug("Array disk %s current state: %s", self._disk_name, disk_state) + + if disk_state == "standby": + _LOGGER.debug( + "Array disk %s is in standby, using cached state: %s", + self._disk_name, + self._last_problem_state + ) + return self._last_problem_state if self._last_problem_state is not None else False + + has_problem = False + + # Get and validate SMART data + smart_data = disk_data.get("smart_data", {}) + if not smart_data: + _LOGGER.debug("No SMART data available for array disk %s", self._disk_name) + return self._last_problem_state if self._last_problem_state is not None else False + + # Check overall SMART status + smart_status = smart_data.get("smart_status", True) + _LOGGER.debug("Array disk %s SMART status: %s", self._disk_name, smart_status) + + if not smart_status: + self._problem_attributes["smart_status"] = "FAILED" + has_problem = True + _LOGGER.warning( + "Array disk %s has failed SMART status", + self._disk_name + ) + + # Process SMART attributes + attributes = smart_data.get("ata_smart_attributes", {}).get("table", []) + + # Map of critical attributes and their thresholds + critical_attrs = { + "Reallocated_Sector_Ct": 0, + "Current_Pending_Sector": 0, + "Offline_Uncorrectable": 0, + "UDMA_CRC_Error_Count": 100, + "Reallocated_Event_Count": 0, + "Reported_Uncorrect": 0, + "Command_Timeout": 100 + } + + # Process each attribute + for attr in attributes: + name = attr.get("name") + if not name: + continue + + # Check critical attributes + if name in critical_attrs: + raw_value = attr.get("raw", {}).get("value", 0) + threshold = critical_attrs[name] + + _LOGGER.debug( + "Checking %s for array disk %s: value=%s, threshold=%s", + name, + self._disk_name, + raw_value, + threshold + ) + + if int(raw_value) > threshold: + self._problem_attributes[name.lower()] = raw_value + has_problem = True + _LOGGER.warning( + "Array disk %s has high %s: %d (threshold: %d)", + self._disk_name, + name, + raw_value, + threshold + ) + + # Temperature check + elif name == "Temperature_Celsius": + temp = attr.get("raw", {}).get("value") + if temp is not None: + _LOGGER.debug( + "Array disk %s temperature: %d°C", + self._disk_name, + temp + ) + if temp > 55: # Temperature threshold + self._problem_attributes["temperature"] = f"{temp}°C" + has_problem = True + _LOGGER.warning( + "Array disk %s temperature is high: %d°C (threshold: 55°C)", + self._disk_name, + temp + ) + + # Store final state + self._last_problem_state = has_problem + + if has_problem: + _LOGGER.warning( + "Array disk %s has problems: %s", + self._disk_name, + self._problem_attributes + ) + else: + _LOGGER.debug( + "No problems found for array disk %s", + self._disk_name + ) + + return has_problem + + except Exception as err: + _LOGGER.error( + "SMART analysis failed for array disk %s: %s", + self._disk_name, + err, + exc_info=True + ) + return self._last_problem_state if self._last_problem_state is not None else False + + @property + def available(self) -> bool: + """Return if entity is available.""" + try: + # Check if disk exists in coordinator data + disks = self.coordinator.data.get("system_stats", {}).get("individual_disks", []) + disk_exists = any(disk["name"] == self._disk_name for disk in disks) + + return self.coordinator.last_update_success and disk_exists + + except Exception as err: + _LOGGER.debug( + "Error checking availability for array disk %s: %s", + self._disk_name, + err + ) + return False + + @property + def is_on(self) -> bool | None: + """Return true if there's a problem with the disk.""" + try: + for disk in self.coordinator.data["system_stats"]["individual_disks"]: + if disk["name"] == self._disk_name: + # Update spin down delay if changed + new_delay = SpinDownDelay(disk.get("spin_down_delay", SpinDownDelay.MINUTES_30)) + if new_delay != self._spin_down_delay: + self._spin_down_delay = new_delay + _LOGGER.debug( + "Updated spin down delay for array disk %s to %s", + self._disk_name, + self._spin_down_delay.to_human_readable() + ) + + # Get current state + is_standby = disk.get("state", "unknown").lower() == "standby" + if is_standby: + return self._last_problem_state if self._last_problem_state is not None else False + + current_time = datetime.now(timezone.utc) + should_check_smart = ( + self._smart_status is None # First check + or self._spin_down_delay == SpinDownDelay.NEVER # Never spin down + or ( + self._last_smart_check is not None + and ( + current_time - self._last_smart_check + ).total_seconds() >= self._spin_down_delay.to_seconds() + ) + ) + + if should_check_smart: + self._last_smart_check = current_time + return self._analyze_smart_status(disk) + + return self._last_problem_state if self._last_problem_state is not None else False + + return None + + except (KeyError, AttributeError, TypeError, ValueError) as err: + _LOGGER.debug("Error checking array disk health: %s", err) + return self._last_problem_state if self._last_problem_state is not None else None + + @property + def extra_state_attributes(self) -> dict[str, StateType]: + """Return additional state attributes.""" + try: + for disk in self.coordinator.data["system_stats"]["individual_disks"]: + if disk["name"] == self._disk_name: + # Get current disk state + is_standby = disk.get("state", "unknown").lower() == "standby" + + # Get storage attributes + attrs = self._get_storage_attributes( + total=disk.get("total", 0), + used=disk.get("used", 0), + free=disk.get("free", 0), + mount_point=disk.get("mount_point"), + device=self._device, + is_standby=is_standby + ) + + # Add disk serial + disk_map = get_unraid_disk_mapping( + {"system_stats": self.coordinator.data.get("system_stats", {})} + ) + if serial := disk_map.get(self._disk_name, {}).get("serial"): + attrs["disk_serial"] = serial + + # Handle temperature + temp = disk.get("temperature") + if not is_standby and temp is not None: + self._last_temperature = temp + + attrs["temperature"] = self._get_temperature_str( + self._last_temperature if is_standby else temp, + is_standby + ) + + # Add SMART status + if smart_data := disk.get("smart_data", {}): + attrs["smart_status"] = ( + "Passed" if smart_data.get("smart_status", True) + else "Failed" + ) + + # Add spin down delay + attrs["spin_down_delay"] = self._spin_down_delay.to_human_readable() + + # Add any problem details + if self._problem_attributes: + attrs["problem_details"] = self._problem_attributes + + return attrs + + return {} + + except (KeyError, AttributeError, TypeError) as err: + _LOGGER.debug("Missing key in array disk data: %s", err) + return {} + + @property + def state(self) -> str: + """Return the state of the sensor.""" + if self.is_on: + return "Problem" + return "OK" diff --git a/custom_components/unraid/diagnostics/parity.py b/custom_components/unraid/diagnostics/parity.py new file mode 100644 index 0000000..9586919 --- /dev/null +++ b/custom_components/unraid/diagnostics/parity.py @@ -0,0 +1,601 @@ +"""Parity disk monitoring for Unraid.""" +from __future__ import annotations + +import asyncio +import logging +from datetime import datetime, timezone +from typing import Any, Dict, Optional + +from homeassistant.components.binary_sensor import BinarySensorDeviceClass # type: ignore +from homeassistant.const import EntityCategory # type: ignore +from homeassistant.core import callback # type: ignore +from homeassistant.helpers.typing import StateType # type: ignore +from homeassistant.util import dt as dt_util # type: ignore + +from .base import UnraidBinarySensorBase +from .const import UnraidBinarySensorEntityDescription +from ..const import ( + DOMAIN, + SpinDownDelay, +) +from ..coordinator import UnraidDataUpdateCoordinator +from ..helpers import DiskDataHelperMixin, format_bytes +from ..naming import EntityNaming + +_LOGGER = logging.getLogger(__name__) + +class UnraidParityDiskSensor(UnraidBinarySensorBase, DiskDataHelperMixin): + """Binary sensor for parity disk health with enhanced monitoring.""" + + def __init__( + self, + coordinator: UnraidDataUpdateCoordinator, + parity_info: Dict[str, Any] + ) -> None: + """Initialize the parity disk sensor.""" + self._parity_info = parity_info + self._disk_serial = parity_info.get("diskId.0", "") # Get serial number + self._device = parity_info.get("rdevName.0", "").strip() + + _LOGGER.debug( + "Initializing parity disk sensor | device: %s | info: %s", + self._device, + {k: v for k, v in parity_info.items() if k != "smart_data"} + ) + + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component="parity" + ) + + description = UnraidBinarySensorEntityDescription( + key="parity_health", + name=f"{naming.get_entity_name('parity', 'parity')} Health", + device_class=BinarySensorDeviceClass.PROBLEM, + entity_category=EntityCategory.DIAGNOSTIC, + icon="mdi:harddisk", + has_warning_threshold=True, + ) + + # Initialize parent class + super().__init__(coordinator, description) + + # Override device info for parity disk + self._attr_name = f"{naming.clean_hostname()} Parity Health" + + # Initialize state variables + self._last_state: bool | None = None + self._problem_attributes: Dict[str, Any] = {} + self._last_smart_check: datetime | None = None + self._smart_status: bool | None = None + self._last_temperature: int | None = None + self._disk_state = "unknown" + self._cached_size: int | None = None + + # Get spin down delay from config + self._spin_down_delay = self._get_spin_down_delay() + + def _get_spin_down_delay(self) -> SpinDownDelay: + """Get spin down delay for parity disk with fallback.""" + try: + # Check disk config for parity-specific setting + disk_cfg = self.coordinator.data.get("disk_config", {}) + + # Get parity delay (diskSpindownDelay.0) + delay = disk_cfg.get("diskSpindownDelay.0") + if delay and delay != "-1": + _LOGGER.debug("Using parity-specific spin down delay: %s", delay) + return SpinDownDelay(int(delay)) + + # Use global setting + global_delay = disk_cfg.get("spindownDelay", "0") + _LOGGER.debug("Using global spin down delay: %s", global_delay) + return SpinDownDelay(int(global_delay)) + + except (ValueError, TypeError) as err: + _LOGGER.warning( + "Error getting spin down delay for parity disk: %s. Using default.", + err + ) + return SpinDownDelay.NEVER + + def _get_temperature(self) -> Optional[int]: + """Get current disk temperature.""" + try: + # Get current array state + array_state = self.coordinator.data.get("array_state", {}) + self._disk_state = "active" if array_state.get("state") == "STARTED" else "standby" + + # First check disk data + for disk in self.coordinator.data.get("system_stats", {}).get("individual_disks", []): + if disk.get("name") == "parity" and (temp := disk.get("temperature")) is not None: + _LOGGER.debug("Got parity temperature %d°C from disk data", temp) + self._last_temperature = temp + return temp + + # Try SMART data if available + if self._device: + smart_data = self.coordinator.data.get("smart_data", {}).get(self._device, {}) + if temp := smart_data.get("temperature"): + _LOGGER.debug("Got parity temperature %d°C from SMART data", temp) + self._last_temperature = temp + return temp + + # Return cached temperature if available + if self._disk_state == "standby" and self._last_temperature is not None: + _LOGGER.debug( + "Using cached temperature for standby parity disk: %d°C", + self._last_temperature + ) + return self._last_temperature + + _LOGGER.debug("No temperature data available for parity disk") + return None + + except Exception as err: + _LOGGER.error("Error getting parity temperature: %s", err) + return None + + def _analyze_smart_status(self, disk_data: Dict[str, Any]) -> bool: + """Analyze SMART status and attributes for actual problems.""" + self._problem_attributes = {} + previous_state = self._last_state + + try: + _LOGGER.debug( + "Starting SMART analysis for parity disk with data: %s", + {k: v for k, v in disk_data.items() if k not in ['smart_data', 'attributes']} + ) + + has_problem = False + + # Check parity status first + if (status := self._parity_info.get("rdevStatus.0")) != "DISK_OK": + self._problem_attributes["parity_status"] = status + has_problem = True + _LOGGER.warning("Parity disk status issue: %s", status) + + # Check disk state (7 is normal operation) + if (state := self._parity_info.get("diskState.0", "0")) != "7": + self._problem_attributes["disk_state"] = f"Abnormal ({state})" + has_problem = True + _LOGGER.warning("Parity disk state issue: %s", state) + + # Get and validate SMART data + smart_data = disk_data.get("smart_data", {}) + if smart_data: + _LOGGER.debug("Processing SMART data for parity disk") + + # Check overall SMART status + smart_status = smart_data.get("smart_status", True) + if not smart_status: + self._problem_attributes["smart_status"] = "FAILED" + has_problem = True + _LOGGER.warning("Parity disk has failed SMART status") + + # Process SMART attributes + attributes = smart_data.get("ata_smart_attributes", {}).get("table", []) + + # Map of critical attributes and their thresholds + critical_attrs = { + "Reallocated_Sector_Ct": 0, + "Current_Pending_Sector": 0, + "Offline_Uncorrectable": 0, + "UDMA_CRC_Error_Count": 100, + "Reallocated_Event_Count": 0, + "Reported_Uncorrect": 0, + "Command_Timeout": 100 + } + + # Process each attribute + for attr in attributes: + name = attr.get("name") + if not name: + continue + + # Check critical attributes + if name in critical_attrs: + raw_value = attr.get("raw", {}).get("value", 0) + threshold = critical_attrs[name] + + _LOGGER.debug( + "Checking parity disk attribute %s: value=%s, threshold=%s", + name, + raw_value, + threshold + ) + + if int(raw_value) > threshold: + self._problem_attributes[name.lower()] = raw_value + has_problem = True + _LOGGER.warning( + "Parity disk has high %s: %d (threshold: %d)", + name, + raw_value, + threshold + ) + + # Temperature check + elif name == "Temperature_Celsius": + temp = attr.get("raw", {}).get("value") + if temp is not None: + _LOGGER.debug("Parity disk temperature from SMART: %d°C", temp) + if temp > 55: # Temperature threshold + self._problem_attributes["temperature"] = f"{temp}°C" + has_problem = True + _LOGGER.warning( + "Parity disk temperature is high: %d°C (threshold: 55°C)", + temp + ) + + # Log state changes + if previous_state != has_problem: + _LOGGER.info( + "Parity disk health state changed: %s -> %s", + "Problem" if previous_state else "OK", + "Problem" if has_problem else "OK" + ) + + # Store final state + self._last_state = has_problem + + if has_problem: + _LOGGER.warning( + "Parity disk has problems: %s", + self._problem_attributes + ) + else: + _LOGGER.debug("No problems found for parity disk") + + return has_problem + + except Exception as err: + _LOGGER.error( + "SMART analysis failed for parity disk: %s", + err, + exc_info=True + ) + return self._last_state if self._last_state is not None else False + + @property + def available(self) -> bool: + """Return True if entity is available.""" + return ( + self.coordinator.last_update_success + and bool(self._device) + and bool(self._parity_info) + ) + + @property + def is_on(self) -> bool | None: + """Return true if there's a problem with the disk.""" + try: + for disk in self.coordinator.data["system_stats"]["individual_disks"]: + if disk["name"] == "parity": + # Update spin down delay if changed + new_delay = SpinDownDelay(disk.get("spin_down_delay", SpinDownDelay.MINUTES_30)) + if new_delay != self._spin_down_delay: + self._spin_down_delay = new_delay + _LOGGER.debug( + "Updated spin down delay for parity to %s", + self._spin_down_delay.to_human_readable() + ) + + # Get current state + current_state = disk.get("state", "unknown").lower() + if current_state == "standby": + return self._last_state if self._last_state is not None else False + + current_time = datetime.now(timezone.utc) + should_check_smart = ( + self._smart_status is None # First check + or self._spin_down_delay == SpinDownDelay.NEVER # Never spin down + or ( + self._last_smart_check is not None + and ( + current_time - self._last_smart_check + ).total_seconds() >= self._spin_down_delay.to_seconds() + ) + ) + + if should_check_smart: + # Smart data will be updated by coordinator + self._last_smart_check = current_time + return self._analyze_smart_status(disk) + + # Use cached status + return self._last_state if self._last_state is not None else False + + return None + + except (KeyError, AttributeError, TypeError, ValueError) as err: + _LOGGER.debug("Error checking parity disk health: %s", err) + return self._last_state if self._last_state is not None else None + + @property + def extra_state_attributes(self) -> dict[str, StateType]: + """Return additional state attributes.""" + try: + # Get current status from array state + array_state = self.coordinator.data.get("array_state", {}) + disk_status = "active" if array_state.get("state") == "STARTED" else "standby" + + # Build attributes + attrs = { + "device": self._device, + "disk_status": disk_status, + "power_state": disk_status, + "spin_down_delay": self._spin_down_delay.to_human_readable(), + "smart_status": "Failed" if self._last_state else "Passed", + "disk_serial": self._disk_serial + } + + # Add temperature if available + attrs["temperature"] = self._get_temperature_str( + self._get_temperature(), + disk_status == "standby" + ) + + # Add disk size information using cached size + if size := self._parity_info.get("diskSize.0"): + try: + # Get device path + device_path = self._parity_info.get("rdevName.0") + # Use cached size if available, otherwise use sector calculation + if hasattr(self, '_cached_size'): + size_bytes = self._cached_size + else: + size_bytes = int(size) * 512 # Fallback to sector calculation + + attrs["total_size"] = format_bytes(size_bytes) + _LOGGER.debug( + "Added disk size for %s: %s (raw sectors: %s)", + device_path or "unknown", + attrs["total_size"], + size + ) + except (ValueError, TypeError) as err: + _LOGGER.error("Error calculating disk size: %s", err) + size_bytes = int(size) * 512 # Fallback + attrs["total_size"] = format_bytes(size_bytes) + + # Add SMART details if available + if self._device: + smart_data = self.coordinator.data.get("smart_data", {}).get(self._device, {}) + if smart_data: + attrs["smart_details"] = { + "power_on_hours": smart_data.get("power_on_hours"), + "status": "Passed" if smart_data.get("smart_status", True) else "Failed", + "device_model": smart_data.get("model_name", "Unknown"), + "serial_number": smart_data.get("serial_number", "Unknown"), + "firmware": smart_data.get("firmware_version", "Unknown") + } + _LOGGER.debug("Added SMART details: %s", attrs["smart_details"]) + + # Add any problem details + if self._problem_attributes: + attrs["problem_details"] = self._problem_attributes + _LOGGER.debug("Added problem details: %s", self._problem_attributes) + + return attrs + + except Exception as err: + _LOGGER.error("Error getting parity attributes: %s", err) + return {} + + async def async_update_disk_size(self) -> None: + """Update disk size asynchronously.""" + try: + if size := self._parity_info.get("diskSize.0"): + device_path = self._parity_info.get("rdevName.0") + if device_path: + result = await self.coordinator.api.execute_command( + f"lsblk -b -d -o SIZE /dev/{device_path} | tail -n1" + ) + if result.exit_status == 0 and result.stdout.strip(): + self._cached_size = int(result.stdout.strip()) + _LOGGER.debug( + "Updated cached disk size for %s: %d bytes", + device_path, + self._cached_size + ) + else: + self._cached_size = int(size) * 512 + else: + self._cached_size = int(size) * 512 + except Exception as err: + _LOGGER.error("Error updating disk size: %s", err) + if size: + self._cached_size = int(size) * 512 + + async def async_added_to_hass(self) -> None: + """When entity is added to hass.""" + await super().async_added_to_hass() + # Initialize disk size + await self.async_update_disk_size() + + @callback + def _handle_coordinator_update(self) -> None: + """Handle updated data from the coordinator.""" + # Schedule disk size update + asyncio.create_task(self.async_update_disk_size()) + super()._handle_coordinator_update() + + def _get_temperature_str(self, temp: int | None, is_standby: bool) -> str: + """Format temperature string with standby indication.""" + if temp is None: + return "Unknown" + + base_str = f"{temp}°C" + return f"{base_str} (Standby)" if is_standby else base_str + + @property + def state(self) -> str: + """Return the state of the sensor.""" + if self.is_on: + return "Problem" + return "OK" + +class UnraidParityCheckSensor(UnraidBinarySensorBase): + """Sensor for monitoring Unraid parity check status.""" + + def __init__( + self, + coordinator: UnraidDataUpdateCoordinator, + ) -> None: + """Initialize the parity check sensor.""" + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component="parity" + ) + + description = UnraidBinarySensorEntityDescription( + key="parity_check", + name=f"{naming.clean_hostname()} Parity Check", + device_class=BinarySensorDeviceClass.RUNNING, + entity_category=EntityCategory.DIAGNOSTIC, + icon="mdi:harddisk-plus", + ) + + super().__init__(coordinator, description) + + self._attr_name = f"{naming.clean_hostname()} Parity Check Status" + self._last_state: bool | None = None + + @property + def is_on(self) -> bool | None: + """Return true if parity check is running.""" + try: + # Check array state first + array_state = self.coordinator.data.get("array_state", {}) + if array_state.get("state") != "STARTED": + return False + + # Check if sync action is running + sync_action = array_state.get("mdResyncAction", "") + return bool(sync_action and sync_action != "IDLE") + + except Exception as err: + _LOGGER.debug("Error checking parity status: %s", err) + return self._last_state + + @property + def extra_state_attributes(self) -> dict[str, StateType]: + """Return additional state attributes.""" + try: + array_state = self.coordinator.data.get("array_state", {}) + _LOGGER.debug("Current array state: %s", array_state) + + attrs = { + "status": "Idle", + "progress": 0, + "speed": "N/A", + "errors": 0, + "last_check": "N/A", + "duration": "N/A", + "last_status": "N/A" + } + + # Get current sync status if running + if sync_action := array_state.get("mdResyncAction"): + _LOGGER.debug("Found sync action: %s", sync_action) + + # Clean up status display + if sync_action == "check P": + attrs["status"] = "Checking Parity" + else: + attrs["status"] = sync_action.capitalize() + + # Calculate progress + if (pos := array_state.get("mdResyncPos")) and ( + size := array_state.get("mdResyncSize") + ): + try: + attrs["progress"] = round((int(pos) / int(size)) * 100, 2) + _LOGGER.debug( + "Calculated progress: %s%% (pos=%s, size=%s)", + attrs["progress"], + pos, + size + ) + except (ValueError, ZeroDivisionError) as err: + _LOGGER.warning("Error calculating progress: %s", err) + attrs["progress"] = 0 + + # Get speed and format it nicely + if speed := array_state.get("mdResyncSpeed"): + try: + speed_mb = round(float(speed) / (1024 * 1024), 2) + attrs["speed"] = f"{speed_mb} MB/s" + _LOGGER.debug("Calculated speed: %s", attrs["speed"]) + except (ValueError, TypeError) as err: + _LOGGER.warning("Error calculating speed: %s", err) + attrs["speed"] = "N/A" + + # Get errors + attrs["errors"] = int(array_state.get("mdSyncErrs", 0)) + _LOGGER.debug("Current errors: %s", attrs["errors"]) + + # Get last check details from history + if history := array_state.get("parity_history"): + _LOGGER.debug("Found parity history: %s", history) + try: + # Format last check date to be more readable + if check_date := history.get("date"): + parsed_date = datetime.strptime(check_date, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) + now = dt_util.now() + time_diff = now - parsed_date + + if time_diff.days == 0: + attrs["last_check"] = f"Today at {parsed_date.strftime('%H:%M')}" + elif time_diff.days == 1: + attrs["last_check"] = f"Yesterday at {parsed_date.strftime('%H:%M')}" + else: + attrs["last_check"] = parsed_date.strftime("%b %d %Y %H:%M") + + _LOGGER.debug("Formatted last check date: %s", attrs["last_check"]) + + # Add other history details + attrs["duration"] = history.get("duration", "N/A") + attrs["last_status"] = history.get("status", "N/A") + attrs["last_speed"] = history.get("speed", "N/A") + + _LOGGER.debug("Added history details: duration=%s, status=%s, speed=%s", + attrs["duration"], + attrs["last_status"], + attrs["last_speed"]) + + except (ValueError, TypeError) as err: + _LOGGER.warning( + "Error formatting history data: %s", + err, + exc_info=True + ) + + _LOGGER.debug("Final attributes: %s", attrs) + return attrs + + except Exception as err: + _LOGGER.error( + "Error getting parity attributes: %s", + err, + exc_info=True + ) + return { + "status": "Unknown", + "progress": 0, + "speed": "N/A", + "errors": 0, + "last_check": "N/A", + "duration": "N/A", + "last_status": "N/A" + } + + @property + def available(self) -> bool: + """Return if entity is available.""" + return self.coordinator.last_update_success diff --git a/custom_components/unraid/diagnostics/pool.py b/custom_components/unraid/diagnostics/pool.py new file mode 100644 index 0000000..ee31dfe --- /dev/null +++ b/custom_components/unraid/diagnostics/pool.py @@ -0,0 +1,440 @@ +"""Pool disk health monitoring for Unraid.""" +from __future__ import annotations + +import logging +from datetime import datetime, timezone +from typing import Any, Dict + +from homeassistant.components.binary_sensor import BinarySensorDeviceClass # type: ignore +from homeassistant.const import EntityCategory # type: ignore +from homeassistant.helpers.typing import StateType # type: ignore + +from .base import UnraidBinarySensorBase +from .const import UnraidBinarySensorEntityDescription +from ..const import DOMAIN, SpinDownDelay +from ..coordinator import UnraidDataUpdateCoordinator +from ..helpers import ( + DiskDataHelperMixin, + get_disk_identifiers, + get_unraid_disk_mapping, +) +from ..naming import EntityNaming + +_LOGGER = logging.getLogger(__name__) + +class UnraidPoolDiskSensor(UnraidBinarySensorBase, DiskDataHelperMixin): + """Binary sensor for pool disk health monitoring.""" + + def __init__( + self, + coordinator: UnraidDataUpdateCoordinator, + disk_name: str, + ) -> None: + """Initialize the pool disk health sensor.""" + # Validate pool disk name + if disk_name.startswith("disk"): + raise ValueError(f"Not a pool disk: {disk_name}") + + # Skip system paths and known special names + invalid_names = {"parity", "flash", "boot", "temp", "user"} + if disk_name.lower() in invalid_names: + raise ValueError(f"Invalid pool name: {disk_name}") + + self._disk_name = disk_name + + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component="disk" + ) + + # Get pretty name using naming utility + # Use "cache" type for cache disk, "pool" for others + component_type = "cache" if disk_name == "cache" else "pool" + pretty_name = naming.get_entity_name(disk_name, component_type) + + description = UnraidBinarySensorEntityDescription( + key=f"disk_health_{disk_name}", + name=f"{pretty_name} Health", + device_class=BinarySensorDeviceClass.PROBLEM, + entity_category=EntityCategory.DIAGNOSTIC, + icon="mdi:harddisk", + has_warning_threshold=True, + ) + + super().__init__(coordinator, description) + + # Get device and serial from helpers + self._device, self._serial = get_disk_identifiers(coordinator.data, disk_name) + + # Initialize tracking variables + self._last_smart_check: datetime | None = None + self._smart_status: bool | None = None + self._last_problem_state: bool | None = None + self._spin_down_delay = self._get_spin_down_delay() + self._last_temperature: int | None = None + self._problem_attributes: Dict[str, Any] = {} + self._is_nvme = bool(self._device and "nvme" in self._device.lower()) + + _LOGGER.debug( + "Initialized pool disk sensor | name: %s | type: %s | device: %s | serial: %s", + disk_name, + "NVMe" if self._is_nvme else "SATA", + self._device or "unknown", + self._serial or "unknown" + ) + + def _get_spin_down_delay(self) -> SpinDownDelay: + """Get spin down delay for pool disk.""" + try: + disk_cfg = self.coordinator.data.get("disk_config", {}) + # Use global setting for pools (no per-disk setting) + global_delay = int(disk_cfg.get("spindownDelay", "0")) + return SpinDownDelay(global_delay) + except (ValueError, TypeError) as err: + _LOGGER.warning( + "Error getting spin down delay for pool %s: %s. Using default Never.", + self._disk_name, + err + ) + return SpinDownDelay.NEVER + + def _analyze_nvme_health(self, smart_data: Dict[str, Any], has_problem: bool) -> bool: + """Analyze NVMe specific health data.""" + nvme_health = smart_data.get("nvme_smart_health_information_log", {}) + _LOGGER.debug( + "NVMe health data for pool %s: %s", + self._disk_name, + nvme_health + ) + + # Media errors check + media_errors = nvme_health.get("media_errors", 0) + if int(media_errors) > 0: + self._problem_attributes["media_errors"] = media_errors + has_problem = True + _LOGGER.warning( + "Pool %s NVMe has %d media errors", + self._disk_name, + media_errors + ) + + # Critical warning check + if warning := nvme_health.get("critical_warning"): + if warning != 0: # NVMe uses numeric warning flags + self._problem_attributes["critical_warning"] = warning + has_problem = True + _LOGGER.warning( + "Pool %s NVMe has critical warning: %d", + self._disk_name, + warning + ) + + # Temperature check from NVMe health log + if temp := nvme_health.get("temperature"): + _LOGGER.debug( + "Pool %s NVMe temperature: %d°C", + self._disk_name, + temp + ) + if temp > 70: # NVMe temperature threshold + self._problem_attributes["temperature"] = f"{temp}°C" + has_problem = True + _LOGGER.warning( + "Pool %s NVMe temperature is high: %d°C (threshold: 70°C)", + self._disk_name, + temp + ) + + return has_problem + + def _analyze_sata_health(self, smart_data: Dict[str, Any], has_problem: bool) -> bool: + """Analyze SATA specific health data.""" + _LOGGER.debug( + "Processing SATA attributes for pool %s", + self._disk_name + ) + + attributes = smart_data.get("ata_smart_attributes", {}).get("table", []) + + # Map of critical attributes and their thresholds + critical_attrs = { + "Reallocated_Sector_Ct": 0, + "Current_Pending_Sector": 0, + "Offline_Uncorrectable": 0, + "UDMA_CRC_Error_Count": 100, + "Reallocated_Event_Count": 0, + "Reported_Uncorrect": 0, + "Command_Timeout": 100 + } + + # Process each attribute + for attr in attributes: + name = attr.get("name") + if not name: + continue + + # Check critical attributes + if name in critical_attrs: + raw_value = attr.get("raw", {}).get("value", 0) + threshold = critical_attrs[name] + + _LOGGER.debug( + "Checking %s for pool %s: value=%s, threshold=%s", + name, + self._disk_name, + raw_value, + threshold + ) + + if int(raw_value) > threshold: + self._problem_attributes[name.lower()] = raw_value + has_problem = True + _LOGGER.warning( + "Pool %s has high %s: %d (threshold: %d)", + self._disk_name, + name, + raw_value, + threshold + ) + + # Temperature check + elif name == "Temperature_Celsius": + temp = attr.get("raw", {}).get("value") + if temp is not None: + _LOGGER.debug( + "Pool %s SATA temperature: %d°C", + self._disk_name, + temp + ) + if temp > 55: # SATA temperature threshold + self._problem_attributes["temperature"] = f"{temp}°C" + has_problem = True + _LOGGER.warning( + "Pool %s SATA temperature is high: %d°C (threshold: 55°C)", + self._disk_name, + temp + ) + + return has_problem + + def _analyze_smart_status(self, disk_data: Dict[str, Any]) -> bool: + """Analyze SMART status and attributes for pool disk problems.""" + self._problem_attributes = {} + + try: + _LOGGER.debug( + "Starting SMART analysis for pool %s with data: %s", + self._disk_name, + {k: v for k, v in disk_data.items() if k not in ['smart_data']} + ) + + # Check disk state using proper standby detection + disk_state = disk_data.get("state", "unknown").lower() + _LOGGER.debug("Pool %s current state: %s", self._disk_name, disk_state) + + if disk_state == "standby": + _LOGGER.debug( + "Pool %s is in standby, using cached state: %s", + self._disk_name, + self._last_problem_state + ) + return self._last_problem_state if self._last_problem_state is not None else False + + has_problem = False + + # Get and validate SMART data + smart_data = disk_data.get("smart_data", {}) + if not smart_data: + _LOGGER.debug("No SMART data available for pool %s", self._disk_name) + return self._last_problem_state if self._last_problem_state is not None else False + + # Check overall SMART status + smart_status = smart_data.get("smart_status", True) + _LOGGER.debug("Pool %s SMART status: %s", self._disk_name, smart_status) + + if not smart_status: + self._problem_attributes["smart_status"] = "FAILED" + has_problem = True + _LOGGER.warning( + "Pool %s has failed SMART status", + self._disk_name + ) + + # Device specific checks + if self._is_nvme: + has_problem = self._analyze_nvme_health(smart_data, has_problem) + else: + has_problem = self._analyze_sata_health(smart_data, has_problem) + + # Store final state + self._last_problem_state = has_problem + + if has_problem: + _LOGGER.warning( + "Pool %s has problems: %s", + self._disk_name, + self._problem_attributes + ) + else: + _LOGGER.debug( + "No problems found for pool %s", + self._disk_name + ) + + return has_problem + + except Exception as err: + _LOGGER.error( + "SMART analysis failed for pool %s: %s", + self._disk_name, + err, + exc_info=True + ) + return self._last_problem_state if self._last_problem_state is not None else False + + @property + def available(self) -> bool: + """Return if entity is available.""" + try: + # Check if disk exists in coordinator data + disks = self.coordinator.data.get("system_stats", {}).get("individual_disks", []) + disk_exists = any(disk["name"] == self._disk_name for disk in disks) + + return self.coordinator.last_update_success and disk_exists + + except Exception as err: + _LOGGER.debug( + "Error checking availability for pool %s: %s", + self._disk_name, + err + ) + return False + + @property + def is_on(self) -> bool | None: + """Return true if there's a problem with the disk.""" + try: + for disk in self.coordinator.data["system_stats"]["individual_disks"]: + if disk["name"] == self._disk_name: + # Update spin down delay if changed + new_delay = SpinDownDelay(disk.get("spin_down_delay", SpinDownDelay.MINUTES_30)) + if new_delay != self._spin_down_delay: + self._spin_down_delay = new_delay + _LOGGER.debug( + "Updated spin down delay for pool %s to %s", + self._disk_name, + self._spin_down_delay.to_human_readable() + ) + + # Get current state + is_standby = disk.get("state", "unknown").lower() == "standby" + if is_standby: + return self._last_problem_state if self._last_problem_state is not None else False + + current_time = datetime.now(timezone.utc) + should_check_smart = ( + self._smart_status is None # First check + or self._spin_down_delay == SpinDownDelay.NEVER # Never spin down + or ( + self._last_smart_check is not None + and ( + current_time - self._last_smart_check + ).total_seconds() >= self._spin_down_delay.to_seconds() + ) + ) + + if should_check_smart: + self._last_smart_check = current_time + return self._analyze_smart_status(disk) + + return self._last_problem_state if self._last_problem_state is not None else False + + return None + + except (KeyError, AttributeError, TypeError, ValueError) as err: + _LOGGER.debug("Error checking pool disk health: %s", err) + return self._last_problem_state if self._last_problem_state is not None else None + + @property + def extra_state_attributes(self) -> dict[str, StateType]: + """Return additional state attributes.""" + try: + for disk in self.coordinator.data["system_stats"]["individual_disks"]: + if disk["name"] == self._disk_name: + # Get current disk state + is_standby = disk.get("state", "unknown").lower() == "standby" + + # Get storage attributes + attrs = self._get_storage_attributes( + total=disk.get("total", 0), + used=disk.get("used", 0), + free=disk.get("free", 0), + mount_point=disk.get("mount_point"), + device=self._device, + is_standby=is_standby + ) + + # Add disk serial + disk_map = get_unraid_disk_mapping( + {"system_stats": self.coordinator.data.get("system_stats", {})} + ) + if serial := disk_map.get(self._disk_name, {}).get("serial"): + attrs["disk_serial"] = serial + + # Handle temperature based on device type + temp = disk.get("temperature") + if self._is_nvme: + # NVMe drives always show actual temperature from SMART data + smart_data = disk.get("smart_data", {}) + nvme_temp = ( + smart_data.get("temperature") + or temp + or smart_data.get("nvme_temperature") + ) + if not is_standby and nvme_temp is not None: + self._last_temperature = nvme_temp + temp = nvme_temp # Use NVMe temp for display + else: + # SATA drives + if not is_standby and temp is not None: + self._last_temperature = temp + + attrs["temperature"] = self._get_temperature_str( + self._last_temperature if is_standby else temp, + is_standby + ) + + # Add SMART status + if smart_data := disk.get("smart_data", {}): + attrs["smart_status"] = ( + "Passed" if smart_data.get("smart_status", True) + else "Failed" + ) + + # Add spin down delay + attrs["spin_down_delay"] = self._spin_down_delay.to_human_readable() + + # Add any problem details + if self._problem_attributes: + attrs["problem_details"] = self._problem_attributes + + # Add pool type + attrs["pool_type"] = "Cache" if self._disk_name == "cache" else "Custom Pool" + attrs["device_type"] = "NVMe" if self._is_nvme else "SATA" + + return attrs + + return {} + + except (KeyError, AttributeError, TypeError) as err: + _LOGGER.debug("Missing key in pool disk data: %s", err) + return {} + + @property + def state(self) -> str: + """Return the state of the sensor.""" + if self.is_on: + return "Problem" + return "OK" \ No newline at end of file diff --git a/custom_components/unraid/diagnostics/ups.py b/custom_components/unraid/diagnostics/ups.py new file mode 100644 index 0000000..bb73bbc --- /dev/null +++ b/custom_components/unraid/diagnostics/ups.py @@ -0,0 +1,177 @@ +"""UPS monitoring for Unraid.""" +from __future__ import annotations + +import logging +from typing import Any + +from homeassistant.components.binary_sensor import BinarySensorDeviceClass # type: ignore +from homeassistant.const import EntityCategory # type: ignore + +from .base import UnraidBinarySensorBase +from .const import UnraidBinarySensorEntityDescription +from ..const import DOMAIN +from ..coordinator import UnraidDataUpdateCoordinator +from ..naming import EntityNaming + +_LOGGER = logging.getLogger(__name__) + +class UnraidUPSBinarySensor(UnraidBinarySensorBase): + """Binary sensor for UPS monitoring.""" + + def __init__(self, coordinator: UnraidDataUpdateCoordinator) -> None: + """Initialize UPS binary sensor.""" + # Initialize entity naming + naming = EntityNaming( + domain=DOMAIN, + hostname=coordinator.hostname, + component="ups" + ) + + description = UnraidBinarySensorEntityDescription( + key="ups_status", + name=f"{naming.get_entity_name('ups', 'ups')} Status", + device_class=BinarySensorDeviceClass.POWER, + entity_category=EntityCategory.DIAGNOSTIC, + icon="mdi:battery-medium", + ) + + super().__init__(coordinator, description) + + _LOGGER.debug( + "Initialized UPS binary sensor | name: %s", + self._attr_name + ) + + @property + def available(self) -> bool: + """Return True if entity is available.""" + try: + ups_info = self.coordinator.data.get("system_stats", {}).get("ups_info") + has_ups = bool(ups_info) + + if not has_ups: + _LOGGER.debug("No UPS info available in coordinator data") + + return self.coordinator.last_update_success and has_ups + + except Exception as err: + _LOGGER.error("Error checking UPS availability: %s", err) + return False + + @property + def is_on(self) -> bool | None: + """Return true if the UPS is online.""" + try: + status = self.coordinator.data["system_stats"].get("ups_info", {}).get("STATUS") + if status is None: + _LOGGER.debug("No UPS status available") + return None + + is_online = status.upper() in ["ONLINE", "ON LINE"] + _LOGGER.debug("UPS status: %s (online: %s)", status, is_online) + return is_online + + except (KeyError, AttributeError, TypeError) as err: + _LOGGER.debug("Error getting UPS status: %s", err) + return None + + @property + def extra_state_attributes(self) -> dict[str, Any]: + """Return additional state attributes.""" + try: + ups_info = self.coordinator.data["system_stats"].get("ups_info", {}) + + # Base attributes + attrs = { + "model": ups_info.get("MODEL", "Unknown"), + "status": ups_info.get("STATUS", "Unknown"), + } + + # Add percentage values with validation + for key, attr_name in [ + ("BCHARGE", "battery_charge"), + ("LOADPCT", "load_percentage") + ]: + if value := ups_info.get(key): + try: + # Ensure value is numeric and within range + numeric_value = float(value) + if 0 <= numeric_value <= 100: + attrs[attr_name] = f"{numeric_value}%" + else: + _LOGGER.warning( + "Invalid %s value: %s (expected 0-100)", + key, + value + ) + except (ValueError, TypeError) as err: + _LOGGER.debug( + "Error processing %s value: %s", + key, + err + ) + + # Add time values + if runtime := ups_info.get("TIMELEFT"): + try: + # Ensure runtime is numeric and positive + runtime_value = float(runtime) + if runtime_value >= 0: + attrs["runtime_left"] = f"{runtime_value} minutes" + else: + _LOGGER.warning( + "Invalid runtime value: %s (expected >= 0)", + runtime + ) + except (ValueError, TypeError) as err: + _LOGGER.debug( + "Error processing runtime value: %s", + err + ) + + # Add power/voltage values with validation + for key, attr_name, unit in [ + ("NOMPOWER", "nominal_power", "W"), + ("LINEV", "line_voltage", "V"), + ("BATTV", "battery_voltage", "V") + ]: + if value := ups_info.get(key): + try: + # Ensure value is numeric and positive + numeric_value = float(value) + if numeric_value >= 0: + attrs[attr_name] = f"{numeric_value}{unit}" + else: + _LOGGER.warning( + "Invalid %s value: %s (expected >= 0)", + key, + value + ) + except (ValueError, TypeError) as err: + _LOGGER.debug( + "Error processing %s value: %s", + key, + err + ) + + # Additional UPS details if available + if firmware := ups_info.get("FIRMWARE"): + attrs["firmware"] = firmware + if serial := ups_info.get("SERIALNO"): + attrs["serial_number"] = serial + if manufacture_date := ups_info.get("MANDATE"): + attrs["manufacture_date"] = manufacture_date + + _LOGGER.debug("UPS attributes: %s", attrs) + return attrs + + except (KeyError, AttributeError, TypeError) as err: + _LOGGER.debug("Error getting UPS attributes: %s", err) + return {} + + @property + def state(self) -> str: + """Return the state of the sensor.""" + if self.is_on is None: + return "Unknown" + return "Online" if self.is_on else "Offline" diff --git a/custom_components/unraid/helpers.py b/custom_components/unraid/helpers.py index 27a7254..d85be23 100755 --- a/custom_components/unraid/helpers.py +++ b/custom_components/unraid/helpers.py @@ -439,6 +439,40 @@ def extract_fans_data(sensors_data: Dict[str, Dict[str, str]]) -> Dict[str, Any] _LOGGER.error("Error extracting fan data: %s", err, exc_info=True) return {} +def is_solid_state_drive(disk_data: dict) -> bool: + """Determine if a disk is a solid state drive (NVME or SSD).""" + try: + # Guard against None or invalid disk_data + if not disk_data or not isinstance(disk_data, dict): + _LOGGER.debug("Invalid disk_data provided to is_solid_state_drive: %s", disk_data) + return False + + # Check device path for nvme + device = disk_data.get("device") + if device and isinstance(device, str) and "nvme" in device.lower(): + return True + + # Check if it's a cache device + if disk_data.get("name") == "cache": + return True + + # Check smart data for rotation rate (0 indicates SSD) + smart_data = disk_data.get("smart_data", {}) + if isinstance(smart_data, dict): + rotation_rate = smart_data.get("rotation_rate") + if rotation_rate == 0: + return True + + return False + + except (AttributeError, TypeError, ValueError) as err: + _LOGGER.debug( + "Error checking if disk is SSD: %s - Error: %s", + disk_data.get("name", "unknown"), + err + ) + return False + class DiskDataHelperMixin: """Mixin providing common disk data handling methods.""" diff --git a/custom_components/unraid/manifest.json b/custom_components/unraid/manifest.json index 9234c3d..1f77814 100755 --- a/custom_components/unraid/manifest.json +++ b/custom_components/unraid/manifest.json @@ -13,6 +13,6 @@ "aiofiles>=23.2.1" ], "ssdp": [], - "version": "2024.12.24", + "version": "2024.12.23", "zeroconf": [] } \ No newline at end of file diff --git a/custom_components/unraid/naming.py b/custom_components/unraid/naming.py index 092e5bb..37b8031 100755 --- a/custom_components/unraid/naming.py +++ b/custom_components/unraid/naming.py @@ -11,9 +11,9 @@ # Patterns for entity name formatting ENTITY_NAME_PATTERNS: Dict[str, Union[str, Callable[[str], str]]] = { "disk": lambda num: f"Array {num}", - "cache": "Cache", - "parity": "Parity", - "pool": lambda name: f"Pool {name.title()}", + "cache": lambda _: "Pool Cache", + "parity": lambda _: "Parity", + "pool": lambda name: f"Pool {name.title()}" if name != "cache" else "Pool Cache", "docker": lambda name: f"Docker {name}", "vm": lambda name: f"VM {name}", } diff --git a/custom_components/unraid/sensors/storage.py b/custom_components/unraid/sensors/storage.py index 04c0dbd..9b242e0 100755 --- a/custom_components/unraid/sensors/storage.py +++ b/custom_components/unraid/sensors/storage.py @@ -15,8 +15,10 @@ from ..coordinator import UnraidDataUpdateCoordinator from ..helpers import ( DiskDataHelperMixin, + format_bytes, get_disk_identifiers, get_pool_info, + is_solid_state_drive, ) from ..naming import EntityNaming @@ -293,12 +295,17 @@ def extra_state_attributes(self) -> dict[str, Any]: return attrs class UnraidPoolSensor(UnraidSensorBase, DiskDataHelperMixin): - """Storage pool sensor for Unraid.""" + """Storage pool and solid state drive sensor for Unraid.""" def __init__(self, coordinator, pool_name: str) -> None: """Initialize the sensor.""" - # Set pool name first as it's needed by value functions + # Set initial values first self._pool_name = pool_name + self._last_value: Optional[float] = None + self._last_temperature: Optional[int] = None + + # Get device and serial using the helper BEFORE using them in get_pool_icon + self._device, self._serial = get_disk_identifiers(coordinator.data, pool_name) # Initialize entity naming naming = EntityNaming( @@ -307,90 +314,125 @@ def __init__(self, coordinator, pool_name: str) -> None: component="pool" ) - description = UnraidSensorEntityDescription( - key=f"pool_{pool_name}_usage", - name=f"{naming.get_entity_name(pool_name, 'pool')} Usage", - native_unit_of_measurement=PERCENTAGE, - device_class=None, - state_class=SensorStateClass.MEASUREMENT, - icon=self._get_pool_icon(), - suggested_display_precision=1, - value_fn=self._get_pool_usage, + # Get pretty name using naming utility + pretty_name = naming.get_entity_name(pool_name, "pool") + + # Initialize base sensor class + super().__init__( + coordinator, + UnraidSensorEntityDescription( + key=f"pool_{pool_name}_usage", + name=f"{pretty_name} Usage", + native_unit_of_measurement=PERCENTAGE, + device_class=None, + state_class=SensorStateClass.MEASUREMENT, + icon=self._get_pool_icon(), # Now _device is available + suggested_display_precision=1, + value_fn=self._get_usage, + ), ) - # Initialize parent class and mixin - super().__init__(coordinator, description) + # Initialize DiskDataHelperMixin DiskDataHelperMixin.__init__(self) def _get_pool_icon(self) -> str: - """Get appropriate icon for pool type.""" + """Get appropriate icon based on device type.""" pool_name = self._pool_name.lower() - if "cache" in pool_name: - return "mdi:harddisk" - elif "nvme" in pool_name: - return "mdi:harddisk" + try: + if (self._device and "nvme" in self._device.lower()) or "nvme" in pool_name: + return "mdi:harddisk" + except AttributeError: + _LOGGER.debug( + "Device not available for pool %s when getting icon", + self._pool_name + ) return "mdi:harddisk" - def _get_pool_usage(self, data: dict) -> float | None: - """Get pool usage percentage.""" - pool_info = get_pool_info(data.get("system_stats", {})) - if self._pool_name in pool_info: - info = pool_info[self._pool_name] - return self._calculate_usage_percentage( - info.get("total_size", 0), - info.get("used_size", 0) - ) - return None + def _get_usage(self, data: dict) -> float | None: + """Get usage percentage for the pool or SSD.""" + try: + # First check individual disks for direct device + for disk in data.get("system_stats", {}).get("individual_disks", []): + if disk.get("name") == self._pool_name: + return self._calculate_usage_percentage( + disk.get("total", 0), + disk.get("used", 0) + ) + + # Fallback to pool data + pool_info = get_pool_info(data.get("system_stats", {})) + if self._pool_name in pool_info: + info = pool_info[self._pool_name] + return self._calculate_usage_percentage( + info.get("total_size", 0), + info.get("used_size", 0) + ) + + return None + + except (TypeError, ValueError) as err: + _LOGGER.debug("Error getting usage: %s", err) + return self._last_value if self._last_value is not None else None @property def extra_state_attributes(self) -> dict[str, Any]: - """Return additional pool attributes.""" + """Return additional state attributes.""" try: - pool_info = get_pool_info(self.coordinator.data.get("system_stats", {})) - if self._pool_name not in pool_info: - return {} + # First try to get individual disk data + for disk in self.coordinator.data.get("system_stats", {}).get("individual_disks", []): + if disk.get("name") == self._pool_name: + device, serial = get_disk_identifiers( + self.coordinator.data, + self._pool_name + ) + + is_standby = disk.get("state") == "standby" + + attrs = self._get_storage_attributes( + total=disk.get("total", 0), + used=disk.get("used", 0), + free=disk.get("free", 0), + mount_point=disk.get("mount_point"), + device=device, + is_standby=is_standby + ) - info = pool_info[self._pool_name] - - # Base attributes from helper - attrs = self._get_storage_attributes( - info["total_size"], - info["used_size"], - info.get("free_size", 0), - info.get("mount_point") - ) + attrs.update({ + "device": device or "unknown", + "disk_serial": serial or "unknown", + "power_state": "standby" if is_standby else "active", + "filesystem": disk.get("filesystem", "unknown"), + }) - # For cache pool, get filesystem info from disk data - filesystem = info.get("filesystem", "unknown") - status = "active" # Default status for mounted pool - - # Check individual disks for more detailed cache info - for disk in self.coordinator.data.get("system_stats", {}).get("individual_disks", []): - if disk.get("name") == "cache" and disk.get("mount_point") == info.get("mount_point"): - filesystem = disk.get("filesystem", filesystem) - # If disk is mounted and responding, it's active - status = "active" if disk.get("state") != "standby" else "standby" - break + # Handle temperature + temp = disk.get("temperature") + if not is_standby and temp is not None: + self._last_temperature = temp + + attrs["temperature"] = self._get_temperature_str( + self._last_temperature if is_standby else temp, + is_standby + ) - attrs.update({ - "filesystem": filesystem, - "device_count": len(info.get("devices", [])), - "status": status, - }) + return attrs - # Add device details with proper nvme path - for i, device in enumerate(info.get("devices", []), 1): - # Clean up device path for nvme devices - if 'nvme' in device.lower(): - device_name = device.split('/')[-1] # Extract just the device name - attrs[f"device_{i}"] = device_name - else: - attrs[f"device_{i}"] = device + # Fallback to pool data + pool_info = get_pool_info(self.coordinator.data.get("system_stats", {})) + if self._pool_name in pool_info: + info = pool_info[self._pool_name] + return { + "filesystem": info.get("filesystem", "unknown"), + "device_count": len(info.get("devices", [])), + "mount_point": info.get("mount_point", "unknown"), + "total_size": format_bytes(info.get("total_size", 0)), + "used_space": format_bytes(info.get("used_size", 0)), + "free_space": format_bytes(info.get("free_size", 0)), + } - return attrs + return {} - except (KeyError, TypeError, AttributeError) as err: - _LOGGER.debug("Error getting pool attributes: %s", err) + except Exception as err: + _LOGGER.error("Error getting attributes: %s", err) return {} class UnraidStorageSensors: @@ -403,12 +445,10 @@ def __init__(self, coordinator) -> None: # Add array sensor self.entities.append(UnraidArraySensor(coordinator)) - # Add individual disk sensors try: - # Get disk data with type validation disk_data = coordinator.data.get("system_stats", {}).get("individual_disks", []) if not isinstance(disk_data, list): - _LOGGER.error("Invalid disk data format - expected list, got %s", type(disk_data)) + _LOGGER.error("Invalid disk data format - expected list") disk_data = [] # Define ignored mounts and filesystem types @@ -420,8 +460,11 @@ def __init__(self, coordinator) -> None: # Track processed disks processed_disks = set() - # Sort array disks first + # Sort and process array disks (spinning drives) array_disks = [] + solid_state_disks = [] + + # First, categorize all disks for disk in disk_data: if not isinstance(disk, dict): _LOGGER.warning("Invalid disk entry format: %s", disk) @@ -434,32 +477,29 @@ def __init__(self, coordinator) -> None: # Skip invalid or ignored disks if not disk_name: continue - if disk_name in processed_disks: - continue - if filesystem == "tmpfs": # Direct check for tmpfs + if filesystem == "tmpfs": continue if any(mount in mount_point for mount in ignored_mounts): continue - # Skip parity disk (handled separately) if disk_name == "parity": continue - # Collect array disks for sorting - if disk_name.startswith("disk"): + # Route disk to appropriate list based on type + if is_solid_state_drive(disk): + solid_state_disks.append(disk) + elif disk_name.startswith("disk"): try: - # Use the helper function for disk number extraction disk_num = get_disk_number(disk_name) if disk_num is not None: array_disks.append((disk_num, disk)) - else: - _LOGGER.warning("Invalid disk number format: %s", disk_name) except ValueError: _LOGGER.warning("Invalid disk number format: %s", disk_name) - continue - # Process cache disk immediately - elif disk_name == "cache": - try: + # Process spinning drives with UnraidDiskSensor + for _, disk in sorted(array_disks, key=lambda x: x[0]): + try: + disk_name = disk.get("name", "") + if disk_name not in processed_disks: self.entities.append( UnraidDiskSensor( coordinator=coordinator, @@ -467,69 +507,54 @@ def __init__(self, coordinator) -> None: ) ) processed_disks.add(disk_name) - _LOGGER.debug( - "Added disk sensor for cache: %s", - disk_name - ) - except ValueError as err: - _LOGGER.warning( - "Error adding disk sensor for %s: %s", - disk_name, - err - ) + _LOGGER.debug("Added spinning disk sensor: %s", disk_name) + except ValueError as err: + _LOGGER.warning("Error adding disk sensor: %s", err) - # Process sorted array disks - for _, disk in sorted(array_disks, key=lambda x: x[0]): + # Process pools and SSDs + pool_info = get_pool_info(coordinator.data.get("system_stats", {})) + + # First handle SSDs and NVMEs that aren't part of a pool + for disk in solid_state_disks: try: disk_name = disk.get("name", "") - if disk_name not in processed_disks: + if not disk_name or disk_name in processed_disks: + continue + + # Check if this disk is part of a pool + is_pool_member = False + for pool_name, pool_data in pool_info.items(): + if disk.get("device") in pool_data.get("devices", []): + is_pool_member = True + break + + # Only create individual sensor if not part of a pool + if not is_pool_member: self.entities.append( - UnraidDiskSensor( + UnraidPoolSensor( coordinator=coordinator, - disk_name=disk_name + pool_name=disk_name ) ) processed_disks.add(disk_name) - _LOGGER.debug( - "Added array disk sensor for: %s", - disk_name - ) + _LOGGER.debug("Added SSD/NVME sensor: %s", disk_name) except ValueError as err: - _LOGGER.warning( - "Error adding disk sensor for %s: %s", - disk_name, - err - ) - continue + _LOGGER.warning("Error adding SSD sensor: %s", err) - _LOGGER.info( - "Successfully added %d disk sensors", - len(processed_disks) - ) - - except (TypeError, KeyError, AttributeError) as err: - _LOGGER.error( - "Error setting up disk sensors: %s", - err, - exc_info=True - ) - - # Add pool sensors - try: - pool_info = get_pool_info(coordinator.data.get("system_stats", {})) + # Then handle pools for pool_name in pool_info: - self.entities.append(UnraidPoolSensor(coordinator, pool_name)) - _LOGGER.debug("Added pool sensor for: %s", pool_name) - - if pool_info: - _LOGGER.info( - "Successfully added %d pool sensors", - len(pool_info) - ) + try: + if pool_name not in processed_disks: + self.entities.append( + UnraidPoolSensor( + coordinator=coordinator, + pool_name=pool_name + ) + ) + processed_disks.add(pool_name) + _LOGGER.debug("Added pool sensor: %s", pool_name) + except ValueError as err: + _LOGGER.warning("Error adding pool sensor: %s", err) - except (TypeError, KeyError, AttributeError, ValueError) as err: - _LOGGER.error( - "Error setting up pool sensors: %s", - err, - exc_info=True - ) + except Exception as err: + _LOGGER.error("Error setting up sensors: %s", err, exc_info=True)