Source code for submarit.io.matlab_io

"""MATLAB file I/O utilities."""

import warnings
from pathlib import Path
from typing import Any, Dict, Optional, Union

import h5py
import numpy as np
import scipy.io as sio
from numpy.typing import NDArray


[docs] def load_mat( filepath: Union[str, Path], variable_names: Optional[list] = None, matlab_compatible: bool = True ) -> Dict[str, Any]: """Load data from a MATLAB .mat file. Handles both old-style (< v7.3) and new-style (>= v7.3) .mat files. Args: filepath: Path to the .mat file variable_names: List of variable names to load (None = load all) matlab_compatible: Whether to maintain MATLAB compatibility Returns: Dictionary of loaded variables """ filepath = Path(filepath) if not filepath.exists(): raise FileNotFoundError(f"File not found: {filepath}") try: # Try loading with scipy (works for v4, v6, v7 up to v7.2) mat_data = sio.loadmat( str(filepath), squeeze_me=not matlab_compatible, mat_dtype=True ) # Remove metadata keys mat_data = {k: v for k, v in mat_data.items() if not k.startswith('__')} except NotImplementedError: # Fall back to h5py for v7.3 files mat_data = _load_mat_hdf5(filepath, variable_names) # Filter variables if requested if variable_names is not None: mat_data = {k: v for k, v in mat_data.items() if k in variable_names} return mat_data
def _load_mat_hdf5( filepath: Path, variable_names: Optional[list] = None ) -> Dict[str, Any]: """Load MATLAB v7.3 files using HDF5.""" data = {} with h5py.File(filepath, 'r') as f: for key in f.keys(): if variable_names is None or key in variable_names: data[key] = _read_hdf5_dataset(f[key]) return data def _read_hdf5_dataset(dataset: h5py.Dataset) -> Any: """Read HDF5 dataset handling MATLAB specifics.""" if isinstance(dataset, h5py.Dataset): data = dataset[()] # Handle MATLAB's column-major storage if data.ndim > 1: data = data.T # Handle MATLAB strings if data.dtype.type is np.bytes_: data = data.tobytes().decode('utf-8') return data elif isinstance(dataset, h5py.Group): # Handle MATLAB structures return {key: _read_hdf5_dataset(dataset[key]) for key in dataset.keys()} else: return dataset
[docs] def save_mat( filepath: Union[str, Path], data: Dict[str, Any], format: str = '5', do_compression: bool = False ) -> None: """Save data to a MATLAB .mat file. Args: filepath: Output file path data: Dictionary of variables to save format: MATLAB file format ('5' or '7.3') do_compression: Whether to compress the data """ filepath = Path(filepath) filepath.parent.mkdir(parents=True, exist_ok=True) if format == '7.3': _save_mat_hdf5(filepath, data, do_compression) else: sio.savemat( str(filepath), data, format='5', do_compression=do_compression )
def _save_mat_hdf5( filepath: Path, data: Dict[str, Any], do_compression: bool = False ) -> None: """Save MATLAB v7.3 files using HDF5.""" compression = 'gzip' if do_compression else None with h5py.File(filepath, 'w') as f: for key, value in data.items(): _write_hdf5_dataset(f, key, value, compression) def _write_hdf5_dataset( group: h5py.Group, name: str, data: Any, compression: Optional[str] = None ) -> None: """Write data to HDF5 handling MATLAB specifics.""" if isinstance(data, (np.ndarray, list)): data = np.asarray(data) # Convert to column-major for MATLAB if data.ndim > 1: data = data.T group.create_dataset(name, data=data, compression=compression) elif isinstance(data, dict): # Handle structures subgroup = group.create_group(name) for key, value in data.items(): _write_hdf5_dataset(subgroup, key, value, compression) elif isinstance(data, str): # Handle strings group.create_dataset( name, data=np.bytes_(data), compression=compression ) else: # Handle scalars group.create_dataset(name, data=data, compression=compression)
[docs] def convert_mat_to_npz( mat_filepath: Union[str, Path], npz_filepath: Union[str, Path], compressed: bool = True ) -> None: """Convert a .mat file to NumPy .npz format. Args: mat_filepath: Input .mat file path npz_filepath: Output .npz file path compressed: Whether to use compression """ data = load_mat(mat_filepath) if compressed: np.savez_compressed(npz_filepath, **data) else: np.savez(npz_filepath, **data)
[docs] def validate_mat_file(filepath: Union[str, Path]) -> Dict[str, Any]: """Validate and get information about a .mat file. Args: filepath: Path to the .mat file Returns: Dictionary with file information """ filepath = Path(filepath) if not filepath.exists(): raise FileNotFoundError(f"File not found: {filepath}") info = { 'filepath': str(filepath), 'size_bytes': filepath.stat().st_size, 'variables': {}, 'format': None, 'loadable': True, 'errors': [] } try: # Try scipy.io first mat_info = sio.whosmat(str(filepath)) info['format'] = 'v5/v7' info['variables'] = { name: {'shape': shape, 'dtype': dtype} for name, shape, dtype in mat_info } except: try: # Try HDF5 with h5py.File(filepath, 'r') as f: info['format'] = 'v7.3' for key in f.keys(): dataset = f[key] if isinstance(dataset, h5py.Dataset): info['variables'][key] = { 'shape': dataset.shape, 'dtype': str(dataset.dtype) } except Exception as e: info['loadable'] = False info['errors'].append(str(e)) return info