Source code for radioviz.services.tiff_metadata

#  Copyright 2026 European Union
#  Author: Bulgheroni Antonio (antonio.bulgheroni@ec.europa.eu)
#  SPDX-License-Identifier: EUPL-1.2
"""
TIFF metadata utilities for RadioViz.

This module provides helper functions to extract, normalize, and prepare
TIFF metadata for persistence and display. It focuses on the subset of
TIFF tags that are meaningful for autoradiography images and ensures
values are kept JSON- and HDF5-friendly for workspace serialization.
"""

from __future__ import annotations

from fractions import Fraction
from io import BytesIO
from pathlib import Path
from typing import Any, Iterable, cast

import numpy as np
import tifffile

TIFF_TAGS_TO_PRESERVE: dict[int, str] = {
    256: 'ImageWidth',
    257: 'ImageLength',
    258: 'BitsPerSample',
    259: 'Compression',
    262: 'PhotometricInterpretation',
    270: 'ImageDescription',
    271: 'Make',
    282: 'XResolution',
    283: 'YResolution',
    296: 'ResolutionUnit',
    305: 'Software',
}
"""Mapping of TIFF tag IDs to their canonical names for preserved metadata."""


[docs] def extract_tiff_metadata(raw_bytes: bytes) -> dict[str, Any]: """ Extract preserved TIFF metadata from raw TIFF bytes. :param raw_bytes: Raw TIFF file bytes. :type raw_bytes: bytes :return: Metadata dictionary suitable for workspace serialization. :rtype: dict[str, Any] """ entries: dict[str, dict[str, Any]] = {} with tifffile.TiffFile(BytesIO(raw_bytes)) as tiff: page = cast(tifffile.TiffPage, tiff.pages[0]) for tag_id, name in TIFF_TAGS_TO_PRESERVE.items(): tag = page.tags.get(tag_id) if tag is None: continue entries[str(tag_id)] = { 'name': name, 'value': _normalize_tag_value(tag.value), } return {'tiff': {'tags': entries}}
[docs] def extract_xyz_dat_metadata( dat_path: Path, image_shape: tuple[int, int] | None = None, ) -> tuple[dict[str, Any], list[str]]: """ Extract TIFF-style metadata from a XYZ DAT sidecar file. The DAT format is a sectioned key/value text file (INI-like). This function parses the DAT content and maps a subset of fields to TIFF tags used by RadioViz for metadata preservation. :param dat_path: Path to the DAT sidecar file. :type dat_path: Path :param image_shape: Optional image shape (height, width) used as fallback for size tags. :type image_shape: tuple[int, int] | None :return: Metadata dictionary and list of warnings. :rtype: tuple[dict[str, Any], list[str]] """ text = _read_dat_text(dat_path) sections = _parse_dat_sections(text) warnings: list[str] = [] width = _coerce_int( _find_dat_value( sections, section_names=('imageinfo',), key_names=('Pixel_X', 'Width', 'PixelX'), ) ) height = _coerce_int( _find_dat_value( sections, section_names=('imageinfo',), key_names=('Pixel_Y', 'Hight', 'Height', 'PixelY'), ) ) if image_shape is not None: shape_height = int(image_shape[0]) shape_width = int(image_shape[1]) if width is not None and height is not None and (width != shape_width or height != shape_height): warnings.append( f'DAT size mismatch: DAT=({width}, {height}) image=({shape_width}, {shape_height}). Using image size.' ) width = shape_width height = shape_height else: height = height if height is not None else shape_height width = width if width is not None else shape_width bits_per_sample = _coerce_int( _find_dat_value( sections, section_names=('imageinfo',), key_names=('BitsPerPixel', 'BitsPerSample'), ) ) bits_per_sample = bits_per_sample if bits_per_sample is not None else 16 photometric = _find_dat_value( sections, section_names=('imageinfo',), key_names=('PhotometricInterpretation',), ) photometric_value = _map_photometric(photometric) software = _find_dat_value(sections, section_names=('Image', 'imageinfo'), key_names=('Software',)) pixel_size_x = _coerce_float( _find_dat_value( sections, section_names=('imageinfo', 'Image_X'), key_names=('PixelSizeX', 'PixelsizeX', 'Pixelsize', 'PixelSize'), ) ) pixel_size_y = _coerce_float( _find_dat_value( sections, section_names=('imageinfo', 'Image_X'), key_names=('PixelSizeY', 'PixelsizeY', 'Pixelsize', 'PixelSize'), ) ) x_resolution = _resolution_from_nm(pixel_size_x) y_resolution = _resolution_from_nm(pixel_size_y) description = _build_dat_description(sections) make = _find_dat_value( sections, section_names=('Image', 'imageinfo'), key_names=('Make', 'Manufacturer', 'Vendor'), ) tag_map: dict[int, Any] = { 258: bits_per_sample, 259: 1, 262: photometric_value, 296: 3, } if width is not None: tag_map[256] = width if height is not None: tag_map[257] = height if description: tag_map[270] = description if make: tag_map[271] = make if software: tag_map[305] = software if x_resolution is not None: tag_map[282] = x_resolution if y_resolution is not None: tag_map[283] = y_resolution return _metadata_from_tag_map(tag_map), warnings
[docs] def derive_tiff_metadata( parent_metadata: dict[str, Any], derivation_note: str | None, software_suffix: str, ) -> dict[str, Any]: """ Derive TIFF metadata for a child image. Preserves only the configured TIFF tags, appends the derivation note to the ImageDescription tag (270), and ensures the Software tag (305) includes the provided suffix. :param parent_metadata: Metadata from the parent image controller. :type parent_metadata: dict[str, Any] :param derivation_note: Description of the derivation to append. :type derivation_note: str | None :param software_suffix: Software string to append if missing. :type software_suffix: str :return: Derived metadata dictionary. :rtype: dict[str, Any] """ parent_tags = _tag_map_from_metadata(parent_metadata) derived_tags = {tag_id: parent_tags[tag_id] for tag_id in TIFF_TAGS_TO_PRESERVE if tag_id in parent_tags} updated_tags = _apply_description_and_software(derived_tags, derivation_note, software_suffix) return _metadata_from_tag_map(updated_tags)
[docs] def build_tiff_extratags( metadata: dict[str, Any], derivation_note: str | None, software_suffix: str, ) -> list[tuple[int, str, int, Any, bool]]: """ Build TIFF extratags for writing derived images. :param metadata: Image metadata dictionary. :type metadata: dict[str, Any] :param derivation_note: Description of the derivation to append. :type derivation_note: str | None :param software_suffix: Software string to append if missing. :type software_suffix: str :return: List of TIFF extratag tuples. :rtype: list[tuple[int, str, int, Any, bool]] """ tag_map = _tag_map_from_metadata(metadata) updated_tags = _apply_description_and_software(tag_map, derivation_note, software_suffix) extratags: list[tuple[int, str, int, Any, bool]] = [] if 271 in updated_tags: extratags.append((271, 's', 0, _ascii_safe(str(updated_tags[271])), False)) return extratags
[docs] def build_tiff_kwargs( metadata: dict[str, Any], derivation_note: str | None, software_suffix: str, ) -> dict[str, Any]: """ Build standard TIFF keyword arguments for writing metadata. :param metadata: Image metadata dictionary. :type metadata: dict[str, Any] :param derivation_note: Description of the derivation to append. :type derivation_note: str | None :param software_suffix: Software string to append if missing. :type software_suffix: str :return: Dictionary of tifffile.imwrite keyword arguments. :rtype: dict[str, Any] """ tag_map = _tag_map_from_metadata(metadata) updated_tags = _apply_description_and_software(tag_map, derivation_note, software_suffix) kwargs: dict[str, Any] = {} if 270 in updated_tags: kwargs['description'] = _ascii_safe(str(updated_tags[270])) if 305 in updated_tags: kwargs['software'] = _ascii_safe(str(updated_tags[305])) if 282 in updated_tags and 283 in updated_tags: xres = _coerce_rational(updated_tags[282]) yres = _coerce_rational(updated_tags[283]) kwargs['resolution'] = (xres[0] / xres[1], yres[0] / yres[1]) if 296 in updated_tags: kwargs['resolutionunit'] = _coerce_short(updated_tags[296]) return kwargs
[docs] def metadata_entries(metadata: dict[str, Any]) -> list[tuple[str, str, str]]: """ Build displayable metadata entries from a metadata dictionary. :param metadata: Metadata dictionary. :type metadata: dict[str, Any] :return: List of (tag, name, value) tuples for display. :rtype: list[tuple[str, str, str]] """ tags = metadata.get('tiff', {}).get('tags', {}) entries: list[tuple[str, str, str]] = [] for tag_id in _sorted_tag_ids(tags.keys()): entry = tags.get(tag_id) if isinstance(entry, dict): name = entry.get('name', '') value = entry.get('value', '') else: name = '' value = entry entries.append((str(tag_id), str(name), _format_value(value))) return entries
[docs] def pixel_size_from_metadata(metadata: dict[str, Any]) -> tuple[float, float] | None: """ Compute the pixel size in meters from TIFF metadata. The pixel size is derived from XResolution/YResolution and ResolutionUnit when available. ResolutionUnit values follow the TIFF specification: - 2: inches - 3: centimeters :param metadata: Metadata dictionary containing TIFF tags. :type metadata: dict[str, Any] :return: Tuple of (pixel_size_x_m, pixel_size_y_m) in meters, or None. :rtype: tuple[float, float] | None """ tag_map = _tag_map_from_metadata(metadata) x_res = _resolution_value(tag_map.get(282)) y_res = _resolution_value(tag_map.get(283)) if x_res is None and y_res is None: return None res_unit = _coerce_short(tag_map.get(296, 0)) unit_m = _resolution_unit_to_meters(res_unit) if unit_m is None: return None if x_res is None: x_res = y_res if y_res is None: y_res = x_res if not x_res or not y_res: return None try: pixel_x_m = unit_m / float(x_res) pixel_y_m = unit_m / float(y_res) except (TypeError, ValueError, ZeroDivisionError): return None if pixel_x_m <= 0 or pixel_y_m <= 0: return None return pixel_x_m, pixel_y_m
[docs] def _sorted_tag_ids(keys: Any) -> list[str]: """ Sort tag identifiers numerically when possible. :param keys: Iterable of tag identifiers. :type keys: Any :return: Sorted list of tag identifiers as strings. :rtype: list[str] """ def _key(value: Any) -> tuple[int, str]: try: return int(value), str(value) except (TypeError, ValueError): return 10_000, str(value) return [str(k) for k in sorted(keys, key=_key)]
[docs] def _format_value(value: Any) -> str: """ Format a metadata value for display. :param value: Metadata value to format. :type value: Any :return: String representation of the value. :rtype: str """ if isinstance(value, (list, tuple)): return ', '.join(str(item) for item in value) return str(value)
[docs] def _resolution_value(value: Any) -> float | None: """ Convert a TIFF resolution tag value to a floating-point number. :param value: TIFF tag value. :type value: Any :return: Resolution as float or None when unavailable. :rtype: float | None """ if value in (None, ''): return None if isinstance(value, (int, float, np.number)): try: return float(value) except (TypeError, ValueError): return None if isinstance(value, (list, tuple)): if len(value) == 2 and all(isinstance(v, (int, float, np.number)) for v in value): if value[1] == 0: return None return float(value[0]) / float(value[1]) if value and isinstance(value[0], (list, tuple)) and len(value[0]) == 2: num, den = value[0] if den == 0: return None return float(num) / float(den) return None
[docs] def _resolution_unit_to_meters(unit_value: int) -> float | None: """ Convert a TIFF ResolutionUnit to meters. :param unit_value: TIFF resolution unit code. :type unit_value: int :return: Unit length in meters, or None if unsupported. :rtype: float | None """ if unit_value == 2: return 0.0254 if unit_value == 3: return 0.01 return None
[docs] def _tag_map_from_metadata(metadata: dict[str, Any]) -> dict[int, Any]: """ Extract a tag-id to value mapping from metadata. :param metadata: Metadata dictionary. :type metadata: dict[str, Any] :return: Mapping of TIFF tag IDs to values. :rtype: dict[int, Any] """ tag_map: dict[int, Any] = {} tags = metadata.get('tiff', {}).get('tags', {}) for key, entry in tags.items(): try: tag_id = int(key) except (TypeError, ValueError): continue if isinstance(entry, dict): tag_map[tag_id] = entry.get('value') else: tag_map[tag_id] = entry return tag_map
[docs] def _metadata_from_tag_map(tag_map: dict[int, Any]) -> dict[str, Any]: """ Convert a tag mapping into the metadata structure. :param tag_map: Mapping of TIFF tag IDs to values. :type tag_map: dict[int, Any] :return: Metadata dictionary. :rtype: dict[str, Any] """ entries: dict[str, dict[str, Any]] = {} for tag_id, name in TIFF_TAGS_TO_PRESERVE.items(): if tag_id not in tag_map: continue entries[str(tag_id)] = {'name': name, 'value': _normalize_tag_value(tag_map[tag_id])} return {'tiff': {'tags': entries}}
[docs] def _apply_description_and_software( tag_map: dict[int, Any], derivation_note: str | None, software_suffix: str, ) -> dict[int, Any]: """ Apply derivation note and software suffix to tag values. :param tag_map: Tag map to update. :type tag_map: dict[int, Any] :param derivation_note: Description of the derivation to append. :type derivation_note: str | None :param software_suffix: Software string to append if missing. :type software_suffix: str :return: Updated tag map. :rtype: dict[int, Any] """ updated = dict(tag_map) description = updated.get(270) if derivation_note: updated[270] = _append_description(str(description) if description else '', derivation_note) elif description is not None: updated[270] = description software = updated.get(305) updated[305] = _append_software(str(software) if software else '', software_suffix) return updated
[docs] def _append_description(existing: str, note: str) -> str: """ Append a derivation note to an existing description string. :param existing: Existing description string. :type existing: str :param note: Derivation note to append. :type note: str :return: Updated description string. :rtype: str """ existing = existing.strip() note = note.strip() if not existing: return note if note in existing: return existing return f'{existing}\n{note}'
[docs] def _append_software(existing: str, suffix: str) -> str: """ Append a software suffix if not already present. :param existing: Existing software string. :type existing: str :param suffix: Software suffix to append. :type suffix: str :return: Updated software string. :rtype: str """ existing = existing.strip() suffix = suffix.strip() if not existing: return suffix if suffix in existing: return existing return f'{existing}; {suffix}'
[docs] def _coerce_rational(value: Any) -> tuple[int, int]: """ Coerce a value to a TIFF rational tuple. A TIFF rational is a pair of 32-bit unsigned integers representing a fraction (numerator/denominator). TIFF uses this format to store resolution values and other floating-point quantities with high precision. For example, a resolution of 300 DPI is stored as the rational (300, 1), while 72.5 DPI would be stored as (145, 2). This function accepts various input types and converts them to the rational tuple format expected by TIFF tags: - Fraction objects: extracts numerator and denominator directly - 2-element lists/tuples: treats as (numerator, denominator) - Numeric types (int, float, numpy.number): converts to a reduced fraction :param value: Value to coerce. Can be a Fraction, 2-element sequence, or any numeric type. :type value: Any :return: Rational tuple (numerator, denominator). :rtype: tuple[int, int] Example:: >>> _coerce_rational(72.5) (145, 2) >>> _coerce_rational(Fraction(3, 4)) (3, 4) >>> _coerce_rational([300, 1]) (300, 1) """ if hasattr(value, 'numerator') and hasattr(value, 'denominator'): return int(value.numerator), int(value.denominator) if isinstance(value, (list, tuple)) and len(value) == 2: return int(value[0]), int(value[1]) if isinstance(value, (int, float, np.number)): frac = Fraction(float(value)).limit_denominator() return int(frac.numerator), int(frac.denominator) return (1, 1)
[docs] def _coerce_short(value: Any) -> int: """ Coerce a value to a TIFF short integer. :param value: Value to coerce. :type value: Any :return: Integer suitable for TIFF short tags. :rtype: int """ if isinstance(value, (list, tuple)) and value: return int(value[0]) if isinstance(value, np.generic): return int(value.item()) return int(value)
[docs] def _normalize_tag_value(value: Any) -> Any: """ Normalize tag values to JSON- and HDF5-friendly types. :param value: Tag value to normalize. :type value: Any :return: Normalized tag value. :rtype: Any """ if isinstance(value, bytes): return _decode_text_bytes(value) if hasattr(value, 'numerator') and hasattr(value, 'denominator'): return [int(value.numerator), int(value.denominator)] if isinstance(value, np.generic): return value.item() if isinstance(value, (list, tuple)): normalized = [_normalize_tag_value(v) for v in value] if len(normalized) == 2 and all(hasattr(v, 'numerator') and hasattr(v, 'denominator') for v in value): num0 = int(value[0].numerator) num1 = int(value[1].numerator) den0 = int(value[0].denominator) den1 = int(value[1].denominator) if den0 == 1 and den1 == 1: return [num0, num1] return normalized return value
[docs] def _ascii_safe(value: str) -> str: """ Normalize a string to 7-bit ASCII for TIFF string tags. :param value: Input string to normalize. :type value: str :return: ASCII-safe string with non-ASCII characters replaced. :rtype: str """ replacement_map = { 'µ': 'u', 'ö': 'oe', 'Ö': 'Oe', 'ä': 'ae', 'Ä': 'Ae', 'ü': 'ue', 'Ü': 'Ue', 'ß': 'ss', } for key, repl in replacement_map.items(): value = value.replace(key, repl) return value.encode('ascii', errors='replace').decode('ascii')
[docs] def _read_dat_text(dat_path: Path) -> str: """ Read DAT file text with a tolerant encoding fallback. :param dat_path: Path to the DAT file. :type dat_path: Path :return: Decoded DAT text. :rtype: str """ raw = dat_path.read_bytes() return _decode_text_bytes(raw)
[docs] def _decode_text_bytes(raw: bytes) -> str: """ Decode raw bytes preferring UTF-8, falling back to Latin-1. :param raw: Raw byte sequence. :type raw: bytes :return: Decoded string. :rtype: str """ try: return raw.decode('utf-8') except UnicodeDecodeError: try: return raw.decode('latin-1') except UnicodeDecodeError: return raw.decode(errors='replace')
[docs] def _parse_dat_sections(text: str) -> dict[str, dict[str, str]]: """ Parse a DAT sidecar file into sectioned key/value pairs. :param text: Raw DAT file content. :type text: str :return: Mapping of section name to key/value pairs. :rtype: dict[str, dict[str, str]] """ sections: dict[str, dict[str, str]] = {} current: str | None = None for raw_line in text.splitlines(): line = raw_line.strip() if not line: continue if line.startswith((';', '#')): continue if line.startswith('[') and line.endswith(']'): current = line[1:-1].strip() sections.setdefault(current, {}) continue if current is None: continue if '=' not in line: continue key, value = line.split('=', 1) sections[current][key.strip()] = value.strip() return sections
[docs] def _find_dat_value( sections: dict[str, dict[str, str]], section_names: Iterable[str], key_names: Iterable[str], ) -> str | None: """ Find the first matching value in DAT sections. :param sections: Parsed DAT sections. :type sections: dict[str, dict[str, str]] :param section_names: Section names to search. :type section_names: Iterable[str] :param key_names: Key names to search. :type key_names: Iterable[str] :return: The first matching value, if any. :rtype: str | None """ for section_name in section_names: section = sections.get(section_name) if not section: continue for key in key_names: value = section.get(key) if value not in (None, ''): return value return None
[docs] def _coerce_int(value: Any) -> int | None: """ Coerce a value to int when possible. :param value: Input value. :type value: Any :return: Parsed integer or None. :rtype: int | None """ if value in (None, ''): return None try: return int(str(value).strip(), 10) except (TypeError, ValueError): return None
[docs] def _coerce_float(value: Any) -> float | None: """ Coerce a value to float when possible. :param value: Input value. :type value: Any :return: Parsed float or None. :rtype: float | None """ if value in (None, ''): return None try: return float(str(value).strip()) except (TypeError, ValueError): return None
[docs] def _resolution_from_nm(pixel_size_nm: float | None) -> float | None: """ Convert a pixel size in nanometers to pixels per centimeter. :param pixel_size_nm: Pixel size in nanometers. :type pixel_size_nm: float | None :return: Resolution in pixels per centimeter. :rtype: float | None """ if pixel_size_nm is None or pixel_size_nm <= 0: return None return 1.0e7 / pixel_size_nm
[docs] def _map_photometric(value: str | None) -> int: """ Map photometric interpretation names to TIFF codes. :param value: Photometric interpretation string. :type value: str | None :return: TIFF photometric interpretation code. :rtype: int """ if not value: return 1 normalized = value.strip().upper() if normalized == 'MONOCHROME1': return 0 if normalized == 'MONOCHROME2': return 1 return 1
[docs] def _build_dat_description(sections: dict[str, dict[str, str]]) -> str | None: """ Build an AIDA-style ImageDescription from DAT sections. :param sections: Parsed DAT sections. :type sections: dict[str, dict[str, str]] :return: Description string or None. :rtype: str | None """ device_serial = _find_dat_value( sections, section_names=('imageinfo', 'Image_X', 'Setting'), key_names=('ScannerSerial', 'DeviceID'), ) parameter_set = _find_dat_value( sections, section_names=('RawImageParamSet', 'Mode'), key_names=('ParamName', 'ModeName_en', 'ModeName'), ) pmt_hv = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('PMT_HV',)) laser = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Laser',)) pentaspd = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Pentaspd',)) binning = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Binning',)) erase_enable = _find_dat_value( sections, section_names=('RawImageParamSet', 'Mode'), key_names=('EraseEnable',), ) lines: list[str] = [] if device_serial: lines.append(f'Device serial: {device_serial}') detail_lines: list[str] = [] if parameter_set: detail_lines.append(f'Parameter Set: {parameter_set}') if pmt_hv: detail_lines.append(f'PMT_HV: {pmt_hv}') if laser: detail_lines.append(f'Laser: {laser}') if pentaspd: detail_lines.append(f'Pentaspd: {pentaspd}') if binning is not None: detail_lines.append(f'Binning: {_format_yes_no(binning)}') if erase_enable is not None: detail_lines.append(f'Eraser Enable: {_format_yes_no(erase_enable)}') if detail_lines: if lines: lines.append('') lines.extend(detail_lines) if not lines: return None return '\n'.join(lines)
[docs] def _format_yes_no(value: Any) -> str: """ Format a boolean-like DAT value as yes/no. :param value: DAT value to interpret. :type value: Any :return: 'yes' or 'no'. :rtype: str """ text = str(value).strip().lower() if text in {'1', 'true', 'yes', 'y', 'on'}: return 'yes' return 'no'