# Copyright 2026 European Union
# Author: Bulgheroni Antonio (antonio.bulgheroni@ec.europa.eu)
# SPDX-License-Identifier: EUPL-1.2
"""
TIFF metadata utilities for RadioViz.
This module provides helper functions to extract, normalize, and prepare
TIFF metadata for persistence and display. It focuses on the subset of
TIFF tags that are meaningful for autoradiography images and ensures
values are kept JSON- and HDF5-friendly for workspace serialization.
"""
from __future__ import annotations
from fractions import Fraction
from io import BytesIO
from pathlib import Path
from typing import Any, Iterable, cast
import numpy as np
import tifffile
TIFF_TAGS_TO_PRESERVE: dict[int, str] = {
256: 'ImageWidth',
257: 'ImageLength',
258: 'BitsPerSample',
259: 'Compression',
262: 'PhotometricInterpretation',
270: 'ImageDescription',
271: 'Make',
282: 'XResolution',
283: 'YResolution',
296: 'ResolutionUnit',
305: 'Software',
}
"""Mapping of TIFF tag IDs to their canonical names for preserved metadata."""
[docs]
def build_tiff_kwargs(
metadata: dict[str, Any],
derivation_note: str | None,
software_suffix: str,
) -> dict[str, Any]:
"""
Build standard TIFF keyword arguments for writing metadata.
:param metadata: Image metadata dictionary.
:type metadata: dict[str, Any]
:param derivation_note: Description of the derivation to append.
:type derivation_note: str | None
:param software_suffix: Software string to append if missing.
:type software_suffix: str
:return: Dictionary of tifffile.imwrite keyword arguments.
:rtype: dict[str, Any]
"""
tag_map = _tag_map_from_metadata(metadata)
updated_tags = _apply_description_and_software(tag_map, derivation_note, software_suffix)
kwargs: dict[str, Any] = {}
if 270 in updated_tags:
kwargs['description'] = _ascii_safe(str(updated_tags[270]))
if 305 in updated_tags:
kwargs['software'] = _ascii_safe(str(updated_tags[305]))
if 282 in updated_tags and 283 in updated_tags:
xres = _coerce_rational(updated_tags[282])
yres = _coerce_rational(updated_tags[283])
kwargs['resolution'] = (xres[0] / xres[1], yres[0] / yres[1])
if 296 in updated_tags:
kwargs['resolutionunit'] = _coerce_short(updated_tags[296])
return kwargs
[docs]
def _sorted_tag_ids(keys: Any) -> list[str]:
"""
Sort tag identifiers numerically when possible.
:param keys: Iterable of tag identifiers.
:type keys: Any
:return: Sorted list of tag identifiers as strings.
:rtype: list[str]
"""
def _key(value: Any) -> tuple[int, str]:
try:
return int(value), str(value)
except (TypeError, ValueError):
return 10_000, str(value)
return [str(k) for k in sorted(keys, key=_key)]
[docs]
def _resolution_value(value: Any) -> float | None:
"""
Convert a TIFF resolution tag value to a floating-point number.
:param value: TIFF tag value.
:type value: Any
:return: Resolution as float or None when unavailable.
:rtype: float | None
"""
if value in (None, ''):
return None
if isinstance(value, (int, float, np.number)):
try:
return float(value)
except (TypeError, ValueError):
return None
if isinstance(value, (list, tuple)):
if len(value) == 2 and all(isinstance(v, (int, float, np.number)) for v in value):
if value[1] == 0:
return None
return float(value[0]) / float(value[1])
if value and isinstance(value[0], (list, tuple)) and len(value[0]) == 2:
num, den = value[0]
if den == 0:
return None
return float(num) / float(den)
return None
[docs]
def _resolution_unit_to_meters(unit_value: int) -> float | None:
"""
Convert a TIFF ResolutionUnit to meters.
:param unit_value: TIFF resolution unit code.
:type unit_value: int
:return: Unit length in meters, or None if unsupported.
:rtype: float | None
"""
if unit_value == 2:
return 0.0254
if unit_value == 3:
return 0.01
return None
[docs]
def _apply_description_and_software(
tag_map: dict[int, Any],
derivation_note: str | None,
software_suffix: str,
) -> dict[int, Any]:
"""
Apply derivation note and software suffix to tag values.
:param tag_map: Tag map to update.
:type tag_map: dict[int, Any]
:param derivation_note: Description of the derivation to append.
:type derivation_note: str | None
:param software_suffix: Software string to append if missing.
:type software_suffix: str
:return: Updated tag map.
:rtype: dict[int, Any]
"""
updated = dict(tag_map)
description = updated.get(270)
if derivation_note:
updated[270] = _append_description(str(description) if description else '', derivation_note)
elif description is not None:
updated[270] = description
software = updated.get(305)
updated[305] = _append_software(str(software) if software else '', software_suffix)
return updated
[docs]
def _append_description(existing: str, note: str) -> str:
"""
Append a derivation note to an existing description string.
:param existing: Existing description string.
:type existing: str
:param note: Derivation note to append.
:type note: str
:return: Updated description string.
:rtype: str
"""
existing = existing.strip()
note = note.strip()
if not existing:
return note
if note in existing:
return existing
return f'{existing}\n{note}'
[docs]
def _append_software(existing: str, suffix: str) -> str:
"""
Append a software suffix if not already present.
:param existing: Existing software string.
:type existing: str
:param suffix: Software suffix to append.
:type suffix: str
:return: Updated software string.
:rtype: str
"""
existing = existing.strip()
suffix = suffix.strip()
if not existing:
return suffix
if suffix in existing:
return existing
return f'{existing}; {suffix}'
[docs]
def _coerce_rational(value: Any) -> tuple[int, int]:
"""
Coerce a value to a TIFF rational tuple.
A TIFF rational is a pair of 32-bit unsigned integers representing a
fraction (numerator/denominator). TIFF uses this format to store
resolution values and other floating-point quantities with high precision.
For example, a resolution of 300 DPI is stored as the rational (300, 1),
while 72.5 DPI would be stored as (145, 2).
This function accepts various input types and converts them to the
rational tuple format expected by TIFF tags:
- Fraction objects: extracts numerator and denominator directly
- 2-element lists/tuples: treats as (numerator, denominator)
- Numeric types (int, float, numpy.number): converts to a reduced fraction
:param value: Value to coerce. Can be a Fraction, 2-element sequence,
or any numeric type.
:type value: Any
:return: Rational tuple (numerator, denominator).
:rtype: tuple[int, int]
Example::
>>> _coerce_rational(72.5)
(145, 2)
>>> _coerce_rational(Fraction(3, 4))
(3, 4)
>>> _coerce_rational([300, 1])
(300, 1)
"""
if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
return int(value.numerator), int(value.denominator)
if isinstance(value, (list, tuple)) and len(value) == 2:
return int(value[0]), int(value[1])
if isinstance(value, (int, float, np.number)):
frac = Fraction(float(value)).limit_denominator()
return int(frac.numerator), int(frac.denominator)
return (1, 1)
[docs]
def _coerce_short(value: Any) -> int:
"""
Coerce a value to a TIFF short integer.
:param value: Value to coerce.
:type value: Any
:return: Integer suitable for TIFF short tags.
:rtype: int
"""
if isinstance(value, (list, tuple)) and value:
return int(value[0])
if isinstance(value, np.generic):
return int(value.item())
return int(value)
[docs]
def _normalize_tag_value(value: Any) -> Any:
"""
Normalize tag values to JSON- and HDF5-friendly types.
:param value: Tag value to normalize.
:type value: Any
:return: Normalized tag value.
:rtype: Any
"""
if isinstance(value, bytes):
return _decode_text_bytes(value)
if hasattr(value, 'numerator') and hasattr(value, 'denominator'):
return [int(value.numerator), int(value.denominator)]
if isinstance(value, np.generic):
return value.item()
if isinstance(value, (list, tuple)):
normalized = [_normalize_tag_value(v) for v in value]
if len(normalized) == 2 and all(hasattr(v, 'numerator') and hasattr(v, 'denominator') for v in value):
num0 = int(value[0].numerator)
num1 = int(value[1].numerator)
den0 = int(value[0].denominator)
den1 = int(value[1].denominator)
if den0 == 1 and den1 == 1:
return [num0, num1]
return normalized
return value
[docs]
def _ascii_safe(value: str) -> str:
"""
Normalize a string to 7-bit ASCII for TIFF string tags.
:param value: Input string to normalize.
:type value: str
:return: ASCII-safe string with non-ASCII characters replaced.
:rtype: str
"""
replacement_map = {
'µ': 'u',
'ö': 'oe',
'Ö': 'Oe',
'ä': 'ae',
'Ä': 'Ae',
'ü': 'ue',
'Ü': 'Ue',
'ß': 'ss',
}
for key, repl in replacement_map.items():
value = value.replace(key, repl)
return value.encode('ascii', errors='replace').decode('ascii')
[docs]
def _read_dat_text(dat_path: Path) -> str:
"""
Read DAT file text with a tolerant encoding fallback.
:param dat_path: Path to the DAT file.
:type dat_path: Path
:return: Decoded DAT text.
:rtype: str
"""
raw = dat_path.read_bytes()
return _decode_text_bytes(raw)
[docs]
def _decode_text_bytes(raw: bytes) -> str:
"""
Decode raw bytes preferring UTF-8, falling back to Latin-1.
:param raw: Raw byte sequence.
:type raw: bytes
:return: Decoded string.
:rtype: str
"""
try:
return raw.decode('utf-8')
except UnicodeDecodeError:
try:
return raw.decode('latin-1')
except UnicodeDecodeError:
return raw.decode(errors='replace')
[docs]
def _parse_dat_sections(text: str) -> dict[str, dict[str, str]]:
"""
Parse a DAT sidecar file into sectioned key/value pairs.
:param text: Raw DAT file content.
:type text: str
:return: Mapping of section name to key/value pairs.
:rtype: dict[str, dict[str, str]]
"""
sections: dict[str, dict[str, str]] = {}
current: str | None = None
for raw_line in text.splitlines():
line = raw_line.strip()
if not line:
continue
if line.startswith((';', '#')):
continue
if line.startswith('[') and line.endswith(']'):
current = line[1:-1].strip()
sections.setdefault(current, {})
continue
if current is None:
continue
if '=' not in line:
continue
key, value = line.split('=', 1)
sections[current][key.strip()] = value.strip()
return sections
[docs]
def _find_dat_value(
sections: dict[str, dict[str, str]],
section_names: Iterable[str],
key_names: Iterable[str],
) -> str | None:
"""
Find the first matching value in DAT sections.
:param sections: Parsed DAT sections.
:type sections: dict[str, dict[str, str]]
:param section_names: Section names to search.
:type section_names: Iterable[str]
:param key_names: Key names to search.
:type key_names: Iterable[str]
:return: The first matching value, if any.
:rtype: str | None
"""
for section_name in section_names:
section = sections.get(section_name)
if not section:
continue
for key in key_names:
value = section.get(key)
if value not in (None, ''):
return value
return None
[docs]
def _coerce_int(value: Any) -> int | None:
"""
Coerce a value to int when possible.
:param value: Input value.
:type value: Any
:return: Parsed integer or None.
:rtype: int | None
"""
if value in (None, ''):
return None
try:
return int(str(value).strip(), 10)
except (TypeError, ValueError):
return None
[docs]
def _coerce_float(value: Any) -> float | None:
"""
Coerce a value to float when possible.
:param value: Input value.
:type value: Any
:return: Parsed float or None.
:rtype: float | None
"""
if value in (None, ''):
return None
try:
return float(str(value).strip())
except (TypeError, ValueError):
return None
[docs]
def _resolution_from_nm(pixel_size_nm: float | None) -> float | None:
"""
Convert a pixel size in nanometers to pixels per centimeter.
:param pixel_size_nm: Pixel size in nanometers.
:type pixel_size_nm: float | None
:return: Resolution in pixels per centimeter.
:rtype: float | None
"""
if pixel_size_nm is None or pixel_size_nm <= 0:
return None
return 1.0e7 / pixel_size_nm
[docs]
def _map_photometric(value: str | None) -> int:
"""
Map photometric interpretation names to TIFF codes.
:param value: Photometric interpretation string.
:type value: str | None
:return: TIFF photometric interpretation code.
:rtype: int
"""
if not value:
return 1
normalized = value.strip().upper()
if normalized == 'MONOCHROME1':
return 0
if normalized == 'MONOCHROME2':
return 1
return 1
[docs]
def _build_dat_description(sections: dict[str, dict[str, str]]) -> str | None:
"""
Build an AIDA-style ImageDescription from DAT sections.
:param sections: Parsed DAT sections.
:type sections: dict[str, dict[str, str]]
:return: Description string or None.
:rtype: str | None
"""
device_serial = _find_dat_value(
sections,
section_names=('imageinfo', 'Image_X', 'Setting'),
key_names=('ScannerSerial', 'DeviceID'),
)
parameter_set = _find_dat_value(
sections,
section_names=('RawImageParamSet', 'Mode'),
key_names=('ParamName', 'ModeName_en', 'ModeName'),
)
pmt_hv = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('PMT_HV',))
laser = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Laser',))
pentaspd = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Pentaspd',))
binning = _find_dat_value(sections, section_names=('RawImageParamSet', 'Mode'), key_names=('Binning',))
erase_enable = _find_dat_value(
sections,
section_names=('RawImageParamSet', 'Mode'),
key_names=('EraseEnable',),
)
lines: list[str] = []
if device_serial:
lines.append(f'Device serial: {device_serial}')
detail_lines: list[str] = []
if parameter_set:
detail_lines.append(f'Parameter Set: {parameter_set}')
if pmt_hv:
detail_lines.append(f'PMT_HV: {pmt_hv}')
if laser:
detail_lines.append(f'Laser: {laser}')
if pentaspd:
detail_lines.append(f'Pentaspd: {pentaspd}')
if binning is not None:
detail_lines.append(f'Binning: {_format_yes_no(binning)}')
if erase_enable is not None:
detail_lines.append(f'Eraser Enable: {_format_yes_no(erase_enable)}')
if detail_lines:
if lines:
lines.append('')
lines.extend(detail_lines)
if not lines:
return None
return '\n'.join(lines)