Source code for hsr4hci.hypotheses

"""
Methods for finding hypotheses of the form `(Y, T)`, that is, pixel `Y`
seems to contain a planet signal at time `T`.
"""

# -----------------------------------------------------------------------------
# IMPORTS
# -----------------------------------------------------------------------------

from typing import Dict, Tuple

from sklearn.metrics.pairwise import cosine_similarity
from tqdm.auto import tqdm

import numpy as np

from hsr4hci.forward_modeling import get_time_series_for_position
from hsr4hci.masking import get_positions_from_mask
from hsr4hci.training import get_signal_times


# -----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
# -----------------------------------------------------------------------------

[docs]def get_all_hypotheses(
    roi_mask: np.ndarray,
    residuals: Dict[str, np.ndarray],
    parang: np.ndarray,
    n_signal_times: int,
    frame_size: Tuple[int, int],
    psf_template: np.ndarray,
    minimum_similarity: float = 0.0,
    n_roi_splits: int = 1,
    roi_split: int = 0,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    This is a convenience function which wraps the loop over the ROI
    to call :func:`get_hypothesis_for_position()` for every spatial
    pixel. See there for a full documentation of all parameters.
    """

    # Initialize array for hypotheses and similarities
    hypotheses = np.full(frame_size, np.nan)
    similarities = np.full(frame_size, np.nan)

    # Define positions for which to run (= subset of the ROI)
    positions = get_positions_from_mask(roi_mask)[roi_split::n_roi_splits]

    # Loop over all spatial positions, find the hypothesis and similarity
    # for the current position, and store them
    for position in tqdm(positions, ncols=80):
        signal_time, similarity = get_hypothesis_for_position(
            residuals=residuals,
            position=position,
            parang=parang,
            n_signal_times=n_signal_times,
            frame_size=frame_size,
            psf_template=psf_template,
            minimum_similarity=minimum_similarity,
        )
        hypotheses[position[0], position[1]] = signal_time
        similarities[position[0], position[1]] = similarity

    return hypotheses, similarities


[docs]def get_hypothesis_for_position(
    residuals: Dict[str, np.ndarray],
    position: Tuple[int, int],
    parang: np.ndarray,
    n_signal_times: int,
    frame_size: Tuple[int, int],
    psf_template: np.ndarray,
    minimum_similarity: float = 0.0,
) -> Tuple[float, float]:
    """
    Take a dictionary containing the full training results and find,
    for a given spatial ``position``, the time `T` at which we suspect
    there to be a planet at the position. If there is no time at which
    the residuals look like the contain a planet, return `NaN`.

    Args:
        residuals: A dictionary containing the residuals of all models
            that we have trained. The keys should be `"default"`, `"0"`,
            ..., `"N"`, where the latter are the signal times of the
            temporal grid that was used during training, and each key
            should map to a 3D numpy array consisting of the residuals
            for the respective model.
        position: A tuple `(x, y)` specifying the position for which we
            want to find the best hypothesis (i.e., the best guess for
            the time at which this pixel contains a planet).
        parang: A 1D numpy array of shape `(n_frames, )` containing the
            parallactic angles for each frame.
        n_signal_times: An integer specifying the number of different
            signal times, that is, the size of the temporal grid that
            was used during training.
        frame_size: A tuple `(x_size, y_size)` specifying the size (in
            pixels) of the frames that we are working with.
        psf_template: A 2D numpy array containing the unsaturated PSF
            template for the data set.
        minimum_similarity: Minimum cosine similarity between the
            expected and the observed signal for the signal to be
            counted as a hypothesis.

    Returns:
        A tuple `(signal_time, similarity)`, where `signal_time` is
        the best guess for the `signal_time` of the planet signal in
        `position`, and `score` is the corresponding cosine similarity
        between the expected and the observed signal.
        If the `position` does not seem to contain a planet signal at
        any time, both of these values are `NaN`.
    """

    # Initialize variables in which we store the optimum
    best_signal_time = np.nan
    best_similarity = 0.0

    # Loop over the temporal grid to find the best hypothesis
    for signal_time in get_signal_times(
        n_frames=len(parang), n_signal_times=n_signal_times
    ):

        # Select residual for this position
        residual = np.asarray(
            residuals[str(signal_time)][:, position[0], position[1]]
        )

        # If the residual is NaN, we can't compute the metric function
        if np.isnan(residual).any():
            continue

        # Compute the expected signal time series
        expected_time_series = get_time_series_for_position(
            position=position,
            signal_time=signal_time,
            frame_size=frame_size,
            parang=parang,
            psf_template=psf_template,
        )

        # Compute the cosine similarity, which serves as a metric for how
        # well the expected time series and the residual match
        similarity = cosine_similarity(
            expected_time_series.reshape(1, -1),
            residual.reshape(1, -1),
        )

        # Clip the similarity to [0, 1] (a signal time that leads to a
        # negative cosine similarity should never become a hypothesis);
        # also round to a "reasonable" precision
        similarity = float(np.around(np.clip(similarity, a_min=0, a_max=1), 3))

        # Store the current similarity if it is better than the optimum so far
        if similarity > best_similarity:
            best_similarity = similarity
            best_signal_time = signal_time

    # Return type must be float, because it can also be NaN
    if best_similarity > minimum_similarity:
        return float(best_signal_time), best_similarity
    return np.nan, np.nan