Module `qbiome.forecaster`

Expand source code

import numpy as np

from quasinet import qnet

class Forecaster:
    """Forecast the data week by week by sequantially generating qnet predictions for the next timestamp and using the filled timestamp to update qnet predictions
    """

    def __init__(self, qnet_orchestrator):
        """Initialization

        Args:
            qnet_orchestrator (qbiome.QnetOrchestrator): an instance with a trained qnet model
        """
        self.qnet_orchestrator = qnet_orchestrator
        self.quantizer = qnet_orchestrator.quantizer

    def forecast_data(self,
                      data,
                      start_week,
                      end_week=None,
                      n_samples=100,
                      subject_id=None):
        """Forecast the data matrix from `start_week` to `end_week`

        Output format:

        |   subject_id | variable         |   week |    value |
        |-------------:|:-----------------|-------:|---------:|
        |            1 | Actinobacteriota |     27 | 0.36665  |
        |            1 | Bacteroidota     |     27 | 0.507248 |
        |            1 | Campilobacterota |     27 | 0.002032 |

        Args:
            data (numpy.ndarray): 2D array of label strings, produced by `self.get_qnet_inputs`
            start_week (int): start predicting from this week
            end_week (int, optional): end predicting after this week
            n_samples (int, optional): the number of times to sample from qnet predictions for one masked entry. Defaults to 100.
            subject_id (list[str], optional): subject id for each row of data. If None, we assume data has as many rows as quantized originally. This must be set for new patient data.

        Returns:
            pandas.DataFrame: see format above
        """
        if end_week is None:
            end_week = self.qnet_orchestrator.get_max_timestamp()
        forecasted_matrix = np.empty(data.shape)
        for idx, seq in enumerate(data):
            forecasted_seq = self.qnet_orchestrator.predict_sequentially_by_week(
                seq, start_week, end_week, n_samples=n_samples
            )
            forecasted_matrix[idx] = forecasted_seq

        if subject_id is None:
            df = self.quantizer.add_meta_to_matrix(forecasted_matrix)
        else:
            df = self.quantizer.add_meta_to_matrix(forecasted_matrix, add_subject_id=False)
            if len(subject_id) != df.index.size:
                raise('Subject_id list must match in length to number of rows in input data matrix in forecaster')
            df['subject_id']=subject_id
        # convert to plottable format
        plot_df = self.quantizer.melt_into_plot_format(df)
        return plot_df

Classes

class Forecaster (qnet_orchestrator)

Forecast the data week by week by sequantially generating qnet predictions for the next timestamp and using the filled timestamp to update qnet predictions

Initialization

Args

qnet_orchestrator : qbiome.QnetOrchestrator: an instance with a trained qnet model

Expand source code

class Forecaster:
    """Forecast the data week by week by sequantially generating qnet predictions for the next timestamp and using the filled timestamp to update qnet predictions
    """

    def __init__(self, qnet_orchestrator):
        """Initialization

        Args:
            qnet_orchestrator (qbiome.QnetOrchestrator): an instance with a trained qnet model
        """
        self.qnet_orchestrator = qnet_orchestrator
        self.quantizer = qnet_orchestrator.quantizer

    def forecast_data(self,
                      data,
                      start_week,
                      end_week=None,
                      n_samples=100,
                      subject_id=None):
        """Forecast the data matrix from `start_week` to `end_week`

        Output format:

        |   subject_id | variable         |   week |    value |
        |-------------:|:-----------------|-------:|---------:|
        |            1 | Actinobacteriota |     27 | 0.36665  |
        |            1 | Bacteroidota     |     27 | 0.507248 |
        |            1 | Campilobacterota |     27 | 0.002032 |

        Args:
            data (numpy.ndarray): 2D array of label strings, produced by `self.get_qnet_inputs`
            start_week (int): start predicting from this week
            end_week (int, optional): end predicting after this week
            n_samples (int, optional): the number of times to sample from qnet predictions for one masked entry. Defaults to 100.
            subject_id (list[str], optional): subject id for each row of data. If None, we assume data has as many rows as quantized originally. This must be set for new patient data.

        Returns:
            pandas.DataFrame: see format above
        """
        if end_week is None:
            end_week = self.qnet_orchestrator.get_max_timestamp()
        forecasted_matrix = np.empty(data.shape)
        for idx, seq in enumerate(data):
            forecasted_seq = self.qnet_orchestrator.predict_sequentially_by_week(
                seq, start_week, end_week, n_samples=n_samples
            )
            forecasted_matrix[idx] = forecasted_seq

        if subject_id is None:
            df = self.quantizer.add_meta_to_matrix(forecasted_matrix)
        else:
            df = self.quantizer.add_meta_to_matrix(forecasted_matrix, add_subject_id=False)
            if len(subject_id) != df.index.size:
                raise('Subject_id list must match in length to number of rows in input data matrix in forecaster')
            df['subject_id']=subject_id
        # convert to plottable format
        plot_df = self.quantizer.melt_into_plot_format(df)
        return plot_df

Methods

def forecast_data(self, data, start_week, end_week=None, n_samples=100, subject_id=None)

Forecast the data matrix from start_week to end_week

Output format:

subject_id	variable	week	value
1	Actinobacteriota	27	0.36665
1	Bacteroidota	27	0.507248
1	Campilobacterota	27	0.002032

Args

data : numpy.ndarray: 2D array of label strings, produced by self.get_qnet_inputs
start_week : int: start predicting from this week
end_week : int, optional: end predicting after this week
n_samples : int, optional: the number of times to sample from qnet predictions for one masked entry. Defaults to 100.
subject_id : list[str], optional: subject id for each row of data. If None, we assume data has as many rows as quantized originally. This must be set for new patient data.

Returns

pandas.DataFrame: see format above

Expand source code

def forecast_data(self,
                  data,
                  start_week,
                  end_week=None,
                  n_samples=100,
                  subject_id=None):
    """Forecast the data matrix from `start_week` to `end_week`

    Output format:

    |   subject_id | variable         |   week |    value |
    |-------------:|:-----------------|-------:|---------:|
    |            1 | Actinobacteriota |     27 | 0.36665  |
    |            1 | Bacteroidota     |     27 | 0.507248 |
    |            1 | Campilobacterota |     27 | 0.002032 |

    Args:
        data (numpy.ndarray): 2D array of label strings, produced by `self.get_qnet_inputs`
        start_week (int): start predicting from this week
        end_week (int, optional): end predicting after this week
        n_samples (int, optional): the number of times to sample from qnet predictions for one masked entry. Defaults to 100.
        subject_id (list[str], optional): subject id for each row of data. If None, we assume data has as many rows as quantized originally. This must be set for new patient data.

    Returns:
        pandas.DataFrame: see format above
    """
    if end_week is None:
        end_week = self.qnet_orchestrator.get_max_timestamp()
    forecasted_matrix = np.empty(data.shape)
    for idx, seq in enumerate(data):
        forecasted_seq = self.qnet_orchestrator.predict_sequentially_by_week(
            seq, start_week, end_week, n_samples=n_samples
        )
        forecasted_matrix[idx] = forecasted_seq

    if subject_id is None:
        df = self.quantizer.add_meta_to_matrix(forecasted_matrix)
    else:
        df = self.quantizer.add_meta_to_matrix(forecasted_matrix, add_subject_id=False)
        if len(subject_id) != df.index.size:
            raise('Subject_id list must match in length to number of rows in input data matrix in forecaster')
        df['subject_id']=subject_id
    # convert to plottable format
    plot_df = self.quantizer.melt_into_plot_format(df)
    return plot_df