Variability Statistics¶

This tutorial showcases VASCA’s main statistics computation (get_var_stat) based on synthetic data. The simple simulation framework allows for the variation of data points, flux uncertainties, number of flare events as well as intrinsic variability parameters like amplitude and period.

from dataclasses import dataclass
from pprint import pformat

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
from scipy.stats import chi2

def get_var_stat(vals: list[float], vals_err: list[float]) -> dict[str, float]:
    """
    Calculate variability parameters for a set of measurements

    Parameters
    ----------
    vals: array-like
        Variable values (e.g., flux measurements)
    vals_err: array-like
        Variable errors (uncertainties in measurements)

    Returns
    -------
    dict
        Dictionary containing variability statistics:
        - wght_mean: Weighted mean of the measurements
        - wght_mean_err: Error in the weighted mean
        - var: Sample variance
        - nxv: Normalized excess variance
        - rchiq: Reduced chi-square statistic
        - cpval: Chi-square p-value
    """
    vals = np.array(vals)
    vals_err = np.array(vals_err)

    rr = {}
    wght = 1.0 / vals_err**2
    rr["wght_mean"] = np.average(vals, weights=wght)
    rr["wght_mean_err"] = np.sqrt(1.0 / np.sum(wght))

    chiq_el = np.power(vals - rr["wght_mean"], 2) / np.power(vals_err, 2)
    chiq = np.sum(chiq_el)
    nr_vals = len(vals)

    if nr_vals > 1:
        rr["var"] = np.var(vals, ddof=1)
        rr["nxv"] = (rr["var"] - np.mean(vals_err**2)) / (
            rr["wght_mean"] * rr["wght_mean"]
        )
        rr["rchiq"] = chiq / (nr_vals - 1)
        rr["cpval"] = chi2.sf(chiq, nr_vals - 1)
    else:
        rr["var"] = rr["nxv"] = -100
        rr["rchiq"] = rr["cpval"] = -1.0

    return rr

Synthetic Light Curves¶

@dataclass
class SimulationParams:
    """Parameters for stellar variability simulation"""

    n_points: int = 50
    base_flux: float = 1000.0
    measurement_error_fraction: float = 0.05
    # Periodic variation parameters
    period: float | None = None
    amplitude: float | None = None
    # Flare parameters
    n_flares: float | None = None
    flare_amplitude: float | None = None
    flare_decay: float | None = 0.5
    # Trend parameters
    slope: float | None = None

def simulate_stellar_light_curve(params: SimulationParams) -> tuple:
    """
    General function to simulate various types of stellar light curves

    Parameters
    ----------
    params : SimulationParams
        Configuration dataclass containing all simulation parameters

    Returns
    -------
    tuple
        (times, fluxes, flux_errors) arrays
    """
    # Generate time array
    times = np.linspace(0, 100, params.n_points)

    # Initialize base flux and measurement errors
    flux_errors = (
        params.base_flux * params.measurement_error_fraction * np.ones(params.n_points)
    )
    fluxes = params.base_flux * np.ones(params.n_points)

    # Add periodic variations if specified
    if params.period is not None and params.amplitude is not None:
        phase = 2 * np.pi * times / params.period
        fluxes *= 1 + params.amplitude * np.sin(phase)

    # Add flares if specified
    if params.n_flares is not None and params.flare_amplitude is not None:
        flare_indices = np.random.choice(
            params.n_points, params.n_flares, replace=False
        )
        for idx in flare_indices:
            flare_profile = params.flare_amplitude * np.exp(
                -params.flare_decay * np.abs(times - times[idx])
            )
            fluxes += params.base_flux * flare_profile

    # Add linear trend if specified
    if params.slope is not None:
        fluxes += params.slope * times

    # Add measurement noise
    fluxes += np.random.normal(0, flux_errors)

    return times, fluxes, flux_errors

# Predefined scenario configurations
SCENARIOS = {
    "Quiet Star": SimulationParams(measurement_error_fraction=0.05),
    "Pulsating Star": SimulationParams(
        period=15, amplitude=0.20, measurement_error_fraction=0.03
    ),
    "Flare Star": SimulationParams(
        n_flares=3,
        flare_amplitude=4.0,
        measurement_error_fraction=0.04,
        flare_decay=0.5,
    ),
    "Low-Amplitude Variable": SimulationParams(
        period=25, amplitude=0.03, measurement_error_fraction=0.01
    ),
    "Noisy Variable": SimulationParams(
        period=20, amplitude=0.15, measurement_error_fraction=0.10
    ),
    "Secular Trend": SimulationParams(slope=-1.0, measurement_error_fraction=0.03),
}

def create_custom_scenario(scenario_type: str, **kwargs) -> SimulationParams:
    """
    Create a custom scenario by modifying default parameters

    Parameters
    ----------
    scenario_type : str
        Name of the base scenario to modify
    **kwargs : dict
        Parameters to override from the base scenario

    Returns
    -------
    SimulationParams
        Modified simulation parameters
    """
    if scenario_type not in SCENARIOS:
        msg: str = f"Unknown scenario type: {scenario_type}"
        raise ValueError(msg)

    # Create new params based on the scenario
    base_params = SCENARIOS[scenario_type]

    # Create a new instance with updated parameters
    param_dict = {k: v for k, v in base_params.__dict__.items()}
    param_dict.update(kwargs)

    return SimulationParams(**param_dict)

Visualization¶

@dataclass
class AnalysisResult:
    """Container for analysis results of a scenario"""

    scenario_name: str
    times: np.ndarray
    fluxes: np.ndarray
    errors: np.ndarray
    stats: dict
    interpretation: str


def analyze_scenario(scenario_name: str, params: SimulationParams) -> AnalysisResult:
    """
    Analyze a single scenario and provide interpretation
    """
    # Generate data
    times, fluxes, errors = simulate_stellar_light_curve(params)

    # Calculate statistics
    stats = get_var_stat(fluxes, errors)

    # Generate interpretation based on statistics
    interpretation = interpret_statistics(scenario_name, stats)

    return AnalysisResult(
        scenario_name=scenario_name,
        times=times,
        fluxes=fluxes,
        errors=errors,
        stats=stats,
        interpretation=interpretation,
    )


def interpret_statistics(scenario_name: str, stats: dict) -> str:
    """
    Generate human-readable interpretation of the statistics
    """
    interpretations = {
        "Quiet Star": f"Non-variable star showing noisy and constant flux:\n"
        f"• Excess variance ({stats['nxv']:.5f}) near zero as expected\n"
        f"• Reduced χ² ({stats['rchiq']:.2f}) ≈ 1 indicates good fit to constant flux\n"
        f"• High p-value ({stats['cpval']:.2e}) suggests no significant variability",
        "Pulsating Star": f"Clear periodic variability detected:\n"
        f"• Strong excess variance ({stats['nxv']:.3f}) indicates intrinsic variation\n"
        f"• High reduced χ² ({stats['rchiq']:.2f}) confirms deviation from constant flux\n"
        f"• Very low p-value ({stats['cpval']:.2e}) shows high significance",
        "Flare Star": f"Episodic flare behavior detected:\n"
        f"• Extreme excess variance ({stats['nxv']:.3f}) due to flare events\n"
        f"• Very high reduced χ² ({stats['rchiq']:.2f}) from strong flux excursions\n"
        f"• P-value ({stats['cpval']:.2e}) indicates definite variability (numerical zero)",
        "Low-Amplitude Variable": f"Subtle variability detected due to precise measurements:\n"
        f"• Excess variance ({stats['nxv']:.5f}) near zero due to low amplitude\n"
        f"• Reduced χ² ({stats['rchiq']:.2f}) indicates deviation from constant flux\n"
        f"• Low p-value ({stats['cpval']:.2e}) confirms significance",
        "Noisy Variable": f"Variability partially masked by measurement errors:\n"
        f"• Small excess variance ({stats['nxv']:.3f}) due to large noise\n"
        f"• Reduced χ² ({stats['rchiq']:.2f}) allows no clear interpretation\n"
        f"• Marginal p-value ({stats['cpval']:.2e}) suggests weak detection",
        "Secular Trend": f"Systematic brightness change detected:\n"
        f"• Small excess variance ({stats['nxv']:.3f}) from trend\n"
        f"• Reduced χ² ({stats['rchiq']:.2f}) allows no clear interpretation\n"
        f"• Marginal p-value ({stats['cpval']:.2e}) suggests weak detection",
    }

    return interpretations.get(
        scenario_name,
        "No interpretation available yet\nVariabilits Statistics:\n"
        f"{pformat(stats, sort_dicts=False)}",
    )


def visualize_results(
    results: list[AnalysisResult], save_path: str | None = None
) -> pd.DataFrame:
    """
    Create comprehensive visualization of all scenarios
    """
    n_scenarios = len(results)
    fig = plt.figure(figsize=(15, 4 * n_scenarios))
    gs = GridSpec(n_scenarios, 2, width_ratios=[2, 1])

    # Create summary dataframe for statistics
    summary_data = []

    for i, result in enumerate(results):
        # Light curve plot
        ax1 = fig.add_subplot(gs[i, 0])
        ax1.errorbar(
            result.times,
            result.fluxes,
            yerr=result.errors,
            fmt="o",
            color="blue",
            alpha=0.6,
            label="Observations",
        )
        ax1.axhline(
            y=result.stats["wght_mean"],
            color="r",
            linestyle="--",
            label=f'Mean: {result.stats["wght_mean"]:.1f} ± {result.stats["wght_mean_err"]:.1f}',
        )
        ax1.fill_between(
            result.times,
            result.stats["wght_mean"] - result.stats["wght_mean_err"],
            result.stats["wght_mean"] + result.stats["wght_mean_err"],
            color="r",
            alpha=0.2,
        )
        ax1.set_xlabel("Time")
        ax1.set_ylabel("Flux")
        ax1.set_title(f"{result.scenario_name} Light Curve")
        ax1.grid(visible=True, linewidth=0.5, color="k", alpha=0.3, zorder=0)
        ax1.tick_params(axis="y", direction="in", left=True, right=True, which="both")
        ax1.tick_params(axis="x", direction="in", top=True, bottom=True, which="both")
        ax1.legend(loc="upper right")

        # Statistics and interpretation text
        ax2 = fig.add_subplot(gs[i, 1])
        ax2.axis("off")
        ax2.text(
            0,
            0.95,
            result.interpretation,
            verticalalignment="top",
            fontfamily="monospace",
            wrap=True,
        )

        # Collect summary statistics
        summary_data.append(
            {
                "Scenario": result.scenario_name,
                "Weighted Mean": f"{result.stats['wght_mean']:.1f}±{result.stats['wght_mean_err']:.1f}",
                "Excess Variance": f"{result.stats['nxv']:.3f}",
                "Reduced χ²": f"{result.stats['rchiq']:.2f}",
                "P-value": f"{result.stats['cpval']:.2e}",
            }
        )

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, bbox_inches="tight", dpi=300)

    return pd.DataFrame(summary_data)

Run Example¶

# Analyze all predefined scenarios
np.random.seed(42)  # For reproducibility

results = []
for scenario_name, params in SCENARIOS.items():
    results.append(analyze_scenario(scenario_name, params))

# Visualize and get summary
summary_df = visualize_results(results)

# Display summary table
print("\nVariability Analysis Summary:")
print(summary_df.to_string(index=False))

Variability Analysis Summary:
              Scenario Weighted Mean Excess Variance Reduced χ²   P-value
            Quiet Star     988.7±7.1          -0.000       0.87  7.24e-01
        Pulsating Star    1005.4±4.2           0.021      24.09 5.54e-215
            Flare Star    1509.7±5.7           0.577     822.93  0.00e+00
Low-Amplitude Variable     998.2±1.4           0.000       5.31  1.56e-30
        Noisy Variable   1014.4±14.1           0.015       2.54  1.60e-08
         Secular Trend     949.1±4.2           0.001       2.43  8.91e-08

../_images/d48a8b118b2b3cb1ea9776d594a5cf1be9c391c3a6184121f99dd488805dd0f4.png

Custom Light Curves¶

# Example of custom scenario analysis
name = "Flare Star"
custom_params = create_custom_scenario(
    name,
    n_points=1000,
    n_flares=1,
    flare_amplitude=0.75,
    measurement_error_fraction=0.15,
    flare_decay=1,
)
custom_result = analyze_scenario(f"Custom {name}", custom_params)
custom_summary = visualize_results([custom_result])

../_images/714956124c250534d9968139b8770be080dd73612e4fa98ade2c90a5ba861a7d.png

# Example of custom scenario analysis
name = "Pulsating Star"
custom_params = create_custom_scenario(
    name, n_points=500, period=10, amplitude=0.05, measurement_error_fraction=0.03
)
custom_result = analyze_scenario(f"Custom {name}", custom_params)
custom_summary = visualize_results([custom_result])

../_images/eecfe844bc6f329cf7ef2f55a2658759c5902d5796f14636edcc29f6ce6905ac.png

# Example of custom scenario analysis
name = "Secular Trend"
custom_params = create_custom_scenario(
    name, n_points=50, slope=-1.5, measurement_error_fraction=0.04
)
custom_result = analyze_scenario(f"Custom {name}", custom_params)
custom_summary = visualize_results([custom_result])

../_images/535a232164b94359fd7782fd6782e9039aae5db5fb777b7b5039d5d26457c7bd.png