Source code for pysotope.processing

# src/OSIBL_correction
import os

from .corrections.drift_correction import apply_drift_model, build_drift_model, plot_drift_diagnostics, q_drift
from .utils.corrections.linearity import *
from .utils.corrections.methanol import *
from .utils.corrections.vsmow import *
from .utils.outliers.outliers import *
from .utils.queries import *
from .utils.regression import *
from .utils.uncertainty_and_output import *
from .utils.figures import *
from .utils.base_functions import *
from .utils.config import CorrectionConfig
from .utils.corrections.pame import *

[docs] def iso_process(user_linearity_conditions=False, min_area_threshold=None, include_parabolic=False, force_linearity_model=None): """ Main processing pipeline for GC-IRMS isotope data. This function executes the complete correction workflow for compound-specific isotope measurements, including: • Instrument drift correction • Linearity (area-dependent) correction • Reference standard normalization (e.g., VSMOW or VPDB) • Optional methanol/PAME correction • Outlier removal • Replicate averaging with propagated uncertainty • Figure generation and export • Final results export to disk The function is file-based and does not return data objects. All intermediate and final results are written to structured output folders along with diagnostic figures and a processing log. Parameters ---------- user_linearity_conditions : dict or bool, default=False Optional user-defined configuration for linearity correction. If provided, overrides automatically determined regression conditions (e.g., selected standards or model form). min_area_threshold : float or None, default=None Minimum peak area required for inclusion in processing. Peaks below this threshold are removed prior to correction. If None, no area-based filtering is applied. include_parabolic : bool, default=False If True, includes a second-order (parabolic) term in the linearity correction model. If False, a linear model is used. force_linearity_model : str, default = None, Used to force the type of model used for linearity correction. Can be "linear", "decay", "growth", or "parabolic". Outputs ------- This function writes the following to disk: • Corrected (drift, linearity, reference standard, methylation) sample results • Mean of corrected sample results • Final processed dataset with propagated uncertainties • Diagnostic plots (drift, linearity, standards, outliers) • Processing log file documenting all correction steps • Correction Figures No objects are returned. Notes ----- Processing sequence: 1. Isotope system selection 2. Output directory creation 3. Standards loading 4. Data import and classification 5. Area-based filtering (optional) 6. Drift correction (time-based regression) 7. Linearity correction (area-based regression) 8. Reference standard normalization 9. Optional methanol/PAME correction 10. Optional hydrogen or carbon isotope calculation of methanol from PAME 11. Outlier removal 12. Replicate averaging with uncertainty propagation 13. Final export and figure generation Uncertainty Propagation ----------------------- Analytical uncertainty is propagated analytically through: • Drift regression parameter covariance • Linearity regression parameter covariance • Reference standard scaling uncertainty • Methanol correction uncertainty (if applicable) • Replicate variability All variance components are combined using first-order Taylor series propagation and reported in the final dataset. """ import pandas as pd import matplotlib.pyplot as plt import numpy as np import math import os from IPython.display import clear_output from scipy.stats import linregress from matplotlib.dates import date2num from datetime import timedelta import time from scipy.stats import zscore from sklearn.linear_model import HuberRegressor import scipy.stats as stats # Query isotope system isotope = isotope_type() cfg = CorrectionConfig(isotope) # Setup output folder folder_path, fig_path, results_path, loc, log_file_path = create_folder(isotope) #debug # Set standards standards_df = load_standards(isotope)#query_stds(alt_stds, isotope) append_to_log(log_file_path, standards_df.to_string()) # Import data lin_std, drift_std, samples, correction_log, pame = import_data(loc, folder_path, log_file_path, isotope, standards_df) if min_area_threshold is not None: lin_std = lin_std[lin_std["area"]>min_area_threshold] drift_std = drift_std[drift_std["area"]>min_area_threshold] samples = samples[samples["area"]>min_area_threshold] uncorrected_samples = samples.copy() # Run standard plots for area std_plot(lin_std, drift_std, folder_path=folder_path, fig_path=fig_path,isotope=isotope, dD=isotope) # Drift Correction append_to_log(log_file_path, "Drift Correction") choice = q_drift(log_file_path).strip().lower() if choice in ("n", "no", "false", "f"): cfg.drift_applied = False for df in (samples, lin_std, drift_std): df[f"drift_corrected_{isotope}"] = df.get(isotope, np.nan) dD_temp = isotope else: drift_model = build_drift_model( drift_std, target_column=isotope, index_column="time_rel", weight_column="area", group_column="chain", log_file_path=log_file_path, ) plot_drift_diagnostics( drift_model, fig_path=fig_path, figure_name="Drift.png", y_label=f"Centered {isotope} (‰)", x_label="Time", ) print(f"\nWeighted-LS model: {isotope} = {drift_model['slope']:.3f}·t_ctr + {drift_model['intercept']:.3f}") print(f"Adj R² = {drift_model['r_squared']:.3f} | p = {drift_model['p_value']:.3g} | SE = {drift_model['std_error']:.3f}") if pos_response(input("Apply the correction? (Y/N): ")): cfg.drift_applied = True drift_std = apply_drift_model( drift_std, drift_model, target_column=isotope, index_column="time_rel", output_column=f"drift_corrected_{isotope}", error_column="drift_error", log_file_path=log_file_path, label="drift_std", ) lin_std = apply_drift_model( lin_std, drift_model, target_column=isotope, index_column="time_rel", output_column=f"drift_corrected_{isotope}", error_column="drift_error", log_file_path=log_file_path, label="lin_std", ) samples = apply_drift_model( samples, drift_model, target_column=isotope, index_column="time_rel", output_column=f"drift_corrected_{isotope}", error_column="drift_error", log_file_path=log_file_path, label="samples", ) correction_log.loc["Drift", "samples"] = 1 append_to_log(log_file_path, "- User accepted drift correction.") append_to_log( log_file_path, f"- Weighted-LS model: {isotope} = {drift_model['slope']:.3f} t_ctr + {drift_model['intercept']:.3f}", ) append_to_log( log_file_path, f"- Weighted-LS model stats: Adj R² = {drift_model['r_squared']:.3f} | p = {drift_model['p_value']:.3g} | SE = {drift_model['std_error']:.3f}", ) dD_temp = f"drift_corrected_{isotope}" else: cfg.drift_applied = False print("[INFO] Drift correction NOT applied - data reverted.") for df in (samples, lin_std, drift_std): df[f"drift_corrected_{isotope}"] = df.get(isotope, np.nan) append_to_log(log_file_path, "- User rejected drift correction. No changes saved.") dD_temp = isotope # # Show plots again # std_plot(lin_std, drift_std, folder_path=folder_path, fig_path=fig_path, dD=dD_temp,isotope=isotope) # Linearity (area) correction print(cfg) print(dD_temp) print(correction_log) drift_std, correction_log, lin_std, samples = process_linearity_correction( cfg, # configuration file samples, # samples to process drift_std, # drift standards lin_std, # linearity standards dD_temp, # isotope type correction_log, # Corrections applied folder_path, fig_path, isotope, user_linearity_conditions, log_file_path=log_file_path, include_parabolic=include_parabolic, force_linearity_model=force_linearity_model) # Reference standard correction samples, standards = reference_standard_correction(cfg, samples, lin_std, drift_std, correction_log, folder_path, fig_path, log_file_path, isotope, standards_df) # PAME if pame: samples, pame_unknown, pame_derv_meth, pame_derv_meth_unc = calculate_methanol_dD(cfg, samples, isotope, log_file_path) samples, standards = q_methylation(samples, standards, isotope, log_file_path, meth_val = pame_derv_meth, meth_std = pame_derv_meth_unc); # methanol values from PAME else: # methylation correction without PAME samples, standards = q_methylation(samples, standards, isotope, log_file_path); # Remove outliers samples, excluded_samples = outlier_removal(samples, fig_path, log_file_path, isotope) raw_samples = samples # Calculate mean values of replicate analyses samples = mean_values_with_uncertainty(samples, cfg, sample_name_header="Identifier 1", chain_header="chain", iso=isotope) if pame: pame_unknown = mean_values_with_uncertainty(pame_unknown, cfg = cfg, sample_name_header="Identifier 1", chain_header="chain", iso=isotope) else: pame_unknown = None # Final Data Correction and Plot output_results(raw_samples, samples, standards, pame_unknown, folder_path, fig_path, results_path, isotope, pame, log_file_path, cfg)