# src/OSIBL_correction
import os
from .corrections.drift_correction import apply_drift_model, build_drift_model, plot_drift_diagnostics, q_drift
from .utils.corrections.linearity import *
from .utils.corrections.methanol import *
from .utils.corrections.vsmow import *
from .utils.outliers.outliers import *
from .utils.queries import *
from .utils.regression import *
from .utils.uncertainty_and_output import *
from .utils.figures import *
from .utils.base_functions import *
from .utils.config import CorrectionConfig
from .utils.corrections.pame import *
[docs]
def iso_process(user_linearity_conditions=False, min_area_threshold=None, include_parabolic=False, force_linearity_model=None):
"""
Main processing pipeline for GC-IRMS isotope data.
This function executes the complete correction workflow for
compound-specific isotope measurements, including:
• Instrument drift correction
• Linearity (area-dependent) correction
• Reference standard normalization (e.g., VSMOW or VPDB)
• Optional methanol/PAME correction
• Outlier removal
• Replicate averaging with propagated uncertainty
• Figure generation and export
• Final results export to disk
The function is file-based and does not return data objects.
All intermediate and final results are written to structured
output folders along with diagnostic figures and a processing log.
Parameters
----------
user_linearity_conditions : dict or bool, default=False
Optional user-defined configuration for linearity correction.
If provided, overrides automatically determined regression
conditions (e.g., selected standards or model form).
min_area_threshold : float or None, default=None
Minimum peak area required for inclusion in processing.
Peaks below this threshold are removed prior to correction.
If None, no area-based filtering is applied.
include_parabolic : bool, default=False
If True, includes a second-order (parabolic) term in the
linearity correction model. If False, a linear model is used.
force_linearity_model : str, default = None,
Used to force the type of model used for linearity correction.
Can be "linear", "decay", "growth", or "parabolic".
Outputs
-------
This function writes the following to disk:
• Corrected (drift, linearity, reference standard, methylation) sample results
• Mean of corrected sample results
• Final processed dataset with propagated uncertainties
• Diagnostic plots (drift, linearity, standards, outliers)
• Processing log file documenting all correction steps
• Correction Figures
No objects are returned.
Notes
-----
Processing sequence:
1. Isotope system selection
2. Output directory creation
3. Standards loading
4. Data import and classification
5. Area-based filtering (optional)
6. Drift correction (time-based regression)
7. Linearity correction (area-based regression)
8. Reference standard normalization
9. Optional methanol/PAME correction
10. Optional hydrogen or carbon isotope calculation of methanol from PAME
11. Outlier removal
12. Replicate averaging with uncertainty propagation
13. Final export and figure generation
Uncertainty Propagation
-----------------------
Analytical uncertainty is propagated analytically through:
• Drift regression parameter covariance
• Linearity regression parameter covariance
• Reference standard scaling uncertainty
• Methanol correction uncertainty (if applicable)
• Replicate variability
All variance components are combined using first-order
Taylor series propagation and reported in the final dataset.
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import os
from IPython.display import clear_output
from scipy.stats import linregress
from matplotlib.dates import date2num
from datetime import timedelta
import time
from scipy.stats import zscore
from sklearn.linear_model import HuberRegressor
import scipy.stats as stats
# Query isotope system
isotope = isotope_type()
cfg = CorrectionConfig(isotope)
# Setup output folder
folder_path, fig_path, results_path, loc, log_file_path = create_folder(isotope) #debug
# Set standards
standards_df = load_standards(isotope)#query_stds(alt_stds, isotope)
append_to_log(log_file_path, standards_df.to_string())
# Import data
lin_std, drift_std, samples, correction_log, pame = import_data(loc, folder_path, log_file_path, isotope, standards_df)
if min_area_threshold is not None:
lin_std = lin_std[lin_std["area"]>min_area_threshold]
drift_std = drift_std[drift_std["area"]>min_area_threshold]
samples = samples[samples["area"]>min_area_threshold]
uncorrected_samples = samples.copy()
# Run standard plots for area
std_plot(lin_std, drift_std, folder_path=folder_path, fig_path=fig_path,isotope=isotope, dD=isotope)
# Drift Correction
append_to_log(log_file_path, "Drift Correction")
choice = q_drift(log_file_path).strip().lower()
if choice in ("n", "no", "false", "f"):
cfg.drift_applied = False
for df in (samples, lin_std, drift_std):
df[f"drift_corrected_{isotope}"] = df.get(isotope, np.nan)
dD_temp = isotope
else:
drift_model = build_drift_model(
drift_std,
target_column=isotope,
index_column="time_rel",
weight_column="area",
group_column="chain",
log_file_path=log_file_path,
)
plot_drift_diagnostics(
drift_model,
fig_path=fig_path,
figure_name="Drift.png",
y_label=f"Centered {isotope} (‰)",
x_label="Time",
)
print(f"\nWeighted-LS model: {isotope} = {drift_model['slope']:.3f}·t_ctr + {drift_model['intercept']:.3f}")
print(f"Adj R² = {drift_model['r_squared']:.3f} | p = {drift_model['p_value']:.3g} | SE = {drift_model['std_error']:.3f}")
if pos_response(input("Apply the correction? (Y/N): ")):
cfg.drift_applied = True
drift_std = apply_drift_model(
drift_std,
drift_model,
target_column=isotope,
index_column="time_rel",
output_column=f"drift_corrected_{isotope}",
error_column="drift_error",
log_file_path=log_file_path,
label="drift_std",
)
lin_std = apply_drift_model(
lin_std,
drift_model,
target_column=isotope,
index_column="time_rel",
output_column=f"drift_corrected_{isotope}",
error_column="drift_error",
log_file_path=log_file_path,
label="lin_std",
)
samples = apply_drift_model(
samples,
drift_model,
target_column=isotope,
index_column="time_rel",
output_column=f"drift_corrected_{isotope}",
error_column="drift_error",
log_file_path=log_file_path,
label="samples",
)
correction_log.loc["Drift", "samples"] = 1
append_to_log(log_file_path, "- User accepted drift correction.")
append_to_log(
log_file_path,
f"- Weighted-LS model: {isotope} = {drift_model['slope']:.3f} t_ctr + {drift_model['intercept']:.3f}",
)
append_to_log(
log_file_path,
f"- Weighted-LS model stats: Adj R² = {drift_model['r_squared']:.3f} | p = {drift_model['p_value']:.3g} | SE = {drift_model['std_error']:.3f}",
)
dD_temp = f"drift_corrected_{isotope}"
else:
cfg.drift_applied = False
print("[INFO] Drift correction NOT applied - data reverted.")
for df in (samples, lin_std, drift_std):
df[f"drift_corrected_{isotope}"] = df.get(isotope, np.nan)
append_to_log(log_file_path, "- User rejected drift correction. No changes saved.")
dD_temp = isotope
# # Show plots again
# std_plot(lin_std, drift_std, folder_path=folder_path, fig_path=fig_path, dD=dD_temp,isotope=isotope)
# Linearity (area) correction
print(cfg)
print(dD_temp)
print(correction_log)
drift_std, correction_log, lin_std, samples = process_linearity_correction(
cfg, # configuration file
samples, # samples to process
drift_std, # drift standards
lin_std, # linearity standards
dD_temp, # isotope type
correction_log, # Corrections applied
folder_path, fig_path, isotope, user_linearity_conditions,
log_file_path=log_file_path, include_parabolic=include_parabolic,
force_linearity_model=force_linearity_model)
# Reference standard correction
samples, standards = reference_standard_correction(cfg, samples, lin_std, drift_std, correction_log, folder_path, fig_path, log_file_path, isotope, standards_df)
# PAME
if pame:
samples, pame_unknown, pame_derv_meth, pame_derv_meth_unc = calculate_methanol_dD(cfg, samples, isotope, log_file_path)
samples, standards = q_methylation(samples, standards, isotope, log_file_path,
meth_val = pame_derv_meth, meth_std = pame_derv_meth_unc); # methanol values from PAME
else: # methylation correction without PAME
samples, standards = q_methylation(samples, standards, isotope, log_file_path);
# Remove outliers
samples, excluded_samples = outlier_removal(samples, fig_path, log_file_path, isotope)
raw_samples = samples
# Calculate mean values of replicate analyses
samples = mean_values_with_uncertainty(samples, cfg, sample_name_header="Identifier 1", chain_header="chain", iso=isotope)
if pame:
pame_unknown = mean_values_with_uncertainty(pame_unknown, cfg = cfg, sample_name_header="Identifier 1", chain_header="chain", iso=isotope)
else:
pame_unknown = None
# Final Data Correction and Plot
output_results(raw_samples, samples, standards, pame_unknown, folder_path, fig_path, results_path, isotope, pame, log_file_path, cfg)