| Title: | CONDitional UI for Time Series normalisation |
|---|---|
| Description: | Provide a user interface for conditionally normalising a timeseries. |
| Authors: | Puwasala Gamakumara [aut, cre], Priyanga Dilini Talagala [aut], Rob J. Hyndman [aut] |
| Maintainer: | Puwasala Gamakumara <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 1.0.0 |
| Built: | 2026-05-17 07:47:56 UTC |
| Source: | https://github.com/PuwasalaG/conduits |
This function produces estimated conditional autocorrelation between $x_t$ and $y_t$ at lag $k$, i.e. $r_k = E(x_ty_t+k|z_t)$.
## S3 method for class 'conditional_acf' augment(x, ...)## S3 method for class 'conditional_acf' augment(x, ...)
x |
Model object of class "conditional_acf" returned from
|
... |
Additional arguments, not currently used. |
A tibble with information
about data points.
old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean ) fit_c_acf <- old_ts |> tidyr::drop_na() |> conditional_acf( turbidity_upstream ~ splines::ns(level_upstream, df = 5) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean = fit_mean, fit_var = fit_var, df_correlation = c(5, 5) ) data_inf <- fit_c_acf |> augment()old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean ) fit_c_acf <- old_ts |> tidyr::drop_na() |> conditional_acf( turbidity_upstream ~ splines::ns(level_upstream, df = 5) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean = fit_mean, fit_var = fit_var, df_correlation = c(5, 5) ) data_inf <- fit_c_acf |> augment()
This function produces estimated conditional cross-correlation between $x_t$ and $y_t$ at lag $k$, i.e. $r_k = E(x_ty_t+k|z_t)$.
## S3 method for class 'conditional_ccf' augment(x, ...)## S3 method for class 'conditional_ccf' augment(x, ...)
x |
Model object of class "conditional_ccf" returned from
|
... |
Additional arguments, not currently used. |
A tibble with information
about data points.
old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns( level_upstream, df = 5 ) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(5, 5) ) data_inf <- fit_c_ccf |> augment()old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns( level_upstream, df = 5 ) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(5, 5) ) data_inf <- fit_c_ccf |> augment()
This function produces partial residuals for each predictor, and the estimated conditional means, standard error and confidence limits.
## S3 method for class 'conditional_moment' augment(x, level = 0.95, ...)## S3 method for class 'conditional_moment' augment(x, level = 0.95, ...)
x |
Model object of class "conditional_moment" returned from
|
level |
Confidence level. Default is set to 0.95. |
... |
Additional arguments, not currently used |
A tibble with information
about data points.
data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) data_inf <- fit_mean |> augment()data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) data_inf <- fit_mean |> augment()
This function computes the bootstrapped confidence intervals for dt. It resample the residuals from the various models used in the conditional cross-correlation calculation to generate new data. As the residuals are serially correlated, a sieve bootstrap approach to capture the autocorrelation structure in the data.
calc_dt_CI(x, m, new_data = NULL)calc_dt_CI(x, m, new_data = NULL)
x |
Model object of class "conditional_ccf" returned from
|
m |
number of replications for boostrap confidence intervals |
new_data |
the dataset with the some predictors that are set to the median value (if required). Default is set to NULL. |
A tibble with estimated time lag "dt"
Priyanga Dilini Talagala & Puwasala Gamakumara
## Not run: old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5) ) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5) ) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns(level_upstream, df = 3) + splines::ns(temperature_upstream, df = 3), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(3, 3) ) df_dt <- fit_c_ccf |> calc_dt_CI(100) # Calculate dt vs an upstream covariate while holding the # remaining upstream covariates at their medians new_data <- fit_c_ccf$data new_data <- new_data |> dplyr::mutate(temperature_upstream = median(temperature_upstream)) df_dt2 <- fit_c_ccf |> calc_dt_CI(100, new_data) ## End(Not run)## Not run: old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5) ) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5) ) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns(level_upstream, df = 3) + splines::ns(temperature_upstream, df = 3), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(3, 3) ) df_dt <- fit_c_ccf |> calc_dt_CI(100) # Calculate dt vs an upstream covariate while holding the # remaining upstream covariates at their medians new_data <- fit_c_ccf$data new_data <- new_data |> dplyr::mutate(temperature_upstream = median(temperature_upstream)) df_dt2 <- fit_c_ccf |> calc_dt_CI(100, new_data) ## End(Not run)
This function computes autocorrelation between $x_t$ and $y_t+k$ at $k = 1,2,...$ conditional on a set of time series $z_t$
conditional_acf(data, formula, lag_max, fit_mean, fit_var, df_correlation)conditional_acf(data, formula, lag_max, fit_mean, fit_var, df_correlation)
data |
a tibble containing all the time series including $ystar*ystar_t-k$ which are uniquely identified by the corresponding Timestamp. |
formula |
A GAM formula. See |
lag_max |
Maximum lag at which to calculate the conditional acf |
fit_mean |
Model object of class "conditional_moment" returned from
|
fit_var |
Model object of class "conditional_moment" returned from
|
df_correlation |
a vector specifying the degrees of freedom to be considered for each numerical predictor when fitting additive models for conditional auto-correlations. Each component of the vector should corresponds to each predictor specified in "z_numeric". |
Suppose $x_t$ and $y_t$ are conditionally normalised with respect
to $z_t$ using conditional_mean and conditional_var. Then
we can estimate the conditional cross-correlation between $x_t$ and $y_t$ at lag $k$, i.e. $r_k = E(x_ty_t+k|z_t)$
via generalised additive models (GAM). conditional_ccf uses natural splines implemented
in splines package to estimate the conditional cross-correlations between two
time series given a set of time series predictors. Users first need to
normalise $x_t$ and $y_t$ at lag $k$ using conditional_mean and conditional_var
The function returns a list of objects of class
"glm" as described in glm.
old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean ) fit_c_acf <- old_ts |> tidyr::drop_na() |> conditional_acf( turbidity_upstream ~ splines::ns(level_upstream, df = 5) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean = fit_mean, fit_var = fit_var, df_correlation = c(5, 5) )old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean ) fit_c_acf <- old_ts |> tidyr::drop_na() |> conditional_acf( turbidity_upstream ~ splines::ns(level_upstream, df = 5) + splines::ns(conductance_upstream, df = 5), lag_max = 10, fit_mean = fit_mean, fit_var = fit_var, df_correlation = c(5, 5) )
This function computes cross correlation between $x_t$ and $y_t+k$ at $k = 1,2,...$ conditional on a set of time series $z_t$
conditional_ccf( data, formula, lag_max = 10, fit_mean_x, fit_var_x, fit_mean_y, fit_var_y, df_correlation )conditional_ccf( data, formula, lag_max = 10, fit_mean_x, fit_var_x, fit_mean_y, fit_var_y, df_correlation )
data |
a tibble containing all the time series including ystar*xstar which are uniquely identified by the corresponding Timestamp. |
formula |
A GAM formula. The response variable should be in the format of
I(x*y) ~ . See |
lag_max |
Maximum lag at which to calculate the conditional ccf |
fit_mean_x |
Model object of class "conditional_moment" returned from
|
fit_var_x |
Model object of class "conditional_moment" returned from
|
fit_mean_y |
Model object of class "conditional_moment" returned from
|
fit_var_y |
Model object of class "conditional_moment" returned from
|
df_correlation |
a vector specifying the degrees of freedom to be considered for each numerical predictor when fitting additive models for conditional cross-correlations. Each component of the vector should corresponds to the degrees of freedom each predictor. |
Suppose $x_t$ and $y_t$ are conditionally normalised with respect
to $z_t$ using conditional_mean and conditional_var. Then
we can estimate the conditional cross-correlation between $x_t$ and $y_t$ at lag $k$, i.e. $r_k = E(x_ty_t+k|z_t)$
via generalised additive models (GAM). conditional_ccf uses natural splines implemented
in splines package to estimate the conditional cross-correlations between two
time series given a set of time series predictors. Users first need to
normalise $x_t$ and $y_t$ at lag $k$ using conditional_mean and conditional_var
The function returns a list of objects of class
"glm" as described in glm. the length og the list is equal to lag_max
old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns( level_upstream, df = 5 ) + splines::ns(temperature_upstream, df = 5), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(5, 5) )old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, conductance, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 8) + s(conductance_upstream, k = 8) + s(temperature_upstream, k = 8)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 7) + s(conductance_upstream, k = 7) + s(temperature_upstream, k = 7), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns( level_upstream, df = 5 ) + splines::ns(temperature_upstream, df = 5), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(5, 5) )
This function estimates the means of a time series conditional on a set of other times series via additive models.
conditional_mean(data, formula)conditional_mean(data, formula)
data |
a tibble containing all the time series which are uniquely identified by the corresponding Timestamp. |
formula |
A GAM formula. See |
Suppose $x_t$ is a time series where its
mean is a function of $z_t$. i.e. $E(x_t|z_t) = m_x(z_t)$.
Then $m_x(z_t)$ can be estimated via generalised
additive models (GAM). This function uses
GAMs implemented in mgcv package to estimate
the conditional means of a time series given a set of
time series predictors.
The function returns an object of class
"gam" as described in gamObject.
data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8))data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8))
This function estimates the variance of a time series conditional on a set of other times series via additive models.
conditional_var(data, formula, family = c("Gamma", "lognormal"), fit_mean)conditional_var(data, formula, family = c("Gamma", "lognormal"), fit_mean)
data |
A tibble containing all the time series which are uniquely identified by the corresponding Timestamp. |
formula |
An object of class "formula": a symbolic description of the model to be fitted. The details of model specification are given under ‘Details’. |
family |
the family to be used in conditional variance model. Currently this can take either "Gamma" or "lognormal". |
fit_mean |
A GAM object return from |
Suppose $x_t$ is a time series where its
variance is a function of $z_t$. i.e. $Var(x_t|z_t) = v_x(z_t)$.
Then $v_x(z_t)$can be estimated via generalised
additive models (GAM). This function uses GAMs implemented
in mgcv package to estimate the conditional variance
of a time series given a set of time series predictors.
The function returns an object of class
"gam" as described in gamObject.
data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) ## Not run: fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) ## End(Not run)data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) ## Not run: fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) ## End(Not run)
Methods and tools for conditional normalisation of time series using additive models. This includes functions to estimate conditional means, conditional variances, conditional autocorrelation functions and conditional cross-correlation functions. Examples show these functions being used to estimate river flow time between two sensor locations in a river system.
Puwasala Gamakumara, Priyanga Dilini Talagala, Rob J Hyndman
This function estimates the time that takes water to flow from an upstream location to a downstream location conditional on the observed water-quality variables from the upstream sensor. That time lag is defined as the lag that gives maximum cross-correlation conditional on upstream water-quality variables.
estimate_dt(x)estimate_dt(x)
x |
Model object of class "conditional_ccf" returned from
|
A tibble with estimated time lag "dt"
and corresponding maximum cross-correlation
Puwasala Gamakumara & Priyanga Dilini Talagala
old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns(level_upstream, df = 3) + splines::ns(temperature_upstream, df = 3), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(3, 3) ) new_data <- fit_c_ccf |> estimate_dt()old_ts <- NEON_PRIN_5min_cleaned |> dplyr::select( Timestamp, site, turbidity, level, temperature ) |> tidyr::pivot_wider( names_from = site, values_from = turbidity:temperature ) fit_mean_y <- old_ts |> conditional_mean(turbidity_downstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5)) fit_var_y <- old_ts |> conditional_var( turbidity_downstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_y ) fit_mean_x <- old_ts |> conditional_mean(turbidity_upstream ~ s(level_upstream, k = 5) + s(temperature_upstream, k = 5)) fit_var_x <- old_ts |> conditional_var( turbidity_upstream ~ s(level_upstream, k = 4) + s(temperature_upstream, k = 4), family = "Gamma", fit_mean = fit_mean_x ) fit_c_ccf <- old_ts |> tidyr::drop_na() |> conditional_ccf( I(turbidity_upstream * turbidity_downstream) ~ splines::ns(level_upstream, df = 3) + splines::ns(temperature_upstream, df = 3), lag_max = 10, fit_mean_x = fit_mean_x, fit_var_x = fit_var_x, fit_mean_y = fit_mean_y, fit_var_y = fit_var_y, df_correlation = c(3, 3) ) new_data <- fit_c_ccf |> estimate_dt()
NEON_PRIN_5min_cleaned consists anomaly removed data for water quality variables from upstream and downstream sensors in Pringle Creek in Texas for the period spanning from 2019-07-01 to 2019-12-31 aggregated at 5-minute intervals.
NEON_PRIN_5min_cleanedNEON_PRIN_5min_cleaned
A data frame with water-quality variables, level and temperature data:
TimestampTimestamp
sitesite position
conductancespecific conductance
dissolvedOxygendissolved oxygen
pHpH
chlorophyllchlorophyll
turbidityturbidity
fDOMfDOM
levelelevation of surface water
temperaturetemperature in surface water
This function produces a normalized series using conditional moments.
normalize(data, y, fit_mean, fit_var)normalize(data, y, fit_mean, fit_var)
data |
a tsibble containing all the time series which are uniquely identified by the corresponding Timestamp. |
y |
The variable name |
fit_mean |
Model object of class "conditional_moment" returned from
|
fit_var |
Model object of class "conditional_moment" returned from
|
A vector of conditional normliased series
data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) |> tsibble::as_tsibble(index = Timestamp) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) new_ts <- data |> dplyr::mutate(ystar = conduits::normalize(data, turbidity, fit_mean, fit_var))data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) |> tsibble::as_tsibble(index = Timestamp) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) new_ts <- data |> dplyr::mutate(ystar = conduits::normalize(data, turbidity, fit_mean, fit_var))
This function produces an unnormalized series using conditional moments.
unnormalize(data, ystar, fit_mean, fit_var)unnormalize(data, ystar, fit_mean, fit_var)
data |
a tsibble containing all the time series which are uniquely identified by the corresponding Timestamp. |
ystar |
The normalized variable name |
fit_mean |
Model object of class "conditional_moment" returned from
|
fit_var |
Model object of class "conditional_moment" returned from
|
A tsibble with the conditional normliased series
data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) |> tsibble::as_tsibble(index = Timestamp) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) new_ts <- data |> dplyr::mutate(ystar = normalize(data, turbidity, fit_mean, fit_var)) # For demonstrative purposes, declare three data points # as missing values. new_ts[3:5, 6] <- NA ## Not run: library(fable) library(dplyr) impute_ts <- new_ts |> model(ARIMA(ystar)) |> interpolate(new_ts) |> rename(y_star_impt = ystar) |> full_join(new_ts, by = "Timestamp") impute_ts <- impute_ts mutate(y = unnormalize(impute_ts, y_star_impt, fit_mean, fit_var)) ## End(Not run)data <- NEON_PRIN_5min_cleaned |> dplyr::filter(site == "upstream") |> dplyr::select(Timestamp, turbidity, level, conductance, temperature) |> tsibble::as_tsibble(index = Timestamp) fit_mean <- data |> conditional_mean(turbidity ~ s(level, k = 8) + s(conductance, k = 8) + s(temperature, k = 8)) fit_var <- data |> conditional_var( turbidity ~ s(level, k = 7) + s(conductance, k = 7) + s(temperature, k = 7), family = "Gamma", fit_mean = fit_mean ) new_ts <- data |> dplyr::mutate(ystar = normalize(data, turbidity, fit_mean, fit_var)) # For demonstrative purposes, declare three data points # as missing values. new_ts[3:5, 6] <- NA ## Not run: library(fable) library(dplyr) impute_ts <- new_ts |> model(ARIMA(ystar)) |> interpolate(new_ts) |> rename(y_star_impt = ystar) |> full_join(new_ts, by = "Timestamp") impute_ts <- impute_ts mutate(y = unnormalize(impute_ts, y_star_impt, fit_mean, fit_var)) ## End(Not run)