The Normality Transformation via Optimized Skewness and Kurtosis (OSKT) is a normality method that simultaneously evaluates deviations in skewness and kurtosis of non-normal data.
The recent version of the package osktnorm is installed from CRAN. If the package osktnorm has already been installed, load it into R working environment by using the following command:
library(osktnorm)In the following code snippet, a right-skewed distribution with 300 observations is generated using rlnorm of R and then normalized using OSKT. For this purpose, the osktfast function is applied by simply passing the original observation vector to be transformed, and the results are stored in the object res_oskt.
set.seed(12)
x_orig <- rlnorm(300, mean=0, sd=0.5) # Generate right-skewed data
# Apply OSKT normality
res_oskt <- osktfast(x_orig)
x_transformed <- res_oskt$transformed
head(x_transformed, 5)
[1] -1.802405 1.399281 -1.250515 -1.210836 -2.307431
g_star <- res_oskt$g
h_star <- res_oskt$h
A2 <- res_oskt$value
cat("Optimized skewness: ", g_star, "\n")
Optimized skewness: -0.5909241
cat("Optimized kurtosis: ", h_star, "\n")
Optimized kurtosis: 0.07987875
cat("Anderson-Darling statistic at the optimum: ", A2, "\n")
Anderson-Darling statistic at the optimum: 0.1056021The code snippet below visualizes the original and normalized observations using histograms and density plots for comparison purposes.
oldpar <- par(no.readonly = TRUE)
breaks <- pretty(range(c(x_orig, x_transformed)), n = 25)
h_orig <- hist(x_orig, breaks = breaks, plot = FALSE)
h_trans <- hist(x_transformed, breaks = breaks, plot = FALSE)
d_orig <- density(x_orig); d_trans <- density(x_transformed)
ymax <- max(c(h_orig$density, h_trans$density, d_orig$y, d_trans$y, dnorm(0)))
hist(x_orig, breaks = breaks, freq = FALSE, ylim = c(0, ymax * 1.05),
col = rgb(0.2, 0.4, 0.8, 0.4), border = "white",
main = "Before and After OSKT Transformation", xlab = "Value")
lines(d_orig, col = "blue", lwd = 2)
hist(x_transformed, breaks = breaks, freq = FALSE,
col = rgb(0.8, 0.3, 0.3, 0.4), border = "white", add = TRUE)
lines(d_trans, col = "red", lwd = 2)
curve(dnorm(x), add = TRUE, lwd = 2, lty = 2, col = "black") # Standard normal reference
legend("topleft",
legend = c("Original", "Transformed", "Original Density", "OSKT Density", "Standard Normal"),
col = c(rgb(0.2,0.4,0.8,0.6), rgb(0.8,0.3,0.3,0.6), "blue", "red", "black"),
lwd = c(10, 10, 2, 2, 2), lty = c(1, 1, 1, 1, 2), bty = "n")
par(oldpar)Back-transformation can be performed using the backosktfast function, which uses the Brent–Dekker algorithm for efficiency.
X_mean <- mean(x_orig)
X_sd <- sd(x_orig)
res_back <- backosktfast(
Z = x_transformed,
X_mean = X_mean, X_sd = X_sd,
g = g_star, h = h_star,
method = "brent")
x_recovered <- res_back$X_orig
head(x_recovered, 5)
[1] 0.4759235 2.2021046 0.6189750 0.6304848 0.3670768oldpar <- par(no.readonly = TRUE)
breaks <- pretty(range(c(x_orig, x_transformed, x_recovered)), n = 30)
hist(x_orig, breaks = breaks, freq = FALSE, col = rgb(0.2, 0.4, 0.9, 0.4),
border = "white", main="OSKT Transformation & Back Transformation", xlab="Value")
hist(x_transformed, breaks = breaks, freq = FALSE, col = rgb(0.8, 0.3, 0.3, 0.4),
border = "white", add=TRUE)
hist(x_recovered, breaks = breaks, freq = FALSE, col = rgb(0.2,0.8,0.2,0.4),
border = "white", add=TRUE)
legend("topleft", legend = c("Original","Transformed","Back-transformed"),
fill = c(rgb(0.2,0.4,0.8,0.4), rgb(0.8,0.3,0.3,0.4), rgb(0.2,0.8,0.2,0.4)))
(all.equal(x_orig, x_recovered, tolerance = 1e-6))
[1] "Mean relative difference: 0.0006286436"
par(oldpar)Diagnostic metrics compare original and recovered values to ensure mathematical inversion accuracy.
ok <- is.finite(x_orig) & is.finite(x_recovered)
xo <- x_orig[ok]
xr <- x_recovered[ok]
err <- xr - xo
MAE <- mean(abs(err))
RMSE <- sqrt(mean(err^2))
COR <- cor(xo, xr)
back_stats <- data.frame(RMSE = RMSE, MAE = MAE, Correlation= COR, R2 = COR^2)
round(t(back_stats), 8)
[,1]
RMSE 0.00089459
MAE 0.00069680
Correlation 1.00000000
R2 1.00000000Below, we generate a skewed variable using ghdist and compare OSKT with Box-Cox (BC) and Yeo-Johnson (YJ).
set.seed(12)
x_orig <- groupcompare::ghdist(n=300, A=0, B=1, g=-0.49, h=0)
x_bc <- osktnorm::boxcox(x_orig, makepositive=TRUE)$transformed
x_yj <- osktnorm::yeojohnson(x_orig)$transformed
x_oskt <- osktfast(x_orig)$transformed
get_stats <- function(x) {
x <- x[is.finite(x)]
c(
Skew = mean((x - mean(x))^3) / sd(x)^3,
Kurt = mean((x - mean(x))^4) / sd(x)^4 - 3,
SW = shapiro.test(x)$p.value,
CVM = cvmtest(x)$p.value,
PPM = unname(pearsonp(x)$statistic)
)
}
pval_table <- rbind(ORG = get_stats(x_orig), BC = get_stats(x_bc), YJ = get_stats(x_yj), OSKT = get_stats(x_oskt))
as.data.frame(round(pval_table, 4))
Skew Kurt SW CVM PPM
ORG -1.8642 5.9759 0.0000 0.0001 9.24
BC -0.2397 -0.5635 0.0047 0.1551 0.82
YJ -0.0454 -0.3109 0.1257 0.3844 0.71
OSKT -0.2130 0.0118 0.0631 0.9100 0.56options(oldopts)