How to use RSDA 3.3

RSDA Package version 3.3

Oldemar RodrĂ­guez R.

Installing the package

CRAN

install.packages("RSDA", dependencies=TRUE)

Github

devtools::install_github("PROMiDAT/RSDA")

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 Ă— 7
#>      F1              F2      F3    F4        F5               F6              F7
#>   <dbl>      <symblc_n> <symbl> <dbl> <symblc_>       <symblc_n>      <symblc_n>
#> 1   2.8   [1.00 : 2.00]  <hist>   6       {a,d}   [0.00 : 90.00]  [9.00 : 24.00]
#> 2   1.4   [3.00 : 9.00]  <hist>   8     {b,c,d} [-90.00 : 98.00]  [-9.00 : 9.00]
#> 3   3.2  [-1.00 : 4.00]  <hist>  -7       {a,b}  [65.00 : 90.00] [65.00 : 70.00]
#> 4  -2.1   [0.00 : 2.00]  <hist>   0   {a,b,c,d}  [45.00 : 89.00] [25.00 : 67.00]
#> 5  -3   [-4.00 : -2.00]  <hist>  -9.5       {b}  [20.00 : 40.00]  [9.00 : 40.00]
#> 6   0.1 [10.00 : 21.00]  <hist>  -1       {a,d}    [5.00 : 8.00]   [5.00 : 8.00]
#> 7   9    [4.00 : 21.00]  <hist>   0.5       {a}    [3.14 : 6.76]   [4.00 : 6.00]

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 Ă— 7
#>      F1              F2                      F3    F4        F5               F6
#>   <dbl>      <symblc_n>              <symblc_m> <dbl> <symblc_>       <symblc_n>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}    [3.14 : 6.76]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 Ă— 7
#>      F1            F2                      F3    F4         F5               F6
#>   <dbl>    <symblc_n>              <symblc_m> <dbl> <symblc_s>       <symblc_n>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8  {a,b,c,d} [-90.00 : 98.00]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 Ă— 1
#>                        F3
#>                <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 Ă— 1
#>           F5
#>   <symblc_s>
#> 1  {a,b,c,d}
#> 2  {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

Example 1

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
#> # A tibble: 6 Ă— 3
#>           county         group            age
#>       <symblc_n>    <symblc_n>     <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

We can add new variables indicating the type we want them to be.

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 Ă— 4
#>     age_hist         county         group            age
#>   <symblc_h>     <symblc_n>    <symblc_n>     <symblc_n>
#> 1     <hist> [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2     <hist>  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3     <hist> [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4     <hist>  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5     <hist> [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6     <hist> [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1     8    1       0.19          0.33         0.02         0.90         0.12
#> 2    53    1       0.00          0.16         0.12         0.74         0.45
#> 3    24    1       0.00          0.42         0.49         0.56         0.17
#> 4    34    1       0.04          0.77         1.00         0.08         0.12
#> 5    42    1       0.01          0.55         0.02         0.95         0.09
#> 6     6    1       0.02          0.28         0.06         0.54         1.00
#>   racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1        0.17        0.34        0.47        0.29       0.32      0.20      1.0
#> 2        0.07        0.26        0.59        0.35       0.27      0.02      1.0
#> 3        0.04        0.39        0.47        0.28       0.32      0.00      0.0
#> 4        0.10        0.51        0.50        0.34       0.21      0.06      1.0
#> 5        0.05        0.38        0.38        0.23       0.36      0.02      0.9
#> 6        0.25        0.31        0.48        0.27       0.37      0.04      1.0
#>   medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1      0.37     0.72         0.34       0.60       0.29        0.15       0.43
#> 2      0.31     0.72         0.11       0.45       0.25        0.29       0.39
#> 3      0.30     0.58         0.19       0.39       0.38        0.40       0.84
#> 4      0.58     0.89         0.21       0.43       0.36        0.20       0.82
#> 5      0.50     0.72         0.16       0.68       0.44        0.11       0.71
#> 6      0.52     0.68         0.20       0.61       0.28        0.15       0.25
#>   medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1      0.39      0.40        0.39        0.32         0.27        0.27
#> 2      0.29      0.37        0.38        0.33         0.16        0.30
#> 3      0.28      0.27        0.29        0.27         0.07        0.29
#> 4      0.51      0.36        0.40        0.39         0.16        0.25
#> 5      0.46      0.43        0.41        0.28         0.00        0.74
#> 6      0.62      0.72        0.76        0.77         0.28        0.52
#>   OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1        0.36       0.41        0.08           0.19            0.10
#> 2        0.22       0.35        0.01           0.24            0.14
#> 3        0.28       0.39        0.01           0.27            0.27
#> 4        0.36       0.44        0.01           0.10            0.09
#> 5        0.51       0.48        0.00           0.06            0.25
#> 6        0.48       0.60        0.01           0.12            0.13
#>   PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1         0.18        0.48          0.27      0.68        0.23            0.41
#> 2         0.24        0.30          0.27      0.73        0.57            0.15
#> 3         0.43        0.19          0.36      0.58        0.32            0.29
#> 4         0.25        0.31          0.33      0.71        0.36            0.45
#> 5         0.30        0.33          0.12      0.65        0.67            0.38
#> 6         0.12        0.80          0.10      0.65        0.19            0.77
#>   PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1         0.25             0.52           0.68           0.40         0.75
#> 2         0.42             0.36           1.00           0.63         0.91
#> 3         0.49             0.32           0.63           0.41         0.71
#> 4         0.37             0.39           0.34           0.45         0.49
#> 5         0.42             0.46           0.22           0.27         0.20
#> 6         0.06             0.91           0.49           0.57         0.61
#>   TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1        0.75       0.35       0.55        0.59             0.61        0.56
#> 2        1.00       0.29       0.43        0.47             0.60        0.39
#> 3        0.70       0.45       0.42        0.44             0.43        0.43
#> 4        0.44       0.75       0.65        0.54             0.83        0.65
#> 5        0.21       0.51       0.91        0.91             0.89        0.85
#> 6        0.58       0.44       0.62        0.69             0.87        0.53
#>   PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1                0.74       0.76     0.04     0.14     0.03           0.24
#> 2                0.46       0.53     0.00     0.24     0.01           0.52
#> 3                0.71       0.67     0.01     0.46     0.00           0.07
#> 4                0.85       0.86     0.03     0.33     0.02           0.11
#> 5                0.40       0.60     0.00     0.06     0.00           0.03
#> 6                0.30       0.43     0.00     0.11     0.04           0.30
#>   PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1         0.27         0.37          0.39           0.07         0.07
#> 2         0.62         0.64          0.63           0.25         0.27
#> 3         0.06         0.15          0.19           0.02         0.02
#> 4         0.20         0.30          0.31           0.05         0.08
#> 5         0.07         0.20          0.27           0.01         0.02
#> 6         0.35         0.43          0.47           0.50         0.50
#>   PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1         0.08          0.08             0.89                0.06
#> 2         0.25          0.23             0.84                0.10
#> 3         0.04          0.05             0.88                0.04
#> 4         0.11          0.11             0.81                0.08
#> 5         0.04          0.05             0.88                0.05
#> 6         0.56          0.57             0.45                0.28
#>   PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1            0.14              0.13             0.33              0.39
#> 2            0.16              0.10             0.17              0.29
#> 3            0.20              0.20             0.46              0.52
#> 4            0.56              0.62             0.85              0.77
#> 5            0.16              0.19             0.59              0.60
#> 6            0.25              0.19             0.29              0.53
#>   PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1               0.28            0.55             0.09           0.51      0.5
#> 2               0.17            0.26             0.20           0.82      0.0
#> 3               0.43            0.42             0.15           0.51      0.5
#> 4               1.00            0.94             0.12           0.01      0.5
#> 5               0.37            0.89             0.02           0.19      0.5
#> 6               0.18            0.39             0.26           0.73      0.0
#>   HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1       0.21         0.71          0.52             0.05           0.26
#> 2       0.02         0.79          0.24             0.02           0.25
#> 3       0.01         0.86          0.41             0.29           0.30
#> 4       0.01         0.97          0.96             0.60           0.47
#> 5       0.01         0.89          0.87             0.04           0.55
#> 6       0.02         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1             0.46                  0.25          0.04         0           0.12
#> 2             0.32                  0.18          0.00         0           0.21
#> 3             0.39                  0.28          0.00         0           0.14
#> 4             0.51                  0.47          0.00         0           0.19
#> 5             0.51                  0.21          0.00         0           0.11
#> 6             0.59                  0.11          0.00         0           0.70
#>   PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1             0.42           0.50          0.51           0.64     0.12    0.26
#> 2             0.50           0.34          0.60           0.52     0.02    0.12
#> 3             0.49           0.54          0.67           0.56     0.01    0.21
#> 4             0.30           0.73          0.64           0.65     0.02    0.39
#> 5             0.72           0.64          0.61           0.53     0.04    0.09
#> 6             0.42           0.49          0.73           0.64     0.01    0.58
#>   PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1           0.20                0.32                0.20
#> 2           0.45                0.00                0.67
#> 3           0.02                0.00                0.43
#> 4           0.28                0.00                0.12
#> 5           0.02                0.00                0.03
#> 6           0.10                0.00                0.14
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 Ă— 4
#>    ViolentCrimesPerPop_hist NumInShelters      NumImmig ViolentCrimesPerPop
#>                  <symblc_h>    <symblc_n>    <symblc_n>          <symblc_n>
#>  1                   <hist> [0.00 : 0.32] [0.00 : 0.04]       [0.01 : 1.00]
#>  2                   <hist> [0.01 : 0.18] [0.01 : 0.09]       [0.05 : 0.36]
#>  3                   <hist> [0.00 : 1.00] [0.00 : 0.57]       [0.05 : 0.57]
#>  4                   <hist> [0.00 : 0.08] [0.00 : 0.02]       [0.02 : 1.00]
#>  5                   <hist> [0.00 : 1.00] [0.00 : 1.00]       [0.01 : 1.00]
#>  6                   <hist> [0.00 : 0.68] [0.00 : 0.23]       [0.07 : 0.75]
#>  7                   <hist> [0.00 : 0.79] [0.00 : 0.14]       [0.00 : 0.94]
#>  8                   <hist> [0.01 : 0.01] [0.01 : 0.01]       [0.37 : 0.37]
#>  9                   <hist> [1.00 : 1.00] [0.39 : 0.39]       [1.00 : 1.00]
#> 10                   <hist> [0.00 : 0.52] [0.00 : 1.00]       [0.06 : 1.00]
#> # ℹ 36 more rows

Example 3

data("ex_mcfa1") 
head(ex_mcfa1)
#>   suspect age     hair    eyes    region
#> 1       1  42    h_red e_brown     Bronx
#> 2       2  20  h_black e_green     Bronx
#> 3       3  64  h_brown e_brown  Brooklyn
#> 4       4  55 h_blonde e_brown     Bronx
#> 5       5   4  h_brown e_green Manhattan
#> 6       6  61 h_blonde e_green     Bronx
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 3
#>                  hair              eyes               region
#>            <symblc_s>        <symblc_s>           <symblc_s>
#>  1            {h_red} {e_brown,e_black}              {Bronx}
#>  2 {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Example 4

We can modify the function that will be applied by default to the categorical variables

sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Converting a SODAS 1.0 *.SDS files to RSDA files

hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 Ă— 6
#>                             R3101                 RNINO12
#>                        <symblc_m>              <symblc_m>
#>  1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#>  2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#>  3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#>  4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#>  5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#>  6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#>  7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#>  8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#>  9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # ℹ 22 more rows
#> # ℹ 4 more variables: RNINO3 <symblc_m>, RNINO4 <symblc_m>, RNINO34 <symblc_m>,
#> #   RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)

Converting a SODAS 2.0 *.XML files to RSDA files

abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 Ă— 7
#>           LENGTH      DIAMETER        HEIGHT  WHOLE_WEIGHT SHUCKED_WEIGHT
#>       <symblc_n>    <symblc_n>    <symblc_n>    <symblc_n>     <symblc_n>
#>  1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37]  [0.03 : 0.64]
#>  2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25]  [0.06 : 1.16]
#>  3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66]  [0.07 : 1.49]
#>  4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51]  [0.11 : 1.23]
#>  5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20]  [0.12 : 0.84]
#>  6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53]  [0.16 : 0.93]
#>  7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12]  [0.16 : 0.82]
#>  8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81]  [0.32 : 0.71]
#>  9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07]  [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89]  [0.00 : 0.50]
#> # ℹ 14 more rows
#> # ℹ 2 more variables: VISCERA_WEIGHT <symblc_n>, SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)

Basic statistics

Symbolic Mean

data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]

Symbolic median

median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]

Variance and standard deviation

var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226

Symbolic correlation

cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#>           [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041

Radar plot for intervals

library(ggpolypath)
#> Loading required package: ggplot2

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.

sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.


res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))

res
#> $frequency
#> [1] 25 49  1 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1
#> [4,]  4.3

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))

res
#> $frequency
#> [1] 50 25 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1

Distances for intervals

Gowda-Diday

data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)

Ichino

DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)

Hausdorff

DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)

Linear regression for intervals

Training

data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#> 
#> Call:
#> stats::lm(formula = formula, data = centers)
#> 
#> Coefficients:
#> (Intercept)       lcavol      lweight          age         lbph          svi  
#>    0.411537     0.579327     0.614128    -0.018659     0.143918     0.730937  
#>         lcp      gleason        pgg45  
#>   -0.205536    -0.030924     0.009507

Prediction

pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)

Testing

RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964

LASSO regression for intervals

data(int_prost_train)
data(int_prost_test)

Training

res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.lasso)

plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.706699
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.7033206
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5236074
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5276086
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4929714

RIDGE regression for intervals

Training

data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.ridge)

plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7003401
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.6973955
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5335437
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5369574
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4743088

PCA for intervals

Example 1

data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 2

res <- sym.pca(oils,'tops')
#> Warning in fun(libname, pkgname): couldn't connect to display ":0"
plot(res, choix = "ind")

Example 3

res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")

Example 4

res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 5

res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")

plot(res, choix = "var")

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))

res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

Symbolic UMAP

Ejemplo Oils

datos <- oils
datos
#> # A tibble: 8 Ă— 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
x <- sym.umap(datos)
x
#>            V1          V2          V3        V4
#> 1   -2.585767  0.68879511 12.25608472 -4.444671
#> 2   -2.731787  0.83482318 12.40253807 -4.298676
#> 3   -2.612730  0.71577486 12.28307965 -4.417711
#> 4   -2.785322  0.88836144 12.45726421 -4.245171
#> 5   -2.673584  0.77663742 12.34394377 -4.356922
#> 6   -2.472002  0.57506048 12.14125741 -4.558478
#> 7   -2.631238  0.73430321 12.30205138 -4.399287
#> 8   -2.764020  0.86708353 12.43541279 -4.266562
#> 9   -7.724020 -7.92523567 -4.18923765 -8.380159
#> 10  -7.660165 -7.98827404 -4.07399969 -8.265870
#> 11  -7.561059 -7.75351401 -4.28568299 -8.480643
#> 12  -7.706022 -7.83889673 -4.21813375 -8.473581
#> 13  -7.537845 -7.98309330 -3.94004793 -8.111801
#> 14  -7.466931 -7.99246148 -3.87223699 -8.036772
#> 15  -7.650829 -7.69241363 -3.92892819 -8.220309
#> 16  -7.603829 -7.75445331 -3.95048011 -8.258781
#> 17  -7.151658 -6.90595628 -3.39486298 -8.112696
#> 18  -7.099121 -7.03749245 -3.42422280 -7.951225
#> 19  -7.386255 -7.27193312 -3.49644728 -7.934284
#> 20  -7.143278 -7.10483695 -3.54026654 -8.135395
#> 21  -7.097380 -6.79966692 -3.18329507 -7.924504
#> 22  -7.336343 -7.02352637 -3.18666989 -7.940248
#> 23  -7.350272 -6.97224292 -3.19505905 -7.830173
#> 24  -7.173078 -6.92953717 -3.25557779 -7.929381
#> 25  -7.383290 -7.30092854 -3.57641310 -8.090767
#> 26  -7.453193 -7.21065084 -3.44738185 -8.403358
#> 27  -7.642228 -7.22628796 -3.74692803 -8.173056
#> 28  -7.299115 -7.21932547 -3.48585279 -8.054155
#> 29  -7.159441 -7.19566618 -3.51864014 -8.054212
#> 30  -7.355388 -7.06952266 -3.57206520 -8.211042
#> 31  -7.530940 -7.10686497 -3.51955207 -8.312959
#> 32  -7.661988 -6.93491520 -3.43447928 -8.251999
#> 33  -1.844555 -1.04290494  1.03803316  2.468671
#> 34  -1.894925 -1.21731165  1.06595489  2.251006
#> 35  -2.030665 -0.74341110  1.21038452  2.778374
#> 36  -1.991029 -0.76281866  1.26834385  2.726389
#> 37  -1.586868 -1.38707402  0.87759006  2.178879
#> 38  -1.552982 -1.30763095  0.99259123  1.949740
#> 39  -1.848414 -1.26586441  1.15924369  2.499329
#> 40  -1.637320 -1.22573433  1.35890467  2.579197
#> 41  -2.768460 -0.96453418 -0.06793758  1.610002
#> 42  -2.836483 -0.89052998 -0.18784656  1.744210
#> 43  -3.105718 -0.66044381 -0.35400205  1.799579
#> 44  -3.203142 -0.58006097 -0.44521436  1.742590
#> 45  -2.622591 -1.10559143 -0.15266826  1.709793
#> 46  -2.657812 -1.08947159  0.11601985  1.640287
#> 47  -2.836352 -1.10281490 -0.47297490  1.427039
#> 48  -2.829936 -1.04011156 -0.29230433  1.530826
#> 49  -1.721788 -1.10815215  1.24248815  2.293656
#> 50  -1.597092 -0.96236510  1.36322001  1.916581
#> 51  -1.646256 -0.91514231  1.35019261  2.473820
#> 52  -1.581155 -0.84104088  1.43849232  2.014578
#> 53  -1.445202 -1.23209979  1.23695469  2.012062
#> 54  -1.347159 -1.12292219  1.28770735  1.721182
#> 55  -1.343237 -1.16456789  1.34044411  2.058444
#> 56  -1.402121 -1.04971562  1.33875057  1.748518
#> 57  -2.159157 -1.16898475  0.56281654  1.874829
#> 58  -2.087334 -0.89802617  0.76577799  1.773909
#> 59  -2.133166 -1.29417973  0.64104079  1.833269
#> 60  -1.976745 -0.96805896  0.79273967  1.642469
#> 61  -1.947080 -1.25979011  0.45455005  1.709792
#> 62  -1.808467 -1.03041947  0.73785109  1.392100
#> 63  -1.880543 -1.38963769  0.68738869  1.563035
#> 64  -1.927964 -1.05505821  0.71240505  1.533037
#> 65  -9.349573 18.51018519  2.84037400  1.734092
#> 66  -9.231415 18.66294911  2.69242855  1.762992
#> 67  -8.796598 17.57164402  3.80224727  2.753445
#> 68  -8.719104 17.45826645  3.91559202  2.775338
#> 69  -9.244395 18.69407041  2.68381062  1.783429
#> 70  -9.117674 18.54942787  2.80599589  1.893048
#> 71  -8.891021 17.68311504  3.69085830  2.684595
#> 72  -8.883562 17.80044003  3.57610410  2.831710
#> 73  -9.207170 18.79559661  2.55611608  1.849539
#> 74  -9.319703 18.70173056  2.56052848  1.633185
#> 75  -8.727636 17.63100559  3.74522302  2.676892
#> 76  -8.822450 17.58109073  3.79189318  2.758334
#> 77  -9.101866 18.83734460  2.58470473  1.828574
#> 78  -9.084235 18.60868851  2.69590841  1.618814
#> 79  -8.726689 17.50998152  3.86184989  2.718337
#> 80  -8.758971 17.70301321  3.66930434  2.622840
#> 81  -2.361431 -0.15720674  1.24516140  3.226076
#> 82  -2.301916 -0.20291600  1.21276081  3.172281
#> 83  -2.576419 -0.08845250  1.25454786  3.410821
#> 84  -2.515827  0.03560128  1.12240090  3.335561
#> 85  -2.298455 -0.29634397  1.34130376  3.187503
#> 86  -2.435769 -0.23741165  1.37208962  3.311329
#> 87  -2.492401 -0.02446727  1.20263512  3.337572
#> 88  -2.668923 -0.09703857  1.26503836  3.475320
#> 89  -3.350443 -0.34481979 -0.19470225  2.087779
#> 90  -3.399585 -0.38192757 -0.29903464  2.070243
#> 91  -3.570085 -0.11626785 -0.15239144  2.320478
#> 92  -3.523809 -0.15551436 -0.04857718  2.296337
#> 93  -3.329781 -0.45517549 -0.47447307  1.899326
#> 94  -3.274401 -0.46200057 -0.31879372  1.992414
#> 95  -3.565785 -0.12303539 -0.13070258  2.314503
#> 96  -3.705151 -0.02676851 -0.26159866  2.386652
#> 97  14.600543 -3.17960153 -2.82343421  2.879040
#> 98  14.388323 -2.98014294 -2.90619573  2.951172
#> 99  14.319133 -3.04201223 -3.10747853  3.079411
#> 100 14.172337 -2.94519010 -3.17391597  3.090219
#> 101 14.634854 -3.11381596 -2.95251766  2.930081
#> 102 14.391315 -2.89886277 -2.87315085  2.872361
#> 103 14.383907 -3.23878408 -3.20547488  3.174793
#> 104 14.159485 -2.95175794 -3.11257035  3.109487
#> 105 14.013361 -2.13966024 -3.17498555  2.855503
#> 106 14.161041 -2.33912999 -3.05716547  2.870218
#> 107 13.863167 -2.22716807 -3.31827973  3.065827
#> 108 13.923721 -2.43908994 -3.32476543  3.085483
#> 109 13.860525 -2.00433353 -3.04261066  2.691761
#> 110 14.216072 -2.27064218 -3.04659495  2.889963
#> 111 13.863109 -2.33504362 -3.11146068  3.185221
#> 112 14.007917 -2.32199983 -3.38876322  3.055431
#> 113 14.726169 -3.11747121 -2.59925841  2.730143
#> 114 14.701482 -2.95403006 -2.54051092  2.643047
#> 115 14.509285 -3.29922201 -2.78246569  2.994653
#> 116 14.555373 -3.08841428 -3.12716302  3.047964
#> 117 14.539144 -2.94932361 -2.56013730  2.327946
#> 118 14.682764 -2.73137698 -2.41029287  2.180093
#> 119 14.332207 -3.25467506 -2.68525452  2.609423
#> 120 14.451270 -3.07446627 -2.60918246  2.487410
#> 121 14.005862 -2.15698375 -2.81826707  2.586575
#> 122 14.184980 -2.17036152 -2.82403428  2.704875
#> 123 13.672441 -1.93502328 -3.25707044  2.645234
#> 124 13.872759 -1.92077205 -3.06882331  2.641074
#> 125 14.156465 -2.23846738 -2.64115038  2.242410
#> 126 14.133397 -2.10861597 -2.73876824  2.268164
#> 127 14.284650 -1.83756950 -2.75209552  2.369839
#> 128 14.227024 -1.89747016 -2.82687325  2.313475
plot(x)

Ejemplo Cardiological

datos <- Cardiological
datos
#> # A tibble: 11 Ă— 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
x <- sym.umap(datos)
x
#>             V1          V2           V3
#> 1   0.50482681  0.89569899  4.175064018
#> 2   0.39176790  0.19957824  3.854900350
#> 3   0.42147893  1.11595497  4.086126195
#> 4   0.33696866  0.08401944  3.577035498
#> 5   0.87120897  1.20803008  3.938785434
#> 6   0.25797245  0.19683937  3.183172711
#> 7   0.76160504  1.12654538  4.012255382
#> 8   0.43973166  0.19537999  2.905889688
#> 9   0.71381978  0.78367405  3.493104774
#> 10  0.31362586  0.20339265  2.821073396
#> 11  0.05027448 -0.39167266  1.454325805
#> 12  0.02620461 -0.43514252  1.443707687
#> 13  0.65207558  0.53322224  2.973798111
#> 14  0.66514446  0.33966776  2.664414000
#> 15  0.55638442 -0.89298985  0.735988343
#> 16  0.40192880 -0.71679407  0.967846182
#> 17  0.30708130 -0.86474184  0.493136541
#> 18 -1.20038520  0.54945830 -2.092431004
#> 19  0.16449452 -0.81882373 -0.021537837
#> 20 -0.50177793 -0.61046329 -2.341345837
#> 21  0.54965572 -0.94737083  0.457249079
#> 22 -0.84471891  0.61128043 -1.956073947
#> 23  0.19509924 -0.78605139 -0.129875596
#> 24 -0.23679378 -0.55596409 -2.414222150
#> 25  0.28100924 -0.01882712  2.285162720
#> 26 -2.06549216  1.21283911 -2.565465957
#> 27 -0.05644056 -0.49350624  0.886339905
#> 28 -2.14062426  0.60254461 -2.595513522
#> 29  1.45054823 -1.12332966  0.598217759
#> 30 -1.30458398  1.52275589 -2.188012223
#> 31  0.96257806 -1.12750470  0.104383648
#> 32 -0.90358924  1.11557288 -2.536337745
#> 33  0.34809612  0.67583651  3.926844298
#> 34  0.26728301 -0.00892598  3.653604184
#> 35  0.54915036  0.59362347  4.383613787
#> 36  0.46418569  0.13794701  3.929532511
#> 37  0.87980090  0.83465046  3.694587979
#> 38  0.36748993  0.24321503  2.772446883
#> 39  1.06135985  0.98014233  3.822205087
#> 40  0.13460346  0.27552914  2.599422341
#> 41  0.10134081 -0.69094301  0.982792619
#> 42 -1.92317931  0.89560930 -2.459376076
#> 43 -0.03574900 -0.61503130  0.308236324
#> 44 -1.58215187 -0.03189326 -2.588926664
#> 45  1.25610001 -1.16030190  0.235825579
#> 46 -1.01224475  1.25679673 -2.297400957
#> 47  0.57591307 -0.93720721 -0.169369150
#> 48 -0.53980821  0.42703690 -2.678240831
#> 49  2.73790977 -1.68387774 -0.422815564
#> 50  2.77400463 -1.60742283 -0.401588403
#> 51  2.73103202 -1.69263759 -0.087467949
#> 52  2.59888914 -1.63194637 -0.043184661
#> 53  2.88927404 -1.51765095 -0.313651876
#> 54  3.02308535 -1.84415215 -0.246736613
#> 55  2.71999222 -1.40562069  0.010239102
#> 56  2.90806980 -1.85288715  0.006106137
#> 57  0.16237839 -0.39489404  1.238317464
#> 58 -2.13584373  0.96299000 -2.315591749
#> 59 -0.28476460 -0.45493411  0.232480494
#> 60 -1.77042712  0.14467133 -2.539536502
#> 61  0.36378005 -0.75720641  1.065660877
#> 62 -1.70512086  0.96656283 -2.474564095
#> 63  0.13562691 -0.56577829  0.207870078
#> 64 -1.25772754  0.18336224 -2.780853751
#> 65  0.05757132 -0.09412350  2.325245448
#> 66 -2.13514565  1.40420568 -2.219288077
#> 67 -0.27392183 -0.56068573 -0.338081957
#> 68 -0.83205592 -0.54442424 -2.503592837
#> 69  1.48090171 -1.11265623  0.415930739
#> 70 -1.19926134  1.52816364 -1.954583996
#> 71  0.05158421 -0.63969376 -2.039220273
#> 72 -0.14693637 -0.25989545 -2.707709845
#> 73 -1.01367809  0.49010301 -1.695930790
#> 74 -1.56936264  0.67315310 -2.397082865
#> 75 -0.34653692 -0.74668140 -2.135702987
#> 76 -0.52739111 -0.56870425 -2.624396166
#> 77 -0.48554282  0.80297471 -1.899243944
#> 78 -0.59524673  0.92252843 -2.259596168
#> 79  0.07804262 -0.40726525 -2.331644228
#> 80 -0.15690570 -0.27371741 -2.843896889
#> 81 -1.99099961  1.26459207 -1.563144319
#> 82 -2.21836640  1.31602849 -2.116005441
#> 83 -1.19251848  0.26344962 -1.803760688
#> 84 -1.92133330  0.25894973 -2.648278125
#> 85 -1.12107276  1.33335041 -1.471763428
#> 86 -1.44361994  1.48130259 -2.165821238
#> 87 -0.48950454  0.32765843 -1.841735008
#> 88 -0.83212698  0.70345260 -2.702339232
plot(x)

Length of intervals

data(oils)
datos <- oils
interval.length(datos)
#>      GRA FRE IOD SAP
#> L  0.005   9  34  78
#> P  0.007   1  16   9
#> Co 0.002   5  14   9
#> S  0.006   2  12   6
#> Ca 0.001  10   2   4
#> O  0.005   6  11   9
#> B  0.010   8   8   9
#> H  0.006  10  24  12

PCA Histogram

Hardwood Data

data("hardwoodBrito")
Hardwood.histogram<-hardwoodBrito
Hardwood.cols<-colnames(Hardwood.histogram)
Hardwood.names<-row.names(Hardwood.histogram)
Hardwood.histogram
#> # A tibble: 5 Ă— 4
#>         ANNT       JULT       ANNP       MITM
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Hardwood.histogram[[1]][[1]]
#> $breaks
#> [1] -3.9  4.2 10.3 20.6
#> 
#> $props
#> [1] 0.5 0.4 0.1

Weighted Center Matrix

weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)

Bin Matrix

BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))

PCA

pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix)
#> Warning: Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
pca.hist$classic.PCA
#> **Results for the Principal Component Analysis (PCA)**
#> The analysis was performed on 85 individuals, described by 4 variables
#> *The results are available in the following objects:
#> 
#>    name               description                                
#> 1  "$eig"             "eigenvalues"                              
#> 2  "$var"             "results for the variables"                
#> 3  "$var$coord"       "coord. for the variables"                 
#> 4  "$var$cor"         "correlations variables - dimensions"      
#> 5  "$var$cos2"        "cos2 for the variables"                   
#> 6  "$var$contrib"     "contributions of the variables"           
#> 7  "$ind"             "results for the individuals"              
#> 8  "$ind$coord"       "coord. for the individuals"               
#> 9  "$ind$cos2"        "cos2 for the individuals"                 
#> 10 "$ind$contrib"     "contributions of the individuals"         
#> 11 "$ind.sup"         "results for the supplementary individuals"
#> 12 "$ind.sup$coord"   "coord. for the supplementary individuals" 
#> 13 "$ind.sup$cos2"    "cos2 for the supplementary individuals"   
#> 14 "$call"            "summary statistics"                       
#> 15 "$call$centre"     "mean of the variables"                    
#> 16 "$call$ecart.type" "standard error of the variables"          
#> 17 "$call$row.w"      "weights for the individuals"              
#> 18 "$call$col.w"      "weights for the variables"
pca.hist$sym.hist.matrix.PCA
#> # A tibble: 5 Ă— 4
#>         PC.1       PC.2       PC.3       PC.4
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Plots

ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                             title.graph = " ",
                             concepts.name = c("ACER"),
                             title.x = "First Principal Component (84.83%)",
                             title.y = "Frequency",
                             pca.axes = 1)

ACER.p1

ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                    title.graph = " ",
                    concepts.name = unique(pca.hist$Bins.df$Object.Name),
                    title.x = "First Principal Component (84.83%)",
                    title.y = "Frequency",
                    pca.axes = 1)

ALL.p1
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3)
#> Warning: Setting row names on a tibble is deprecated.

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.ACER.plot

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-row.names(Hardwood.quantiles.PCA)
var.names<-c("PC.1","PC.2")

quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.plot

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "PC 1 (84.83%)"
axes.y.label<- "PC 2 (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA)
sym.all.quantiles.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

KS

Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100)
#> Warning: Setting row names on a tibble is deprecated.
h<-Hardwood.quantiles.PCA.2[[1]][[1]]
tmp<-HistRSDAToEcdf(h)

h2<-Hardwood.quantiles.PCA.2[[1]][[2]]
tmp2<-HistRSDAToEcdf(h2)

h3<-Hardwood.quantiles.PCA.2[[1]][[3]]
tmp3<-HistRSDAToEcdf(h3)

h4<-Hardwood.quantiles.PCA.2[[1]][[4]]
tmp4<-HistRSDAToEcdf(h4)

h5<-Hardwood.quantiles.PCA.2[[1]][[5]]
tmp5<-HistRSDAToEcdf(h5)

breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks))
tmp.unique<-breaks.unique[order(breaks.unique)]

tmp<-tmp(v = tmp.unique)
tmp2<-tmp2(v = tmp.unique)
tmp3<-tmp3(v = tmp.unique)
tmp4<-tmp4(v = tmp.unique)
tmp5<-tmp5(v = tmp.unique)
abs_dif <-  abs(tmp2 - tmp)
# La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas.
distancia_ks <- max(abs_dif)
distancia_ks
#> [1] 0.05857869
library(tidyr)
# Se unen los valores calculados en un dataframe.
df.HW <- data.frame(
  PC.1 = tmp.unique,
  ACER = tmp,
  ALNUS = tmp2,
  FRAXINUS = tmp3,
  JUGLANS = tmp4,
  QUERCUS = tmp5
) %>%
  pivot_longer(
    cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS),
    names_to = "HardWood",
    values_to = "ecdf"
  )

grafico_ecdf <- ggplot(data = df.HW,
                       aes(x = PC.1, y = ecdf, color = HardWood)) +
  geom_line(size = 1) +
  labs(
    color = "Hardwood",
    y = "Empirical Cumulative Distribution "
  ) +
  theme_bw() +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 12))+geom_line()

grafico_ecdf