For checking the dataset from EDC in clinical trials. Notice, your dataset should have a postfix( _V1 ) or a prefix( V1_ ) in the names of variables. Column names should be unique.
For laboratory check, you need to create the excel table like in the example.
|*column names without prefix or postfix
| AGELOW | AGEHIGH | SEX | LBTEST | LBORRES | LBNRIND | LBORNRLO | LBORNRHI | 
|---|---|---|---|---|---|---|---|
| 18 | 45 | f|m | Glucose | GLUC | GLUC_IND | 3.9 | 5.9 | 
| 18 | 45 | m | Aspartate transaminase | AST | AST_IND | 0 | 42 | 
| 18 | 45 | f | Aspartate transaminase | AST | AST_IND | 0 | 39 | 
| ID | AGE | SEX | V1_GLUC | V1_GLUC_IND | V2_AST | V2_AST_IND | 
|---|---|---|---|---|---|---|
| 01 | 19 | f | 5.5 | norm | 30 | norm | 
| 02 | 20 | m | 4.1 | NA | 48 | norm | 
| 03 | 22 | m | 9.7 | norm | 31 | norm | 
# "norm" and "no" it is an example, necessary variable for the estimate, get from the dataset
# parameter is_post has value FALSE because a dataset has a prefix( V1_ ) in the names of variables
refs <- system.file("labs_refer.xlsx", package = "dmtools")
obj_lab <- lab(refs, ID, AGE, SEX, "norm", "no", is_post = FALSE)
obj_lab <- obj_lab %>% check(df)
# ok - analysis, which has a correct estimate of the result
obj_lab %>% choose_test("ok")
#>   ID AGE SEX                 LBTEST LBTESTCD VISIT LBORNRLO LBORNRHI LBORRES
#> 1 01  19   f                Glucose     GLUC   V1_      3.9      5.9     5.5
#> 2 01  19   f Aspartate transaminase      AST   V2_      0.0     39.0      30
#> 3 03  22   m Aspartate transaminase      AST   V2_      0.0     42.0      31
#>   LBNRIND RES_TYPE_NUM IND_EXPECTED
#> 1    norm          5.5         norm
#> 2    norm         30.0         norm
#> 3    norm         31.0         norm
# mis - analysis, which has an incorrect estimate of the result
obj_lab %>% choose_test("mis")
#>   ID AGE SEX                 LBTEST LBTESTCD VISIT LBORNRLO LBORNRHI LBORRES
#> 1 02  20   m Aspartate transaminase      AST   V2_      0.0     42.0      48
#> 2 03  22   m                Glucose     GLUC   V1_      3.9      5.9     9.7
#>   LBNRIND RES_TYPE_NUM IND_EXPECTED
#> 1    norm         48.0           no
#> 2    norm          9.7           no
# skip - analysis, which has an empty value of the estimate
obj_lab %>% choose_test("skip")
#>   ID AGE SEX  LBTEST LBTESTCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 02  20   m Glucose     GLUC   V1_      3.9      5.9     4.1    <NA>
#>   RES_TYPE_NUM IND_EXPECTED
#> 1          4.1         norm
# all analyzes 
obj_lab %>% get_result()
#>   ID AGE SEX                 LBTEST LBTESTCD VISIT LBORNRLO LBORNRHI LBORRES
#> 1 01  19   f                Glucose     GLUC   V1_      3.9      5.9     5.5
#> 2 01  19   f Aspartate transaminase      AST   V2_      0.0     39.0      30
#> 3 02  20   m                Glucose     GLUC   V1_      3.9      5.9     4.1
#> 4 02  20   m Aspartate transaminase      AST   V2_      0.0     42.0      48
#> 5 03  22   m                Glucose     GLUC   V1_      3.9      5.9     9.7
#> 6 03  22   m Aspartate transaminase      AST   V2_      0.0     42.0      31
#>   LBNRIND RES_TYPE_NUM IND_EXPECTED IS_RIGHT
#> 1    norm          5.5         norm     TRUE
#> 2    norm         30.0         norm     TRUE
#> 3    <NA>          4.1         norm       NA
#> 4    norm         48.0           no    FALSE
#> 5    norm          9.7           no    FALSE
#> 6    norm         31.0         norm     TRUEFor dates check, you need to create the excel table like in the example.
contains(num_visit)| VISITNUM | VISIT | MINUS | PLUS | VISITDY | STARTDAT | STARTVISIT | IS_EQUAL | EQUALDAT | 
|---|---|---|---|---|---|---|---|---|
| E1 | screening | 0 | 3 | 0 | screen_date_E1 | date of screening | F | NA | 
| E2 | rand | 0 | 0 | 0 | rand_date_E2 | date of randomization | T | rand_date_E2 | 
| E3 | visit 2 | 1 | 1 | 5 | rand_date_E2 | date of randomization | T | ph_date_E3 | 
| id | screen_date_E1 | rand_date_E2 | ph_date_E3 | bio_date_E3 | 
|---|---|---|---|---|
| 01 | 1991-03-13 | 1991-03-15 | 1991-03-21 | 1991-03-23 | 
| 02 | 1991-03-07 | 1991-03-11 | 1991-03-16 | 1991-03-16 | 
| 03 | 1991-03-08 | 1991-03-10 | 1991-03-16 | 1991-03-16 | 
# use parameter str_date for search columns with dates, default:"DAT"
dates <- system.file("dates.xlsx", package = "dmtools")
obj_date <- date(dates, id, dplyr::contains, dplyr::matches)
obj_date <- obj_date %>% check(df)
# out - dates, which are out of the protocol's timeline
obj_date %>% choose_test("out")
#>   id            STARTVISIT   STARTDAT   VISIT        TERM     VISDAT
#> 1 01 date of randomization 1991-03-15 visit 2 bio_date_E3 1991-03-23
#>                          PLANDAT DAYS_OUT
#> 1 1991-03-19 UTC--1991-03-21 UTC        2
# uneq - dates, which are unequal
obj_date %>% choose_test("uneq")
#>   id   VISIT        TERM     VISDAT   EQUALDAT IS_TIMELINE
#> 1 01 visit 2 bio_date_E3 1991-03-23 1991-03-21       FALSE
# ok - correct dates
obj_date %>% choose_test("ok")
#>    id            STARTVISIT   STARTDAT     VISIT           TERM     VISDAT
#> 1  01     date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2  01 date of randomization 1991-03-15      rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization 1991-03-15   visit 2     ph_date_E3 1991-03-21
#> 4  02     date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 5  02 date of randomization 1991-03-11      rand   rand_date_E2 1991-03-11
#> 6  02 date of randomization 1991-03-11   visit 2     ph_date_E3 1991-03-16
#> 7  02 date of randomization 1991-03-11   visit 2    bio_date_E3 1991-03-16
#> 8  03     date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 9  03 date of randomization 1991-03-10      rand   rand_date_E2 1991-03-10
#> 10 03 date of randomization 1991-03-10   visit 2     ph_date_E3 1991-03-16
#> 11 03 date of randomization 1991-03-10   visit 2    bio_date_E3 1991-03-16
#>                           PLANDAT   EQUALDAT
#> 1  1991-03-13 UTC--1991-03-16 UTC 1991-03-13
#> 2  1991-03-15 UTC--1991-03-15 UTC 1991-03-15
#> 3  1991-03-19 UTC--1991-03-21 UTC 1991-03-21
#> 4  1991-03-07 UTC--1991-03-10 UTC 1991-03-07
#> 5  1991-03-11 UTC--1991-03-11 UTC 1991-03-11
#> 6  1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 7  1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 8  1991-03-08 UTC--1991-03-11 UTC 1991-03-08
#> 9  1991-03-10 UTC--1991-03-10 UTC 1991-03-10
#> 10 1991-03-14 UTC--1991-03-16 UTC 1991-03-16
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16
# all dates
obj_date %>% get_result()
#>    id            STARTVISIT   STARTDAT     VISIT           TERM     VISDAT
#> 1  01     date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2  01 date of randomization 1991-03-15      rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization 1991-03-15   visit 2     ph_date_E3 1991-03-21
#> 4  01 date of randomization 1991-03-15   visit 2    bio_date_E3 1991-03-23
#> 5  02     date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 6  02 date of randomization 1991-03-11      rand   rand_date_E2 1991-03-11
#> 7  02 date of randomization 1991-03-11   visit 2     ph_date_E3 1991-03-16
#> 8  02 date of randomization 1991-03-11   visit 2    bio_date_E3 1991-03-16
#> 9  03     date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 10 03 date of randomization 1991-03-10      rand   rand_date_E2 1991-03-10
#> 11 03 date of randomization 1991-03-10   visit 2     ph_date_E3 1991-03-16
#> 12 03 date of randomization 1991-03-10   visit 2    bio_date_E3 1991-03-16
#>                           PLANDAT   EQUALDAT IS_TIMELINE IS_EQUAL DAYS_OUT
#> 1  1991-03-13 UTC--1991-03-16 UTC 1991-03-13        TRUE     TRUE        0
#> 2  1991-03-15 UTC--1991-03-15 UTC 1991-03-15        TRUE     TRUE        0
#> 3  1991-03-19 UTC--1991-03-21 UTC 1991-03-21        TRUE     TRUE        0
#> 4  1991-03-19 UTC--1991-03-21 UTC 1991-03-21       FALSE    FALSE        2
#> 5  1991-03-07 UTC--1991-03-10 UTC 1991-03-07        TRUE     TRUE        0
#> 6  1991-03-11 UTC--1991-03-11 UTC 1991-03-11        TRUE     TRUE        0
#> 7  1991-03-15 UTC--1991-03-17 UTC 1991-03-16        TRUE     TRUE        0
#> 8  1991-03-15 UTC--1991-03-17 UTC 1991-03-16        TRUE     TRUE        0
#> 9  1991-03-08 UTC--1991-03-11 UTC 1991-03-08        TRUE     TRUE        0
#> 10 1991-03-10 UTC--1991-03-10 UTC 1991-03-10        TRUE     TRUE        0
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16        TRUE     TRUE        0
#> 12 1991-03-14 UTC--1991-03-16 UTC 1991-03-16        TRUE     TRUE        0dplyr::contains - A function, which select necessary visit or event e.g. dplyr::start_with, dplyr::contains. It works like df %>% select(contains("E1")). You also can use dplyr::start_with, works like df %>% select(start_with("V1"))
dplyr::matches - A function, which select dates from necessary visit e.g. dplyr::matches, dplyr::contains. It works like visit_one %>% select(contains("DAT")), default: dplyr::contains()
Function to rename the dataset, using crfs.