# Connect to DB
datasetName = "GiBleed"
dbms = "sqlite"
datasetLocation <- Eunomia::getDatabaseFile(
datasetName = datasetName,
dbms = dbms,
databaseFile = tempfile(fileext = ".sqlite")
)
connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = dbms, server = datasetLocation)
connection = DatabaseConnector::connect(connectionDetails = connectionDetails)
DatabaseConnector::getTableNames(connection, databaseSchema = 'main')
source("util.R")
insert_visits(connection)
# Create Cohort for AMI
# Create cohort entry event temp table using AMI concept set
myocardial_infraction = 4329847 # Myocardial infarction
old_myocardial_infraction = 314666 # Old myocardial infarction
cdm = "main"
sql <- "
drop table if exists #diagnosis;
SELECT
person_id AS subject_id,
condition_start_date AS cohort_start_date
INTO #diagnoses
FROM @cdm.condition_occurrence
WHERE condition_concept_id IN (
SELECT descendant_concept_id
FROM @cdm.concept_ancestor
WHERE ancestor_concept_id IN(@myocardial_infraction)
)
AND condition_concept_id NOT IN (
SELECT descendant_concept_id
FROM @cdm.concept_ancestor
WHERE ancestor_concept_id IN(@old_myocardial_infraction)
);
"
DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
sql = sql,
cdm = cdm,
myocardial_infraction = myocardial_infraction,
old_myocardial_infraction = old_myocardial_infraction
)
# Create additional inclusion criteria to only include inpatients or ER
visit_concepts_to_include <- c(9201, 9203, 262) # inpatient or ER concept set
sql <- "
INSERT INTO @cdm.cohort (
subject_id,
cohort_start_date,
cohort_definition_id
)
SELECT
subject_id,
cohort_start_date,
CAST (1 AS INT) AS cohort_definition_id
FROM #diagnoses
INNER JOIN @cdm.visit_occurrence v
ON subject_id = person_id
AND cohort_start_date >= v.visit_start_date
AND cohort_start_date <= v.visit_end_date
WHERE visit_concept_id IN (@visit_concepts_to_include);"
DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
sql = sql,
cdm = cdm,
visit_concepts_to_include = visit_concepts_to_include
)Characterizations
Exercise
Use the acute myocardial infraction cohort to do some basic characterizations.
Packages
HADES packages used
- Eunomia for connecting to a synthetic OMOP CDM database.
- DatabaseConnector for connecting to our Eunomia dataset and executing queries.
- FeatureExtraction for generating covariates for our cohort and getting some aggregated stats.
Installation
# Needed to install some HADES packages
install.packages("remotes")
# HADES packages
remotes::install_github("ohdsi/FeatureExtraction")
install.packages("DatabaseConnector")
install.packages("Eunomia")Create AMI Cohort
For this tutorial, we will use the same cohort and Eunomia GiBleed dataset from the cohort generation tutorial. We will generate it real quick here, but see that tutorial for an explanation.
View Subjects
# View results
sql <- "
select * from @cdm.cohort;
"
DatabaseConnector::renderTranslateQuerySql(
connection = connection,
sql = sql,
cdm = cdm
)COHORT_DEFINITION_ID <chr> | SUBJECT_ID <chr> | COHORT_START_DATE <date> | COHORT_END_DATE <date> | |
|---|---|---|---|---|
| 1 | 1491 | 2011-07-12 | <NA> | |
| 1 | 105 | 2005-11-11 | <NA> | |
| 1 | 96 | 1997-01-25 | <NA> | |
| 1 | 3049 | 1986-12-05 | <NA> | |
| 1 | 2692 | 1998-06-29 | <NA> |
Get Aggregated Covariate Data
Specify Covariate Settings
We will just specify some demographic features.
There is a large set of covariate settings and all are false by default, so you must “opt in” to the covariates you want to extract.
You can view the covariate settings by executing this from the RStudio console
?FeatureExtraction::createCovariateSettings
settings <- FeatureExtraction::createCovariateSettings(
useDemographicsGender = T,
useDemographicsAge = T,
useDemographicsAgeGroup = T,
useDemographicsRace = T,
useDemographicsEthnicity = T,
useDemographicsIndexYear = T,
useDemographicsIndexMonth = T,
useDemographicsPriorObservationTime = T,
useDemographicsPostObservationTime = T,
useDemographicsTimeInCohort = T,
useDemographicsIndexYearMonth = T
)Get Covariate Data
Extract those features across the cohort. Aggregated = T means we are considering the cohort as a whole, not on a per-patient basis.
covariateData <- FeatureExtraction::getDbCovariateData(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
cohortTable = "cohort",
cohortIds = c(1), # We gave our cohort Id a value of 1 when we inserted into the cohort table
covariateSettings = settings,
aggregated = TRUE
)
str(covariateData) # An S4 objectDiscrete Covariates
Well, we have the covariates here and some aggregations, but we need to map the covariateId to an actual description
covariateData$covariatescohortDefinitionId <chr> | covariateId <dbl> | sumValue <dbl> | averageValue <dbl> | |
|---|---|---|---|---|
| 1 | 1007 | 1 | 0.2 | |
| 1 | 2003 | 1 | 0.2 | |
| 1 | 6007 | 1 | 0.2 | |
| 1 | 7007 | 1 | 0.2 | |
| 1 | 8003 | 2 | 0.4 | |
| 1 | 10003 | 1 | 0.2 | |
| 1 | 11007 | 1 | 0.2 | |
| 1 | 12007 | 1 | 0.2 | |
| 1 | 14003 | 1 | 0.2 | |
| 1 | 1986006 | 1 | 0.2 |
Continuous Covariates
Same thing for the continuous covariates, we have the covariateId, not the actual description…
covariateData$covariatesContinuouscohortDefinitionId <chr> | covariateId <dbl> | countValue <dbl> | minValue <dbl> | maxValue <dbl> | averageValue <dbl> | |
|---|---|---|---|---|---|---|
| 1 | 1010 | 5 | NA | NA | NA | |
| 1 | 1009 | 5 | 7 | 11473 | 2300.2 | |
| 1 | 1002 | 5 | 10 | 73 | 43.4 | |
| 1 | 1008 | 5 | 3878 | 26327 | 15808.8 |
Merging with covariateRef
Here is the mapping of covariateId to covariateName
covariateData$covariateRefcovariateId <dbl> | covariateName <chr> | analysisId <dbl> | conceptId <dbl> | |
|---|---|---|---|---|
| 1010 | time (days) between cohort start and end | 10 | 0 | |
| 8507001 | gender = MALE | 1 | 8507 | |
| 1009 | observation time (days) after index | 9 | 0 | |
| 198612011 | index year and month: 198612 | 11 | 0 | |
| 199701011 | index year and month: 199701 | 11 | 0 | |
| 199806011 | index year and month: 199806 | 11 | 0 | |
| 200511011 | index year and month: 200511 | 11 | 0 | |
| 201107011 | index year and month: 201107 | 11 | 0 | |
| 2003 | age group: 10 - 14 | 3 | 0 | |
| 8003 | age group: 40 - 44 | 3 | 0 |
Lets merge the covariates to the covariateRef to get the results along with the covariateName
merged <- merge(covariateData$covariates, covariateData$covariateRef, by = "covariateId")
mergedcovariateId <dbl> | cohortDefinitionId <chr> | sumValue <dbl> | averageValue <dbl> | covariateName <chr> | |
|---|---|---|---|---|---|
| 1007 | 1 | 1 | 0.2 | index month: 1 | |
| 2003 | 1 | 1 | 0.2 | age group: 10 - 14 | |
| 6007 | 1 | 1 | 0.2 | index month: 6 | |
| 7007 | 1 | 1 | 0.2 | index month: 7 | |
| 8003 | 1 | 2 | 0.4 | age group: 40 - 44 | |
| 10003 | 1 | 1 | 0.2 | age group: 50 - 54 | |
| 11007 | 1 | 1 | 0.2 | index month: 11 | |
| 12007 | 1 | 1 | 0.2 | index month: 12 | |
| 14003 | 1 | 1 | 0.2 | age group: 70 - 74 | |
| 1986006 | 1 | 1 | 0.2 | index year: 1986 |
Get Table 1
Here is how you get “table 1”, frequently used in papers.
result <- FeatureExtraction::createTable1(covariateData1 = covariateData)
print(result, row.names = F, right = T) Characteristic % (n = 5) Characteristic % (n = 5)
Age group 50 - 54 20
10 - 14 20 70 - 74 20
40 - 44 40