git package

tempDir <- tempdir()
pathToRepo <- file.path(tempDir, "CDMConnector")

git2r::clone(
  url = "https://github.com/darwin-eu/CDMConnector.git",
  local_path = pathToRepo
)
## cloning into 'C:\Users\MVANKE~1\AppData\Local\Temp\RtmpKgSdK4/CDMConnector'...
## Receiving objects:   1% (34/3319),   63 kb
## Receiving objects:  11% (366/3319),  120 kb
## Receiving objects:  21% (697/3319),  176 kb
## Receiving objects:  31% (1029/3319),  288 kb
## Receiving objects:  41% (1361/3319),  400 kb
## Receiving objects:  51% (1693/3319),  456 kb
## Receiving objects:  61% (2025/3319), 7292 kb
## Receiving objects:  71% (2357/3319), 12223 kb
## Receiving objects:  81% (2689/3319), 12279 kb
## Receiving objects:  91% (3021/3319), 12335 kb
## Receiving objects: 100% (3319/3319), 19820 kb, done.
## Local:    main C:/Users/mvankessel/AppData/Local/Temp/RtmpKgSdK4/CDMConnector
## Remote:   main @ origin (https://github.com/darwin-eu/CDMConnector.git)
## Head:     [764154b] 2023-06-13: bugfix

Objects

Repository

repo <- PaRe::Repository$new(pathToRepo)

Git management

repo$gitCheckout("main")
repo$gitPull()
dev <- repo$clone()
dev$gitCheckout("75e7333cb2d9f8ec7f7a77ae21fb611f8141ef0e")
## Switched to: 75e7333cb2d9f8ec7f7a77ae21fb611f8141ef0e
## Re-initializing

Repository properties

repo$getPath()
## [1] "C:\\Users\\mvankessel\\AppData\\Local\\Temp\\RtmpKgSdK4\\CDMConnector"
repo$getName()
## [1] "CDMConnector"
repo$getDescription()
## Package: CDMConnector
## Title: Connect to an OMOP Common Data Model
## Version: 1.0.0
## Authors@R (parsed):
##     * Adam Black <black@ohdsi.org> [aut, cre] (<https://orcid.org/0000-0001-5576-8701>)
##     * Artem Gorbachev <artem.gorbachev@odysseusinc.com> [aut]
##     * Edward Burn <edward.burn@ndorms.ox.ac.uk> [aut]
##     * Marti Catala Sabate <marti.catalasabate@ndorms.ox.ac.uk> [aut]
## Description: Provides tools for working with observational health
##     data in the Observational Medical Outcomes Partnership (OMOP) Common
##     Data Model format with a pipe friendly syntax.  Common data model
##     database table references are stored in a single compound object along
##     with metadata.
## License: Apache License (>= 2)
## URL: https://darwin-eu.github.io/CDMConnector/,
##     https://github.com/darwin-eu/CDMConnector
## BugReports: https://github.com/darwin-eu/CDMConnector/issues
## Depends:
##     R (>= 4.0)
## Imports:
##     checkmate,
##     cli,
##     DBI (>= 0.3.0),
##     dbplyr,
##     dplyr,
##     fs,
##     generics,
##     glue,
##     jsonlite,
##     lifecycle,
##     magrittr,
##     methods,
##     pillar,
##     purrr,
##     readr,
##     rlang,
##     stringr,
##     tidyr,
##     tidyselect,
##     waldo,
##     withr
## Suggests:
##     arrow,
##     bigrquery,
##     covr,
##     DatabaseConnector,
##     duckdb,
##     ggplot2,
##     knitr,
##     lubridate,
##     odbc,
##     rJava,
##     rmarkdown,
##     RPostgres,
##     RSQLite,
##     SqlRender,
##     testthat (>= 3.0.0),
##     tibble,
##     zeallot
## Enhances:
##     CirceR
## VignetteBuilder:
##     knitr
## Additional_repositories: https://OHDSI.github.io/drat
## Config/testthat/edition: 3
## Encoding: UTF-8
## Roxygen: list(markdown = TRUE)
## RoxygenNote: 7.2.3

File

files <- repo$getFiles()
files
## $R
## $R[[1]]
## <File> <Code> <R6>
## Name: cdm.R
## # Lines: 709
## $R[[2]]
## <File> <Code> <R6>
## Name: cdm_reference_methods.R
## # Lines: 40
## $R[[3]]
## <File> <Code> <R6>
## Name: cdm_select_tbl.R
## # Lines: 40
## $R[[4]]
## <File> <Code> <R6>
## Name: CDMConnector-package.R
## # Lines: 13
## $R[[5]]
## <File> <Code> <R6>
## Name: cdmSubset.R
## # Lines: 497
## $R[[6]]
## <File> <Code> <R6>
## Name: compute.R
## # Lines: 362
## $R[[7]]
## <File> <Code> <R6>
## Name: dateadd.R
## # Lines: 235
## $R[[8]]
## <File> <Code> <R6>
## Name: Eunomia.R
## # Lines: 307
## $R[[9]]
## <File> <Code> <R6>
## Name: generateCohortSet.R
## # Lines: 821
## $R[[10]]
## <File> <Code> <R6>
## Name: intersect.R
## # Lines: 105
## $R[[11]]
## <File> <Code> <R6>
## Name: listTables.R
## # Lines: 66
## $R[[12]]
## <File> <Code> <R6>
## Name: summarise_quantile.R
## # Lines: 129
## $R[[13]]
## <File> <Code> <R6>
## Name: utils.R
## # Lines: 109
## $R[[14]]
## <File> <Code> <R6>
## Name: validate.R
## # Lines: 229
## 
## $cpp
## list()
## 
## $o
## list()
## 
## $h
## list()
## 
## $java
## list()
## 
## $sql
## list()
file <- files$R[[1]]
file
## <File> <Code> <R6>
## Name: cdm.R
## # Lines: 709

File properties

file$getName()
## [1] "cdm.R"
file$getNLines()
## [1] 709
file$getType()
## [1] "R"
head(file$getLines())
## [1] "#' Create a CDM reference object from a database connection"                              
## [2] "#'"                                                                                       
## [3] "#' @param con A DBI database connection to a database where an OMOP CDM v5.4 or"          
## [4] "#'   v5.3 instance is located."                                                           
## [5] "#' @param cdm_schema,cdmSchema The schema where the OMOP CDM tables are located. Defaults"
## [6] "#'   to NULL."
tail(file$getLines())
## [1] "  DBI::dbDisconnect(attr(cdm, \"dbcon\"), shutdown = TRUE)"
## [2] "}"                                                         
## [3] ""                                                          
## [4] "#' @rdname cdmDisconnect"                                  
## [5] "#' @export"                                                
## [6] "cdm_disconnect <- cdmDisconnect"
file$getFunctionTable()
##                     name lineStart lineEnd nArgs cycloComp
## 1           cdm_from_con        20     175     7        22
## 2             cdmFromCon       179     193     7         1
## 3     detect_cdm_version       195     245     3        13
## 4                version       265     273     1         2
## 5                cdmName       298     301     1         1
## 6    print.cdm_reference       315     321     2         1
## 7    verify_write_access       326     362     4         3
## 8              tbl_group       397     406     1         1
## 9                   dbms       428     430     1         1
## 10    dbms.cdm_reference       433     435     1         1
## 11    dbms.DBIConnection       438     461     1         4
## 12                  stow       480     511     4         1
## 13        cdm_from_files       523     572     4         3
## 14          cdmFromFiles       577     583     4         1
## 15 collect.cdm_reference       607     613     2         3
## 16              snapshot       641     693     1         3
## 17         cdmDisconnect       700     705     1         2

Function

functions <- file$getFunctions()
functions
## [[1]]
## <Function> <Code> <R6>
## Name: cdm_from_con
## # Lines: 156
## [[2]]
## <Function> <Code> <R6>
## Name: cdmFromCon
## # Lines: 15
## [[3]]
## <Function> <Code> <R6>
## Name: detect_cdm_version
## # Lines: 51
## [[4]]
## <Function> <Code> <R6>
## Name: version
## # Lines: 9
## [[5]]
## <Function> <Code> <R6>
## Name: cdmName
## # Lines: 4
## [[6]]
## <Function> <Code> <R6>
## Name: print.cdm_reference
## # Lines: 7
## [[7]]
## <Function> <Code> <R6>
## Name: verify_write_access
## # Lines: 37
## [[8]]
## <Function> <Code> <R6>
## Name: tbl_group
## # Lines: 10
## [[9]]
## <Function> <Code> <R6>
## Name: dbms
## # Lines: 3
## [[10]]
## <Function> <Code> <R6>
## Name: dbms.cdm_reference
## # Lines: 3
## [[11]]
## <Function> <Code> <R6>
## Name: dbms.DBIConnection
## # Lines: 24
## [[12]]
## <Function> <Code> <R6>
## Name: stow
## # Lines: 32
## [[13]]
## <Function> <Code> <R6>
## Name: cdm_from_files
## # Lines: 50
## [[14]]
## <Function> <Code> <R6>
## Name: cdmFromFiles
## # Lines: 7
## [[15]]
## <Function> <Code> <R6>
## Name: collect.cdm_reference
## # Lines: 7
## [[16]]
## <Function> <Code> <R6>
## Name: snapshot
## # Lines: 53
## [[17]]
## <Function> <Code> <R6>
## Name: cdmDisconnect
## # Lines: 6
funs <- unlist(lapply(files$R, function(file) {
  file$getFunctions()
}))

fun <- funs[[1]]
fun
## <Function> <Code> <R6>
## Name: cdm_from_con
## # Lines: 156

Function properties

fun$getName()
## [1] "cdm_from_con"
fun$getNLines()
## [1] 156
fun$getFunction()
##           name lineStart lineEnd nArgs cycloComp
## 1 cdm_from_con        20     175     7        22
head(fun$getLines())
## [1] "cdm_from_con <- function(con,"                  
## [2] "                         cdm_schema = NULL,"    
## [3] "                         write_schema = NULL,"  
## [4] "                         cohort_tables = NULL," 
## [5] "                         cdm_version = \"5.3\","
## [6] "                         cdm_name = NULL) {"
tail(fun$getLines())
## [1] "  # The following attributes can be used to communicate temp table preferences to downstream analytic packages."                 
## [2] "  # This a feature for analytic package developers and users should not need to know about it unless there is an issue to debug."
## [3] "  attr(cdm, \"cohort_as_temp\") <- FALSE"                                                                                        
## [4] "  attr(cdm, \"intermediate_as_temp\") <- TRUE"                                                                                   
## [5] "  return(cdm)"                                                                                                                   
## [6] "}"

ComPaRing main to develop

dplyr::bind_rows(
  PaRe::countPackageLines(repo) %>% mutate(branch = "main"),
  PaRe::countPackageLines(dev) %>% mutate(branch = "V0.1.0")
)
## # A tibble: 2 × 7
##       R   cpp     o     h  java   sql branch
##   <int> <int> <int> <int> <int> <int> <chr> 
## 1  3662     0     0     0     0     0 main  
## 2  2628     0     0     0     0     0 V0.1.0
PaRe::pkgDiagram(repo)
PaRe::pkgDiagram(dev)
mainFunUse <- PaRe::getFunctionUse(repo) %>%
  mutate(branch = "main")

devFunUse <- PaRe::getFunctionUse(dev) %>%
  mutate(branch = "V0.1.0")

pkgUse <- dplyr::bind_rows(
  mainFunUse,
  devFunUse
) %>%
  group_by(pkg, branch) %>%
  tally()
library(ggplot2)

ggplot(data = pkgUse, mapping = aes(x = pkg, y = n, fill = branch)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

mainFuns <- PaRe::getDefinedFunctions(repo)
devFuns <- PaRe::getDefinedFunctions(dev)

defFuns <- bind_rows(
  mainFuns %>% mutate(branch = "main"),
  devFuns %>% mutate(branch = "V0.1.0")
)

ggplot(data = defFuns, mapping = aes(y = cycloComp, x = name, fill = branch)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(title = "Cyclomatic Complexity per Function", x = "Function", y = "Cyclomatic Complexity")

ggplot(data = defFuns, mapping = aes(y = lineEnd - lineStart + 1, x = name, fill = branch)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(title = "Number of Lines per Function", y = "Number of Lines", x = "Function")

effectiveCode <- defFuns %>%
  group_by(fileName, branch) %>%
  summarise(n = sum(lineEnd - lineStart + 1)) %>%
  ungroup()
## `summarise()` has grouped output by 'fileName'. You can override using the
## `.groups` argument.
ggplot(data = effectiveCode, mapping = aes(y = n, x = fileName, fill = branch)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(title = "Functional code increase", y = "Number of Lines", x = "File")

mainGraph <- PaRe::getGraphData(repo)
## 



[32m✔
[39m Updated metadata database: 5.07 MB in 14 files.
## 

[36mℹ
[39m Updating metadata database


[32m✔
[39m Updating metadata database ... done
devGraph <- PaRe::getGraphData(dev)

library(igraph)
## 
## Attaching package: 'igraph'
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
data.frame(
  branch = c("main", "V0.1.0"),
  countVertices = c(length(igraph::V(mainGraph)), length(igraph::V(devGraph))),
  countEdges = c(length(igraph::E(mainGraph)), length(igraph::E(devGraph))),
  meanDegree = c(round(mean(igraph::degree(mainGraph)), 2), round(mean(igraph::degree(devGraph)), 2)),
  meanDistance = c(round(mean(igraph::distances(mainGraph)), 2), round(mean(igraph::distances(devGraph)), 2))
)
##   branch countVertices countEdges meanDegree meanDistance
## 1   main            44        173       7.86         2.14
## 2 V0.1.0            44        172       7.82         2.15
messages <- PaRe::lintRepo(repo)
messages2 <- PaRe::lintRepo(dev)
head(messages)
##   filename line_number column_number  type
## 1    cdm.R           5            81 style
## 2    cdm.R           7            81 style
## 3    cdm.R           9            81 style
## 4    cdm.R          11            81 style
## 5    cdm.R          14            81 style
## 6    cdm.R          20             1 style
##                                                                     message
## 1                              Lines should not be more than 80 characters.
## 2                              Lines should not be more than 80 characters.
## 3                              Lines should not be more than 80 characters.
## 4                              Lines should not be more than 80 characters.
## 5                              Lines should not be more than 80 characters.
## 6 Functions should have cyclomatic complexity of less than 15, this has 22.
##                                                                                           line
## 1    #' @param cdm_schema,cdmSchema The schema where the OMOP CDM tables are located. Defaults
## 2  #' @param write_schema,writeSchema An optional schema in the CDM database that the user has
## 3 #' @param cohort_tables,cohortTables A character vector listing the cohort table names to be
## 4        #' @param cdm_version,cdmVersion The version of the OMOP CDM: "5.3" (default), "5.4",
## 5        #' @param cdm_name,cdmName The name of the CDM. If NULL (default) the cdm_source_name
## 6                                                                cdm_from_con <- function(con,
##               linter
## 1 line_length_linter
## 2 line_length_linter
## 3 line_length_linter
## 4 line_length_linter
## 5 line_length_linter
## 6   cyclocomp_linter
bind_rows(
  PaRe::lintScore(repo, messages) %>% mutate(branch = "main"),
  PaRe::lintScore(dev, messages) %>% mutate(branch = "v0.1.0")
)
## # A tibble: 4 × 3
##   type      pct branch
##   <chr>   <dbl> <chr> 
## 1 style   11.8  main  
## 2 warning  2.18 main  
## 3 style   16.4  v0.1.0
## 4 warning  3.04 v0.1.0