• R/O
  • SSH

Commit

Tags
Keine Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#objective-cqt誰得cocoawindowspythonphprubygameguibathyscaphec翻訳omegat計画中(planning stage)frameworktwittertestdombtronvb.netdirectxarduinopreviewerゲームエンジン

Commit MetaInfo

Revisionc2a9e93444f3eed8593f1c8b239fed4913088c59 (tree)
Zeit2022-07-06 23:44:29
AutorLorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

I improved the function to convert a csv file to an sqlite database.

Ändern Zusammenfassung

Diff

diff -r 9f0bc6a2077c -r c2a9e93444f3 R-codes/stat_lib.R
--- a/R-codes/stat_lib.R Tue Jul 05 16:06:58 2022 +0200
+++ b/R-codes/stat_lib.R Wed Jul 06 16:44:29 2022 +0200
@@ -4733,6 +4733,69 @@
47334733 #' @importFrom readr read_delim read_delim_chunked
47344734 #' @importFrom dplyr %>% select_if mutate_at
47354735 #' @importFrom lubridate is.Date is.POSIXt
4736+## csv_to_sqlite <- function(csv_file, sqlite_file, table_name,
4737+## delim = ",",
4738+## pre_process_size = 1000, chunk_size = 50000,
4739+## show_progress_bar = TRUE, ...) {
4740+## con <- dbConnect(SQLite(), dbname = sqlite_file)
4741+
4742+## # read a first chunk of data to extract the colnames and types
4743+## # to figure out the date and the datetime columns
4744+## df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...)
4745+## date_cols <- df %>%
4746+## select_if(is.Date) %>%
4747+## colnames()
4748+## datetime_cols <- df %>%
4749+## select_if(is.POSIXt) %>%
4750+## colnames()
4751+
4752+## # write the first batch of lines to SQLITE table, converting dates to string
4753+## # representation
4754+## df <- df %>%
4755+## mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
4756+## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
4757+## dbWriteTable(con, table_name, df, overwrite = TRUE)
4758+
4759+## # readr chunk functionality
4760+## read_delim_chunked(
4761+## csv_file,
4762+## callback = append_to_sqlite(con = con, table_name = table_name,
4763+## date_cols = date_cols,
4764+## datetime_cols = datetime_cols),
4765+## delim = delim,
4766+## skip = pre_process_size, chunk_size = chunk_size,
4767+## progress = show_progress_bar,
4768+## col_names = colnames(df), ...)
4769+## dbDisconnect(con)
4770+## }
4771+
4772+#' Callback function that appends new sections to the SQLite table.
4773+#' @param con A valid connection to SQLite database.
4774+#' @param table_name Name of the table to store the data table in the sqlite
4775+#' database.
4776+#' @param date_cols Name of columns containing Date objects
4777+#' @param datetime_cols Name of columns containint POSIXt objects.
4778+#'
4779+#' @keywords internal
4780+## append_to_sqlite <- function(con, table_name,
4781+## date_cols, datetime_cols) {
4782+## #' @param x Data.frame we are reading from.
4783+## function(x, pos) {
4784+
4785+## x <- as.data.frame(x)
4786+## x <- x %>%
4787+## mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
4788+## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
4789+## # append data frame to table
4790+## dbWriteTable(con, table_name, x, append = TRUE)
4791+
4792+## }
4793+## }
4794+
4795+
4796+### A more modern version of the functions above
4797+
4798+
47364799 csv_to_sqlite <- function(csv_file, sqlite_file, table_name,
47374800 delim = ",",
47384801 pre_process_size = 1000, chunk_size = 50000,
@@ -4742,18 +4805,29 @@
47424805 # read a first chunk of data to extract the colnames and types
47434806 # to figure out the date and the datetime columns
47444807 df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...)
4745- date_cols <- df %>%
4746- select_if(is.Date) %>%
4808+
4809+
4810+ date_cols <- df |>
4811+ select(where(is.Date)) |>
47474812 colnames()
4748- datetime_cols <- df %>%
4749- select_if(is.POSIXt) %>%
4813+
4814+ datetime_cols <- df |>
4815+ select(where(is.POSIXt)) |>
47504816 colnames()
47514817
47524818 # write the first batch of lines to SQLITE table, converting dates to string
47534819 # representation
4754- df <- df %>%
4755- mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
4756- mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
4820+ ## df <- df %>%
4821+ ## mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
4822+ ## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
4823+
4824+
4825+ df <- df |>
4826+ mutate(across( all_of(date_cols), \(x) as.character.Date(x))) |>
4827+ mutate(across(all_of(datetime_cols), \(x) as.character.POSIXt(x)))
4828+
4829+
4830+
47574831 dbWriteTable(con, table_name, df, overwrite = TRUE)
47584832
47594833 # readr chunk functionality
@@ -4769,23 +4843,19 @@
47694843 dbDisconnect(con)
47704844 }
47714845
4772-#' Callback function that appends new sections to the SQLite table.
4773-#' @param con A valid connection to SQLite database.
4774-#' @param table_name Name of the table to store the data table in the sqlite
4775-#' database.
4776-#' @param date_cols Name of columns containing Date objects
4777-#' @param datetime_cols Name of columns containint POSIXt objects.
4778-#'
4779-#' @keywords internal
4846+
4847+
4848+
4849+
47804850 append_to_sqlite <- function(con, table_name,
47814851 date_cols, datetime_cols) {
47824852 #' @param x Data.frame we are reading from.
47834853 function(x, pos) {
47844854
47854855 x <- as.data.frame(x)
4786- x <- x %>%
4787- mutate_at(.vars = date_cols, .funs = as.character.Date) %>%
4788- mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt)
4856+ x <- x |>
4857+ mutate(across( all_of(date_cols), \(x) as.character.Date(x))) |>
4858+ mutate(across( all_of(datetime_cols), \(x) as.character.POSIXt(x)))
47894859 # append data frame to table
47904860 dbWriteTable(con, table_name, x, append = TRUE)
47914861
@@ -4793,6 +4863,11 @@
47934863 }
47944864
47954865
4866+
4867+
4868+
4869+
4870+
47964871 #######################################################################
47974872 #######################################################################
47984873 #######################################################################