Revision | c2a9e93444f3eed8593f1c8b239fed4913088c59 (tree) |
---|---|
Zeit | 2022-07-06 23:44:29 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I improved the function to convert a csv file to an sqlite database.
@@ -4733,6 +4733,69 @@ | ||
4733 | 4733 | #' @importFrom readr read_delim read_delim_chunked |
4734 | 4734 | #' @importFrom dplyr %>% select_if mutate_at |
4735 | 4735 | #' @importFrom lubridate is.Date is.POSIXt |
4736 | +## csv_to_sqlite <- function(csv_file, sqlite_file, table_name, | |
4737 | +## delim = ",", | |
4738 | +## pre_process_size = 1000, chunk_size = 50000, | |
4739 | +## show_progress_bar = TRUE, ...) { | |
4740 | +## con <- dbConnect(SQLite(), dbname = sqlite_file) | |
4741 | + | |
4742 | +## # read a first chunk of data to extract the colnames and types | |
4743 | +## # to figure out the date and the datetime columns | |
4744 | +## df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...) | |
4745 | +## date_cols <- df %>% | |
4746 | +## select_if(is.Date) %>% | |
4747 | +## colnames() | |
4748 | +## datetime_cols <- df %>% | |
4749 | +## select_if(is.POSIXt) %>% | |
4750 | +## colnames() | |
4751 | + | |
4752 | +## # write the first batch of lines to SQLITE table, converting dates to string | |
4753 | +## # representation | |
4754 | +## df <- df %>% | |
4755 | +## mutate_at(.vars = date_cols, .funs = as.character.Date) %>% | |
4756 | +## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt) | |
4757 | +## dbWriteTable(con, table_name, df, overwrite = TRUE) | |
4758 | + | |
4759 | +## # readr chunk functionality | |
4760 | +## read_delim_chunked( | |
4761 | +## csv_file, | |
4762 | +## callback = append_to_sqlite(con = con, table_name = table_name, | |
4763 | +## date_cols = date_cols, | |
4764 | +## datetime_cols = datetime_cols), | |
4765 | +## delim = delim, | |
4766 | +## skip = pre_process_size, chunk_size = chunk_size, | |
4767 | +## progress = show_progress_bar, | |
4768 | +## col_names = colnames(df), ...) | |
4769 | +## dbDisconnect(con) | |
4770 | +## } | |
4771 | + | |
4772 | +#' Callback function that appends new sections to the SQLite table. | |
4773 | +#' @param con A valid connection to SQLite database. | |
4774 | +#' @param table_name Name of the table to store the data table in the sqlite | |
4775 | +#' database. | |
4776 | +#' @param date_cols Name of columns containing Date objects | |
4777 | +#' @param datetime_cols Name of columns containint POSIXt objects. | |
4778 | +#' | |
4779 | +#' @keywords internal | |
4780 | +## append_to_sqlite <- function(con, table_name, | |
4781 | +## date_cols, datetime_cols) { | |
4782 | +## #' @param x Data.frame we are reading from. | |
4783 | +## function(x, pos) { | |
4784 | + | |
4785 | +## x <- as.data.frame(x) | |
4786 | +## x <- x %>% | |
4787 | +## mutate_at(.vars = date_cols, .funs = as.character.Date) %>% | |
4788 | +## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt) | |
4789 | +## # append data frame to table | |
4790 | +## dbWriteTable(con, table_name, x, append = TRUE) | |
4791 | + | |
4792 | +## } | |
4793 | +## } | |
4794 | + | |
4795 | + | |
4796 | +### A more modern version of the functions above | |
4797 | + | |
4798 | + | |
4736 | 4799 | csv_to_sqlite <- function(csv_file, sqlite_file, table_name, |
4737 | 4800 | delim = ",", |
4738 | 4801 | pre_process_size = 1000, chunk_size = 50000, |
@@ -4742,18 +4805,29 @@ | ||
4742 | 4805 | # read a first chunk of data to extract the colnames and types |
4743 | 4806 | # to figure out the date and the datetime columns |
4744 | 4807 | df <- read_delim(csv_file, delim = delim, n_max = pre_process_size, ...) |
4745 | - date_cols <- df %>% | |
4746 | - select_if(is.Date) %>% | |
4808 | + | |
4809 | + | |
4810 | + date_cols <- df |> | |
4811 | + select(where(is.Date)) |> | |
4747 | 4812 | colnames() |
4748 | - datetime_cols <- df %>% | |
4749 | - select_if(is.POSIXt) %>% | |
4813 | + | |
4814 | + datetime_cols <- df |> | |
4815 | + select(where(is.POSIXt)) |> | |
4750 | 4816 | colnames() |
4751 | 4817 | |
4752 | 4818 | # write the first batch of lines to SQLITE table, converting dates to string |
4753 | 4819 | # representation |
4754 | - df <- df %>% | |
4755 | - mutate_at(.vars = date_cols, .funs = as.character.Date) %>% | |
4756 | - mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt) | |
4820 | + ## df <- df %>% | |
4821 | + ## mutate_at(.vars = date_cols, .funs = as.character.Date) %>% | |
4822 | + ## mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt) | |
4823 | + | |
4824 | + | |
4825 | + df <- df |> | |
4826 | + mutate(across( all_of(date_cols), \(x) as.character.Date(x))) |> | |
4827 | + mutate(across(all_of(datetime_cols), \(x) as.character.POSIXt(x))) | |
4828 | + | |
4829 | + | |
4830 | + | |
4757 | 4831 | dbWriteTable(con, table_name, df, overwrite = TRUE) |
4758 | 4832 | |
4759 | 4833 | # readr chunk functionality |
@@ -4769,23 +4843,19 @@ | ||
4769 | 4843 | dbDisconnect(con) |
4770 | 4844 | } |
4771 | 4845 | |
4772 | -#' Callback function that appends new sections to the SQLite table. | |
4773 | -#' @param con A valid connection to SQLite database. | |
4774 | -#' @param table_name Name of the table to store the data table in the sqlite | |
4775 | -#' database. | |
4776 | -#' @param date_cols Name of columns containing Date objects | |
4777 | -#' @param datetime_cols Name of columns containint POSIXt objects. | |
4778 | -#' | |
4779 | -#' @keywords internal | |
4846 | + | |
4847 | + | |
4848 | + | |
4849 | + | |
4780 | 4850 | append_to_sqlite <- function(con, table_name, |
4781 | 4851 | date_cols, datetime_cols) { |
4782 | 4852 | #' @param x Data.frame we are reading from. |
4783 | 4853 | function(x, pos) { |
4784 | 4854 | |
4785 | 4855 | x <- as.data.frame(x) |
4786 | - x <- x %>% | |
4787 | - mutate_at(.vars = date_cols, .funs = as.character.Date) %>% | |
4788 | - mutate_at(.vars = datetime_cols, .funs = as.character.POSIXt) | |
4856 | + x <- x |> | |
4857 | + mutate(across( all_of(date_cols), \(x) as.character.Date(x))) |> | |
4858 | + mutate(across( all_of(datetime_cols), \(x) as.character.POSIXt(x))) | |
4789 | 4859 | # append data frame to table |
4790 | 4860 | dbWriteTable(con, table_name, x, append = TRUE) |
4791 | 4861 |
@@ -4793,6 +4863,11 @@ | ||
4793 | 4863 | } |
4794 | 4864 | |
4795 | 4865 | |
4866 | + | |
4867 | + | |
4868 | + | |
4869 | + | |
4870 | + | |
4796 | 4871 | ####################################################################### |
4797 | 4872 | ####################################################################### |
4798 | 4873 | ####################################################################### |