Revision | 149f52fc5c3943c54eda036fdc1a9af05328ac1a (tree) |
---|---|
Zeit | 2023-05-03 20:59:31 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
Now the code can process the large tam from PL.
@@ -17,14 +17,20 @@ | ||
17 | 17 | |
18 | 18 | read_estat <- 0 |
19 | 19 | |
20 | -df_ini1 <- read_csv("State_aid_in_Poland-2018-2019_above_500K_euro,_2020_above_100K_euro.csv" ## , locale = readr::locale(encoding = "latin1") | |
21 | - ) |> | |
22 | - clean_names() | |
20 | +## df_ini1 <- read_csv("State_aid_in_Poland-2018-2019_above_500K_euro,_2020_above_100K_euro.csv" ## , locale = readr::locale(encoding = "latin1") | |
21 | +## ) |> | |
22 | +## clean_names() | |
23 | 23 | |
24 | -df_ini2 <- read_csv("2021-data/State_aid_in_Poland-2021_above_100K_euro.csv") |> | |
25 | - clean_names() | |
24 | +## df_ini2 <- read_csv("2021-data/State_aid_in_Poland-2021_above_100K_euro.csv") |> | |
25 | +## clean_names() | |
26 | 26 | |
27 | -df_ini <- bind_rows(df_ini1, df_ini2) | |
27 | +## df_ini <- bind_rows(df_ini1, df_ini2) | |
28 | + | |
29 | + | |
30 | +df_ini <- read_csv("all-PL.csv", locale = readr::locale(encoding = "UTF-8" ## "latin1" | |
31 | + )) | |
32 | + | |
33 | +## df_ini <- readRDS("all-PL.RDS") | |
28 | 34 | |
29 | 35 | query <- "ert_bil_eur_a/A.AVG.NAC.PLN" |
30 | 36 |
@@ -112,6 +118,8 @@ | ||
112 | 118 | select(all_of(column_selection)) |> |
113 | 119 | rename_many( new_columns## [1:14] |
114 | 120 | , column_selection) |> |
121 | + mutate(aid_award_created_date=parse_date_time(aid_award_created_date,c("dmy", "ymd"))) |> | |
122 | + mutate(aid_award_granted_date=parse_date_time(aid_award_granted_date, c("dmy", "ymd"))) |> | |
115 | 123 | mutate(year=year(aid_award_granted_date ), |
116 | 124 | beneficiary_country="Poland") |> |
117 | 125 | left_join(y=all_rates_ini, by=c("year"="time_period")) |> |
@@ -164,7 +172,7 @@ | ||
164 | 172 | pull(aid_award_instrument) |> |
165 | 173 | su() |
166 | 174 | |
167 | -nace <- readRDS("../../nace_codes/df_nace.RDS") |> | |
175 | +nace <- readRDS("df_nace.RDS") |> | |
168 | 176 | select(-code2) |
169 | 177 | |
170 | 178 | df_nace <- tibble(macro=seq_fixed_width(1:99,2), |
@@ -199,8 +207,8 @@ | ||
199 | 207 | |
200 | 208 | df_sel2 <- df_sel |> |
201 | 209 | mutate(aid_award_ga_original=df_ini$udzielajacy_nazwa) |> |
202 | - mutate(aid_award_instrument=recode_many(aid_award_instrument, aid_instr, | |
203 | - aid_instr_new)) |> | |
210 | + ## mutate(aid_award_instrument=recode_many(aid_award_instrument, aid_instr, | |
211 | + ## aid_instr_new)) |> | |
204 | 212 | mutate(beneficiary_type=if_else(beneficiary_type %in% c("0","1","2"), |
205 | 213 | "Small and medium-sized enterprises", |
206 | 214 | "Only large enterprises")) |> |
@@ -215,17 +223,20 @@ | ||
215 | 223 | left_join(y=df_nace, by=c("ben_sec2"="macro")) |> |
216 | 224 | mutate(beneficiary_sector=paste(macro2, " - ", beneficiary_sector, |
217 | 225 | sep="")) |> |
218 | - select(-c(ben_sec2, macro2)) | |
226 | + select(-c(ben_sec2, macro2)) |> | |
227 | + mutate(aid_award_granted_date=as.Date(aid_award_granted_date), | |
228 | + aid_award_created_date=as.Date(aid_award_created_date), | |
229 | + national_identification_type=as.character(national_identification_type)) | |
219 | 230 | |
220 | 231 | |
221 | 232 | |
222 | 233 | |
223 | -saveRDS(df_sel2,"tam_PL_matched.RDS") | |
224 | -write_csv(df_sel2, "tam_PL_matched.csv.gz") | |
225 | -save_excel(df_sel2, "tam_PL_matched.xlsx") | |
234 | +## saveRDS(df_sel2,"tam_PL_matched.RDS") | |
235 | +## write_csv(df_sel2, "tam_PL_matched.csv.gz") | |
236 | +## save_excel(df_sel2, "tam_PL_matched.xlsx") | |
226 | 237 | |
227 | 238 | |
228 | -saveRDS(df_sel2,"tam_PL_matched_extended.RDS") | |
239 | +## saveRDS(df_sel2,"tam_PL_matched_extended.RDS") | |
229 | 240 | write_parquet(df_sel2,"tam_PL_matched_extended.parquet") |
230 | 241 | |
231 | 242 |