Revision | 70a74d019c9a1c88ed1473e2f0fd6c473d05dedd (tree) |
---|---|
Zeit | 2020-08-28 00:44:23 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A code which illustrates how to rectangle a json file.
@@ -0,0 +1,84 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | + | |
4 | +library(tidyjson) # this library | |
5 | +library(tidyverse) # for %>% and other dplyr functions | |
6 | + | |
7 | + | |
8 | +## see https://mran.microsoft.com/snapshot/2016-08-05/web/packages/tidyjson/vignettes/introduction-to-tidyjson.html | |
9 | + | |
10 | + | |
11 | + | |
12 | + | |
13 | +purch_json <- ' | |
14 | +[ | |
15 | + { | |
16 | + "name": "bob", | |
17 | + "purchases": [ | |
18 | + { | |
19 | + "date": "2014/09/13", | |
20 | + "items": [ | |
21 | + {"name": "shoes", "price": 187}, | |
22 | + {"name": "belt", "price": 35} | |
23 | + ] | |
24 | + } | |
25 | + ] | |
26 | + }, | |
27 | + { | |
28 | + "name": "susan", | |
29 | + "purchases": [ | |
30 | + { | |
31 | + "date": "2014/10/01", | |
32 | + "items": [ | |
33 | + {"name": "dress", "price": 58}, | |
34 | + {"name": "bag", "price": 118} | |
35 | + ] | |
36 | + }, | |
37 | + { | |
38 | + "date": "2015/01/03", | |
39 | + "items": [ | |
40 | + {"name": "shoes", "price": 115} | |
41 | + ] | |
42 | + } | |
43 | + ] | |
44 | + } | |
45 | +]' | |
46 | + | |
47 | + | |
48 | + | |
49 | +library(jsonlite) | |
50 | +# Parse the JSON into a data.frame | |
51 | +purch_df <- jsonlite::fromJSON(purch_json, simplifyDataFrame = TRUE) | |
52 | + | |
53 | + | |
54 | + | |
55 | + | |
56 | + | |
57 | +purch_items <- purch_json %>% | |
58 | + gather_array %>% # stack the users | |
59 | + spread_values(person = jstring("name")) %>% # extract the user name | |
60 | + enter_object("purchases") %>% gather_array %>% # stack the purchases | |
61 | + spread_values(purchase.date = jstring("date")) %>% # extract the purchase date | |
62 | + enter_object("items") %>% gather_array %>% # stack the items | |
63 | + spread_values( # extract item name and price | |
64 | + item.name = jstring("name"), | |
65 | + item.price = jnumber("price") | |
66 | + ) %>% | |
67 | + select(person, purchase.date, item.name, item.price) # select only what is needed | |
68 | + | |
69 | + | |
70 | + | |
71 | +### I comment what I did before. The object where "name" is the key is simple. | |
72 | +## As a consequence, I just have to gather the array and spread its value. | |
73 | + | |
74 | +## A "gather" operation is always in front of a "spread" operation . | |
75 | + | |
76 | +## The "purchases" object is way more complicated and first I need to enter it. | |
77 | +## It has also another complex object inside (the "items" object), but at least the "date" object is simple. As a consequence, I gather the array and spread the "date". | |
78 | + | |
79 | +## Then the "items" object is left. It is complex and I enter it before the usual gather and spread procedure. | |
80 | + | |
81 | + | |
82 | + | |
83 | + | |
84 | +print("So far so good") |