This is a package to transform large, multi-nested lists into a more user-friendly format (i.e. a tibble
) in R
. The initial focus is on making processing of return values from jsonlite::fromJSON()
queries more seamless, but ideally this package should be useful for deeply-nested lists from an array of sources.
Key features:
roomba()
searches deeply-nested list for names specified in cols
(a character vector) and returns a tibble
with the associated column titles. Nothing further about nesting hierarchy or depth need be specified.
Handles empty values gracefully by substituting NULL
values with NA
or user-specified value in default
, or truncates lists appropriately.
If you’re only interested in sniffing out and replacing all NULL
s, turn to the replace_null()
function.
Option to keep
any
or all
data from the columns supplied
You can install the development version from GitHub with:
# install.packages("devtools")
devtools::install_github("cstawitz/roomba")
Say we have some JSON from a pesky API.
The JSON becomes a nested R list,
super_data <- json %>%
jsonlite::fromJSON(simplifyVector = FALSE)
which we can pull data into the columns we want with roomba
.
super_data %>%
roomba(cols = c("name", "super_power", "more_nested_stuff"), keep = any)
#> # A tibble: 5 x 3
#> name super_power more_nested_stuff
#> <chr> <chr> <int>
#> 1 Laura DeCicco fixing merge conflicts NA
#> 2 Amanda Dobbyn flight 4
#> 3 Jim Hester <NA> NA
#> 4 Christine Stawitz invisibility 5
#> 5 Isabella Velasquez teleportation NA
Let’s try with a real-world Twitter example (see package data to use this data).
roomba(twitter_data, c("created_at", "name"))
#> # A tibble: 24 x 2
#> name created_at
#> <chr> <chr>
#> 1 Code for America Mon Aug 10 18:59:29 +0000 2009
#> 2 Ben Lorica 罗瑞卡 Mon Dec 22 22:06:18 +0000 2008
#> 3 Dan Sholler Thu Apr 03 20:09:24 +0000 2014
#> 4 Code for America Mon Aug 10 18:59:29 +0000 2009
#> 5 FiveThirtyEight Tue Jan 21 21:39:32 +0000 2014
#> 6 Digital Impact Wed Oct 07 21:10:53 +0000 2009
#> 7 Drew Williams Thu Aug 07 18:41:29 +0000 2014
#> 8 joe Fri May 29 13:25:25 +0000 2009
#> 9 Data Analysts 4 Good Wed May 07 16:55:33 +0000 2014
#> 10 Ryan Frederick Sun Mar 01 19:06:53 +0000 2009
#> # ... with 14 more rows
Feast your eyes on the original super_data
list!
super_data
#> $stuff
#> $stuff$buried
#> $stuff$buried$deep
#> $stuff$buried$deep[[1]]
#> $stuff$buried$deep[[1]]$location
#> [1] "here"
#>
#> $stuff$buried$deep[[1]]$name
#> [1] "Laura DeCicco"
#>
#> $stuff$buried$deep[[1]]$super_power
#> [1] "fixing merge conflicts"
#>
#> $stuff$buried$deep[[1]]$other_secret_power
#> list()
#>
#>
#> $stuff$buried$deep[[2]]
#> $stuff$buried$deep[[2]]$location
#> [1] "here"
#>
#> $stuff$buried$deep[[2]]$name
#> [1] "Amanda Dobbyn"
#>
#> $stuff$buried$deep[[2]]$super_power
#> [1] "flight"
#>
#> $stuff$buried$deep[[2]]$more_nested_stuff
#> [1] 4
#>
#>
#>
#> $stuff$buried$alsodeep
#> [1] 2342423234
#>
#> $stuff$buried$stilldeep
#> $stuff$buried$stilldeep$even_deeper
#> $stuff$buried$stilldeep$even_deeper[[1]]
#> $stuff$buried$stilldeep$even_deeper[[1]]$location
#> [1] "not here"
#>
#> $stuff$buried$stilldeep$even_deeper[[1]]$name
#> [1] "Jim Hester"
#>
#> $stuff$buried$stilldeep$even_deeper[[1]]$super_power
#> list()
#>
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]
#> $stuff$buried$stilldeep$even_deeper[[2]]$location
#> [1] "here"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$name
#> [1] "Christine Stawitz"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$super_power
#> [1] "invisibility"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$more_nested_stuff
#> [1] 5
#>
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]
#> $stuff$buried$stilldeep$even_deeper[[3]]$location
#> [1] "here"
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]$name
#> [1] "Isabella Velasquez"
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]$super_power
#> [1] "teleportation"
And just the first element of the twitter
dataset 😱
twitter_data[[1]]
#> $created_at
#> [1] "Mon May 21 17:58:09 +0000 2018"
#>
#> $id
#> [1] 9.98624e+17
#>
#> $id_str
#> [1] "998623997397876743"
#>
#> $text
#> [1] "Could a program like food stamps have a Cambridge Analytica moment? How do we allow for the innovation that data pl… https://t.co/7tVf1qmNmq"
#>
#> $truncated
#> [1] TRUE
#>
#> $entities
#> $entities$hashtags
#> list()
#>
#> $entities$symbols
#> list()
#>
#> $entities$user_mentions
#> list()
#>
#> $entities$urls
#> $entities$urls[[1]]
#> $entities$urls[[1]]$url
#> [1] "https://t.co/7tVf1qmNmq"
#>
#> $entities$urls[[1]]$expanded_url
#> [1] "https://twitter.com/i/web/status/998623997397876743"
#>
#> $entities$urls[[1]]$display_url
#> [1] "twitter.com/i/web/status/9…"
#>
#> $entities$urls[[1]]$indices
#> $entities$urls[[1]]$indices[[1]]
#> [1] 117
#>
#> $entities$urls[[1]]$indices[[2]]
#> [1] 140
#>
#>
#>
#>
#>
#> $source
#> [1] "<a href=\"https://about.twitter.com/products/tweetdeck\" rel=\"nofollow\">TweetDeck</a>"
#>
#> $in_reply_to_status_id
#> NULL
#>
#> $in_reply_to_status_id_str
#> NULL
#>
#> $in_reply_to_user_id
#> NULL
#>
#> $in_reply_to_user_id_str
#> NULL
#>
#> $in_reply_to_screen_name
#> NULL
#>
#> $user
#> $user$id
#> [1] 64482503
#>
#> $user$id_str
#> [1] "64482503"
#>
#> $user$name
#> [1] "Code for America"
#>
#> $user$screen_name
#> [1] "codeforamerica"
#>
#> $user$location
#> [1] "San Francisco, California"
#>
#> $user$description
#> [1] "Government can work for the people, by the people, in the 21st century. Help us make it so."
#>
#> $user$url
#> [1] "https://t.co/l9lokka0rJ"
#>
#> $user$entities
#> $user$entities$url
#> $user$entities$url$urls
#> $user$entities$url$urls[[1]]
#> $user$entities$url$urls[[1]]$url
#> [1] "https://t.co/l9lokka0rJ"
#>
#> $user$entities$url$urls[[1]]$expanded_url
#> [1] "http://codeforamerica.org"
#>
#> $user$entities$url$urls[[1]]$display_url
#> [1] "codeforamerica.org"
#>
#> $user$entities$url$urls[[1]]$indices
#> $user$entities$url$urls[[1]]$indices[[1]]
#> [1] 0
#>
#> $user$entities$url$urls[[1]]$indices[[2]]
#> [1] 23
#>
#>
#>
#>
#>
#> $user$entities$description
#> $user$entities$description$urls
#> list()
#>
#>
#>
#> $user$protected
#> [1] FALSE
#>
#> $user$followers_count
#> [1] 49202
#>
#> $user$friends_count
#> [1] 1716
#>
#> $user$listed_count
#> [1] 2659
#>
#> $user$created_at
#> [1] "Mon Aug 10 18:59:29 +0000 2009"
#>
#> $user$favourites_count
#> [1] 4490
#>
#> $user$utc_offset
#> [1] -25200
#>
#> $user$time_zone
#> [1] "Pacific Time (US & Canada)"
#>
#> $user$geo_enabled
#> [1] TRUE
#>
#> $user$verified
#> [1] TRUE
#>
#> $user$statuses_count
#> [1] 15912
#>
#> $user$lang
#> [1] "en"
#>
#> $user$contributors_enabled
#> [1] FALSE
#>
#> $user$is_translator
#> [1] FALSE
#>
#> $user$is_translation_enabled
#> [1] FALSE
#>
#> $user$profile_background_color
#> [1] "EBEBEB"
#>
#> $user$profile_background_image_url
#> [1] "http://abs.twimg.com/images/themes/theme7/bg.gif"
#>
#> $user$profile_background_image_url_https
#> [1] "https://abs.twimg.com/images/themes/theme7/bg.gif"
#>
#> $user$profile_background_tile
#> [1] FALSE
#>
#> $user$profile_image_url
#> [1] "http://pbs.twimg.com/profile_images/615534833645678592/iAO_Lytr_normal.jpg"
#>
#> $user$profile_image_url_https
#> [1] "https://pbs.twimg.com/profile_images/615534833645678592/iAO_Lytr_normal.jpg"
#>
#> $user$profile_banner_url
#> [1] "https://pbs.twimg.com/profile_banners/64482503/1497895952"
#>
#> $user$profile_link_color
#> [1] "CF1B41"
#>
#> $user$profile_sidebar_border_color
#> [1] "FFFFFF"
#>
#> $user$profile_sidebar_fill_color
#> [1] "F3F3F3"
#>
#> $user$profile_text_color
#> [1] "333333"
#>
#> $user$profile_use_background_image
#> [1] FALSE
#>
#> $user$has_extended_profile
#> [1] FALSE
#>
#> $user$default_profile
#> [1] FALSE
#>
#> $user$default_profile_image
#> [1] FALSE
#>
#> $user$following
#> [1] TRUE
#>
#> $user$follow_request_sent
#> [1] FALSE
#>
#> $user$notifications
#> [1] FALSE
#>
#> $user$translator_type
#> [1] "none"
#>
#>
#> $geo
#> NULL
#>
#> $coordinates
#> NULL
#>
#> $place
#> NULL
#>
#> $contributors
#> NULL
#>
#> $is_quote_status
#> [1] FALSE
#>
#> $retweet_count
#> [1] 0
#>
#> $favorite_count
#> [1] 0
#>
#> $favorited
#> [1] FALSE
#>
#> $retweeted
#> [1] FALSE
#>
#> $possibly_sensitive
#> [1] FALSE
#>
#> $possibly_sensitive_appealable
#> [1] FALSE
#>
#> $lang
#> [1] "en"
Happy cleaning!