library(tidyverse)
library(httr)
filelist <-
# LISTSTATUS is the webHDFS equivalent of `$ ls`
paste0(hdfsUri, dirUri, "?op=LISTSTATUS") %>%
GET() %>%
content(type = "application/json") %>%
pluck(1, 1) %>%
map_dfr(as_tibble)
glimpse(filelist)
# Observations: 16
# Variables: 13
# $ accessTime <dbl> 1.527669e+12, 1.527588e+12, 1.527588e+12, 1.527588...
# $ blockSize <int> 134217728, 134217728, 134217728, 134217728, 134217...
# $ childrenNum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
# $ fileId <int> 878668, 878659, 878669, 878672, 878670, 878663, 87...
# $ group <chr> "foo.bar", "foo.bar", "foo.bar", "f...
# $ length <int> 6590121, 37314676, 2174591, 7325908, 4599335, 1677...
# $ modificationTime <dbl> 1.527582e+12, 1.527582e+12, 1.527582e+12, 1.527582...
# $ owner <chr> "foo.bar", "foo.bar", "foo.bar", "f...
# $ pathSuffix <chr> "baz_qux_20180524.TXT", "baz_qux_20180525.TXT", ...
# $ permission <chr> "755", "755", "755", "755", "755", "755", "755", "...
# $ replication <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
# $ storagePolicy <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
# $ type <chr> "FILE", "FILE", "FILE", "FILE", "FILE", "FILE", "F...
filelist %>%
# "pathSuffix" is the actual file names
pull("pathSuffix")
# [1] "baz_qux_20180524.TXT" "baz_qux_20180525.TXT"
# [3] ... |