Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
# Upload the file with a PUT request
responseWrite <- PUT(uriWrite, body = upload_file("tmp.csv"))


List files in a directory

Code Block
themeRDark
linenumberstrue
library(tidyverse)
library(httr)

filelist <-
  # LISTSTATUS is the webHDFS equivalent of `$ ls`
  paste0(hdfsUri, dirUri, "?op=LISTSTATUS") %>% 
  GET() %>% 
  content(type = "application/json") %>% 
  pluck(1, 1) %>% 
  map_dfr(as_tibble)

glimpse(filelist)
# Observations: 16
# Variables: 13
# $ accessTime       <dbl> 1.527669e+12, 1.527588e+12, 1.527588e+12, 1.527588...
# $ blockSize        <int> 134217728, 134217728, 134217728, 134217728, 134217...
# $ childrenNum      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
# $ fileId           <int> 878668, 878659, 878669, 878672, 878670, 878663, 87...
# $ group            <chr> "foo.bar", "foo.bar", "foo.bar", "f...
# $ length           <int> 6590121, 37314676, 2174591, 7325908, 4599335, 1677...
# $ modificationTime <dbl> 1.527582e+12, 1.527582e+12, 1.527582e+12, 1.527582...
# $ owner            <chr> "foo.bar", "foo.bar", "foo.bar", "f...
# $ pathSuffix       <chr> "baz_qux_20180524.TXT", "baz_qux_20180525.TXT", ...
# $ permission       <chr> "755", "755", "755", "755", "755", "755", "755", "...
# $ replication      <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
# $ storagePolicy    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
# $ type             <chr> "FILE", "FILE", "FILE", "FILE", "FILE", "FILE", "F...

filelist %>% 
  # "pathSuffix" is the actual file names
  pull("pathSuffix")
#  [1] "baz_qux_20180524.TXT"   "baz_qux_20180525.TXT"  
#  [3] ...