Working with the Hub: Download, Upload, and Share • huggingfaceR

library(huggingfaceR)
library(dplyr)

Inspect your token

Hub write operations require a write-scoped token. hf_whoami() now includes token metadata so you can confirm the active token before creating repositories or uploading files.

hf_whoami() |>
  select(name, token_name, token_role, billing_mode, is_pro)

Download files

Use hf_list_repo_files() to inspect a repository tree, then download specific files with hf_hub_download().

files <- hf_list_repo_files("BAAI/bge-small-en-v1.5", recursive = FALSE)
files
#> # A tibble: 14 × 4
#>    path                              type           size oid                    
#>    <chr>                             <chr>         <int> <chr>                  
#>  1 1_Pooling                         directory         0 b9d00290ba7577bd1709db…
#>  2 onnx                              directory         0 8e8db8abdebe9ba4820469…
#>  3 .gitattributes                    file           1519 a6344aac8c09253b3b630f…
#>  4 README.md                         file          94783 8b8567d75ffa619486d959…
#>  5 config.json                       file            743 3992bf890728a92476c700…
#>  6 config_sentence_transformers.json file            124 dcb0c0d97d09b930d13600…
#>  7 model.safetensors                 file      133466304 a4fef68c99b10468206a9b…
#>  8 modules.json                      file            349 952a9b81c0bfd99800fabf…
#>  9 pytorch_model.bin                 file      133508397 b6e2a796fcfd4513c609fc…
#> 10 sentence_bert_config.json         file             52 ea85692bff64b0d1917833…
#> 11 special_tokens_map.json           file            125 a8b3208c2884c4efb86e49…
#> 12 tokenizer.json                    file         711396 688882a79f44442ddc1f60…
#> 13 tokenizer_config.json             file            366 37fca74771bc76a8e01178…
#> 14 vocab.txt                         file         231508 fb140275c155a9c7c5a3b3…

readme <- hf_hub_download("BAAI/bge-small-en-v1.5", "README.md")
readLines(readme, n = 5)
#> [1] "---"                     "tags:"                  
#> [3] "- sentence-transformers" "- feature-extraction"   
#> [5] "- sentence-similarity"

Dataset and Space repositories use the same interface:

hf_list_repo_files("stanfordnlp/imdb", repo_type = "dataset", recursive = FALSE)
#> # A tibble: 3 × 4
#>   path           type       size oid                                     
#>   <chr>          <chr>     <int> <chr>                                   
#> 1 plain_text     directory     0 7af61406508d73570a4d4f5247de6593f95a8655
#> 2 .gitattributes file       1174 957b2579c6ef20995a09efd9a17f8fd90606f5ed
#> 3 README.md      file       7809 9efdff48906733be2fa05e4538f017d2fbec7afc

Search Spaces and papers

hf_search_spaces("chat", limit = 5)
#> # A tibble: 5 × 6
#>   space_id                       author        sdk    likes last_modified tags  
#>   <chr>                          <chr>         <chr>  <int> <chr>         <list>
#> 1 ResembleAI/Chatterbox          ResembleAI    gradio  1738 <NA>          <chr> 
#> 2 ysharma/ChatGPT4               ysharma       gradio  1487 <NA>          <chr> 
#> 3 merve/ChatGPT-prompt-generator merve         gradio  1216 <NA>          <chr> 
#> 4 huggingchat/chat-ui            huggingchat   static  1214 <NA>          <chr> 
#> 5 HuggingFaceH4/zephyr-chat      HuggingFaceH4 docker   904 <NA>          <chr>
hf_search_papers("transformers", limit = 5)
#> # A tibble: 5 × 6
#>   paper_id   title                            upvotes published_at authors url  
#>   <chr>      <chr>                              <int> <chr>        <list>  <chr>
#> 1 2606.24888 DiffusionBench: On Holistic Eva…       4 2026-06-23T… <chr>   http…
#> 2 2606.24876 FLAT: Feedforward Latent Triang…      12 2026-06-23T… <chr>   http…
#> 3 2606.24874 FLUX3D: High-Fidelity 3D Gaussi…       0 2026-06-23T… <chr>   http…
#> 4 2606.24855 OpenThoughts-Agent: Data Recipe…      16 2026-06-23T… <chr>   http…
#> 5 2606.24825 L3Cube-MahaPOS: A Marathi Part-…       0 2026-06-23T… <chr>   http…

Choose providers before batch jobs

hf_list_providers() returns router metadata for OpenAI-compatible models, including pricing and capability flags.

providers <- hf_list_providers("Qwen/Qwen2.5-72B-Instruct")

providers |>
  filter(status == "live") |>
  arrange(input_price, output_price)
#> # A tibble: 2 × 12
#>   model_id       provider status context_length input_price output_price is_free
#>   <chr>          <chr>    <chr>           <int>       <dbl>        <dbl> <lgl>  
#> 1 Qwen/Qwen2.5-… novita   live            32000        0.38          0.4 FALSE  
#> 2 Qwen/Qwen2.5-… feather… live               NA       NA            NA   FALSE  
#> # ℹ 5 more variables: supports_tools <lgl>, supports_structured_output <lgl>,
#> #   first_token_latency_ms <dbl>, throughput <dbl>, is_model_author <lgl>

For non-router task models, provider metadata may be unavailable even when the task-specific Inference API works. Use hf_check_inference() as a broader availability check.

hf_check_inference("BAAI/bge-small-en-v1.5")
#> $model_id
#> [1] "BAAI/bge-small-en-v1.5"
#> 
#> $available
#> [1] TRUE
#> 
#> $pipeline_tag
#> [1] "feature-extraction"
#> 
#> $inference_provider
#> [1] "text-embeddings-inference"
#> 
#> $providers
#> # A tibble: 0 × 12
#> # ℹ 12 variables: model_id <chr>, provider <chr>, status <chr>,
#> #   context_length <int>, input_price <dbl>, output_price <dbl>, is_free <lgl>,
#> #   supports_tools <lgl>, supports_structured_output <lgl>,
#> #   first_token_latency_ms <dbl>, throughput <dbl>, is_model_author <lgl>

Guarded writes

Write operations are intentionally explicit. They require confirm = TRUE, a write-scoped token, and safe defaults that avoid accidental overwrites.

repo_id <- "your-username/analysis-results"

hf_create_repo(
  repo_id,
  repo_type = "dataset",
  private = TRUE,
  exist_ok = TRUE,
  confirm = TRUE
)

results <- tibble(
  id = 1:3,
  label = c("positive", "neutral", "negative")
)

hf_push_dataset(
  results,
  repo_id,
  path_in_repo = "results.csv",
  commit_message = "Upload analysis results",
  confirm = TRUE
)

Destructive deletes are more heavily guarded: hf_delete_repo() also refuses to run when CI=true.

hf_delete_repo(repo_id, repo_type = "dataset", confirm = TRUE)