- fundamentals

tidy-select

pull() is used to extract one single column from our data frame

df %>%
  slice(1:100) %>%
  pull(Country)
##   [1] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [5] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [9] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [13] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [17] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [21] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [25] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [29] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [33] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [37] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [41] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [45] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [49] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [53] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [57] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [61] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [65] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [69] "United Kingdom" "United Kingdom" "United Kingdom" "France"        
##  [73] "France"         "France"         "France"         "France"        
##  [77] "France"         "France"         "France"         "France"        
##  [81] "France"         "France"         "France"         "France"        
##  [85] "France"         "France"         "France"         "France"        
##  [89] "France"         "France"         "United Kingdom" "United Kingdom"
##  [93] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [97] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"

in the form of a vector.

df %>%
  slice(1:100) %>%
  pull(Country) %>%
  class()
## [1] "character"

Beside the name, we can also use the position.

df %>%
  slice(1:100) %>%
  pull(-1)
##   [1] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [5] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [9] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [13] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [17] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [21] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [25] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [29] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [33] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [37] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [41] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [45] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [49] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [53] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [57] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [61] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [65] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [69] "United Kingdom" "United Kingdom" "United Kingdom" "France"        
##  [73] "France"         "France"         "France"         "France"        
##  [77] "France"         "France"         "France"         "France"        
##  [81] "France"         "France"         "France"         "France"        
##  [85] "France"         "France"         "France"         "France"        
##  [89] "France"         "France"         "United Kingdom" "United Kingdom"
##  [93] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [97] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"

We can use it with some expressions.

df %>%
  slice(1:100) %>%
  pull(dense_rank(Price))
##   [1] "489434" "489434" "489434" "489434" "489434" "489434" "489434" "489434"
##   [9] "489435" "489435" "489435" "489435" "489436" "489436" "489436" "489436"
##  [17] "489436" "489436" "489436" "489436" "489436" "489436" "489436" "489436"
##  [25] "489436" "489436" "489436" "489436" "489436" "489436" "489436" "489437"
##  [33] "489437" "489437" "489437" "489437" "489437" "489437" "489437" "489437"
##  [41] "489437" "489437" "489437" "489437" "489437" "489437" "489437" "489437"
##  [49] "489437" "489437" "489437" "489437" "489437" "489437" "489438" "489438"
##  [57] "489438" "489438" "489438" "489438" "489438" "489438" "489438" "489438"
##  [65] "489438" "489438" "489438" "489438" "489438" "489438" "489438" "489439"
##  [73] "489439" "489439" "489439" "489439" "489439" "489439" "489439" "489439"
##  [81] "489439" "489439" "489439" "489439" "489439" "489439" "489439" "489439"
##  [89] "489439" "489439" "489440" "489440" "489441" "489441" "489441" "489441"
##  [97] "489442" "489442" "489442" "489442"
df %>%
  slice(1:100) %>%
  pull(CustomerID = `Customer ID`)
## Error in `pull()`:
## ! Arguments in `...` must be used.
## ✖ Problematic argument:
## • CustomerID = Customer ID
## ℹ Did you misspell an argument name?
df %>%
  slice(1:100) %>%
  pull(as.Date(InvoiceDate, origin = "1970-01-01"))
## Error in `pull()`:
## ! Can't extract column with `!!enquo(var)`.
## ✖ `!!enquo(var)` must be numeric or character, not a <Date> object.

But not selection statements though.

df %>%
  mutate(Country_Factor = as.factor(Country)) %>%
  slice(1:100) %>%
  pull(pick(where(is.factor)))
## Error in `pull()`:
## ! Problem while evaluating `pick(where(is.factor))`.
## Caused by error in `pick()`:
## ! Must only be used inside data-masking verbs like `mutate()`,
##   `filter()`, and `group_by()`.

Without an argument, it uses the last column, based on the idea that it was the most recently subject to manipulations (mutate() adds columns at the last position by default for example), hence the one we are currently interested in.

df %>%
  slice(1:100) %>%
  pull()
##   [1] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [5] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [9] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [13] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [17] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [21] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [25] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [29] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [33] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [37] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [41] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [45] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [49] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [53] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [57] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [61] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [65] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [69] "United Kingdom" "United Kingdom" "United Kingdom" "France"        
##  [73] "France"         "France"         "France"         "France"        
##  [77] "France"         "France"         "France"         "France"        
##  [81] "France"         "France"         "France"         "France"        
##  [85] "France"         "France"         "France"         "France"        
##  [89] "France"         "France"         "United Kingdom" "United Kingdom"
##  [93] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [97] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"

- name

We can specify another column to name the vector we extract with the optional argument name.

df %>% 
  slice(1:100) %>%
  pull(Country, name = `Customer ID`)
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            12682 
## "United Kingdom" "United Kingdom" "United Kingdom"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            18087            18087 
##         "France"         "France" "United Kingdom" "United Kingdom" 
##            18087            18087            18087            18087 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13635            13635            13635            13635 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"

But that can’t be an external one.

cusnames <- df$`Customer ID`
df %>% 
  pull(Country, name = cusnames)
## Error in `pull()`:
## ! `!!name` must select exactly one column.

The name argument can be not named, but attention that that doesn’t mean that we can extract two columns.

df %>%
  slice(1:100) %>%
  pull(Country, `Customer ID`)
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13085            13085            13085            13085 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            13078 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13078            13078            13078            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            15362            15362 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            15362            15362            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            18102 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            18102            18102            18102            12682 
## "United Kingdom" "United Kingdom" "United Kingdom"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            12682            12682 
##         "France"         "France"         "France"         "France" 
##            12682            12682            18087            18087 
##         "France"         "France" "United Kingdom" "United Kingdom" 
##            18087            18087            18087            18087 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" 
##            13635            13635            13635            13635 
## "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"

- with group_by()

Grouping columns are ignored.

df %>%
  slice(1:100) %>%
  group_by(`Customer ID`) %>%
  pull(Country)
##   [1] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [5] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##   [9] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [13] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [17] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [21] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [25] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [29] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [33] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [37] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [41] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [45] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [49] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [53] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [57] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [61] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [65] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [69] "United Kingdom" "United Kingdom" "United Kingdom" "France"        
##  [73] "France"         "France"         "France"         "France"        
##  [77] "France"         "France"         "France"         "France"        
##  [81] "France"         "France"         "France"         "France"        
##  [85] "France"         "France"         "France"         "France"        
##  [89] "France"         "France"         "United Kingdom" "United Kingdom"
##  [93] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"
##  [97] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom"