长宽数据转换

207 阅读1分钟

长宽数据转换

在R语言中可以使用tidyr包和reshape2包进行长宽数据转换。

tidyr

宽数据转换为长数据

library(tidyr)
library(reshape2)

data(iris)
head(iris)
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1          5.1         3.5          1.4         0.2  setosa
# 2          4.9         3.0          1.4         0.2  setosa
# 3          4.7         3.2          1.3         0.2  setosa
# 4          4.6         3.1          1.5         0.2  setosa
# 5          5.0         3.6          1.4         0.2  setosa
# 6          5.4         3.9          1.7         0.4  setosa
iris_long <- tidyr::pivot_longer(iris, cols = Sepal.Length:Petal.Width,names_to = 'Varname',values_to = 'Value')
head(iris_long)
#   Species Varname      Value
#   <fct>   <chr>        <dbl>
# 1 setosa  Sepal.Length   5.1
# 2 setosa  Sepal.Width    3.5
# 3 setosa  Petal.Length   1.4
# 4 setosa  Petal.Width    0.2
# 5 setosa  Sepal.Length   4.9
# 6 setosa  Sepal.Width    3  
head(relig_income)
#   religion       `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
#   <chr>            <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
# 1 Agnostic            27        34        60        81        76       137        122
# 2 Atheist             12        27        37        52        35        70         73
# 3 Buddhist            27        21        30        34        33        58         62
# 4 Catholic           418       617       732       670       638      1116        949
# 5 Don’t know/re…      15        14        15        11        10        35         21
# 6 Evangelical P…     575       869      1064       982       881      1486        949
relig_income %>%
  pivot_longer(!religion, names_to = "income", values_to = "count")
# A tibble: 180 × 3
#    religion income             count
#    <chr>    <chr>              <dbl>
#  1 Agnostic <$10k                 27
#  2 Agnostic $10-20k               34
#  3 Agnostic $20-30k               60
#  4 Agnostic $30-40k               81
#  5 Agnostic $40-50k               76
#  6 Agnostic $50-75k              137
#  7 Agnostic $75-100k             122
#  8 Agnostic $100-150k            109
#  9 Agnostic >150k                 84
# 10 Agnostic Don't know/refused    96

长数据转换为宽数据

iris_long %>% group_by(Varname) %>% dplyr::mutate(id = 1:n()) %>% 
  pivot_wider(names_from = Varname, values_from = Value)
#    Species    id Sepal.Length Sepal.Width Petal.Length Petal.Width
#    <fct>   <int>        <dbl>       <dbl>        <dbl>       <dbl>
#  1 setosa      1          5.1         3.5          1.4         0.2
#  2 setosa      2          4.9         3            1.4         0.2
#  3 setosa      3          4.7         3.2          1.3         0.2
#  4 setosa      4          4.6         3.1          1.5         0.2
#  5 setosa      5          5           3.6          1.4         0.2
#  6 setosa      6          5.4         3.9          1.7         0.4
#  7 setosa      7          4.6         3.4          1.4         0.3
#  8 setosa      8          5           3.4          1.5         0.2
#  9 setosa      9          4.4         2.9          1.4         0.2
# 10 setosa     10          4.9         3.1          1.5         0.1

reshape2

宽数据转换为长数据

iris_long <- melt(iris,id.vars = 'Species',variable.name = 'Varnames',value.name = 'values')
head(iris_long)
#   Species     Varnames values
# 1  setosa Sepal.Length    5.1
# 2  setosa Sepal.Length    4.9
# 3  setosa Sepal.Length    4.7
# 4  setosa Sepal.Length    4.6
# 5  setosa Sepal.Length    5.0
# 6  setosa Sepal.Length    5.4

长数据转化为宽数据

iris_long %>% group_by(Varname) %>% dplyr::mutate(id = 1:n()) %>% 
  reshape2::dcast(id~Species+Varname, value.var = 'Value')
#  id setosa_Sepal.Length setosa_Sepal.Width setosa_Petal.Length setosa_Petal.Width
# 1  1                 5.1                3.5                 1.4                0.2
# 2  2                 4.9                3.0                 1.4                0.2
# 3  3                 4.7                3.2                 1.3                0.2
# 4  4                 4.6                3.1                 1.5                0.2
# 5  5                 5.0                3.6                 1.4                0.2
# 6  6                 5.4                3.9                 1.7                0.4