长宽数据转换
在R语言中可以使用tidyr包和reshape2包进行长宽数据转换。
tidyr
宽数据转换为长数据
library(tidyr)
library(reshape2)
data(iris)
head(iris)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3.0 1.4 0.2 setosa
# 3 4.7 3.2 1.3 0.2 setosa
# 4 4.6 3.1 1.5 0.2 setosa
# 5 5.0 3.6 1.4 0.2 setosa
# 6 5.4 3.9 1.7 0.4 setosa
iris_long <- tidyr::pivot_longer(iris, cols = Sepal.Length:Petal.Width,names_to = 'Varname',values_to = 'Value')
head(iris_long)
# Species Varname Value
# <fct> <chr> <dbl>
# 1 setosa Sepal.Length 5.1
# 2 setosa Sepal.Width 3.5
# 3 setosa Petal.Length 1.4
# 4 setosa Petal.Width 0.2
# 5 setosa Sepal.Length 4.9
# 6 setosa Sepal.Width 3
head(relig_income)
# religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Agnostic 27 34 60 81 76 137 122
# 2 Atheist 12 27 37 52 35 70 73
# 3 Buddhist 27 21 30 34 33 58 62
# 4 Catholic 418 617 732 670 638 1116 949
# 5 Don’t know/re… 15 14 15 11 10 35 21
# 6 Evangelical P… 575 869 1064 982 881 1486 949
relig_income %>%
pivot_longer(!religion, names_to = "income", values_to = "count")
# A tibble: 180 × 3
# religion income count
# <chr> <chr> <dbl>
# 1 Agnostic <$10k 27
# 2 Agnostic $10-20k 34
# 3 Agnostic $20-30k 60
# 4 Agnostic $30-40k 81
# 5 Agnostic $40-50k 76
# 6 Agnostic $50-75k 137
# 7 Agnostic $75-100k 122
# 8 Agnostic $100-150k 109
# 9 Agnostic >150k 84
# 10 Agnostic Don't know/refused 96
长数据转换为宽数据
iris_long %>% group_by(Varname) %>% dplyr::mutate(id = 1:n()) %>%
pivot_wider(names_from = Varname, values_from = Value)
# Species id Sepal.Length Sepal.Width Petal.Length Petal.Width
# <fct> <int> <dbl> <dbl> <dbl> <dbl>
# 1 setosa 1 5.1 3.5 1.4 0.2
# 2 setosa 2 4.9 3 1.4 0.2
# 3 setosa 3 4.7 3.2 1.3 0.2
# 4 setosa 4 4.6 3.1 1.5 0.2
# 5 setosa 5 5 3.6 1.4 0.2
# 6 setosa 6 5.4 3.9 1.7 0.4
# 7 setosa 7 4.6 3.4 1.4 0.3
# 8 setosa 8 5 3.4 1.5 0.2
# 9 setosa 9 4.4 2.9 1.4 0.2
# 10 setosa 10 4.9 3.1 1.5 0.1
reshape2
宽数据转换为长数据
iris_long <- melt(iris,id.vars = 'Species',variable.name = 'Varnames',value.name = 'values')
head(iris_long)
# Species Varnames values
# 1 setosa Sepal.Length 5.1
# 2 setosa Sepal.Length 4.9
# 3 setosa Sepal.Length 4.7
# 4 setosa Sepal.Length 4.6
# 5 setosa Sepal.Length 5.0
# 6 setosa Sepal.Length 5.4
长数据转化为宽数据
iris_long %>% group_by(Varname) %>% dplyr::mutate(id = 1:n()) %>%
reshape2::dcast(id~Species+Varname, value.var = 'Value')
# id setosa_Sepal.Length setosa_Sepal.Width setosa_Petal.Length setosa_Petal.Width
# 1 1 5.1 3.5 1.4 0.2
# 2 2 4.9 3.0 1.4 0.2
# 3 3 4.7 3.2 1.3 0.2
# 4 4 4.6 3.1 1.5 0.2
# 5 5 5.0 3.6 1.4 0.2
# 6 6 5.4 3.9 1.7 0.4