使用spotifyr聚类Springsteen专辑

栏目: 编程工具 · 发布时间: 6年前

内容简介：spotifyr包很棒，让我们来探索音乐的各个方面，如节奏、舞蹈性和化合价。在这篇文章中，我们将从相同点和不同点来探讨布鲁斯·斯普林斯汀的专辑。使用get_artist_audio_features()函数很容易获得数据。在这里，我们将从csv文件中加载它并查看。我们只需要做一点清洗和删除一些非录音室的专辑。

spotifyr包很棒，让我们来探索音乐的各个方面，如节奏、舞蹈性和化合价。在这篇文章中，我们将从相同点和不同点来探讨布鲁斯·斯普林斯汀的专辑。

# devtools::install_github('charlie86/spotifyr')

library(spotifyr)
library(tidyverse)
library(magrittr)
library(ggridges)
library(ggcorrplot)
library(viridisLite)
library(factoextra)
library(ggiraphExtra)

使用get_artist_audio_features()函数很容易获得数据。在这里，我们将从csv文件中加载它并查看。

# df <- get_artist_audio_features(artist = "bruce springsteen")

df <- read_csv("https://raw.github.com/peerchristensen/Springsteen_album_clusters/master/springsteen_albums.csv")

glimpse(df)

## Observations: 537
## Variables: 31
## $ artist_name            <chr> "Bruce Springsteen", "Bruce Springsteen...
## $ artist_uri             <chr> "3eqjTLE0HfPfh78zjh6TqT", "3eqjTLE0HfPf...
## $ album_uri              <chr> "0PMasrHdpaoIRuHuhHp72O", "0PMasrHdpaoI...
## $ album_name             <chr> "Born In The U.S.A.", "Born In The U.S....
## $ album_img              <chr> "https://i.scdn.co/image/d002b63ceb5658...
## $ album_type             <chr> "album", "album", "album", "album", "al...
## $ is_collaboration       <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALS...
## $ album_release_date     <chr> "1984-06-04", "1984-06-04", "1984-06-04...
## $ album_release_year     <date> 1984-06-04, 1984-06-04, 1984-06-04, 19...
## $ album_popularity       <dbl> 76, 76, 76, 76, 76, 76, 76, 76, 76, 76,...
## $ track_name             <chr> "Born in the U.S.A.", "Cover Me", "Darl...
## $ track_uri              <chr> "0dOg1ySSI7NkpAe89Zo0b9", "4U7NhC2rQTAh...
## $ track_number           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ...
## $ disc_number            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ danceability           <dbl> 0.398, 0.535, 0.536, 0.429, 0.544, 0.62...
## $ energy                 <dbl> 0.952, 0.884, 0.982, 0.949, 0.762, 0.44...
## $ key                    <chr> "E", "A", "G", "C", "A#", "C#", "F", "A...
## $ loudness               <dbl> -6.042, -5.499, -4.674, -5.295, -7.289,...
## $ mode                   <chr> "major", "minor", "major", "major", "ma...
## $ speechiness            <dbl> 0.0610, 0.0407, 0.0389, 0.0458, 0.0382,...
## $ acousticness           <dbl> 0.000373, 0.001880, 0.014100, 0.084200,...
## $ instrumentalness       <dbl> 7.75e-05, 1.26e-03, 3.67e-05, 0.00e+00,...
## $ liveness               <dbl> 0.1000, 0.1400, 0.2740, 0.1540, 0.0740,...
## $ valence                <dbl> 0.584, 0.796, 0.963, 0.967, 0.473, 0.86...
## $ tempo                  <dbl> 122.093, 120.555, 119.201, 184.286, 120...
## $ duration_ms            <dbl> 278680, 205987, 288027, 192267, 215427,...
## $ time_signature         <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ key_mode               <chr> "E major", "A minor", "G major", "C maj...
## $ track_popularity       <dbl> 72, 51, 45, 47, 49, 71, 50, 47, 53, 62,...
## $ track_preview_url      <chr> "https://p.scdn.co/mp3-preview/3b6a5b91...
## $ track_open_spotify_url <chr> "https://open.spotify.com/track/0dOg1yS...

我们只需要做一点清洗和删除一些非录音室的专辑。

# some albums only have one song, some are alternate versions

remove_albums <- c("Greatest Hits",
                   "Hammersmith Odeon, London 75",
                   "The Essential Bruce Springsteen (Bonus Disc)",
                   "The Ties That Bind: The River Collection",
                   "Chapter and Verse",
                   "The Promise",
                   "Tracks")

df %<>% 
  filter(!album_name %in% remove_albums,
         !grepl("live|Live",album_name)) %>%
  mutate(album_name = str_to_title(album_name))

df$album_name <- gsub(":.*","",df$album_name)
df$album_name[grepl("Innocent",df$album_name)]  <- "The Wild, The Innocent.."
df$album_name[grepl("Greetings",df$album_name)] <- "Greetings"
df$album_name[grepl("Darkness",df$album_name)]  <- "Darkness"

让我们先来看看Springsteen歌曲中最常用的五个键。

df                   %>% 
  select(key_mode)   %>%
  group_by(key_mode) %>%
  count()            %>%
  arrange(desc(n))   %>%
  ungroup()          %>%
  top_n(5)           %>%
  mutate(ordered = row_number()) %>%
  
  ggplot(aes(x = reorder(key_mode,desc(ordered)), y = n, fill = n)) +
  geom_col() +
  coord_flip() +
  ggtitle("Five most common keys") +
  scale_fill_viridis_c(option="B", direction = -1,guide=F) +
  theme_minimal() +
  labs(y = "n",x = "key")

使用spotifyr聚类Springsteen专辑

正如我们所看到的，spotifyr从spotify API获取了许多有趣的数据。让我们先来看看每张专辑的舞蹈性。“天生就会跑步”的可舞性最低，而“爱的隧道”的可舞性最高。

df                     %>% 
  group_by(album_name) %>%
  
  ggplot(aes(x    = danceability, 
             y    = reorder(album_name,desc(album_release_year)),
             fill = reorder(album_name,desc(album_release_year)))) +
  geom_density_ridges(colour = "snow") +
  scale_fill_viridis_d(option = "B", begin = .05, direction = -1, guide = F) +
  theme_minimal() +
  ggtitle("Danceability") +
  labs(y="album")

使用spotifyr聚类Springsteen专辑

让我们把所有的特征放在同一个图中。

df %>% 
  gather(key = feature, value = measure, 
         danceability, energy, loudness, valence, tempo, acousticness) %>%
  group_by(album_name) %>%
  
  ggplot(aes(x    = measure, 
             y    = reorder(album_name,desc(album_release_year)), 
             fill = album_release_date)) +
  geom_density_ridges(rel_min_height = 0.005, legend = F, alpha = .9, size = .2, colour = "snow") +
  facet_wrap(~feature, scales = "free", ncol = 2) +
  scale_fill_viridis_d(option ="B" ,begin = .05) +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 7)) +
  labs(y = "album name") +
  ggtitle("Springsteen albums in six features",
          subtitle = "Acousticness, danceability, energy, loudness, tempo and valence") +
  guides(fill = FALSE)

使用spotifyr聚类Springsteen专辑

将各个特征之间的相关性形象化也会很有趣。energy和loudness是正相关的，而acousticness和loudness是负相关的，这不足为奇。

sign_test <- df %>% 
  select(acousticness,danceability,energy,loudness,tempo,valence) %>%
  cor_pmat()

df  %>% 
  select(acousticness,danceability,energy,loudness,tempo,valence) %>%
  cor() %>%
  ggcorrplot(type   = "lower", 
             p.mat  = sign_test,
             colors = c(inferno(5)[2], "snow", inferno(5)[4])) +
  ggtitle("Correlations between features",
          subtitle = "Non-significant correlations marked with X")

使用spotifyr聚类Springsteen专辑

基于这些特性，我们还可以探索专辑在距离矩阵中的相似性。在这幅图中，橙色表示专辑之间的高度差异或很大的“距离”。

dfScale <- df %>%
  select(album_name,acousticness,danceability,energy,loudness,tempo,valence) %>%
  group_by(album_name) %>%
  summarise(acousticness = mean(scale(acousticness)),
            danceability = mean(scale(danceability)),
            energy       = mean(scale(energy)),
            loudness     = mean(scale(loudness)),
            tempo        = mean(scale(tempo)),
            valence      = mean(scale(valence))) %>%
  data.frame()

row.names(dfScale) <- dfScale$album_name

dfScale %<>% 
  select(-album_name) %>%
  data.frame()

df_dist <- get_dist(dfScale, stand = TRUE)

fviz_dist(df_dist,gradient = list(low = inferno(5)[2], mid = "white", high = inferno(5)[4])) +
  theme_minimal() +
  ggtitle("Distance matrix",
          subtitle  = "Similarity between albums based on all features") +
  theme(axis.text.x = element_text(hjust = 1,angle = 45),
        axis.title = element_blank())

使用spotifyr聚类Springsteen专辑

为了获得更清晰的图像，我们可以使用ggiraphExtra包中的雷达图来探索专辑和特征之间的模式。

dfScale %>%
  mutate(albums = row.names(dfScale)) %>%
  ggRadar(aes(group = albums), 
        rescale = FALSE, legend.position = "none",
        size = 1, interactive = FALSE, use.label = TRUE) +
  facet_wrap(~albums) + 
  scale_y_discrete(breaks = NULL) +
  theme(axis.text.x = element_text(size = 10)) +
  theme_minimal() +
  theme(legend.position = "none") +
  scale_fill_viridis_d(option="B") +
  scale_colour_viridis_d(option="B")

使用spotifyr聚类Springsteen专辑

最后一步，我们将了解如何使用分层和k-means聚类根据各种特征对专辑进行分组。我们首先使用factoExtra包中的fviz_nbclust()函数来计算聚类的最优数量。注意，函数中包含不同的方法来计算聚类的数量。默认情况下使用“silhouette”方法。

fviz_nbclust(dfScale, hcut) +
  ggtitle("Optimal Number of Clusters: H-Clustering")

使用spotifyr聚类Springsteen专辑

df.hc <- hclust(dist(scale(dfScale)))

fviz_dend(df.hc, k = 3,
          cex = .9,
          k_colors = inferno(10)[c(4,7)],
          color_labels_by_k = TRUE, 
          rect = TRUE) +
  ggtitle("Hierachical Clustering")

使用spotifyr聚类Springsteen专辑

fviz_nbclust(dfScale, kmeans) +
  ggtitle("Optimal Number of Clusters: K-means Clustering")

使用spotifyr聚类Springsteen专辑

set.seed(324789)
km.res <- kmeans(dfScale, 2, nstart = 25)

fviz_cluster(km.res, data = dfScale,
             ellipse.type = "convex",
             repel = T,
             palette = inferno(10)[c(4,6,8)],
             ggtheme = theme_minimal(),
             main = "K-means Clustering")

使用spotifyr聚类Springsteen专辑

作者：Peer Christensen 原文链接： https://peerchristensen.netlify.com/post/clustering-springsteen-albums-with-spotifyr/

数据人网： 数据人学习，交流和分享的平台，诚邀您创造和分享数据知识，共建和共享数据智库。

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络，本站转载出于传递更多信息之目的，版权归原作者或者来源机构所有，如转载稿涉及版权问题，请联系我们。

码农书籍

Beautiful Code

Greg Wilson、Andy Oram / O'Reilly Media / 2007-7-6 / GBP 35.99

In this unique work, leading computer scientists discuss how they found unusual, carefully designed solutions to difficult problems. This book lets the reader look over the shoulder of major coding an......一起来看看《Beautiful Code》这本书的介绍吧!

码农工具

使用spotifyr聚类Springsteen专辑

Beautiful Code

随机密码生成器

SHA 加密

RGB CMYK 转换工具