分享

利用ggfortify包的kmeans详细分析

 生物_医药_科研 2018-12-22
更多精彩至阅读原文


library(ggfortify)
set.seed(1)
p <>-5], 3), data = iris)
#上面自己理解的意思为,-5就是把原始数据中的第5列去除,3就是聚类的数值
# plot as it is
p

首先也看看原始数据的排布情况吧:

iris
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
1            5.1         3.5          1.4         0.2     setosa
2            4.9         3.0          1.4         0.2     setosa
3            4.7         3.2          1.3         0.2     setosa
4            4.6         3.1          1.5         0.2     setosa
................................................................
................................................................
49           5.3         3.7          1.5         0.2     setosa
50           5.0         3.3          1.4         0.2     setosa
51           7.0         3.2          4.7         1.4 versicolor
52           6.4         3.2          4.5         1.5 versicolor
................................................................
................................................................
99           5.1         2.5          3.0         1.1 versicolor
100          5.7         2.8          4.1         1.3 versicolor
101          6.3         3.3          6.0         2.5  virginica
102          5.8         2.7          5.1         1.9  virginica
103          7.1         3.0          5.9         2.1  virginica
................................................................
................................................................
148          6.5         3.0          5.2         2.0  virginica
149          6.2         3.4          5.4         2.3  virginica
150          5.9         3.0          5.1         1.8  virginica

结果显示为:

聚成4类看看效果:

library(ggfortify)
set.seed(1)
p <>-5], 4), data = iris)
# plot as it is
p

看看数据被拉开的效果:

改变颜色看看:

library(ggfortify)
set.seed(1)
p <>-5], 4), data = iris)
# plot as it is
p
# change colour mapping
p + scale_colour_brewer()

显示:


autoplot调用名为ggplot2 :: fortify的通用函数将传递的实例转换为data.frame。 {ggfortify}为所有支持的类定义了fortify函数。 如果想要一个不同类型的图,可以使用fortify来获取data.frame,然后以正常方式调用ggplot。

以下示例显示了计算每个k均值聚类的记录的条形图:


library(ggfortify)
set.seed(1)

df <>-5], 3), data = iris)
head(df)

原始数据的前几行为:

head(df)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species cluster
1          5.1         3.5          1.4         0.2  setosa       1
2          4.9         3.0          1.4         0.2  setosa       1
3          4.7         3.2          1.3         0.2  setosa       1
4          4.6         3.1          1.5         0.2  setosa       1
5          5.0         3.6          1.4         0.2  setosa       1
6          5.4         3.9          1.7         0.4  setosa       1

df:

df
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species cluster
1            5.1         3.5          1.4         0.2     setosa       1
2            4.9         3.0          1.4         0.2     setosa       1
3            4.7         3.2          1.3         0.2     setosa       1
4            4.6         3.1          1.5         0.2     setosa       1
5            5.0         3.6          1.4         0.2     setosa       1
6            5.4         3.9          1.7         0.4     setosa       1
7            4.6         3.4          1.4         0.3     setosa       1
8            5.0         3.4          1.5         0.2     setosa       1
9            4.4         2.9          1.4         0.2     setosa       1
10           4.9         3.1          1.5         0.1     setosa       1
................................................................
................................................................     1
49           5.3         3.7          1.5         0.2     setosa       1
50           5.0         3.3          1.4         0.2     setosa       1
51           7.0         3.2          4.7         1.4 versicolor       3
................................................................
................................................................    3
99           5.1         2.5          3.0         1.1 versicolor       3
100          5.7         2.8          4.1         1.3 versicolor       3
101          6.3         3.3          6.0         2.5  virginica       2
102          5.8         2.7          5.1         1.9  virginica       3
................................................................
................................................................     3
148          6.5         3.0          5.2         2.0  virginica       2
149          6.2         3.4          5.4         2.3  virginica       2
150          5.9         3.0          5.1         1.8  virginica       3

用柱形图显示

library(ggfortify)
set.seed(1)
df - fortify(kmeans(iris[-5], 3), data = iris)
head(df)
ggplot(dfaes(xcluster, fill = cluster)) + geom_bar()

呈现:

autoplot可以在绘制子图,例如,autoplot.lm绘制如下的诊断图:

library(ggfortify)
set.seed(1)
res <>
mp <>4)
mp

原始数据:

trees
   Girth Height Volume
1    8.3     70   10.3
2    8.6     65   10.3
3    8.8     63   10.2
.......................
.......................
27  17.5     82   55.7
28  17.9     80   58.3
29  18.0     80   51.5
30  18.0     80   51.0
31  20.6     87   77.0

呈现:

因为{ggplot2}本身不能在单个实例中处理不同类型的图,{ggfortify}使用名为ggmultiplot的原始类来处理它们,可以使用+运算符来装饰ggmultiplot:


library(ggfortify)
set.seed(1)
res <>data = trees) 
mp <>4)
mp
class(mp)

显示:

class(mp)
[1'ggmultiplot'
attr(,'package')
[1'ggfortify'
library(ggfortify)
set.seed(1)
res <>data = trees) 
mp <>4)
mp
class(mp)
mp + theme_bw()

呈现:

此外,如果ggplot或ggmultiplot实例作为右侧给出,+运算符会附加图。 以下示例在诊断后附加2个散点图:

library(ggfortify)
set.seed(1)
res <>data = trees) 
mp <>4)
mp
class(mp)
mp + theme_bw()
mp +
  (ggplot(trees, aes(Girth, Volume)) + geom_point()) +
  (ggplot(trees, aes(Girth, Height)) + geom_point())

呈现:

可以使用和运算符()来提取子集:

library(ggfortify)
set.seed(1)
res <>data = trees) 
mp <>4)
mp
class(mp)
mp + theme_bw()
mp +
  (ggplot(trees, aes(Girth, Volume)) + geom_point()) +
  (ggplot(trees, aes(Girth, Height)) + geom_point())

mp[2:3]

呈现:


library(ggfortify)
set.seed(1)
res <>
mp <>4)
mp
class(mp)
mp + theme_bw()
mp +
  (ggplot(trees, aes(Girth, Volume)) + geom_point()) +
  (ggplot(trees, aes(Girth, Height)) + geom_point())

mp[2:3]

mp[[1]]

呈现:

library(ggfortify)
set.seed(1)
res <>
mp <>4)
mp
class(mp)
mp + theme_bw()
mp +
  (ggplot(trees, aes(Girth, Volume)) + geom_point()) +
  (ggplot(trees, aes(Girth, Height)) + geom_point())

mp[2:3]

mp[[1]]

mp[2:3] <>2:3] + theme_bw()
mp

呈现:


library(ggfortify)
set.seed(1)
#下面的3,4,5分别表示3种情况
res <>345), function(x) kmeans(iris[-5], x))
autoplot(res, data = iris[-5], ncol = 3)

呈现:



声明:本文基于参考文摘进行编辑
本文编辑:魏博

微信ID: lxdlxd19900511

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多