1.1 向量 1.2 矩阵 1.3 数据框 2.1 c() 连接单个数据 2.2 ':' 生成1/-1等差向量 2.3 seq() 生成等距向量 2.4 rep() 生成重复数据 3.1 引用行/引用列 3.2 引用单个元素 3.3 引用子矩阵 3.4 变量名引用 4.1 更改工作目录 4.2 read.table 4.3 read.csv 正文 1 数据结构 1.1 向量 > c(1,2,8)#生成包含1,2,8的一维数组(向量) [1] 1 2 8 1.2 矩阵
1.3 数据框 > x <- c(11:20) #其中' <- '是赋值的意思,将向量c(11:20)赋值给对象x > y <- c(1:10) > data.frame(xf = x, yf = x) #将向量x和y合并存储到数据框中,并重命名为xf和yf xf yf 1 11 11 2 12 12 3 13 13 4 14 14 5 15 15 6 16 16 7 17 17 8 18 18 9 19 19 10 20 20 2 生成数据 本节主要讲“c()”、':'、seq、rep等四种数据生成的内容(入门必须学) 2.1 “c” 连接单个数据
2.2 “:“ 生成1/-1等差向量 > 1.1:10 [1] 1.1 2.1 3.1 4.1 5.1 6.1 7.1 8.1 9.1 > 1:10 [1] 1 2 3 4 5 6 7 8 9 10 > 10:1 #如x=1:10(递减,如y=10:1) [1] 10 9 8 7 6 5 4 3 2 1 ![]() 2.3 seq 生成等距向量 ![]()
![]() 2.4 rep(x,n) 重复 ![]() rep(1:3,3) rep(1:3,each=3)
#> rep(1:3,3) #[1] 1 2 3 1 2 3 1 2 3 #> rep(1:3,each = 3) #[1] 1 1 1 2 2 2 3 3 3 3 数据引用(以矩阵为例) ![]() 3.1 行引用/列引用 ![]()
![]() 3.2 行列值引用:数据集[行值,列值] ![]() > iris[1,] #引用第1行数据 Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 5.1 3.5 1.4 0.2 setosa
> head(iris[,1],5) #引用第1列的数据,其中因数据过长,使用head()函数取前5个数字 [1] 5.1 4.9 4.7 4.6 5.0 ![]() 3.3 引用子矩阵 ![]()
![]() 3.4 变量名引用 ![]() > head(iris$Petal.Length,5) [1] 1.4 1.4 1.3 1.5 1.4 4 读取外部数据(以.csv表为例) 本节主要讲如何读取外部数据(表) (以.csv表为例) ![]() 4.1 设置工作目录 ![]()
![]() ![]() ![]() 4.2 read.table() ![]() read.table(file, header = FALSE, sep = '', quote = '\''', dec = '.', numerals = c('allow.loss', 'warn.loss', 'no.loss'), row.names, col.names, as.is = !stringsAsFactors, na.strings = 'NA', colClasses = NA, nrows = -1, skip = 0, check.names = TRUE, fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = TRUE, comment.char = '#', allowEscapes = FALSE, flush = FALSE, stringsAsFactors = default.stringsAsFactors(), fileEncoding = '', encoding = 'unknown', text, skipNul = FALSE) read.csv(file, header = TRUE, sep = ',', quote = '\'', dec = '.', fill = TRUE, comment.char = '', ...)
read.csv2(file, header = TRUE, sep = ';', quote = '\'', dec = ',', fill = TRUE, comment.char = '', ...)
read.delim(file, header = TRUE, sep = '\t', quote = '\'', dec = '.', fill = TRUE, comment.char = '', ...)
read.delim2(file, header = TRUE, sep = '\t', quote = '\'', dec = ',', fill = TRUE, comment.char = '', ...)
> df<- read.table('data.txt') > df V1 V2 1 x y 2 1 2 3 3 4 4 5 6 > df <- read.table('data.txt',header = T) > df x y 1 1 2 2 3 4 3 5 6
> df <- read.table('data.csv',header = T) #读数+首行表头 > head(df) ID.Sepal.Length.Sepal.Width.Petal.Length.Petal.Width.Species 1 1,5.1,3.5,1.4,0.2,setosa 2 2,4.9,3,1.4,0.2,setosa 3 3,4.7,3.2,1.3,0.2,setosa 4 4,4.6,3.1,1.5,0.2,setosa 5 5,5,3.6,1.4,0.2,setosa 6 6,5.4,3.9,1.7,0.4,setosa
> df <- read.table('data.csv',header = T,sep=',',stringsAsFactor = T) ##读数+首行表头+','逗号分割+字符转因子factor > head(df) ID Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 1 5.1 3.5 1.4 0.2 setosa 2 2 4.9 3.0 1.4 0.2 setosa 3 3 4.7 3.2 1.3 0.2 setosa 4 4 4.6 3.1 1.5 0.2 setosa 5 5 5.0 3.6 1.4 0.2 setosa 6 6 5.4 3.9 1.7 0.4 setosa
#请注意species结果与样式3中结果的差异 > summary(df) ID Sepal.Length Sepal.Width Petal.Length Min. : 1.00 Min. :4.300 Min. :2.000 Min. :1.000 1st Qu.: 38.25 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 Median : 75.50 Median :5.800 Median :3.000 Median :4.350 Mean : 75.50 Mean :5.843 Mean :3.057 Mean :3.758 3rd Qu.:112.75 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 Max. :150.00 Max. :7.900 Max. :4.400 Max. :6.900 Petal.Width Species Min. :0.100 setosa :50 1st Qu.:0.300 versicolor:50 Median :1.300 virginica :50 Mean :1.199 3rd Qu.:1.800 Max. :2.500 ![]() 4.3 read.csv() ![]()
#实例 > df <- read.csv('data.csv') #相当于df <- read.table('data.csv',header = T,sep=',',stringsAsFactor = T) > head(df) ID Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 1 5.1 3.5 1.4 0.2 setosa 2 2 4.9 3.0 1.4 0.2 setosa 3 3 4.7 3.2 1.3 0.2 setosa 4 4 4.6 3.1 1.5 0.2 setosa 5 5 5.0 3.6 1.4 0.2 setosa 6 6 5.4 3.9 1.7 0.4 setosa 喜欢你就转发一下吧 ![]() |
|
来自: 西北望msm66g9f > 《培训》