Lecture 1 Learning R

争子俱乐部 2013-07-15

展开全文

#####Lecture 1##

#几个有用的函数

wd=getwd() #获取当前路径

setwd(dir="E:/S123") #设置当前路径

x=rnorm(100)

rnorm#

help(rnorm)

ls()#列出当前环境中的所有对象和函数

apropos("norm")

identical(1:10,1:10)

#Test Objects for Exact Equality

#赋值和运算

z=rnorm(100,4,0.1)

median(z)

#赋值可以使用“->”

x<-z->y->w

#简单的数学符号，+，-，*，/,^,%*%,%%(mod),%/%(整数除法)

round

floor

ceiling

#基本的运算

x=1:100

sample(x,20)

set.seed(0)

sample(1:10,3)

z=sample(1:2000,100)

z[1:10]

y=c(1,3,7,3,4,2)

z[y]

z=sample(x,20,rep=T)

z1=unique(z)

length(z1)

xz=setdiff(x,z) #在x中但不在z中的元素

is.element(x,z)

x%in%z #x中每个元素是否在z中

sort(union(xz,z))

setequal(union(xz,z),x) #检测两个集合是否相等

intersect(1:10,7:50) #交叉项

sample(1:100,20,prob=1:100)#prob表示抽取时每个元素被抽中的权重

#基本算数运算方法

pi*10^2 #可以使用?"*"来看基本的算术运算方法

"*"(pi,"^"(10,2))

x<-pi*10^2

print(x)

pi^(1:5)

class(cars)

typeof(cars)

names(cars)

summary(cars)

str(cars)

row.names(cars)

plot(dist~speed,cars)

head(cars) #car[1:6,]

tail(cars)

ncol(cars)

nrow(cars)

dim(cars)

lm(dist~speed,data=cars)

cars$qspeed=cut(cars$speed,breaks=quantile(cars$speed),include.lowest=TRUE)

#cut: Convert Numeric to Factor,cut divides the range of x into intervals and codes the values in x according to which interval they fall. The leftmost interval corresponds to level one, the next leftmost to level two and so on.

names(cars)

cars[3]

table(cars[3])

is.factor(cars$qspeed)

plot(dist~qspeed,data=cars)

a=lm(dist~qspeed,data=cars)

summary(a)

x=round(runif(20,0,20),digit=2)

min(x)

max(x)

median(x)

mean(x)

var(x)

sd(x) #standard deviation

sqrt(var(x)) #开根号

sort(x) #排序

rank(x) #rank()是求秩（排名）的函数，它的返回值是这个向量中对应元素的“排名”。第一个数在原数据中大小的位置

order(x) #order()的返回值是对应“排名”的元素所在向量中的位置。按从小到大位置排列的数在原数据的位置

sort(x)==x[order(x)]

sort(x,decreasing=T)

sum(x)

length(x)

round(x)

fivenum(x) #quantiles 求5个分位数

quantile(x) #求5分位数

quantile(x,c(0.33,0.66,1)) #给.33,.66,.1 三个分位数

mad(x) # 每个数都和中位数有个差值，这些差值的中位数

cummax(x) #到目前为止的最大值

cummin(x) #到目前为止的最小值

cumprod(x) #到目前为止累积的所有数的乘积

cor(x,sin(x/20)) #x和sin（x/20)的相关系数

x=2+3i #复数的运算

(z=complex(real=rnorm(10),imaginary=rnorm(10)))

complex(real=rnorm(3),imaginary=rnorm(3))#complex生成复数，实部real，复部imaginary

Re(z) #取z的实部

Im(z) #取z的虚部

Mod(z)#取z的摹a方加b方开根号

f=function(x)x^3-2*x-1 #定义函数f,自变量x,返回值x^3-2*x-1

f(2) #x取2，此时值为多少

uniroot(f,c(0,2)) #解方程，单根，根在0到2之间搜索

y=uniroot(f,c(0,2)) #y等于根的值

y$root

f=function(x)x^2+2*x+1

optimize(f,c(-2,2))

x<-rnorm(200)

hist(x,col="light blue") #生成直方图

rug(x) #跟上式在一起，显示真实数据，在原直方图增加一部

stem(x) #茎叶图

N<-500

x<-rnorm(N)

y<-x+rnorm(N)

plot(y~x)

a=lm(y~x) #回归方程定义给a

abline(a,col="red") #增加斜线，拟合的

abline(lm(y~x),col="red") #同上

paste("x的最小值=",min(x)) #粘贴，显示

demo(graphics) #演示画图，先空白，演示r可以做什么图，在r里程序就出来了

##分布和产生随机数

#正态分布

pnorm(1.2,2,1) #经验分布函数

dnorm(1.2,2,1) #密度分布函数

qnorm(.5,0,1) #分位数值x的值，概率为0.5 ，均值为0，标准差为1

rnorm(10,0,1) #随机生成10个服从正态分布的数

rnorm(10)

#t分布

pt(1.2,1) #后面的数代表自由度，求1.2对应的经验分布函数的值

dt(1.2,2)

qt(.7,1)

rt(10,1)

#可能遇到的问题

a=factor(letters[1:10]) #前10个字母表字母，字母当做因子，不同的类，十个类

a[3]="w" #出错的！，生成缺失值正确的为下面所示

a=as.character(a) #把a变成字符串，as将某格式变成某格式

a[3]="w"

a=factor(a)

#输入输出数据

x=scan() #可以扫描数据给x，数据把在r里手输的数据

write(x,"test.txt") #将x的数据保存在test里，保存在当前目录里

y=scan("test.txt")# 将test文件里的数据扫描到电脑上

y=iris #鸢尾花数据

y[1:5,]

str(y) #看数据的几个描述值

write.table(y,"test1.txt",row.names=F) #把y以表格的形式放进test1，行的名字不带

w=read.table("test1.txt",header=T)

str(w)

write.csv(y,"test2.csv")

v=read.csv("test2.csv")

str(v)

data=read.table("clipboard")

write.table("clipboard")

#序列和向量

z=seq(-1,10,length=10)

z=seq(10,-1,-1)

(x=rep(1:3,3))

(x=rep(3:5,1:3))

(x=rep(c(1,10),c(4,5)))

w=c(1,3,x,z)

w[3]

x=rep(0,10)

z=1:3

x+z

x*z

rev(x)#rev provides a reversed version of its argument.

z=c("no cat","has","nine","tails")

z[1]=="no cat"

z=1:5

z[7]=8

z=NULL

z[c(1,3,5)]=1:3

rnorm(10)[c(2,5)]

z[-c(1,3)]

z=sample(1:100,10)

which(z==max(z))

#向量—>矩阵

x=sample(1:100,12);

all(x>0)

all(x!=0)

any(x>0)

(1:10)[x>0]

diff(x)

diff(x,lag=2)

x=matrix(1:20,4,5)

x=matrix(1:20,4,5,byrow=T)

t(x)

x=matrix(sample(1:100,20),4,5)

2*x

x+5

y=matrix(sample(1:100,20),5,4)

x+t(y)

(z=x%*%y)

z1=solve(z)#solve(a,b)可以解方程ax=b

z1%*%z

round(z1%*%z,14)#参数14表示的是小数点后的位数

nrow(x)

ncol(x)

dim(x)

x=matrix(rnorm(24),4,6)

x[c(2,1),]

x[,c(1,3)]

x[x[,1]>0,1]

sum(x[,1]>0)

x[,-c(1,3)]

diag(x)

diag(1:5)

diag(5)

x[-2,-c(1,3)]#没有第2行、第1、3列的x

apply(x,1,mean)

apply(x,2,sum)

x=rnorm(24,4,6)

diag(x)

diag(1:5)

diag(5)

x[lower.tri(x)]=0 #x[upper.tri(x)]=0

#diag(x)=0

x=array(runif(24),c(4,3,2))

is.matrix(x)

is.matrix(x[1,,])

x[c(1,3),,]

#缺失值问题

airquality

complete.cases(airquality) #哪一行没有缺失值

which(complete.cases(airquality)==F)

sum(complete.cases(airquality))

na.omit(airquality) #删掉有缺失值的行

#数据拼接

cbind(1:3,4:6)

rbind(1:3,4:6)

x=1:10

x[12]=3

x1=append(x,77,after=5) #Add elements to a vector.

#去掉矩阵中重复的行

(x=rbind(1:5,runif(5),runif(5),1:5,7:11))

x[!duplicated(x),]

unique(x)

#分类数据

x=c("Yes","No","No","Yes","Yes")

table(x)

factor(x)

beer=scan()

barplot(beer)

barplot(table(beer))

barplot(table(beer)/length(beer))

本站是提供个人知识管理的网络存储空间，所有内容均由用户发布，不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息，谨防诈骗。如发现有害或侵权内容，请点击一键举报。

转藏分享

QQ空间 QQ好友新浪微博微信

献花（0） +1

来自：争子俱乐部 > 《R语言》

举报/认领

0条评论

发表

请遵守用户评论公约

类似文章 更多

争子俱乐部

关注对话

TA的最新馆藏

[转] 一战和二战的各重大事件时间表
[转] 丘吉尔二战励志经典演讲：热血、汗水和眼泪
[转] 中国光大集团股份公司2017年度拟接收毕业生情况公示--中国光大集团股份公司
SQL经典50题 | 附答案
身为数据分析师，要学会保护自己鸭～
中国的银行体系

喜欢该文的人也喜欢更多

热门阅读换一换