统计与R讲座的演示程序

上传人:沈*** 文档编号:82490961 上传时间:2022-04-29 格式:DOC 页数:20 大小:156.02KB
收藏 版权申诉 举报 下载
统计与R讲座的演示程序_第1页
第1页 / 共20页
统计与R讲座的演示程序_第2页
第2页 / 共20页
统计与R讲座的演示程序_第3页
第3页 / 共20页
资源描述:

《统计与R讲座的演示程序》由会员分享,可在线阅读,更多相关《统计与R讲座的演示程序(20页珍藏版)》请在装配图网上搜索。

1、### 统计与R讲座的演示程序。 ########### 演示用的数据 ############ ## 19个学生的身高体重数据。 d.class <- structure(list(name = structure(c(2L, 3L, 5L, 10L, 11L, 12L, 15L, 16L, 17L, 1L, 4L, 6L, 7L, 8L, 9L, 13L, 14L, 18L, 19L), .Label = c("Alfred", "Alice", "Becka", "Duke", "Gail", "Guido", "James", "Jeffrey", "John",

2、"Karen", "Kathy", "Mary", "Philip", "Robert", "Sandy", "Sharon", "Tammy", "Thomas", "William"), class = "factor"), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("F", "M"), class = "factor"), age = c(13L, 13L, 14L, 12L, 12L, 15L, 11L

3、, 15L, 14L, 14L, 14L, 15L, 12L, 13L, 12L, 16L, 12L, 11L, 15L), height = c(56.5, 65.3, 64.3, 56.3, 59.8, 66.5, 51.3, 62.5, 62.8, 69, 63.5, 67, 57.3, 62.5, 59, 72, 64.8, 57.5, 66.5), weight = c(84, 98, 90, 77, 84.5, 112, 50.5, 112.5, 102.5, 112.5, 102.5, 133, 83, 84, 99.5, 150, 128, 85, 112)

4、), .Names = c("name", "sex", "age", "height", "weight"), class = "data.frame", row.names = c(NA, -19L)) ## 100个学生的语文、数学、英语、物理、化学、生物成绩。主成份和因子分析。 d.scores <- structure(list(学号 = c("225040819", "210050718", "226170534", "226191045", "229120703", "225102012", "210060331", "226090835", "225180

5、415", "225072225", "228192837", "228160809", "226071147", "226191019", "228032508", "205093429", "228121617", "210070122", "226190523", "205154816", "205021825", "228221122", "203050439", "205253820", "228211614", "225052428", "210070332", "203102326", "228200918", "226020307", "205134827",

6、"226121101", "229120707", "205134418", "225092127", "228171508", "221060722", "226080741", "229080931", "228020803", "205034506", "203041406", "226080420", "205063410", "225172323", "225040827", "205134308", "203131832", "205173806", "226190443", "228212122", "226141416", "203020227", "228

7、051109", "205053908", "228070939", "210111213", "229081026", "203021502", "226171716", "225080604", "226081301", "205241919", "203010233", "210070218", "228122620", "225122710", "225051018", "226081020", "226020322", "203071407", "225031614", "203060719", "228182104", "221020221", "205172627

8、", "229091404", "228072710", "205011714", "226191139", "226151322", "205172106", "203060550", "203161626", "203061014", "205233330", "203211013", "205084213", "203112725", "217040329", "225040817", "203121211", "228032313", "225020208", "203142140", "226020335", "205223328", "210091321", "

9、225102218", "225071708"), 语文 = c(97, 88, 93, 81, 89, 98, 82, 97, 93, 84, 84, 105, 92, 88, 85, 75, 87, 77, 94, 83, 92, 103, 84, 88, 94, 79, 80, 92, 87, 102, 73, 85, 75, 87, 79, 103, 80, 86, 80, 107, 76, 93, 79, 83, 94, 93, 84, 83, 83, 81, 81, 100, 94, 88, 79, 99, 72, 98, 73, 89, 96, 98, 92, 9

10、8, 93, 91, 67, 95, 92, 88, 91, 84, 103, 91, 87, 98, 82, 92, 96, 94, 91, 87, 98, 0, 89, 77, 96, 83, 38, 78, 102, 80, 110, 96, 77, 103, 94, 70, 74, 80), 数学 = c(81, 31, 96, 95, 75, 76, 40, 107, 98, 62, 25, 90, 116, 102, 0, 79, 90, 57, 116, 50, 111, 105, 102, 96, 80, 28, 45, 69, 87, 100, 58, 9

11、0, 101, 45, 44, 98, 5, 116, 65, 107, 56, 66, 98, 77, 42, 93, 60, 82, 55, 107, 86, 93, 102, 102, 80, 96, 15, 40, 88, 73, 114, 104, 81, 112, 55, 76, 28, 89, 103, 109, 56, 76, 70, 89, 52, 91, 20, 77, 68, 91, 98, 96, 82, 0, 85, 68, 105, 59, 0, 52, 89, 106, 0, 110, 58, 118, 77, 15, 65, 72), 英语 =

12、c(78.5, 33, 70.5, 74, 60.5, 65.5, 61, 71, 91, 72, 52.5, 82.5, 78, 65, 0, 47.5, 63.5, 41, 70.5, 52, 74, 90.5, 81, 49.5, 91.5, 48, 63.5, 38.5, 82, 82, 71.5, 80, 55.5, 42, 62.5, 71, 46, 94.5, 55.5, 101, 53.5, 72.5, 74, 59.5, 64.5, 79, 37, 58, 43, 70, 58, 71, 97, 84, 44, 95, 28.5, 59, 48, 64.5

13、, 94, 62.5, 72, 90.5, 73.5, 66, 58.5, 85, 79, 71.5, 64.5, 70, 85, 82.5, 79, 70, 57.5, 68.5, 81, 64.5, 75.5, 57, 94.5, 0, 48.5, 29, 56.5, 57, 60.5, 51.5, 82, 57, 0, 90.5, 52, 79.5, 75.5, 28.5, 49, 82), 物理 = c(49, 34, 33, 3, 40, 50, 51, 22, 22, 49, 15, 43, 11, 23, 21, 89, 36, 61, 20, 44, 15,

14、 58, 20, 46, 36, 36, 39, 59, 17, 21, 57, 34, 36, 43, 45, 42, 28, 29, 61, 29, 19, 67, 34, 48, 58, 32, 66, 24, 27, 17, 40, 0, 9, 27, 26, 14, 15, 56, 72, 82, 39, 23, 44, 19, 53, 30, 34, 52, 38, 65, 57, 26, 54, 53, 15, 17, 17, 61, 6, 21, 36, 43, 49, 21, 56, 49, 50, 63, 26, 43, 46, 9, 39, 20, 29,

15、 16, 67, 33, 43, 14 ), 化学 = c(56, 24, 57, 18, 17, 13, 51, 19, 22, 57, 14, 49, 32, 20, 24, 92, 46, 56, 28, 33, 31, 68, 12, 62, 70, 39, 18, 69, 8, 25, 50, 20, 18, 48, 49, 40, 32, 47, 84, 68, 16, 60, 53, 47, 47, 25, 32, 35, 22, 22, 32, 0, 25, 39, 59, 31, 18, 56, 59, 85, 24, 26, 22, 28, 74, 35,

16、 28, 39, 55, 67, 63, 31, 53, 75, 23, 42, 40, 49, 16, 44, 41, 52, 71, 19, 18, 72, 50, 87, 14, 81, 40, 14, 9, 63, 35, 28, 80, 22, 46, 8), 生物 = c(42, 28, 59, 16, 42, 43, 59, 44, 44, 60, 9, 60, 26, 7, 7, 84, 54, 46, 41, 63, 35, 58, 4, 32, 66, 53, 44, 61, 31, 30, 60, 20, 27, 67, 46, 80, 20, 46,

17、 63, 44, 22, 64, 51, 60, 70, 30, 65, 47, 44, 29, 59, 0, 27, 51, 71, 65, 27, 21, 65, 79, 29, 17, 35, 26, 56, 40, 22, 58, 61, 67, 57, 21, 51, 46, 11, 48, 36, 56, 17, 52, 64, 51, 72, 30, 54, 36, 61, 60, 42, 69, 67, 27, 34, 44, 49, 22, 69, 33, 60, 11)), .Names = c("学号", "语文", "数学", "英语", "物理", "

18、化学", "生物"), row.names = c("225040819", "210050718", "226170534", "226191045", "229120703", "225102012", "210060331", "226090835", "225180415", "225072225", "228192837", "228160809", "226071147", "226191019", "228032508", "205093429", "228121617", "210070122", "226190523", "205154816", "20502

19、1825", "228221122", "203050439", "205253820", "228211614", "225052428", "210070332", "203102326", "228200918", "226020307", "205134827", "226121101", "229120707", "205134418", "225092127", "228171508", "221060722", "226080741", "229080931", "228020803", "205034506", "203041406", "226080420

20、", "205063410", "225172323", "225040827", "205134308", "203131832", "205173806", "226190443", "228212122", "226141416", "203020227", "228051109", "205053908", "228070939", "210111213", "229081026", "203021502", "226171716", "225080604", "226081301", "205241919", "203010233", "210070218", "22

21、8122620", "225122710", "225051018", "226081020", "226020322", "203071407", "225031614", "203060719", "228182104", "221020221", "205172627", "229091404", "228072710", "205011714", "226191139", "226151322", "205172106", "203060550", "203161626", "203061014", "205233330", "203211013", "205084

22、213", "203112725", "217040329", "225040817", "203121211", "228032313", "225020208", "203142140", "226020335", "205223328", "210091321", "225102218", "225071708"), class = "data.frame") ## 癌症病人康复的logistic回归。 d.remiss <- structure(list(remiss = c(1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,

23、0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L), cell = c(0.8, 0.9, 0.8, 1, 0.9, 1, 0.95, 0.95, 1, 0.95, 0.85, 0.7, 0.8, 0.2, 1, 1, 0.65, 1, 0.5, 1, 1, 0.9, 1, 0.95, 1, 1, 1), smear = c(0.83, 0.36, 0.88, 0.87, 0.75, 0.65, 0.97, 0.87, 0.45, 0.36, 0.39, 0.76, 0.46, 0.39, 0.9, 0

24、.84, 0.42, 0.75, 0.44, 0.63, 0.33, 0.93, 0.58, 0.32, 0.6, 0.69, 0.73), infil = c(0.66, 0.32, 0.7, 0.87, 0.68, 0.65, 0.92, 0.83, 0.45, 0.34, 0.33, 0.53, 0.37, 0.08, 0.9, 0.84, 0.27, 0.75, 0.22, 0.63, 0.33, 0.84, 0.58, 0.3, 0.6, 0.69, 0.73), li = c(1.9, 1.4, 0.8, 0.7, 1.3, 0.6, 1, 1.9, 0.8,

25、0.5, 0.7, 1.2, 0.4, 0.8, 1.1, 1.9, 0.5, 1, 0.6, 1.1, 0.4, 0.6, 1, 1.6, 1.7, 0.9, 0.7), blast = c(1.1, 0.74, 0.176, 1.053, 0.519, 0.519, 1.23, 1.354, 0.322, 0, 0.279, 0.146, 0.38, 0.114, 1.037, 2.064, 0.114, 1.322, 0.114, 1.072, 0.176, 1.591, 0.531, 0.886, 0.964, 0.398, 0.398), temp = c(0.996

26、, 0.992, 0.982, 0.986, 0.98, 0.982, 0.992, 1.02, 0.999, 1.038, 0.988, 0.982, 1.006, 0.99, 0.99, 1.02, 1.014, 1.004, 0.99, 0.986, 1.01, 1.02, 1.002, 0.988, 0.99, 0.986, 0.986)), .Names = c("remiss", "cell", "smear", "infil", "li", "blast", "temp"), class = "data.frame", row.names = c(NA, -2

27、7L)) ## 工作性质与工作满意度关系。典型相关分析。 d.jobs <- structure(list(Career = c(72L, 63L, 96L, 96L, 84L, 66L, 31L, 45L, 42L, 79L, 39L, 54L, 60L, 63L), Supervis = c(26L, 76L, 31L, 98L, 94L, 10L, 40L, 14L, 18L, 74L, 12L, 35L, 75L, 45L), Finance = c(9L, 7L, 7L, 6L, 6L, 5L, 9L, 2L, 6L, 4L, 2L, 3L, 5L, 5L),

28、Variety = c(10L, 85L, 83L, 82L, 36L, 28L, 64L, 19L, 33L, 23L, 37L, 23L, 45L, 22L ), Feedback = c(11L, 22L, 63L, 75L, 77L, 24L, 23L, 15L, 13L, 14L, 13L, 74L, 58L, 67L), Autonomy = c(70L, 93L, 73L, 97L, 97L, 75L, 75L, 50L, 70L, 90L, 70L, 53L, 83L, 53L)), .Names = c("Career", "Supervis", "Fina

29、nce", "Variety", "Feedback", "Autonomy"), class = "data.frame", row.names = c(NA, -14L)) ## 北京地区1949--1964年洪水受灾面积与成灾面积数据。时间序列。 d.flood <- matrix(c( 1949 , 1 , 331.12 , 243.96 , 1950 , 2 , 380.44 , 293.90 , 1951 , 3 , 59.63 , 59.63, 1952 , 4 , 37.89 , 18.09, 1953 , 5 , 103

30、.66 ,72.92, 1954 , 6 , 316.67 , 244.57, 1955 , 7 , 208.72 , 155.77, 1956 , 8 , 288.79 , 255.22, 1957 , 9 , 25.00 , 0.50, 1958 , 10 , 84.72 , 48.59, 1959 , 11 , 260.89 ,202.96, 1960, 12 , 27.18 ,15.02, 1961, 13 , 20.74 ,17.09, 1962 , 14 , 52.99 ,14.66, 1963 ,

31、15 , 99.25 , 45.61, 1964 , 16 , 55.36 ,41.90), byrow=T, ncol=4, dimnames=list(1949:1964, c("year", "t", "area1", "area2"))) ########### 第一讲 ################# #################################### ### R初步演示。 x1 <- 0:100 x2 <- x1 * 2 * pi / 100 y1 <- sin(x2) plot

32、(x2, y1, type="l") abline(h=0, lwd=2) abline(v=(0:4)/2*pi, lty=3, col="gray") y2 <- cos(x2) lines(x2, y2, lty=2, col="green") demo("graphics") demo("image") cl <- read.csv("class.csv", header=TRUE) summary(cl) mean(cl$height) var(cl$height) lm1 <- lm(weight ~ height + age + sex,

33、 data=cl) print(summary(lm1)) lm2 <- step(lm1, weight ~ height + age + sex, data=cl) sink("myres.txt", split=TRUE) print(A) print(Ai) sink() ### R向量演示。 marks <- c(10, 6, 4, 7, 8) x <- c(1:3, 10:13) x1 <- c(1, 2) x2 <- c(3, 4) x <- c(x1, x2) x x <- c(1, 10) x

34、+ 2 x - 2 x * 2 x / 2 x ^ 2 2 / x 2 ^x x1 <- c(1, 10) x2 <- c(4, 2) x1 + x2 x1 - x2 x1 * x2 x1 / x2 x1 <- c(1, 10) x2 <- c(1, 3, 5, 7) x1 + x2 x1 * x2 sort(c(3, 5, 1)) cl[order(cl$height),] ages <- c("李明"=30, "张聪"=25, "刘颖"=28) ages <- c(30, 25, 28) names(ages) <- c("李明",

35、 "张聪", "刘颖") ### R矩阵演示。 A <- matrix(1:6, nrow=3, ncol=2) B <- matrix(1,-1, 1,1, nrow=2, ncol=2, byrow=TRUE) C1 <- A %*% B C2 <- A + 2 C3 <- A / 2 colnames(A) <- c("x", "y") A[,"y"] ### R函数演示。 fsub <- function(x, y=0){ cat("x=", x, " y=", y, "\n") x - y } fsub(5,3)

36、fsub(5) fsub(x=5, y=3) fsub(y=3, x=5) ### R探索性数据分析演示。 x <- cl$sex print(x) table(x) table(x)/length(x) barplot(table(x)) x <- rnorm(30, 10, 2) summary(x) hist(x) boxplot(x) qqnorm(x);qqline(x) x <- rexp(30) summary(x) hist(x) boxplot(x) qqnorm(x);qqline(x) ##########

37、# 第二讲 ################# #################################### ### 用optim求正态MLE的演示。 objf <- function(theta, x){ mu <- theta[1] s2 <- exp(theta[2]) n <- length(x) res <- n*log(s2) + 1/s2*sum((x - mu)^2) res } sim <- function(n=30){ mu0 <- 20 sigma0 <- 2 x <- rnorm(n, mu0

38、, sigma0) theta0 <- c(0,0) ores <- optim(theta0, objf, x=x) print(ores) theta <- ores$par mu <- theta[1] sigma <- exp(0.5*theta[2]) cat("mu: ", mu, " ==> ", mu0, "\n") cat("sigma: ", sigma, " ==> ", sigma0, "\n") } ### 用optimize求正态MLE的演示。 sim1 <- function(n=30){ mu0

39、 <- 20 sigma0 <- 2 x <- rnorm(n, mu0, sigma0) mu <- mean(x) ss <- sum((x - mu)^2)/length(x) objf <- function(delta,ss) log(delta) + 1/delta*ss ores <- optimize(objf, lower=0.0001, upper=1000, ss=ss) delta <- ores$minimum sigma <- sqrt(delta) print(o

40、res) cat("mu: ", mu, " ==> ", mu0, "\n") cat("sigma: ", sigma, " ==> ", sigma0, "\n") } ### R置信区间演示。 x <- rnorm(100,10,3) mu <- mean(x) sig <- sd(x) p = pnorm(15,mu,sig) - pnorm(5,mu,sig) cat("Prob in [5,15] = ", p, "\n") ### 用t.test求正态均值置信区间。 x <- c(11.67, 9.29, 10.45, 9.01

41、, 12.67, 16.24, 11.64, 7.73, 12.23) t.test(x, conf.level=0.95) ### 单总体双侧均值t检验。 x <- c(490, 506, 508, 502, 498, 511, 510, 515, 512) t.test(x, mu=500, side="two-sided") ### 单总体右侧方差卡方检验。 x <- c(42,65,75,78,59,71,57,68,54,55) n <- length(x) chi2 <- (n-1)*var(x)/80 p <-

42、 1-pchisq(chi2,n-1) cat("Chi-squared = ", chi2, " p-value = ", p, "\n") ### 两样本t检验。 x <- c(20.5, 19.8, 19.7, 20.4, 20.1, 20.0, 19.0, 19.9) y <- c(20.7, 19.8, 19.5, 20.8, 20.4, 19.6, 20.2) t.test(x, y) ### 方差比较。 var.test(x, y) ### 成对t检验。 x <- c(20.5, 18.8, 19.8, 20

43、.9, 21.5, 19.5, 21.0, 21.2) y <- c(17.7, 20.3, 20.0, 18.8, 19.0, 20.1, 20.0, 19.1) t.test(x, y, paired=TRUE) ### 单总体比例检验。 ### 12次中5次成功,与40\%比较。 binom.test(5, 12, 0.4) prop.test(5, 12, 0.4) ### 单总体比例检验。 ### 120次中35次成功,与25\%比较。 prop.test(35, 120, 0.25) ### 两个比例的比较。102中成功23和135中成功25的比例比

44、较。 prop.test(c(23,25), c(102,135)) ########### 第三讲 方差分析 ######## #################################### ## Wilcoxon秩和检验。 x <- c(20.5, 19.8, 19.7, 20.4, 20.1, 20.0, 19.0, 19.9) y <- c(20.7, 19.8, 19.5, 20.8, 20.4, 19.6, 20.2) wilcox.test(x, y) ## 符号检验。 x <- c(20.5, 1

45、8.8, 19.8, 20.9, 21.5, 19.5, 21.0, 21.2) y <- c(17.7, 20.3, 20.0, 18.8, 19.0, 20.1, 20.0, 19.1) t.test(x, y, paired=TRUE) z <- x - y binom.test(sum(z>0), sum(z != 0), p=0.5) ## Wilcoxon 符号秩检验 wilcox.test(x, y, paired=TRUE) ## 单总体拟合优度卡方检验。 x <- c(18, 13, 17, 21, 15, 16)

46、 p <- rep(1/6, 6) chisq.test(x, p) ## 独立性卡方检验 tab <- matrix(c(60, 3, 32, 11), nrow=2, ncol=2, byrow=TRUE, dimnames=list(c("病人", "健康人"), c("吸烟", "不吸烟"))) chisq.test(tab) ## 单因素方差分析 A <- factor(rep(1:5, each=4)) y <- c(25.6, 22.2, 28.0, 29.8,

47、 24.4, 30.0, 29.0, 27.5, 25.0, 27.7, 23.0, 32.2, 28.8, 28.0, 31.5, 25.9, 20.6, 21.2, 22.0, 21.2) d <- data.frame(A, y) plot(y ~ A, data=d) summary(aov(y ~ A, data=d)) tapply(d$y, d$A, mean) ## 控制单次比较错误率的多重比较 pairwise.t.test(y, A, p.adjust="none") ## 控制总错误

48、率的多重比较 pairwise.t.test(y, A) ## 控制错误发现率的多重比较 pairwise.t.test(y, A, p.adjust="fdr") ## 用Tukey同时置信区间做多重比较。 TukeyHSD(aov(y ~ A, data=d)) ## Bartlett方差齐性检验 bartlett.test(y ~ A, data=d) ## Levene方差齐性检验 require(car) leveneTest(y ~ A, data=d) ## 方差不相等情形下单因素方差分析的Welch检验 oneway.test(y ~

49、 A, data=d) ## 非参数单因素方差分析的Kruskal-Wallis检验 kruskal.test(y ~ A, data=d) ## 两因素方差分析 rats <- data.frame( y = c(0.31, 0.45, 0.46, 0.43, # (1,1) 0.82, 1.10, 0.88, 0.72, # (1,2) 0.43, 0.45, 0.63, 0.76, # (1,3) 0.45, 0.71, 0.66, 0.62, # (1,4) 0.36, 0.29, 0.40

50、, 0.23, # (2,1) 0.92, 0.61, 0.49, 1.24, # (2,2) 0.44, 0.35, 0.31, 0.40, # (2,3) 0.56, 1.02, 0.71, 0.38, # (2,4) 0.22, 0.21, 0.18, 0.23, # (3,1) 0.30, 0.37, 0.38, 0.29, # (3,2) 0.23, 0.25, 0.24, 0.22, # (3,3) 0.30, 0.36, 0.31, 0.33), # (3,4) T

51、oxicant=factor(rep(1:3, each=4*4)), Cure=factor(rep(rep(1:4, each=4), 3))) ## 比较各组的盒形图 opar <- par(mfrow=c(1,2)) plot(y ~ Toxicant + Cure, data=rats) par(opar) ## 带交互作用的两因素方差分析 res <- aov(y ~ Toxicant + Cure + Toxicant:Cure, data=rats) summary(res) ## 无交互作用的两因素方差分析 r

52、es2 <- aov(y ~ Toxicant + Cure, data=rats) summary(res2) tapply(rats$y, rats$Toxicant, mean) tapply(rats$y, rats$Cure, mean) ## 协方差分析, 用HH包。 require(HH) ancova(y ~ A + x, data=d) ## 协方差分析,用lm函数。 anova(lm(y ~ A + x, data=d)) ## 三个三水平因素的正交设计。烟灰砖试验 d <- data.frame( A = factor(rep(1:3,

53、 each=3)), B = factor(rep(1:3, 3)), C = factor(c(1,2,3, 2,3,1, 3,1,2)), y = c(16.9, 19.1, 16.7, 19.8, 23.7, 19.0, 25.0, 20.4, 23.1)) ## 画图比较 opar <- par(mfrow=c(1,3)) plot(y ~ A + B + C, data=d) par(opar) ## 方差分析 summary(aov(y ~ A + B + C, data=d)) ############ 第四讲 统计模型 ###

54、############## ############################################## ### 相关与回归 ### ## 生成线性回归模拟数据 nsamp <- 30 x <- runif(nsamp, -10, 10) y <- 20 + 0.5*x + rnorm(nsamp,0,0.5) plot(x, y) ## 生成二次曲线回归模拟数据 y2 <- 0.5*x^2 + rnorm(nsamp,0,2) plot(x, y2) ## 生成指数函数回归模拟数据 y3 <- exp(0.2*(x+10)) + rn

55、orm(nsamp,0,2) plot(x, y3) ## 生成对数函数回归模拟数据 y4 <- log(10*(x+12)) + rnorm(nsamp,0,0.1) plot(x, y4) ## 计算相关系数并检验:线性相关例子 cor(x, y) cor.test(x, y) ## 计算相关系数并检验:二次曲线相关例子 cor(x, y2) cor.test(x, y2) ## 模拟数据的一元回归 plot(x, y) abline(lm(y ~ x), col="red", lwd=2) res <- lm(y ~ x); res su

56、mmary(res) ## 19个学生身高体重的回归。 d <- read.csv("class.csv", header=TRUE) lm1 <- lm(weight ~ height, data=d); summary(lm1) plot(weight ~ height, data=d) abline(lm1, col="red", lwd=2) plot(lm1) ## 样条回归例子。 nsamp <- 30 x <- runif(nsamp, -10, 10) x <- sort(x) y <- 10*sin(x/10*pi)^2 + rnorm(ns

57、amp,0,0.2) plot(x, y) require(splines) res <- smooth.spline(x, y) lines(spline(x, res$y), col="red") ## 体重对身高和年龄的多元回归。 lm2 <- lm(weight ~ height + age, data=d) summary(lm2) ## 只对年龄回归。 lm3 <- lm(weight ~ age, data=d) summary(lm3) ## 逐步回归。 lm4 <- step(lm(weight ~ height + age + sex,

58、data=d)) summary(lm4) ## 以因子为自变量的回归。 lm5 <- lm(weight ~ height + sex, data=d) summary(lm5) ## 癌症病人康复概率的logistic回归。 remiss <- read.csv("remiss.csv", header=TRUE) res1 <- glm(remiss ~ cell+smear+infil+li +blast+temp, family=binomial, data=remiss) summary(res1) ##

59、 逐步剔除不重要的自变量。 res2 <- glm(remiss ~ cell+smear+infil+li +temp, family=binomial, data=remiss) summary(res2) res3 <- glm(remiss ~ cell+infil+li +temp, family=binomial, data=remiss) summary(res3) res4 <- glm(remiss ~ cell+li+temp, family=bino

60、mial, data=remiss) summary(res4) ## step()逐步回归。 ress <- step(glm(remiss ~ cell+smear+infil+li +blast+temp, family=binomial, data=remiss)) summary(ress) ### 多元分析 ### ### 主成份分析。 ## 模拟例子 nsamp <- 30; s <- 1 p01 <- runif(nsamp, -10, 10) p02 <- runif(nsamp, 0,

61、5) x1 <- 0.5*p01 + 0.1*p02 + rnorm(nsamp, 0, s) x2 <- 0.1*p01 - 0.5*p02 + rnorm(nsamp, 0, s) x3 <- 0.5*p01 - 0.1*p02 + rnorm(nsamp, 0, s) x4 <- -0.1*p01 + 0.5*p02 + rnorm(nsamp, 0, s) ## 用协方差阵。 res1 <- princomp(~x1+x2+x3+x4, cor=FALSE, scores=TRUE) summary(res1) ## 用相关阵。 res2 <- princomp(~x

62、1+x2+x3+x4, cor=TRUE) summary(res2) ## 计算主成份得分。 pr1 <- predict(res1) plot(d[,1], pr1[,1]) ### 因子分析。 ## 100个学生的语文、数学、英语、物理、化学、生物成绩。 scores <- read.csv("scores.csv", header=TRUE) dd <- scores[,2:7] res <- factanal(dd, factors=2) res ### 判别分析。 ## 鸢尾花数据的Fisher判别分析。 data(iris) plot(

63、Sepal.Length ~ Species, data=iris) require(MASS) res <- lda(Species ~ Sepal.Length+Sepal.Width +Petal.Length+Petal.Width, data=iris) res ## 对比判别效果。 pr <- predict(res)$class table(iris[,"Species"], pr) ### 聚类分析。 ## 鸢尾花数据的聚类分析。 data(iris) ## 计算距离。 di <- dist(iris[,1:4], method="euc

64、lidean") print(as.matrix(di)[1:10,1:10]) ## 谱系聚类。 res <- hclust(di, method="complete") plot(res, labels=FALSE) ## 在结果树形图中切分出三个类。 rect.hclust(res, k=3) ## 得到分三个类的分类结果。 clus <- cutree(res, k=3) ## 与真实类对比。 table(iris[,"Species"], clus) ### 典型相关分析 ## 鸢尾花数据花萼长、宽与花瓣长、宽的典型相关。 data(iris) r

65、es <- cancor(iris[,1:2], iris[,3:4]) print(res) ## 工作性质与工作满意度的典型相关。 jobs <- read.csv("jobs.csv", header=TRUE) res <- cancor(jobs[,1:3], jobs[,4:6]) print(res) ### 时间序列分析 ### ## 北京地区1949-1964年洪灾受灾和成灾面积数据,年数据。 print(d.flood) flood.area1 <- ts(d.flood[,"area1"], freq

66、uency=1, start=1949) flood.area2 <- ts(d.flood[,"area2"], frequency=1, start=1949) plot(flood.area1) lines(flood.area2, col="red", lty=2) ## 自相关系数函数ACF。 acf(flood.area1) ## 居民用煤消耗季度数据,从1991年到1996年。 coal.consumption <- ts(c( 6878.4 , 5343.7 , 4847.9 , 6421.9 , 6815.4 , 5532.6 , 4745.6 , 6406.2 , 6634.4 , 5658.5 , 4674.8 , 6445.5 , 7130.2 , 5532.6 , 4989.6 , 6642.3 , 74

展开阅读全文
温馨提示:
1: 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
2: 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
3.本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
5. 装配图网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

相关资源

更多
正为您匹配相似的精品文档
关于我们 - 网站声明 - 网站地图 - 资源地图 - 友情链接 - 网站客服 - 联系我们

copyright@ 2023-2025  zhuangpeitu.com 装配图网版权所有   联系电话:18123376007

备案号:ICP2024067431-1 川公网安备51140202000466号


本站为文档C2C交易模式,即用户上传的文档直接被用户下载,本站只是中间服务平台,本站所有文档下载所得的收益归上传人(含作者)所有。装配图网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对上载内容本身不做任何修改或编辑。若文档所含内容侵犯了您的版权或隐私,请立即通知装配图网,我们立即给予删除!