Vector and Matrix(向量與矩陣) 羅琪老師
向量形式 > c(2,3,5,2,7,1) # 數字 > 3:10 # 數列 3, 4, .., 10 > c(TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE) # 邏輯 > c(”Canberra”,”Sydney”,”Newcastle”,”Darwin”) # 文字
串聯向量 > x <- c(2,3,5,2,7,1) > x [1] 2 3 5 2 7 1 > y <- c(10,15,12) > y [1] 10 15 12 > z <- c(x, y) > z [1] 2 3 5 2 7 1 10 15 12 > x <- c(10.4, 5.6, 3.1, 6.4, 21.7) > y <- c(x, 0, x) > y [1] 10.4 5.6 3.1 6.4 21.7 0.0 10.4 5.6 3.1 6.4 21.7
向量的一部分 > x <- c(3,11,8,15,12) # 將3, 11, 8, 15, 12放到向量x > x[c(2,4)] # 取出向量x中的第2與第4個元素 [1] 11 15 > x <- c(3,11,8,15,12) > x[-c(2,3)] # 將向量x中的第2與第3個元素移除 [1] 3 15 12 > x>10 # 產生一個邏輯向量(元素都是T or F) [1] F T F T T > x[x>10] # 印出x>10的元素 [1] 11 15 12
向量的排序 > x <- c(10.4, 5.6, 3.1, 6.4, 21.7) > sort(x) # 由小到大排序, NA省略 [1] 3.1 5.6 6.4 10.4 21.7 > order(x) # 最小到最大數的位置 [1] 3 2 4 1 5 > x[order(x)] # 由小到大排序, NA放最後 > rev(x) # 將向量x的順序反轉 [1] 21.7 6.4 3.1 5.6 10.4 > rev(sort(x)) # 將向量x由大到小排序 [1] 21.7 10.4 6.4 5.6 3.1
生成規則的數列 1:30 與 c(1, 2, ..., 29, 30) 相同 2*1:15 與 c(2, 4, ..., 28, 30) 相同 > n <- 10 > 1:n-1 [1] 0 1 2 3 4 5 6 7 8 9 > 1:(n-1) [1] 1 2 3 4 5 6 7 8 9 > seq(2,10) # 2到10的數列, 每次增加1 [1] 2 3 4 5 6 7 8 9 10 > seq(1,30) # 1到30的數列, 每次增加1 [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
生成規則的數列 > s3 <- seq(-5, 5, by=.2) # -5到5的數列, 每次增加0.2 > s3 [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4 -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 [23] -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 [45] 3.8 4.0 4.2 4.4 4.6 4.8 5.0 > s4 <- seq(length=51, from=-5, by=.2) # -5開始每次增加0.2, 共51個數字 > s4
生成規則的數列 > x <- c(3, 1, 4, 2) > s5 <- rep(x, times=5) # 重複x內容5次 > s5 [1] 3 1 4 2 3 1 4 2 3 1 4 2 3 1 4 2 3 1 4 2 > s6 <- rep(x, each=5) # 重複x每個元素5次 > s6 [1] 3 3 3 3 3 1 1 1 1 1 4 4 4 4 4 2 2 2 2 2 > rep(c(2,3,5),c(4,4,8)) [1] 2 2 2 2 3 3 3 3 5 5 5 5 5 5 5 5
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 向量的運算 > x <- 1.2 : 6.4 # 從1.2起到6.4, 每次增加1, 然後2.2, 3.2, 4.2, 5.2, 6.2, 最多到6.2 > x [1] 1.2 2.2 3.2 4.2 5.2 6.2 > x * 2 [1] 2.4 4.4 6.4 8.4 10.4 12.4 > x / 2 [1] 0.6 1.1 1.6 2.1 2.6 3.1 > x - 1 [1] 0.2 1.2 2.2 3.2 4.2 5.2
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 向量的運算 > (x <- 1 : 5) [1] 1 2 3 4 5 > sqrt(x) [1] 1.000000 1.414214 1.732051 2.000000 2.236068 > round(sqrt(x),digits = 2) # 4捨5入到小數點第2位 [1] 1.00 1.41 1.73 2.00 2.24 > x^2 [1] 1 4 9 16 25
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 向量的運算 > (x <- seq(2, 10, by = 2)) [1] 2 4 6 8 10 > (y <- 1 : 5) [1] 1 2 3 4 5 > x/y [1] 2 2 2 2 2 > (x <- 1 : 10) [1] 1 2 3 4 5 6 7 8 9 10 > (y <- 1 : 3) [1] 1 2 3 > x / y [1] 1.0 1.0 1.0 4.0 2.5 2.0 7.0 4.0 3.0 10.0 Warning message: In x/y : longer object length is not a multiple of shorter object length
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 向量的運算 2*x重複2.2次, y重複1次,1 重複11次. > x <- c(10.4, 5.6, 3.1, 6.4, 21.7) > 1/x [1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 > y <- c(x, 0, x) > y [1] 10.4 5.6 3.1 6.4 21.7 0.0 10.4 5.6 3.1 6.4 21.7 > v <- 2*x + y + 1 Warning message:警告 In 2 * x + y :長的物件長度不是短的物件長度的倍數 longer object length is not a multiple of shorter object length > v [1] 32.2 17.8 10.3 20.2 66.1 21.8 22.6 12.8 16.9 50.8 43.5
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 字符向量 > (s <- c(“Florida; a politician‘s”,’nightmare‘)) # 可用雙引號或單引號 [1] "Florida; a politician's" "nightmare" > paste(s[1], s[2]) [1] "Florida; a politician's nightmare“ > paste(s[1], s[2], sep = '-') [1] "Florida; a politician's-nightmare“
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 字符向量 > (s <- c(“Florida; a politician‘s”,’nightmare‘)) # 可用雙引號或單引號 [1] "Florida; a politician's" "nightmare" > paste(s[1], s[2]) [1] "Florida; a politician's nightmare“ > paste(s[1], s[2], sep = '-') [1] "Florida; a politician's-nightmare“
[1] 0.09615385 0.17857143 0.32258065 0.15625000 0.04608295 字符向量 > labs <- paste(c("X","Y"), 1:10, sep="") > labs [1] "X1" "Y2" "X3" "Y4" "X5" "Y6" "X7" "Y8" "X9" "Y10“ > labs1 <- paste(c("X","Y"), rep(1:10, each=2), sep="") > labs1 [1] "X1" "Y1" "X2" "Y2" "X3" "Y3" "X4" "Y4" "X5" "Y5" "X6" "Y6" "X7" "Y7" "X8" "Y8" "X9" "Y9" "X10" [20] "Y10"
遺漏值Missing values > x <- c(1, 20, 2, NA, 22) > order(x) [1] 1 3 2 5 4 > x[order(x)] [1] 1 2 20 22 NA > sort(x) [1] 1 2 20 22
遺漏值Missing values > x <- c(1, 20, 2, NA, 22) > is.na(x) [1] FALSE FALSE FALSE TRUE FALSE > sum(is.na(x)) # 計算向量x中NA的個數 [1] 1 > !is.na(x) [1] TRUE TRUE TRUE FALSE TRUE > sum(!is.na(x)) # 計算向量x中不是NA的個數 [1] 4
> mean(x) [1] NA 遺漏值Missing values > x <- c(1, 20, 2, NA, 22) > y<-x[!is.na(x)] # 將向量x中非NA的資料放到y > y [1] 1 20 2 22 > x[is.na(x)] <- 0 # 將向量x中NA的資料改為0 > x [1] 1 20 2 0 22 > mean(x) [1] NA
遺漏值Missing values > n <- length(x[!is.na(x)]) # 計算向量x中不是NA的個數 > n [1] 4 > new.x<-x[!is.na(x)] > new.x [1] 1 20 2 22 > sum(new.x) [1] 45 > mean<-sum(new.x)/n > mean [1] 11.25 > mean(x, na.rm=TRUE) # 計算去掉NA的平均數
絕對值 > y<-c(1, -2, 3, -4, -5, 6) > y[y < 0] <- -y[y < 0] > y [1] 1 2 3 4 5 6 > y<-abs(y)
矩陣計算 > A<-matrix(c(3,-1,0,-1,2,-1,0,-1,3),3,3) # 建立 3×3 矩陣 > A [,1] [,2] [,3] [1,] 3 -1 0 [2,] -1 2 -1 [3,] 0 -1 3 > B<-cbind(c(3,-2,4),c(1,3,7)) # 將兩向量做為矩陣的2行 > B [,1] [,2] [1,] 3 1 [2,] -2 3 [3,] 4 7 > dim(B) # 矩陣的維度 [1] 3 2
矩陣計算 > D<-matrix(1:6,nrow=3,ncol=2) # 建立 3×2矩陣 > D [,1] [,2] [1,] 1 4 [2,] 2 5 [3,] 3 6 > E<-matrix(1:6,nrow=3,ncol=2,byrow=T) # 矩陣由列填滿 > E [1,] 1 2 [2,] 3 4 [3,] 5 6
矩陣計算 > A%*%B # 矩陣相乘 [,1] [,2] [1,] 11 0 [2,] -11 -2 [3,] 14 18 > B+D #矩陣相加 [1,] 4 5 [2,] 0 8 [3,] 7 13
矩陣計算 > t(B) # 矩陣B的轉置矩陣 [,1] [,2] [,3] [1,] 3 -2 4 [2,] 1 3 7 > sum(diag(A)) # 矩陣A的trace [1] 8 > det(A) # 矩陣A的行列式 [1] 12 > prod(eigen(A)$values) # 用特徵值計算矩陣A的行列式
矩陣計算 > solve(A) # invert a matrix 求矩陣A的反矩陣 [,1] [,2] [,3] [1,] 0.41666667 0.25 0.08333333 [2,] 0.25000000 0.75 0.25000000 [3,] 0.08333333 0.25 0.41666667 > eigen(A) # 矩陣A的特徵值與特徵向量 $values [1] 4 3 1 $vectors [,1] [,2] [,3] [1,] 0.5773503 -7.071068e-01 0.4082483 [2,] -0.5773503 4.710277e-16 0.8164966 [3,] 0.5773503 7.071068e-01 0.4082483
矩陣計算 > p<-eigen(A) # 矩陣A的特徵值與特徵向量 > p$values # 矩陣A的特徵值 [1] 4 3 1 > p$vectors # 矩陣A的特徵向量 [,1] [,2] [,3] [1,] 0.5773503 -7.071068e-01 0.4082483 [2,] -0.5773503 4.710277e-16 0.8164966 [3,] 0.5773503 7.071068e-01 0.4082483
取出矩陣的行、列或元素 > X<-matrix(c(1,2,3,4,5,6,7,8,9),nrow=3,ncol=3) > X [,1] [,2] [,3] [1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9 > X[1,] # 取得第一列 [1] 1 4 7 > X[,2] # 取得第二行 [1] 4 5 6 > X[-2,] # 刪除第二列 [2,] 3 6 9 > X[,-1] # 刪除第一行 [,1] [,2] [1,] 4 7 [2,] 5 8 [3,] 6 9 > X>4 [,1] [,2] [,3] [1,] FALSE FALSE TRUE [2,] FALSE TRUE TRUE [3,] FALSE TRUE TRUE > X[X>4] [1] 5 6 7 8 9
取出矩陣的行、列或元素 > (m <- matrix(1 : 20, ncol = 5, nrow = 4)) # 建立 4×5矩陣 [,1] [,2] [,3] [,4] [,5] [1,] 1 5 9 13 17 [2,] 2 6 10 14 18 [3,] 3 7 11 15 19 [4,] 4 8 12 16 20 > i <- c(2, 3) > j <- 2 : 4 > m[i, j] # 取得第2,3列第 2 到 4 行的元素 [,1] [,2] [,3] [1,] 6 10 14 [2,] 7 11 15
多個矩陣的建立 > v <- 1 : 24 > (a <- array(v, dim = c(3, 5, 2))) # 建立2個3×5矩陣 , , 1 [,1] [,2] [,3] [,4] [,5] [1,] 1 4 7 10 13 [2,] 2 5 8 11 14 [3,] 3 6 9 12 15 , , 2 [,1] [,2] [,3] [,4] [,5] [1,] 16 19 22 1 4 [2,] 17 20 23 2 5 [3,] 18 21 24 3 6
多個矩陣的建立 > x <- 1:24 > dim(x) <- c(2,12) # 建立2×12矩陣 > x [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 1 3 5 7 9 11 13 15 17 19 21 23 [2,] 2 4 6 8 10 12 14 16 18 20 22 24
多個矩陣的建立 > x <- 1:24 > dim(x) <-c(3,4,2) # 建立2個3×4矩陣 > x , , 1 [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 , , 2 [,1] [,2] [,3] [,4] [1,] 13 16 19 22 [2,] 14 17 20 23 [3,] 15 18 21 24
附加矩陣一起 > X<-matrix(c(1,2,3,4,5,6,7,8,9),nrow=3,ncol=3) > X [,1] [,2] [,3] [1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9 > Y<-matrix(c(9,8,7,6,5,4,3,2,1),nrow=3,ncol=3) > Y [1,] 9 6 3 [2,] 8 5 2 [3,] 7 4 1 > cbind(X,Y) # 將兩矩陣行合併為一新矩陣 [,1] [,2] [,3] [,4] [,5] [,6] [1,] 1 4 7 9 6 3 [2,] 2 5 8 8 5 2 [3,] 3 6 9 7 4 1
附加矩陣一起 > X<-matrix(c(1,2,3,4,5,6,7,8,9),nrow=3,ncol=3) > X [,1] [,2] [,3] [1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9 > Y<-matrix(c(9,8,7,6,5,4,3,2,1), nrow=3,ncol=3) > Y [1,] 9 6 3 [2,] 8 5 2 [3,] 7 4 1 > rbind(X,Y) # 將兩矩陣列合併為一新矩陣 [,1] [,2] [,3] [1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9 [4,] 9 6 3 [5,] 8 5 2 [6,] 7 4 1
為矩陣編索引 # 取出矩陣中 x[1,3], x[2,2] and x[3,1] 這3個元素 # 將這3個元素改為0 > x <- array(1:20, dim=c(4,5)) # 生成一個4列5行的矩陣 > x [,1] [,2] [,3] [,4] [,5] [1,] 1 5 9 13 17 [2,] 2 6 10 14 18 [3,] 3 7 11 15 19 [4,] 4 8 12 16 20 # 取出矩陣中 x[1,3], x[2,2] and x[3,1] 這3個元素 # 將這3個元素改為0
為矩陣編索引 # x[i]=c(x[1,3],x[2,2],x[3,1]) > i <- array(c(1:3,3:1), dim=c(3,2)) # 建立索引 > i [,1] [,2] [1,] 1 3 [2,] 2 2 [3,] 3 1 > x[i] [1] 9 6 3 # x[i]=c(x[1,3],x[2,2],x[3,1])
為矩陣編索引 < x[i] <- 0 > x[i] [1] 0 0 0 > x [,1] [,2] [,3] [,4] [,5] [1,] 1 5 0 13 17 [2,] 2 0 10 14 18 [3,] 0 7 11 15 19 [4,] 4 8 12 16 20
平均向量, 變異數共變數矩陣, 相關係數矩陣 p=3 variables ( assets資產, net income淨收入, stockholder equity股東權益) for U.S. industrial corporations Find the sample mean vector, sample variance-covariance matrix, and sample correlation matrix. Find the generalized sample variance and the total sample variance.
> data<-read.csv(file="D:/chilo/indust.csv", header=F) V1 V2 V3 1 26.7 3.3 15.8 2 38.4 2.4 19.5 3 19.2 1.7 8.4 4 20.6 1.0 8.2 5 18.9 0.9 9.4 6 14.8 1.0 7.6 7 19.0 2.7 12.6 8 14.2 0.8 7.3 9 13.7 1.1 5.9 10 7.7 0.2 2.9 > names(data) # variable names [1] “V1" “V2" “V3" > names(data)=c("x1","x2","x3") # change variable names to x1 x2 x3 > names(data) [1] "x1" "x2" "x3"
> apply(data,2,mean) # compute mean vector 樣本平均向量, 2代表求行平均, 若用1代表求列平均 x1 x2 x3 19.32 1.51 9.76 > s<-var(data,y=data) # compute variance-covariance matrix 樣本變異數共變數矩陣 > s x1 x2 x3 x1 70.410667 5.8731111 39.065333 x2 5.873111 0.9698889 4.114889 x3 39.065333 4.1148889 24.056000 > cor(data,y=data) # compute correlation matrix 樣本相關係數矩陣 x1 x2 x3 x1 1.0000000 0.7107028 0.9492062 x2 0.7107028 1.0000001 0.8518938 x3 0.9492062 0.8518938 0.9999999
> prod(eigen(s)$values) # compute generalized sample variance [1] 28.85923 > sum(diag(s)) # compute total sample variance [1] 95.43656
付出最多的人,也是收穫最多的人 ~共勉之~