In [1]:
snp <- c("AA Ct GG Ag AA Ct Ga AA tt CC GG AA")
ss <- strsplit(snp,split = " ")
snpM <- matrix(ss[[1]],3,4,byrow = T)
row.names(snpM) <- c("ID1","ID2","ID3")
colnames(snpM) <- c("snp1","snp2","snp3","snp4")
# Genotype:AA Aa aa
snpM
snp1snp2snp3snp4
ID1AACtGGAg
ID2AACtGaAA
ID3ttCCGGAA
In [11]:
library(synbreed)
gp <- create.gpData(geno = snpM)
gpb <- codeGeno(gp,label.heter = "alleleCoding", maf = 0.01, nmiss = 0.1,
                impute = TRUE, impute.type = "random", verbose = TRUE)
agenotype <- gpb$geno
# Genotype 0 1 2
MAF <- agenotype
MAF
   step 1  : 0 marker(s) removed with > 10 % missing values
   step 2  : Recoding alleles
   step 4  : 0 marker(s) removed with maf < 0.01
   step 7  : Imputing of missing values
   step 7d : Random imputing of missing values
   step 8  : No recoding of alleles necessary after imputation
   step 9  : 0 marker(s) removed with maf < 0.01
   step 10 : No duplicated markers removed
   End     : 4 marker(s) remain after the check

     Summary of imputation
    total number of missing values                : 0
    number of random imputations                  : 0
snp1snp2snp3snp4
ID10101
ID20110
ID32000
In [12]:
# Genotype -1 0 2
M <- MAF-1
M
snp1snp2snp3snp4
ID1-1 0-1 0
ID2-1 0 0-1
ID3 1-1-1-1
In [13]:
#对角线为每个个体含有纯合的位点,比如ID1有2个纯合位点
M%*%t(M)
ID1ID2ID3
ID1210
ID2120
ID3004
In [14]:
t(M)%*%M
snp1snp2snp3snp4
snp1 3-10 0
snp2-1 11 1
snp3 0 12 1
snp4 0 11 2
In [15]:
p1 <- c(0.383,0.244,0.167,0.067)
names(p1) <- paste("snp",1:4,sep = "")
p1
p=2*(p1-0.5)
P=matrix(p,byrow=T,nrow=nrow(M),ncol=ncol(M))
P
snp1
0.383
snp2
0.244
snp3
0.167
snp4
0.067
-0.234-0.512-0.666-0.866
-0.234-0.512-0.666-0.866
-0.234-0.512-0.666-0.866
In [16]:
Z=as.matrix(M-P)
Z
snp1snp2snp3snp4
ID1-0.766 0.512-0.334 0.866
ID2-0.766 0.512 0.666-0.134
ID3 1.234-0.488-0.334-0.134
In [17]:
b=1-p1
c=p1*b
d=2*(sum(c))
ZZt=Z%*%t(Z)
G1=(ZZt/d)
G1 = round(G1,3)
G1
ID1ID2ID3
ID1 1.374 0.410-0.964
ID2 0.410 1.053-1.124
ID3-0.964-1.124 1.519
In [18]:
D=1/(ncol(M)*(2*p1*(1-p1)))
G2 = Z%*%(D*t(Z))
G2 =round(G2,3)
G2
ID1ID2ID3
ID1 2.088 0.056-0.801
ID2 0.056 0.922-0.833
ID3-0.801-0.833 1.103
In [19]:
p1=array(0.5,ncol(M))
p=p1
P=matrix(p,byrow=T,nrow=nrow(M),ncol=ncol(M))
Z=as.matrix(M-P)
b=1-p1
c=p1*b
d=2*(sum(c))
ZZt=Z%*%t(Z)
G3=(ZZt/d)
G3
ID1ID2ID3
ID12.52.01.5
ID22.02.51.5
ID31.51.53.5
#

BLUP

In [54]:
library(GeneticsPed)
In [55]:
data(Mrode3.1)
In [56]:
x <- Pedigree(x=Mrode3.1, subject="calf", ascendant=c("sire", "dam"),ascendantSex=c("Male", "Female"), sex="sex")
In [57]:
x
calfsexsiredampwg
S4 Male S1 NA 4.5
S5 FemaleS3 S2 2.9
S6 FemaleS1 S2 3.9
S7 Male S4 S5 3.5
S8 Male S3 S6 5.0
In [60]:
## Observed/measured phenotype records:
y <- x$pwg
y
  1. 4.5
  2. 2.9
  3. 3.9
  4. 3.5
  5. 5
In [65]:
## Design matrix(X) for sex effect:
X <- model.matrix(~x$sex -1)
X
t(X)
x$sexFemalex$sexMale
101
210
310
401
501
12345
x$sexFemale01100
x$sexMale10011
In [67]:
## Design matrix (Z) for additive genetic effect. Note that first three columns do not have indicators since
## these columns are for individuals without phenotype records and apear in the model only through the
## pedigree
Z <- model.matrix(object = x, y=x$pwg, id=x$calf)
Z
S2S1S3S4S5S6S7S8
100010000
200001000
300000100
400000010
500000001
In [68]:
## Left hand side (LHS) of MME without G −1 :
 LHS <- rbind(cbind(t(X) %*% X, t(X) %*% Z),cbind(t(Z) %*% X, t(Z) %*% Z))
## or more efficiently
#LHS <- rbind(cbind(crossprod(X), crossprod(X, Z)),+ cbind(crossprod(Z, X), crossprod(Z)))
LHS
x$sexFemalex$sexMaleS2S1S3S4S5S6S7S8
x$sexFemale2000001100
x$sexMale0300010011
S20000000000
S10000000000
S30000000000
S40100010000
S51000001000
S61000000100
S70100000010
S80100000001
In [71]:
## We want Ainv for all individuals in the pedigree not only individuals with records
x <- extend(x)
x
Ainv <- inverseAdditive(x=x)
Ainv
calfsexsiredampwg
S2 FemaleNA NA NA
S1 Male NA NA NA
S3 Male NA NA NA
S4 Male S1 NA 4.5
S5 FemaleS3 S2 2.9
S6 FemaleS1 S2 3.9
S7 Male S4 S5 3.5
S8 Male S3 S6 5.0
S2S1S3S4S5S6S7S8
S2 2.0 0.5000000 0.5 0.0000000-1.0 -1.0 0 0
S1 0.5 1.8333333 0.0 -0.6666667 0.0 -1.0 0 0
S3 0.5 0.0000000 2.0 0.0000000-1.0 0.5 0 -1
S4 0.0 -0.6666667 0.0 1.8333333 0.5 0.0 -1 0
S5-1.0 0.0000000-1.0 0.5000000 2.5 0.0 -1 0
S6-1.0 -1.0000000 0.5 0.0000000 0.0 2.5 0 -1
S7 0.0 0.0000000 0.0 -1.0000000-1.0 0.0 2 0
S8 0.0 0.0000000-1.0 0.0000000 0.0 -1.0 0 2
In [75]:
sigma2a <- 20
sigma2e <- 40
alpha <- sigma2e / sigma2a
q <- nIndividual(x)
q
p <- nrow(LHS) - q
p
#LHS[(p + 1):(p + q), (p + 1):(p + q)] <- LHS[(p + 1):(p + q), (p + 1):(p + q)] + Ainv * alpha
8
2
In [76]:
LHS[(p + 1):(p + q), (p + 1):(p + q)]
S2S1S3S4S5S6S7S8
S200000000
S100000000
S300000000
S400010000
S500001000
S600000100
S700000010
S800000001
In [77]:
LHS[(p + 1):(p + q), (p + 1):(p + q)] <- LHS[(p + 1):(p + q), (p + 1):(p + q)] + Ainv * alpha
In [78]:
LHS
x$sexFemalex$sexMaleS2S1S3S4S5S6S7S8
x$sexFemale2 0 0 0.000000 0 0.000000 1 1 0 0
x$sexMale0 3 0 0.000000 0 1.000000 0 0 1 1
S20 0 4 1.000000 1 0.000000-2 -2 0 0
S10 0 1 3.666667 0 -1.333333 0 -2 0 0
S30 0 1 0.000000 4 0.000000-2 1 0 -2
S40 1 0 -1.333333 0 4.666667 1 0 -2 0
S51 0 -2 0.000000-2 1.000000 6 0 -2 0
S61 0 -2 -2.000000 1 0.000000 0 6 0 -2
S70 1 0 0.000000 0 -2.000000-2 0 5 0
S80 1 0 0.000000-2 0.000000 0 -2 0 5
In [79]:
## Right hand side (RHS) of MME:
RHS <- rbind(t(X) %*% y, t(Z) %*% y)
## or more efficiently
#RHS <- rbind(crossprod(X, y),crossprod(Z, y))
t(RHS)
x$sexFemalex$sexMaleS2S1S3S4S5S6S7S8
6.813 0 0 0 4.52.93.93.55
In [81]:
## Solution
sol <- solve(LHS) %*% RHS
## or more efficiently
sol <- solve(LHS, RHS)
sol
x$sexFemale 3.404430006
x$sexMale 4.358502330
S2-0.018770099
S1 0.098444576
S3-0.041084203
S4-0.008663123
S5-0.185732099
S6 0.176872088
S7-0.249458555
S8 0.182614688
In [ ]:
###################################################################################
In [3]:
library(GeneticsPed)
Loading required package: MASS

Attaching package: ‘MASS’

The following object is masked from ‘package:dplyr’:

    select


Attaching package: ‘GeneticsPed’

The following object is masked from ‘package:stats’:

    family

In [4]:
data(Mrode3.1)
In [5]:
x <- Pedigree(x=Mrode3.1, subject="calf", ascendant=c("sire", "dam"),ascendantSex=c("Male", "Female"), sex="sex")
In [4]:
x
calfsexsiredampwg
S4 Male S1 NA 4.5
S5 FemaleS3 S2 2.9
S6 FemaleS1 S2 3.9
S7 Male S4 S5 3.5
S8 Male S3 S6 5.0
In [5]:
## Observed/measured phenotype records:
y <- x$pwg
y
  1. 4.5
  2. 2.9
  3. 3.9
  4. 3.5
  5. 5
In [6]:
## Design matrix(X) for sex effect:
X <- model.matrix(~x$sex -1)
X
t(X)
x$sexFemalex$sexMale
101
210
310
401
501
12345
x$sexFemale01100
x$sexMale10011
In [13]:
## Design matrix (Z) for additive genetic effect. Note that first three columns do not have indicators since
## these columns are for individuals without phenotype records and apear in the model only through the
## pedigree
Z <- model.matrix(object = x, y=x$pwg, id=x$calf)
Z
S2S1S3S4S5S6S7S8
100010000
200001000
300000100
400000010
500000001
In [15]:
## Left hand side (LHS) of MME without G −1 :
tzz <- diag(0.001, nrow = nrow(t(Z) %*% Z), ncol = ncol(t(Z) %*% Z)) + t(Z) %*% Z
tzz
LHS <- rbind(cbind(t(X) %*% X, t(X) %*% Z),cbind(t(Z) %*% X, tzz))
## or more efficiently
#LHS <- rbind(cbind(crossprod(X), crossprod(X, Z)),+ cbind(crossprod(Z, X), crossprod(Z)))
LHS
S2S1S3S4S5S6S7S8
S20.0010.0000.0000.0000.0000.0000.0000.000
S10.0000.0010.0000.0000.0000.0000.0000.000
S30.0000.0000.0010.0000.0000.0000.0000.000
S40.0000.0000.0001.0010.0000.0000.0000.000
S50.0000.0000.0000.0001.0010.0000.0000.000
S60.0000.0000.0000.0000.0001.0010.0000.000
S70.0000.0000.0000.0000.0000.0001.0010.000
S80.0000.0000.0000.0000.0000.0000.0001.001
x$sexFemalex$sexMaleS2S1S3S4S5S6S7S8
x$sexFemale2 0 0.0000.0000.0000.0001.0001.0000.0000.000
x$sexMale0 3 0.0000.0000.0001.0000.0000.0001.0001.000
S20 0 0.0010.0000.0000.0000.0000.0000.0000.000
S10 0 0.0000.0010.0000.0000.0000.0000.0000.000
S30 0 0.0000.0000.0010.0000.0000.0000.0000.000
S40 1 0.0000.0000.0001.0010.0000.0000.0000.000
S51 0 0.0000.0000.0000.0001.0010.0000.0000.000
S61 0 0.0000.0000.0000.0000.0001.0010.0000.000
S70 1 0.0000.0000.0000.0000.0000.0001.0010.000
S80 1 0.0000.0000.0000.0000.0000.0000.0001.001
In [16]:
## Right hand side (RHS) of MME:
RHS <- rbind(t(X) %*% y, t(Z) %*% y)
## or more efficiently
#RHS <- rbind(crossprod(X, y),crossprod(Z, y))
t(RHS)
x$sexFemalex$sexMaleS2S1S3S4S5S6S7S8
6.813 0 0 0 4.52.93.93.55
In [18]:
## Solution
sol <- solve(LHS) %*% RHS
## or more efficiently
#sol <- solve(LHS, RHS)
sol
x$sexFemale 3.4000000
x$sexMale 4.3333333
S2 0.0000000
S1 0.0000000
S3 0.0000000
S4 0.1665002
S5-0.4995005
S6 0.4995005
S7-0.8325008
S8 0.6660007

lme4 for blup

In [1]:
library(lme4)
library(dplyr)
library(tidyr)
Loading required package: Matrix

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘tidyr’

The following object is masked from ‘package:Matrix’:

    expand

In [6]:
x
calfsexsiredampwg
S4 Male S1 NA 4.5
S5 FemaleS3 S2 2.9
S6 FemaleS1 S2 3.9
S7 Male S4 S5 3.5
S8 Male S3 S6 5.0
In [7]:
## We want Ainv for all individuals in the pedigree not only individuals with records
x <- extend(x)
x
calfsexsiredampwg
S2 FemaleNA NA NA
S1 Male NA NA NA
S3 Male NA NA NA
S4 Male S1 NA 4.5
S5 FemaleS3 S2 2.9
S6 FemaleS1 S2 3.9
S7 Male S4 S5 3.5
S8 Male S3 S6 5.0
In [8]:
xx <- x %>% gather(3:4, key = parents, value = val) %>% arrange(calf, sex) %>% filter(!is.na(pwg))
xx
str(xx)
Warning message:
“attributes are not identical across measure variables;
they will be dropped”
calfsexpwgparentsval
S5 Female2.9 sire S3
S5 Female2.9 dam S2
S6 Female3.9 sire S1
S6 Female3.9 dam S2
S4 Male 4.5 sire S1
S4 Male 4.5 dam NA
S7 Male 3.5 sire S4
S7 Male 3.5 dam S5
S8 Male 5.0 sire S3
S8 Male 5.0 dam S6
'data.frame':	10 obs. of  5 variables:
 $ calf   : Factor w/ 8 levels "S2","S5","S6",..: 2 2 3 3 6 6 7 7 8 8
 $ sex    : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 2 2 2 2 2
 $ pwg    : num  2.9 2.9 3.9 3.9 4.5 4.5 3.5 3.5 5 5
 $ parents: chr  "sire" "dam" "sire" "dam" ...
 $ val    : chr  "S3" "S2" "S1" "S2" ...
In [9]:
temp <- lmer(pwg ~ 0 + sex + (1 | calf), xx)
Warning message in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
“convergence code 3 from bobyqa: bobyqa -- a trust region step failed to reduce q”Warning message in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
“unable to evaluate scaled gradient”Warning message in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
“Model failed to converge: degenerate  Hessian with 1 negative eigenvalues”
In [10]:
print(temp)

anova(temp)  ####求方差

ranef(temp)  ####求随机效应的BLUP值

fixef(temp)  ####求固定效应的BLUE值
Linear mixed model fit by REML ['lmerMod']
Formula: pwg ~ 0 + sex + (1 | calf)
   Data: xx
REML criterion at convergence: -88.6077
Random effects:
 Groups   Name        Std.Dev.
 calf     (Intercept) 4.564e-01
 Residual             1.386e-05
Number of obs: 10, groups:  calf, 5
Fixed Effects:
sexFemale    sexMale
    3.400      4.333
convergence code 3; 2 optimizer warnings; 0 lme4 warnings
DfSum SqMean SqF value
sex2 7.330078e-083.665039e-08190.688
$calf
   (Intercept)
S5  -0.4999989
S6   0.5000011
S4   0.1666662
S7  -0.8333338
S8   0.6666662
sexFemale
3.39999889286255
sexMale
4.33333376121258
In [11]:
model.matrix(temp)
sexFemalesexMale
110
210
310
410
501
601
701
801
901
1001
In [12]:
## X and Z are the fixed- and random-effects model matrices respectively
X1 = getME(temp, "X")
X1
Z1 = getME(temp, "Z")
Z1
sexFemalesexMale
110
210
310
410
501
601
701
801
901
1001
10 x 5 sparse Matrix of class "dgCMatrix"
   S5 S6 S4 S7 S8
1   1  .  .  .  .
2   1  .  .  .  .
3   .  1  .  .  .
4   .  1  .  .  .
5   .  .  1  .  .
6   .  .  1  .  .
7   .  .  .  1  .
8   .  .  .  1  .
9   .  .  .  .  1
10  .  .  .  .  1
In [13]:
LHS <- rbind(cbind(t(X1) %*% X1, t(X1) %*% Z1),cbind(t(Z1) %*% X1, t(Z1) %*% Z1))
LHS
7 x 7 sparse Matrix of class "dgCMatrix"
          sexFemale sexMale S5 S6 S4 S7 S8
sexFemale         4       .  2  2  .  .  .
sexMale           .       6  .  .  2  2  2
S5                2       .  2  .  .  .  .
S6                2       .  .  2  .  .  .
S4                .       2  .  .  2  .  .
S7                .       2  .  .  .  2  .
S8                .       2  .  .  .  .  2
In [14]:
## Left hand side (LHS) of MME without G −1 :
tzz <- diag(0.001, nrow = nrow(t(Z1) %*% Z1), ncol = ncol(t(Z1) %*% Z1)) + t(Z1) %*% Z1
tzz
LHS <- rbind(cbind(t(X1) %*% X1, t(X1) %*% Z1),cbind(t(Z1) %*% X1, tzz))
LHS
5 x 5 sparse Matrix of class "dgCMatrix"
      S5    S6    S4    S7    S8
S5 2.001 .     .     .     .
S6 .     2.001 .     .     .
S4 .     .     2.001 .     .
S7 .     .     .     2.001 .
S8 .     .     .     .     2.001
7 x 7 sparse Matrix of class "dgCMatrix"
          sexFemale sexMale    S5    S6    S4    S7    S8
sexFemale         4       . 2.000 2.000 .     .     .
sexMale           .       6 .     .     2.000 2.000 2.000
S5                2       . 2.001 .     .     .     .
S6                2       . .     2.001 .     .     .
S4                .       2 .     .     2.001 .     .
S7                .       2 .     .     .     2.001 .
S8                .       2 .     .     .     .     2.001
In [15]:
## Observed/measured phenotype records:
y <- xx$pwg
y
  1. 2.9
  2. 2.9
  3. 3.9
  4. 3.9
  5. 4.5
  6. 4.5
  7. 3.5
  8. 3.5
  9. 5
  10. 5
In [16]:
## Right hand side (RHS) of MME:
RHS <- rbind(t(X1) %*% y, t(Z1) %*% y)
## or more efficiently
#RHS <- rbind(crossprod(X, y),crossprod(Z, y))
t(RHS)
1 x 7 Matrix of class "dgeMatrix"
     sexFemale sexMale  S5  S6 S4 S7 S8
[1,]      13.6      26 5.8 7.8  9  7 10
In [17]:
## Solution
sol <- solve(LHS) %*% RHS
## or more efficiently
#sol <- solve(LHS, RHS)
sol
7 x 1 Matrix of class "dgeMatrix"
           [,1]
[1,]  3.4000000
[2,]  4.3333333
[3,] -0.4997501
[4,]  0.4997501
[5,]  0.1665834
[6,] -0.8329169
[7,]  0.6663335
In [18]:
VarCorr(temp)
 Groups   Name        Std.Dev.
 calf     (Intercept) 4.5644e-01
 Residual             1.3864e-05
In [ ]:
##############################################################################
In [28]:
summary(temp)
Linear mixed model fit by REML ['lmerMod']
Formula: pwg ~ 0 + sex + (1 | calf)
   Data: xx

REML criterion at convergence: -88.6

Scaled residuals:
       Min         1Q     Median         3Q        Max
-2.773e-05 -1.664e-05  5.545e-06  1.664e-05  2.218e-05

Random effects:
 Groups   Name        Variance  Std.Dev.
 calf     (Intercept) 2.083e-01 4.564e-01
 Residual             1.922e-10 1.386e-05
Number of obs: 10, groups:  calf, 5

Fixed effects:
          Estimate Std. Error t value
sexFemale   3.4000     0.3227   10.54
sexMale     4.3333     0.2635   16.44

Correlation of Fixed Effects:
        sexFml
sexMale 0.000
convergence code: 3
unable to evaluate scaled gradient
Model failed to converge: degenerate  Hessian with 1 negative eigenvalues