基因组选择中, H矩阵的构建比较关键, 下面介绍一下, 常用的设置H矩阵参数的方法.
1. 基因组选择中H矩阵的构建
这里的1为非测序个体, 2为测序个体, A11, A12, A21, A22, 可以由系谱构建的A矩阵, 提取相应的矩阵即可, G为基因组构建的矩阵, 可以根据上面的公式, 进行H矩阵的构建, 相关代码, 见: 怎么构建H矩阵.
2. 直接构建 H − 1 H^{-1} H−1矩阵
因为, 在一步法混合线性方程组中, 直接利用的是H逆矩阵, 因此可以直接构建H逆矩阵, 更加方便.
H逆矩阵构建方法:
3. 构建H逆矩阵需要考虑的参数
3.1 矫正G矩阵到A22矩阵的水平
原因:
G矩阵构建时依赖的是测序个体的基因频率构建而来, 它的频率可能与A矩阵(可以追溯到基础群体base population)不一样, 这导致G矩阵和A22矩阵存在尺度上的差异, 因此需要矫正.
矫正方法是建立两个方程组:
- G矩阵对角线的平均值与A22对角线平均值一致
- G矩阵非对角线的平均值与A22非对角线平均值一致
3.2 调整G矩阵和A22矩阵的相对权重
原因
对于某些性状, G矩阵可能无法解释所有的变异, 这时可以设定A矩阵可以解释的比例. 另外, 也可以避免构建G矩阵时的奇异矩阵.
3.3 设置tau 和omega
对于某些性状, 适当设置tau 和 omega, 可以矫正无偏性.
4. 完整的H逆矩阵构建方法
5. 构建H逆矩阵代码展示(Julia代码)
Julia是新一代的计算科学语言, 速度非常快, 比R语言要快很多, 内存占用也比较少, 是时候尝试一下新东西了.
代码思路:
- 依赖的包: NamedArray, 主要用于矩阵根据名称进行提取
- 参数介绍:
- id_full: 为所有的ID, A矩阵的id
- id_geno: 为测序的ID, G矩阵的id
- Amatrix: A矩阵
- Gmatrix: G矩阵
- a = 0.95, 表示G矩阵的比例, 默认0.95
- b = 0.05, 表示A22矩阵的比例, 默认1-0.95=0.05
- tau =1, 默认值为1
- omega=1, 默认值为1
- 首先定义一个函数diag, 因为julia中没有diag函数.
- 提取A11, A12, A21, A22
- 根据对角线和非对角线方差, 计算a和b
- 将相关参数加进去, 构建H逆矩阵
function hmatrix_julia_adjust(id_full,id_geno,Amatrix,Gmatrix,a =0.95,b=0.05,tau=1,omega=1)
print("G* = a*G + b*A22, a is ",a,"; b is ",b,"\n")
print("iG = tau*G - omega*A22, tau is ",tau,"; omega is ",omega,"\n\n")
function diag(mat)
dd = [mat[i,i] for i in 1:size(mat,1)]
return dd
end
id1 = id_full
idb = id_geno
A = Amatrix
G = Gmatrix
ida = setdiff(id1,idb)
A = NamedArray(A,(id1,id1))
G = NamedArray(G,(idb,idb))
reb = idb
rea = ida
A22 = A[reb,reb]
diagA22 = diag(A22)
offdiagA22 = []
for i in 1:size(A22,1)
for j in 1:size(A22,2)
if(i != j )
push!(offdiagA22,A22[i,j])
end
end
end
diagG = diag(G)
offdiagG = []
for i in 1:size(G,1)
for j in 1:size(G,2)
if(i != j )
push!(offdiagG,G[i,j])
end
end
end
print("\n\nStatistic of Rel Matrix G\n","\t\t","N","\t","Mean","\t","Min","\t","Max","\t","\n",
"Diagonal\t",length(diagG),"\t",round.(mean(diagG),digits=3),"\t",
round.(minimum(diagG),digits=3),"\t",
round.(maximum(diagG),digits=3),"\t",
"\nOff-diagonal\t",length(offdiagG),"\t",
round.(mean(offdiagG),digits=3),"\t",round.(minimum(offdiagG),digits=3),"\t",
round.(maximum(offdiagG),digits=3),"\t","\n\n")
print("\n\nStatistic of Rel Matrix A22\n","\t\t","N","\t","Mean","\t","Min","\t","Max","\t","\n",
"Diagonal\t",length(diagA22),"\t",round.(mean(diagA22),digits=3),"\t",
round.(minimum(diagA22),digits=3),"\t",
round.(maximum(diagA22),digits=3),"\t",
"\nOff-diagonal\t",length(offdiagA22),"\t",
round.(mean(offdiagA22),digits=3),"\t",round.(minimum(offdiagA22),digits=3),"\t",
round.(maximum(offdiagA22),digits=3),"\t","\n\n")
# 根据方程计算alpha和beta两个参数值
meanoffdiagG=mean(offdiagG)
meandiagG=mean(diagG)
meanoffdiagA22=mean(offdiagA22)
meandiagA22=mean(diagA22)
print("Means_off_diag\t","Means_diag:\n","G\t",meanoffdiagG,"\t",meandiagG,
"\nA22\t",meanoffdiagA22,"\t",meandiagA22,"\n")
beta=(meandiagA22 - meanoffdiagA22)/(meandiagG - meanoffdiagG)
alpha=meandiagA22-meandiagG*beta
print("Adjust G, and the value of alpha and beta is:",alpha,beta,"\n\n\n")
G=alpha .+ beta .* G #
# 在将a和b的参数加上
G = a .* G + b .* A22
iA22 = inv(A22)
iG =inv(G)
iA = inv(A)
x22 = tau*iG - omega*iA22
diagx22 = diag(x22)
offdiagx22 = []
for i in 1:size(x22,1)
for j in 1:size(x22,2)
if(i != j )
push!(offdiagx22,x22[i,j])
end
end
end
print("\n\nStatistic of Rel Matrix x22\n","\t\t","N","\t","Mean","\t","Min","\t","Max","\t","\n",
"Diagonal\t",length(diagx22),"\t",round.(mean(diagx22),digits=3),"\t",
round.(minimum(diagx22),digits=3),"\t",
round.(maximum(diagx22),digits=3),"\t",
"\nOff-diagonal\t",length(offdiagx22),"\t",
round.(mean(offdiagx22),digits=3),"\t",round.(minimum(offdiagx22),digits=3),"\t",
round.(maximum(offdiagx22),digits=3),"\t","\n\n")
# 构建H逆矩阵
iHaa = iA[rea,rea]
iHab = iA[rea,reb]
iHba = iHab'
iHbb = iA[reb,reb] + x22
function cbind(A,B)
X = vcat(A',B')'
return(X)
end
function rbind(A,B)
X = vcat(A,B)
return(X)
end
iH = cbind(rbind(iHaa,iHba),rbind(iHab,iHbb))
return(round.(iH,digits=6))
end
欢迎关注我的公众号: