Github链接:https://github.com/ZitongYu/CDCN/tree/master/CVPR2020_paper_codes
中心差分卷积实现:
######################## Centeral-difference (second order, with 9 parameters and a const theta for 3x3 kernel) 2D Convolution ##############################
## | a1 a2 a3 | | w1 w2 w3 |
## | a4 a5 a6 | * | w4 w5 w6 | --> output = \sum_{i=1}^{9}(ai * wi) - \sum_{i=1}^{9}wi * a5 --> Conv2d (k=3) - Conv2d (k=1)
## | a7 a8 a9 | | w7 w8 w9 |
##
## --> output =
## | a1 a2 a3 | | w1 w2 w3 |
## | a4 a5 a6 | * | w4 w5 w6 | - | a | * | w\_sum | (kernel_size=1x1, padding=0)
## | a7 a8 a9 | | w7 w8 w9 |
class Conv2d_cd(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
padding=1, dilation=1, groups=1, bias=False, theta=0.7):
super(Conv2d_cd, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
self.theta = theta
def forward(self, x):
out_normal = self.conv(x)
if math.fabs(self.theta - 0.0) < 1e-8:
return out_normal
else:
[C_out,C_in, kernel_size,kernel_size] = self.conv.weight.shape
kernel_diff = self.conv.weight.sum(2).sum(2)
kernel_diff = kernel_diff[:, :, None, None]
out_diff = F.conv2d(input=x, weight=kernel_diff, bias=self.conv.bias, stride=self.conv.stride, padding=0, groups=self.conv.groups)
return out_normal - self.theta * out_diff
空间注意力模块:
class SpatialAttention(nn.Module):
def __init__(self, kernel = 3):
super(SpatialAttention, self).__init__()
self.conv1 = nn.Conv2d(2, 1, kernel_size=kernel, padding=kernel//2, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
网络结构:
CDCNpp(
(conv1): Sequential(
(0): Conv2d_cd(
(conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(Block1): Sequential(
(0): Conv2d_cd(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d_cd(
(conv): Conv2d(128, 204, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(4): BatchNorm2d(204, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): Conv2d_cd(
(conv): Conv2d(204, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU()
(9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(Block2): Sequential(
(0): Conv2d_cd(
(conv): Conv2d(128, 153, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): BatchNorm2d(153, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d_cd(
(conv): Conv2d(153, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): Conv2d_cd(
(conv): Conv2d(128, 179, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(7): BatchNorm2d(179, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU()
(9): Conv2d_cd(
(conv): Conv2d(179, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): ReLU()
(12): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(Block3): Sequential(
(0): Conv2d_cd(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d_cd(
(conv): Conv2d(128, 153, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(4): BatchNorm2d(153, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU()
(6): Conv2d_cd(
(conv): Conv2d(153, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU()
(9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(lastconv1): Sequential(
(0): Conv2d_cd(
(conv): Conv2d(384, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Conv2d_cd(
(conv): Conv2d(128, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(4): ReLU()
)
(sa1): SpatialAttention(
(conv1): Conv2d(2, 1, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
(sigmoid): Sigmoid()
)
(sa2): SpatialAttention(
(conv1): Conv2d(2, 1, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(sigmoid): Sigmoid()
)
(sa3): SpatialAttention(
(conv1): Conv2d(2, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(sigmoid): Sigmoid()
)
(downsample32x32): Upsample(size=(32, 32), mode=bilinear)
)
对比深度损失 LCDL,即Contrastive Detph Loss
def contrast_depth_conv(input):
''' compute contrast depth in both of (out, label) '''
'''
input 32x32
output 8x32x32
'''
kernel_filter_list =[
[[1,0,0],[0,-1,0],[0,0,0]], [[0,1,0],[0,-1,0],[0,0,0]], [[0,0,1],[0,-1,0],[0,0,0]],
[[0,0,0],[1,-1,0],[0,0,0]], [[0,0,0],[0,-1,1],[0,0,0]],
[[0,0,0],[0,-1,0],[1,0,0]], [[0,0,0],[0,-1,0],[0,1,0]], [[0,0,0],[0,-1,0],[0,0,1]]
]
kernel_filter = np.array(kernel_filter_list, np.float32)
kernel_filter = torch.from_numpy(kernel_filter.astype(np.float)).float().cuda()
# weights (in_channel, out_channel, kernel, kernel)
kernel_filter = kernel_filter.unsqueeze(dim=1)
input = input.unsqueeze(dim=1).expand(input.shape[0], 8, input.shape[1],input.shape[2])
contrast_depth = F.conv2d(input, weight=kernel_filter, groups=8) # depthwise conv
return contrast_depth
class Contrast_depth_loss(nn.Module): # Pearson range [-1, 1] so if < 0, abs|loss| ; if >0, 1- loss
def __init__(self):
super(Contrast_depth_loss,self).__init__()
return
def forward(self, out, label):
'''
compute contrast depth in both of (out, label),
then get the loss of them
tf.atrous_convd match tf-versions: 1.4
'''
contrast_out = contrast_depth_conv(out)
contrast_label = contrast_depth_conv(label)
criterion_MSE = nn.MSELoss().cuda()
loss = criterion_MSE(contrast_out, contrast_label)
#loss = torch.pow(contrast_out - contrast_label, 2)
#loss = torch.mean(loss)
return loss
注: 代码中并没有分类的loss
数据预处理:Spoofing_train
注:需要根据自己需求进行重写。
人脸防作伪
# get the inputs
inputs, spoof_label = sample_batched['image_x'].cuda(), sample_batched['spoofing_label'].cuda()
val_maps = sample_batched['val_map_x'].cuda() # binary map from PRNet
map_score = 0.0
for frame_t in range(inputs.shape[1]):
map_x, embedding, x_Block1, x_Block2, x_Block3, x_input = model(inputs[:,frame_t,:,:,:])
score_norm = torch.sum(map_x)/torch.sum(val_maps[:,frame_t,:,:])
map_score += score_norm
map_score = map_score/inputs.shape[1]
map_score_list.append('{} {}\n'.format(map_score, spoof_label[0][0]))
人脸防作伪的过程是通过生成的map和PRNet生成的伪gt进行比值来算得分,具体细节还需进一步验证。