update code for generator blocks (#9)

* update code for AddResBlock * add backbones * update inference * Update readme
ai-forever · Jan 11, 2022 · 01d4cc2 · 01d4cc2
1 parent 4a448a7
commit 01d4cc2
Show file tree

Hide file tree

Showing 8 changed files with 310 additions and 122 deletions.
diff --git a/README.md b/README.md
@@ -54,13 +54,18 @@
 We also provide the training code for face swap model as follows:
   1. Download [VGGFace2 Dataset](https://www.robots.ox.ac.uk/~vgg/data/vgg_face/).
   2. Crop and align faces with out detection model.
-  > python preprocess_vgg.py --path_to_dataset ./VggFace2/VGG-Face2/data/preprocess_train --save_path ./VggFace2-crop
+  ```bash
+  python preprocess_vgg.py --path_to_dataset {PATH_TO_DATASET} --save_path {SAVE_PATH}
+  ```
   3. Start training. 
-  > python train.py --run_name {YOUR_RUN_NAME}
-
-We provide a lot of different options for the training. More info about each option you can find in `train.py` file. If you would like to use wandb logging of the experiments, you should login to wandb first -- `wandb login`.
+  ```bash
+  python train.py --run_name {YOUR_RUN_NAME}
+  ```
+We provide a lot of different options for the training. More info about each option you can find in `train.py` file. If you would like to use wandb logging of the experiments, you should login to wandb first  `--wandb login`.
 
 ### Tips:
-  1. For first epochs we suggest not to use eye detection loss if you train from scratch
-  2. In case of finetuning model you can variate losses coefficients to make result look more like source identity, or vice versa, save features and attributes of target face
+  1. For first epochs we suggest not to use eye detection loss and scheduler if you train from scratch.
+  2. In case of finetuning model you can variate losses coefficients to make result look more like source identity, or vice versa, save features and attributes of target face.
+  3. You can change backbone for attribute encoder and num_blocks of AddResBlock using parameters `--backbone` and `--num_blocks`.
+  4. For finetuning model you can use our pretrain weights for generator and discriminator that are in folder `weights`. We provide weights for models with unet backbone and 1-3 blocks in AddResBlock. 
 
diff --git a/SberSwapInference.ipynb b/SberSwapInference.ipynb
diff --git a/download_models.sh b/download_models.sh
@@ -6,8 +6,16 @@ wget -P ./arcface_model https://github.com/Danyache/sber-swap/releases/download/
 wget -P ./insightface_func/models/antelope https://github.com/Danyache/sber-swap/releases/download/antelope/glintr100.onnx
 wget -P ./insightface_func/models/antelope https://github.com/Danyache/sber-swap/releases/download/antelope/scrfd_10g_bnkps.onnx
 
-# load model itself
-wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v1.0/G_0_035000_init_arch_arcface2.pth
+# load G and D models with 1, 2, 3 blocks
+# model with 2 blocks is main
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/G_unet_2blocks.pth
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/D_unet_2blocks.pth
+
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/G_unet_1block.pth
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/D_unet_1block.pth
+
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/G_unet_3blocks.pth
+wget -P ./weights https://github.com/Danyache/sber-swap/releases/download/sber-swap-v2.0/D_unet_3blocks.pth
 
 # load model for eyes loss
 wget -P ./AdaptiveWingLoss/AWL_detector https://github.com/Danyache/sber-swap/releases/download/awl_detector/WFLW_4HG.pth

diff --git a/inference.py b/inference.py
@@ -23,7 +23,7 @@ def init_models(args):
     app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
 
     # main model for generation
-    G = AEI_Net(c_id=512)
+    G = AEI_Net(args.backbone, num_blocks=args.num_blocks, c_id=512)
     G.eval()
     G.load_state_dict(torch.load(args.G_path, map_location=torch.device('cpu')))
     G = G.cuda()
@@ -127,8 +127,11 @@ def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
-    # dataset params
-    parser.add_argument('--G_path', default='weights/G_0_035000_init_arch_arcface2.pth', type=str, help='Path to weights for G')
+    # Generator params
+    parser.add_argument('--G_path', default='weights/G_unet_2blocks.pth', type=str, help='Path to weights for G')
+    parser.add_argument('--backbone', default='unet', const='unet', nargs='?', choices=['unet', 'linknet', 'resnet'], help='Backbone for attribute encoder')
+    parser.add_argument('--num_blocks', default=2, type=int, help='Numbers of AddBlocks at AddResblock')
+
     parser.add_argument('--batch_size', default=40, type=int)
     parser.add_argument('--crop_size', default=224, type=int, help="Don't change this")
     parser.add_argument('--use_sr', default=False, type=bool, help='True for super resolution on swap images')

diff --git a/network/AADLayer.py b/network/AADLayer.py
@@ -3,7 +3,7 @@
 
 
 class AADLayer(nn.Module):
-    def __init__(self, c_x, attr_c, c_id=256):
+    def __init__(self, c_x, attr_c, c_id):
         super(AADLayer, self).__init__()
         self.attr_c = attr_c
         self.c_id = c_id
@@ -37,41 +37,47 @@ def forward(self, h_in, z_attr, z_id):
         out = (torch.ones_like(M).to(M.device) - M) * A + M * I
         return out
 
-
+
+class AddBlocksSequential(nn.Sequential):
+    def forward(self, *inputs):
+        h, z_attr, z_id = inputs
+        for i, module in enumerate(self._modules.values()):
+            if i%3 == 0 and i > 0:
+                inputs = (inputs, z_attr, z_id)
+            if type(inputs) == tuple:
+                inputs = module(*inputs)
+            else:
+                inputs = module(inputs)
+        return inputs
+
+
 class AAD_ResBlk(nn.Module):
-    def __init__(self, cin, cout, c_attr, c_id=256):
+    def __init__(self, cin, cout, c_attr, c_id, num_blocks):
         super(AAD_ResBlk, self).__init__()
         self.cin = cin
         self.cout = cout
-
-        self.AAD1 = AADLayer(cin, c_attr, c_id)
-        self.conv1 = nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=False)
-        self.relu1 = nn.ReLU(inplace=True)
-
-        self.AAD2 = AADLayer(cin, c_attr, c_id)
-        self.conv2 = nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False)
-        self.relu2 = nn.ReLU(inplace=True)
-
+
+        add_blocks = []
+        for i in range(num_blocks):
+            out = cin if i < (num_blocks-1) else cout
+            add_blocks.extend([AADLayer(cin, c_attr, c_id),
+                               nn.ReLU(inplace=True),
+                               nn.Conv2d(cin, out, kernel_size=3, stride=1, padding=1, bias=False)
+                              ])
+        self.add_blocks = AddBlocksSequential(*add_blocks)
+
         if cin != cout:
-            self.AAD3 = AADLayer(cin, c_attr, c_id)
-            self.conv3 = nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False)
-            self.relu3 = nn.ReLU(inplace=True)
+            last_add_block = [AADLayer(cin, c_attr, c_id), 
+                             nn.ReLU(inplace=True), 
+                             nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False)]
+            self.last_add_block = AddBlocksSequential(*last_add_block)
+
 
     def forward(self, h, z_attr, z_id):
-        x = self.AAD1(h, z_attr, z_id)
-        x = self.relu1(x)
-        x = self.conv1(x)
-
-        x = self.AAD2(x,z_attr, z_id)
-        x = self.relu2(x)
-        x = self.conv2(x)
-
+        x =  self.add_blocks(h, z_attr, z_id)
         if self.cin != self.cout:
-            h = self.AAD3(h, z_attr, z_id)
-            h = self.relu3(h)
-            h = self.conv3(h)
+            h = self.last_add_block(h, z_attr, z_id)
         x = x + h
-
         return x
 
 
diff --git a/network/AEI_Net.py b/network/AEI_Net.py
@@ -2,6 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from .AADLayer import *
+from network.resnet import MLAttrEncoderResnet
 
 
 def weight_init(m):
@@ -30,31 +31,42 @@ def __init__(self, in_c, out_c, norm=nn.BatchNorm2d):
         self.bn = norm(out_c)
         self.lrelu = nn.LeakyReLU(0.1, inplace=True)
 
-    def forward(self, input, skip):
+    def forward(self, input, skip, backbone):
         x = self.deconv(input)
         x = self.bn(x)
         x = self.lrelu(x)
-        return torch.cat((x, skip), dim=1)
-
-
+        if backbone == 'linknet':
+            return x+skip
+        else:
+            return torch.cat((x, skip), dim=1)
+
+
 class MLAttrEncoder(nn.Module):
-    def __init__(self):
+    def __init__(self, backbone):
         super(MLAttrEncoder, self).__init__()
+        self.backbone = backbone
         self.conv1 = conv4x4(3, 32)
         self.conv2 = conv4x4(32, 64)
         self.conv3 = conv4x4(64, 128)
         self.conv4 = conv4x4(128, 256)
         self.conv5 = conv4x4(256, 512)
         self.conv6 = conv4x4(512, 1024)
         self.conv7 = conv4x4(1024, 1024)
-
-        self.deconv1 = deconv4x4(1024, 1024)
-        self.deconv2 = deconv4x4(2048, 512)
-        self.deconv3 = deconv4x4(1024, 256)
-        self.deconv4 = deconv4x4(512, 128)
-        self.deconv5 = deconv4x4(256, 64)
-        self.deconv6 = deconv4x4(128, 32)
-
+
+        if backbone == 'unet':
+            self.deconv1 = deconv4x4(1024, 1024)
+            self.deconv2 = deconv4x4(2048, 512)
+            self.deconv3 = deconv4x4(1024, 256)
+            self.deconv4 = deconv4x4(512, 128)
+            self.deconv5 = deconv4x4(256, 64)
+            self.deconv6 = deconv4x4(128, 32)
+        elif backbone == 'linknet':
+            self.deconv1 = deconv4x4(1024, 1024)
+            self.deconv2 = deconv4x4(1024, 512)
+            self.deconv3 = deconv4x4(512, 256)
+            self.deconv4 = deconv4x4(256, 128)
+            self.deconv5 = deconv4x4(128, 64)
+            self.deconv6 = deconv4x4(64, 32)
         self.apply(weight_init)
 
     def forward(self, Xt):
@@ -73,29 +85,38 @@ def forward(self, Xt):
         z_attr1 = self.conv7(feat6)
         # 1024x2x2
 
-        z_attr2 = self.deconv1(z_attr1, feat6)
-        z_attr3 = self.deconv2(z_attr2, feat5)
-        z_attr4 = self.deconv3(z_attr3, feat4)
-        z_attr5 = self.deconv4(z_attr4, feat3)
-        z_attr6 = self.deconv5(z_attr5, feat2)
-        z_attr7 = self.deconv6(z_attr6, feat1)
+        z_attr2 = self.deconv1(z_attr1, feat6, self.backbone)
+        z_attr3 = self.deconv2(z_attr2, feat5, self.backbone)
+        z_attr4 = self.deconv3(z_attr3, feat4, self.backbone)
+        z_attr5 = self.deconv4(z_attr4, feat3, self.backbone)
+        z_attr6 = self.deconv5(z_attr5, feat2, self.backbone)
+        z_attr7 = self.deconv6(z_attr6, feat1, self.backbone)
         z_attr8 = F.interpolate(z_attr7, scale_factor=2, mode='bilinear', align_corners=True)
         return z_attr1, z_attr2, z_attr3, z_attr4, z_attr5, z_attr6, z_attr7, z_attr8
 
-
+    
 class AADGenerator(nn.Module):
-    def __init__(self, c_id=256):
+    def __init__(self, backbone, c_id=256, num_blocks=2):
         super(AADGenerator, self).__init__()
         self.up1 = nn.ConvTranspose2d(c_id, 1024, kernel_size=2, stride=1, padding=0)
-        self.AADBlk1 = AAD_ResBlk(1024, 1024, 1024, c_id)
-        self.AADBlk2 = AAD_ResBlk(1024, 1024, 2048, c_id)
-        self.AADBlk3 = AAD_ResBlk(1024, 1024, 1024, c_id)
-        self.AADBlk4 = AAD_ResBlk(1024, 512, 512, c_id)
-        self.AADBlk5 = AAD_ResBlk(512, 256, 256, c_id)
-        self.AADBlk6 = AAD_ResBlk(256, 128, 128, c_id)
-        self.AADBlk7 = AAD_ResBlk(128, 64, 64, c_id)
-        self.AADBlk8 = AAD_ResBlk(64, 3, 64, c_id)
-
+        self.AADBlk1 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
+        if backbone == 'linknet':
+            self.AADBlk2 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
+            self.AADBlk3 = AAD_ResBlk(1024, 1024, 512, c_id, num_blocks)
+            self.AADBlk4 = AAD_ResBlk(1024, 512, 256, c_id, num_blocks)
+            self.AADBlk5 = AAD_ResBlk(512, 256, 128, c_id, num_blocks)
+            self.AADBlk6 = AAD_ResBlk(256, 128, 64, c_id, num_blocks)
+            self.AADBlk7 = AAD_ResBlk(128, 64, 32, c_id, num_blocks)
+            self.AADBlk8 = AAD_ResBlk(64, 3, 32, c_id, num_blocks)
+        else:
+            self.AADBlk2 = AAD_ResBlk(1024, 1024, 2048, c_id, num_blocks)
+            self.AADBlk3 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
+            self.AADBlk4 = AAD_ResBlk(1024, 512, 512, c_id, num_blocks)
+            self.AADBlk5 = AAD_ResBlk(512, 256, 256, c_id, num_blocks)
+            self.AADBlk6 = AAD_ResBlk(256, 128, 128, c_id, num_blocks)
+            self.AADBlk7 = AAD_ResBlk(128, 64, 64, c_id, num_blocks)
+            self.AADBlk8 = AAD_ResBlk(64, 3, 64, c_id, num_blocks)
+
         self.apply(weight_init)
 
     def forward(self, z_attr, z_id):
@@ -111,19 +132,22 @@ def forward(self, z_attr, z_id):
         return torch.tanh(y)
 
 
+
 class AEI_Net(nn.Module):
-    def __init__(self, c_id=256):
+    def __init__(self, backbone, num_blocks=2, c_id=256):
         super(AEI_Net, self).__init__()
-        self.encoder = MLAttrEncoder()
-        self.generator = AADGenerator(c_id)
+        if backbone in ['unet', 'linknet']:
+            self.encoder = MLAttrEncoder(backbone)
+        elif backbone == 'resnet':
+            self.encoder = MLAttrEncoderResnet()
+        self.generator = AADGenerator(backbone, c_id, num_blocks)
 
     def forward(self, Xt, z_id):
         attr = self.encoder(Xt)
         Y = self.generator(attr, z_id)
         return Y, attr
 
     def get_attr(self, X):
-        # with torch.no_grad():
         return self.encoder(X)