Stay Hungry,Stay Foolish!

replace background of people picture with peddlehub model

背景

https://aistudio.baidu.com/aistudio/projectdetail/377462

对于此案例实现的视频换背景功能感兴趣,所以细细研究其实现原理。

 

本示例用DeepLabv3+模型完成一键抠图。在最新作中,作者通过encoder-decoder进行多尺度信息的融合,同时保留了原来的空洞卷积和ASSP层, 其骨干网络使用了Xception模型,提高了语义分割的健壮性和运行速率,在 PASCAL VOC 2012 dataset取得新的state-of-art performance,该PaddleHub Module使用百度自建数据集进行训练,可用于人像分割,支持任意大小的图片输入。在完成一键抠图之后,通过图像合成,实现扣图比赛任务

 

功能实现分析

功能实现上分为以下步骤:

  1.  将视频分解为帧图片 -- 依赖CV2的VideoCapture接口
  2.  提取图片中人体图像部分 -- 依赖 DeepLabv3+模型
  3.  生成背景图片
  4.  将第二步骤中的人体图像混入第三步生成的背景图片,生成新的帧图片
  5.  将新的帧图片,合成视频 -- 依赖CV2的VideoWriter接口

 

依赖安装

https://cloud.tencent.com/developer/article/1630639

python3 -m pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install -i https://mirror.baidu.com/pypi/simple paddlehub

 

遇到的坑于解

在运行示例代码时,如果没有单独安装模型deeplabv3p_xception65_humanseg,默认会自动在执行前进行安装。但安装完成后,执行结果并没有生成扣图结果及humanseg_output目录,输出结果类似如下所示:

正常情况下,在生成扣图数据,打印results时,应该是类似如下结构才对:

可以通过单独安装模型并指定安装版本来解决。

hub install deeplabv3p_xception65_humanseg==1.0.0

具体原因没有细究,默认自动安装模型时,版本为1.2.0,猜测由于还是模型版本不兼容问题导致。

 

 

Code (优化后)

https://github.com/fanqingsong/replace_video_background/blob/master/app.py

import cv2
import os
import numpy as np
from PIL import Image
import paddlehub as hub


def split_video_to_frames(video_file_path, frames_folder_path):
    print("call split_video_to_frames")

    if not os.path.exists(video_file_path):
        print(f"video file {video_file_path} do not exist.")
        return

    cap = cv2.VideoCapture(video_file_path)
    index = 0
    while True:
        ret, frame = cap.read()
        print(f"capture ret={ret} frame={frame}")
        if ret:
            cv2.imwrite(f'{frames_folder_path}/{index}.jpg', frame)
            print(type(frame))
            print(frame.shape)
            index += 1
        else:
            break

    cap.release()
    print('video split finish, all %d frame' % index)


def turn_frames_to_humans(frames_folder_path, humans_folder_path):
    print("call turn_frames_to_humans")

    print(f"frames_folder_path = {frames_folder_path}")
    print(f"humans_folder_path = {humans_folder_path}")

    print(os.listdir(frames_folder_path))

    # load model
    module = hub.Module(name="deeplabv3p_xception65_humanseg")

    test_img_path = [os.path.join(frames_folder_path, fname) for fname in os.listdir(frames_folder_path)]
    input_dict = {"image": test_img_path}

    results = module.segmentation(data=input_dict, output_dir=humans_folder_path)
    for result in results:
        print(result)


def blend_one_human_with_background(one_human_image_path, background_image_path, one_blended_image_path):
    print("call blend_one_human_with_background")

    background_image = Image.open(background_image_path).convert('RGB')

    one_human_image = Image.open(one_human_image_path).resize(background_image.size)

    # PNG format = RGBA
    one_human_image = np.array(one_human_image)
    print(one_human_image.shape)
    print(one_human_image[0, 0])

    # transparency dimension of A in RGBA
    one_human_image_A = one_human_image[:, :, -1]
    # print(one_human_image_A.shape)
    # print(one_human_image_A[0, 0])
    # print(list(set(one_human_image_A.ravel())))

    # RGB dimension in RGBA
    one_human_image_RGB = one_human_image[:, :, :3]

    scope_map = one_human_image_A / 255
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])
    # print(list(set(scope_map.ravel())))

    scope_map = scope_map[:, :, np.newaxis]
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])

    scope_map = np.repeat(scope_map, repeats=3, axis=2)
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])

    human_layer = np.multiply(scope_map, one_human_image_RGB)
    backgroud_layer = np.multiply((1 - scope_map), np.array(background_image))
    blended_image = human_layer + backgroud_layer
    
    blended_image = Image.fromarray(np.uint8(blended_image))
    blended_image.save(one_blended_image_path)


def blend_humans_with_background(humans_folder_path, background_image_path, frames_blended_folder_path):
    print("call blend_humans_with_background")

    all_human_image_paths = [filename for filename in os.listdir(humans_folder_path)]

    for i, one_human_image_name in enumerate(all_human_image_paths):
        one_human_image_path = f"{humans_folder_path}{one_human_image_name}"
        print(f"one_human_image_path = {one_human_image_path}")

        if not os.path.exists(one_human_image_path):
            print(f"one human image({one_human_image_path}) does not exist.")
            continue

        one_blended_image_path = f"{frames_blended_folder_path}{i}.png"
        print(one_blended_image_path)

        blend_one_human_with_background(one_human_image_path, background_image_path, one_blended_image_path)
   

def init_canvas(width, height, color=(255, 255, 255)):
    print("call init_canvas")

    canvas = np.ones((height, width, 3), dtype="uint8")
    # assign all element with specific color
    canvas[:] = color
    return canvas


def make_background_file(width, height, out_path):
    canvas = init_canvas(width, height, color=(0, 255, 0))
    cv2.imwrite(out_path, canvas)


def concatenate_frames_blended(frames_blended_folder_path, video_blended_file_path, size):
    print("call concatenate_frames_blended")

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(video_blended_file_path, fourcc, 3.0, size)
    files = os.listdir(frames_blended_folder_path)

    for i in range(len(files)):
        one_frame_blended = frames_blended_folder_path + '%d.png' % i
        if not os.path.exists(one_frame_blended):
            continue

        img = cv2.imread(one_frame_blended)
        out.write(img)
    out.release()


# Config
video_path = 'workspace/sample.mp4'
video_blended_path = 'workspace/output.mp4'
background_image_path = 'workspace/green.jpg'

frames_folder_path = 'workspace/frames/'
humans_folder_path = 'workspace/humans/'
frames_blended_folder_path = 'workspace/frames_blended/'

background_size = (1920, 1080)

if __name__ == "__main__":
    print("video to frames")
    if not os.path.exists(frames_folder_path):
        os.mkdir(frames_folder_path)
        split_video_to_frames(video_path, frames_folder_path)

    print("frames to humans")
    if not os.path.exists(humans_folder_path):
        os.mkdir(humans_folder_path)
        turn_frames_to_humans(frames_folder_path, humans_folder_path)

    print("make green background")
    if not os.path.exists(background_image_path):
        make_background_file(*background_size, background_image_path)

    print("blend humans with background")
    if not os.path.exists(frames_blended_folder_path):
        os.mkdir(frames_blended_folder_path)
        blend_humans_with_background(humans_folder_path, background_image_path, frames_blended_folder_path)

    print("concatenate frames blended into video")
    if not os.path.exists(video_blended_path):
        concatenate_frames_blended(frames_blended_folder_path, video_blended_path, background_size)

 

PNG

PNG支持 RGB 或者 RGBA格式。

其中分为两部分

  • RGB -- 色彩通道
  • A -- 透明度通道

本例中, 从帧图片中扣取人体图像, 保存的结果,就是PNG格式, 其支持RGBA格式。

除了人体部分的图像,图片中其它部分均使用非透明的值0来表示, 其相当于一个mask,或者滤镜,只显示图片中指定部分。

 

https://en.wikipedia.org/wiki/Portable_Network_Graphics#Transparency_of_image

Portable Network Graphics (PNG, officially pronounced /pɪŋ/[2][3] PING, more commonly pronounced /ˌpɛnˈiː/[4] PEE-en-JEE) is a raster-graphics file format that supports lossless data compression. PNG was developed as an improved, non-patented replacement for Graphics Interchange Format (GIF).

PNG supports palette-based images (with palettes of 24-bit RGB or 32-bit RGBA colors), grayscale images (with or without alpha channel for transparency), and full-color non-palette-based RGB or RGBA images. The PNG working group designed the format for transferring images on the Internet, not for professional-quality print graphics; therefore non-RGB color spaces such as CMYK are not supported. A PNG file contains a single image in an extensible structure of chunks, encoding the basic pixels and other information such as textual comments and integrity checks documented in RFC 2083.[5]

 

相关代码

    # PNG format = RGBA
    one_human_image = np.array(one_human_image)
    print(one_human_image.shape)
    print(one_human_image[0, 0])

    # transparency dimension of A in RGBA
    one_human_image_A = one_human_image[:, :, -1]
    # print(one_human_image_A.shape)
    # print(one_human_image_A[0, 0])
    # print(list(set(one_human_image_A.ravel())))

    # RGB dimension in RGBA
    one_human_image_RGB = one_human_image[:, :, :3]

 

 

numpy.repeat

https://numpy.org/doc/stable/reference/generated/numpy.repeat.html

对透明度像素元素,进行扩展, 透明度是一维的, 需要扩展成为三维, 扩展后的值,通过 np.multiply 对背景 和 前景进行过滤。

Repeat elements of an array.

np.repeat(3, 4)
array([3, 3, 3, 3])

x = np.array([[1,2],[3,4]])

np.repeat(x, 2)
array([1, 1, 2, 2, 3, 3, 4, 4])

np.repeat(x, 3, axis=1)
array([[1, 1, 1, 2, 2, 2],
       [3, 3, 3, 4, 4, 4]])

np.repeat(x, [1, 2], axis=0)
array([[1, 2],
       [3, 4],
       [3, 4]])

相关部分代码


background_image = Image.open(background_image_path).convert('RGB')

one_human_image = Image.open(one_human_image_path).resize(background_image.size)

# PNG format = RGBA
one_human_image = np.array(one_human_image)
print(one_human_image.shape)
print(one_human_image[0, 0])

# transparency dimension of A in RGBA
one_human_image_A = one_human_image[:, :, -1]
# print(one_human_image_A.shape)
# print(one_human_image_A[0, 0])
# print(list(set(one_human_image_A.ravel())))

# RGB dimension in RGBA
one_human_image_RGB = one_human_image[:, :, :3]

scope_map = one_human_image_A / 255
# print(f"scope_map.shape={scope_map.shape}")
# print(scope_map[0, 0])
# print(list(set(scope_map.ravel())))

scope_map = scope_map[:, :, np.newaxis]
# print(f"scope_map.shape={scope_map.shape}")
# print(scope_map[0, 0])

scope_map = np.repeat(scope_map, repeats=3, axis=2)
# print(f"scope_map.shape={scope_map.shape}")
# print(scope_map[0, 0])

human_layer = np.multiply(scope_map, one_human_image_RGB)
backgroud_layer = np.multiply((1 - scope_map), np.array(background_image))
blended_image = human_layer + backgroud_layer

 

 

Image对象如何转换为np?

使用Image.open打开的图片文件,返回值对应一个Image对象,为Pillow定义。

其为何能够转换为 numpy数据呢?

即将Image对象放入 np.array() 函数中,就返回的numpy数据。

    background_image = Image.open(background_image_path).convert('RGB')

    one_human_image = Image.open(one_human_image_path).resize(background_image.size)

    # PNG format = RGBA
    one_human_image = np.array(one_human_image)
    print(one_human_image.shape)
    print(one_human_image[0, 0])

 

https://numpy.org/doc/stable/reference/arrays.interface.html

因为其实现了 numpy数据的内部接口

This approach to the interface consists of the object having an __array_interface__ attribute.

object.__array_interface__

A dictionary of items (3 required and 5 optional). The optional keys in the dictionary have implied defaults if they are not provided.

 

其它相同技术方案例子

证件照换底色

https://aistudio.baidu.com/aistudio/projectdetail/811555?channelType=0&channel=0

 

posted @ 2021-03-15 15:45  lightsong  阅读(117)  评论(0编辑  收藏  举报
Life Is Short, We Need Ship To Travel