replace background of people picture with peddlehub model

背景

https://aistudio.baidu.com/aistudio/projectdetail/377462

对于此案例实现的视频换背景功能感兴趣，所以细细研究其实现原理。

本示例用DeepLabv3+模型完成一键抠图。在最新作中，作者通过encoder-decoder进行多尺度信息的融合，同时保留了原来的空洞卷积和ASSP层，其骨干网络使用了Xception模型，提高了语义分割的健壮性和运行速率，在 PASCAL VOC 2012 dataset取得新的state-of-art performance，该PaddleHub Module使用百度自建数据集进行训练，可用于人像分割，支持任意大小的图片输入。在完成一键抠图之后，通过图像合成，实现扣图比赛任务

功能实现分析

功能实现上分为以下步骤：

将视频分解为帧图片 -- 依赖CV2的VideoCapture接口
提取图片中人体图像部分 -- 依赖 DeepLabv3+模型
生成背景图片
将第二步骤中的人体图像混入第三步生成的背景图片，生成新的帧图片
将新的帧图片，合成视频 -- 依赖CV2的VideoWriter接口

依赖安装

https://cloud.tencent.com/developer/article/1630639

python3 -m pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install -i https://mirror.baidu.com/pypi/simple paddlehub

遇到的坑于解

在运行示例代码时，如果没有单独安装模型deeplabv3p_xception65_humanseg，默认会自动在执行前进行安装。但安装完成后，执行结果并没有生成扣图结果及humanseg_output目录，输出结果类似如下所示：

正常情况下，在生成扣图数据，打印results时，应该是类似如下结构才对：

可以通过单独安装模型并指定安装版本来解决。
hub install deeplabv3p_xception65_humanseg==1.0.0
具体原因没有细究，默认自动安装模型时，版本为1.2.0，猜测由于还是模型版本不兼容问题导致。

Code （优化后）

https://github.com/fanqingsong/replace_video_background/blob/master/app.py

import cv2
import os
import numpy as np
from PIL import Image
import paddlehub as hub


def split_video_to_frames(video_file_path, frames_folder_path):
    print("call split_video_to_frames")

    if not os.path.exists(video_file_path):
        print(f"video file {video_file_path} do not exist.")
        return

    cap = cv2.VideoCapture(video_file_path)
    index = 0
    while True:
        ret, frame = cap.read()
        print(f"capture ret={ret} frame={frame}")
        if ret:
            cv2.imwrite(f'{frames_folder_path}/{index}.jpg', frame)
            print(type(frame))
            print(frame.shape)
            index += 1
        else:
            break

    cap.release()
    print('video split finish, all %d frame' % index)


def turn_frames_to_humans(frames_folder_path, humans_folder_path):
    print("call turn_frames_to_humans")

    print(f"frames_folder_path = {frames_folder_path}")
    print(f"humans_folder_path = {humans_folder_path}")

    print(os.listdir(frames_folder_path))

    # load model
    module = hub.Module(name="deeplabv3p_xception65_humanseg")

    test_img_path = [os.path.join(frames_folder_path, fname) for fname in os.listdir(frames_folder_path)]
    input_dict = {"image": test_img_path}

    results = module.segmentation(data=input_dict, output_dir=humans_folder_path)
    for result in results:
        print(result)


def blend_one_human_with_background(one_human_image_path, background_image_path, one_blended_image_path):
    print("call blend_one_human_with_background")

    background_image = Image.open(background_image_path).convert('RGB')

    one_human_image = Image.open(one_human_image_path).resize(background_image.size)

    # PNG format = RGBA
    one_human_image = np.array(one_human_image)
    print(one_human_image.shape)
    print(one_human_image[0, 0])

    # transparency dimension of A in RGBA
    one_human_image_A = one_human_image[:, :, -1]
    # print(one_human_image_A.shape)
    # print(one_human_image_A[0, 0])
    # print(list(set(one_human_image_A.ravel())))

    # RGB dimension in RGBA
    one_human_image_RGB = one_human_image[:, :, :3]

    scope_map = one_human_image_A / 255
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])
    # print(list(set(scope_map.ravel())))

    scope_map = scope_map[:, :, np.newaxis]
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])

    scope_map = np.repeat(scope_map, repeats=3, axis=2)
    # print(f"scope_map.shape={scope_map.shape}")
    # print(scope_map[0, 0])

    human_layer = np.multiply(scope_map, one_human_image_RGB)
    backgroud_layer = np.multiply((1 - scope_map), np.array(background_image))
    blended_image = human_layer + backgroud_layer
    
    blended_image = Image.fromarray(np.uint8(blended_image))
    blended_image.save(one_blended_image_path)


def blend_humans_with_background(humans_folder_path, background_image_path, frames_blended_folder_path):
    print("call blend_humans_with_background")

    all_human_image_paths = [filename for filename in os.listdir(humans_folder_path)]

    for i, one_human_image_name in enumerate(all_human_image_paths):
        one_human_image_path = f"{humans_folder_path}{one_human_image_name}"
        print(f"one_human_image_path = {one_human_image_path}")

        if not os.path.exists(one_human_image_path):
            print(f"one human image({one_human_image_path}) does not exist.")
            continue

        one_blended_image_path = f"{frames_blended_folder_path}{i}.png"
        print(one_blended_image_path)

        blend_one_human_with_background(one_human_image_path, background_image_path, one_blended_image_path)
   

def init_canvas(width, height, color=(255, 255, 255)):
    print("call init_canvas")

    canvas = np.ones((height, width, 3), dtype="uint8")
    # assign all element with specific color
    canvas[:] = color
    return canvas


def make_background_file(width, height, out_path):
    canvas = init_canvas(width, height, color=(0, 255, 0))
    cv2.imwrite(out_path, canvas)


def concatenate_frames_blended(frames_blended_folder_path, video_blended_file_path, size):
    print("call concatenate_frames_blended")

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(video_blended_file_path, fourcc, 3.0, size)
    files = os.listdir(frames_blended_folder_path)

    for i in range(len(files)):
        one_frame_blended = frames_blended_folder_path + '%d.png' % i
        if not os.path.exists(one_frame_blended):
            continue

        img = cv2.imread(one_frame_blended)
        out.write(img)
    out.release()


# Config
video_path = 'workspace/sample.mp4'
video_blended_path = 'workspace/output.mp4'
background_image_path = 'workspace/green.jpg'

frames_folder_path = 'workspace/frames/'
humans_folder_path = 'workspace/humans/'
frames_blended_folder_path = 'workspace/frames_blended/'

background_size = (1920, 1080)

if __name__ == "__main__":
    print("video to frames")
    if not os.path.exists(frames_folder_path):
        os.mkdir(frames_folder_path)
        split_video_to_frames(video_path, frames_folder_path)

    print("frames to humans")
    if not os.path.exists(humans_folder_path):
        os.mkdir(humans_folder_path)
        turn_frames_to_humans(frames_folder_path, humans_folder_path)

    print("make green background")
    if not os.path.exists(background_image_path):
        make_background_file(*background_size, background_image_path)

    print("blend humans with background")
    if not os.path.exists(frames_blended_folder_path):
        os.mkdir(frames_blended_folder_path)
        blend_humans_with_background(humans_folder_path, background_image_path, frames_blended_folder_path)

    print("concatenate frames blended into video")
    if not os.path.exists(video_blended_path):
        concatenate_frames_blended(frames_blended_folder_path, video_blended_path, background_size)

PNG

PNG支持 RGB 或者 RGBA格式。

其中分为两部分

RGB -- 色彩通道
A -- 透明度通道

本例中，从帧图片中扣取人体图像，保存的结果，就是PNG格式，其支持RGBA格式。

除了人体部分的图像，图片中其它部分均使用非透明的值0来表示，其相当于一个mask，或者滤镜，只显示图片中指定部分。

https://en.wikipedia.org/wiki/Portable_Network_Graphics#Transparency_of_image

Portable Network Graphics (PNG, officially pronounced /pɪŋ/^[2]^[3] PING, more commonly pronounced /ˌpiːɛnˈdʒiː/^[4] PEE-en-JEE) is a raster-graphics file format that supports lossless data compression. PNG was developed as an improved, non-patented replacement for Graphics Interchange Format (GIF).

PNG supports palette-based images (with palettes of 24-bit RGB or 32-bit RGBA colors), grayscale images (with or without alpha channel for transparency), and full-color non-palette-based RGB or RGBA images. The PNG working group designed the format for transferring images on the Internet, not for professional-quality print graphics; therefore non-RGB color spaces such as CMYK are not supported. A PNG file contains a single image in an extensible structure of chunks, encoding the basic pixels and other information such as textual comments and integrity checks documented in RFC 2083.^[5]

numpy.repeat

https://numpy.org/doc/stable/reference/generated/numpy.repeat.html

对透明度像素元素，进行扩展，透明度是一维的，需要扩展成为三维，扩展后的值，通过 np.multiply 对背景和前景进行过滤。

Repeat elements of an array.

np.repeat(3, 4)
array([3, 3, 3, 3])

x = np.array([[1,2],[3,4]])

np.repeat(x, 2)
array([1, 1, 2, 2, 3, 3, 4, 4])

np.repeat(x, 3, axis=1)
array([[1, 1, 1, 2, 2, 2],
       [3, 3, 3, 4, 4, 4]])

np.repeat(x, [1, 2], axis=0)
array([[1, 2],
       [3, 4],
       [3, 4]])

Image对象如何转换为np？

使用Image.open打开的图片文件，返回值对应一个Image对象，为Pillow定义。

其为何能够转换为 numpy数据呢？

即将Image对象放入 np.array() 函数中，就返回的numpy数据。

    background_image = Image.open(background_image_path).convert('RGB')

    one_human_image = Image.open(one_human_image_path).resize(background_image.size)

    # PNG format = RGBA
    one_human_image = np.array(one_human_image)
    print(one_human_image.shape)
    print(one_human_image[0, 0])

https://numpy.org/doc/stable/reference/arrays.interface.html

因为其实现了 numpy数据的内部接口

This approach to the interface consists of the object having an __array_interface__ attribute.

object.__array_interface__

A dictionary of items (3 required and 5 optional). The optional keys in the dictionary have implied defaults if they are not provided.

其它相同技术方案例子

证件照换底色

https://aistudio.baidu.com/aistudio/projectdetail/811555?channelType=0&channel=0

posted @ 2021-03-15 15:45 lightsong 阅读(149) 评论(0) 收藏举报

刷新页面返回顶部

Stay Hungry,Stay Foolish!

lightsong

{Web: [React, Vue, NodeJS, HTTP]，DevOps:[Jenkins,Docker,K8S], Languages:[Python, JS, C, Lua, Shell, Groovy]}, AI:[LLM, langchain，langraph]