基于opencv的图片检索（模仿百度的以图搜图功能）

　　曾经有一篇文章简单的介绍了CBIR（Content-Based Image Retrieval），详细内容猛戳基于内容的图像检索技（CBIR）术相术介绍。CBIR由于其应用的广泛性，特别是近年来，随着数码设备的迅猛普及，网络上的图片每天以几十亿级的速度猛增，而且对于一些摄影爱好者和自拍族来说，电脑和存储设备里更是有数不清的图片，如何从这么多图片中找到自己所需的图片是困惑很多人的难题。而CBIR正是这问题的一种一种很好的解决方式。

本文将介绍用Python和OpenCV创建一个简单的图片搜索引擎。

系统架构：

CBIR系统的构建主要包括：
1）定义图像描述符（图像特征提取）

这一阶段，需要决定描述图像的哪一方面。图像的可描述特征很多、包括颜色、形状、纹理、能量等，而颜色有分为很多种，如颜色直方图、颜色矩等。在这一阶段，我们选定要提取的颜色特征，根据应用的不同，选取的颜色特征可以是一种或多种。

2）索引化数据集（存储）

现在有了图像描述符，接着就是将这个图像描述符应用得到数据集中的每幅图像，提取这些图像的特征，将其存储起来（如CSV文件、RDBMS、Redis数据库中），这样后续步骤就能使用以便比较。

3）定义相似矩阵

很好，现在有了许多特征向量。但如何比较这些特征向量呢？比较常用的流行的相似性度量方式有：欧几里德距离、余弦距离、或卡方距离、巴氏距离、闵式距离、相关性等。但实际中取决于两点：①、数据集；②、提取的特征类型。
4）检索

上面步骤都完成了，剩下的就是根据输入的图片，从图像库中检索相似的图像并返回了。用户会向系统提交一幅需要搜索的图片（例如从上传窗口或通过移动App提交），而你的任务是：1、提取这幅图像的特征；2、使用相似度函数将这幅图像的特征与已经索引化的特征进行比较。这样，只需根据相似度函数的结果，返回相关的图像就可以了。

环境说明：

　　python3.6

　　opencv3

执行说明：

　　1、首先生成index检索文件：python index.py -d d:/picture/test/ -i d:/picture/index.csv

　　2、接着就是搜索相似图片：python Search.py -i d:/picture/index.csv -q d:/picture/test/1.jpg -r d:/picture/test/

数据集如下：

展示的结果：

下面是代码部分：

　　ColorDescriptor.py

import numpy as np
import cv2
 
class ColorDescriptor:
    def __init__(self, bins):
        # store the number of bins for the HSV histogram
        self.bins = bins
 
    def describe(self, image):
        # convert the image into HSV color space
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        features =[]
        # grab the dimensions and compute the center of the image
        (h,w) = image.shape[:2]
        (cx,cy) = (int(w*0.5), int(h*0.5))
 
        # segment the image
        segments =[(0,cx,0,cy),(cx,w,0,cy),(cx,w,cy,h),(0,cx,cy,h)]
 
        # construct an elliptical mask representing the center of the image
        (axesX, axesY) =(int(w*0.75)/2, int(h*0.75)/2)
        ellipMask = np.zeros(image.shape[:2],dtype="unit8")
        cv2.ellipse(ellipMask,(cx,cy),(axesX,axesY),0,0,360,255,-1)
 
        # loop over the segments
        for(startX,endX, startY, endY) in segments:
            cornerMask = np.zeros(image.shpae[:2],dtype="unit8")
            cv2.rectangle(cornerMask,(startX,startY),(endX,endY),255,-1)
            cornerMask = cv2.subtract(cornerMask, ellipMask)
 
            # compute the histogram
            hist = self.histogram(image, cornerMask)
            features.extend(hist)
 
        # compute the ellipse histogram
        hist = self.histogram(image, ellipMask)
        features.extend(hist)
 
        # return the feature vectr
        return features
 
    # define the function of histogram
    def histogram(self, image, mask):
        # extract the color histogram from the masked region
        hist = cv2.calcHist([image],[0,1,2],mask, self.bins,[0,180,0,256,0,256])
        hist = cv2.normalize(hist).flatten()
        return hist

index.py：

"CBIR(Content-Base Image Retrieval)--Extract Features and Indexing"
import ColorDescriptor
import argparse
import glob
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="Path to the directory that cntains the images to be indexed")
ap.add_argument("-i", "--index", required=True, help="Path to where the computed index will be stored")
args = vars(ap.parse_args())
cd = ColorDescriptor.ColorDescriptor((8,12,3))
 
#Open the output index file for writing
output = open(args["index"],"w")
 
# use glob to grab the image paths and loop over them
for imagePath in glob.glob(args["dataset"]+"/*.jpg"):
    # extract the image ID from the image
    imageID = imagePath[imagePath.rfind("\\")+1:]
    image = cv2.imread(imagePath)
 
    # describe the image
    features = cd.describe(image)
 
    # write feature to file
    features = [str(f) for f in features]
    output.write("%s,%s\n" %(imageID,",".join(features)))
# close index file
output.close()

Searcher.py：

"CBIR(Content-Base Image Retrieval)--Similarity and Search"
import numpy as np
# use for processing index.csv
import csv
 
class Searcher:
    def __init__(self, indexPath):
        self.indexPath = indexPath
 
    def chi2_distance(self, histA, histB, eps=1e-10):
        # compute the chi-squred distance
         d = 0.5*np.sum([((a-b)**2)/(a+b+eps) for(a,b) in zip(histA,histB)])
         return d
 
    def search(self, queryFeatures, limit=10):
        results = {}
 
        # open the index file for reading
        with open(self.indexPath) as f:
            # initialize the CSV reader
            reader = csv.reader(f)
 
            # loop over the rows in the index
            for row in reader:
                # parse out the imageID and features,
                # then compute the chi-squared distance
                features = [float(x) for x in row[1:]]
                d = self.chi2_distance(features, queryFeatures)
                results[row[0]] = d
            f.close()
            results = sorted([(v,k) for (k,v) in results.items()])
            return results[:limit]
 
        #def chi2_distance(self, histA, histB, eps=1e-10):
            ## compute the chi-squred distance
            #d = 0.5*np.sum([((a-b)**2)/(a+b+eps) for(a,b) in zip(histA,histB)])
            #return d

Search.py：

"CBIR(Content-Base Image Retrieval)--Search"
import argparse
 
import cv2
 
import ColorDescriptor
import Searcher
 
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--index", required=True, help="Path to where the computed index will be stored")
 
ap.add_argument("-q", "--query", required=True, help="Path to query image")
ap.add_argument("-r", "--result_path", required = True, help="Path to the result Path")
args = vars(ap.parse_args())
cd = ColorDescriptor.ColorDescriptor((8,12,3))
 
# load the query image and describe it
query = cv2.imread(args["query"])
features = cd.describe(query)
 
# perform the search
searcher = Searcher.Searcher(args["index"])
results = searcher.search(features)
 
 
# display the query
cv2.imshow("Query", query)
 
 
# loop over the results
for(score, resultID) in results:
    # load the result image and display it
    print(args["index"]+"/"+resultID)    
    result = cv2.imread(args["result_path"]+"/"+resultID)  
    cv2.imshow("Result",result)
    cv2.waitKey(0)

参考文章：http://python.jobbole.com/80860/

posted @ 2018-09-04 11:39 消失的白桦林阅读(1299) 评论(0) 收藏举报

刷新页面返回顶部

似水流年

基于opencv的图片检索（模仿百度的以图搜图功能）

公告