# 我是如何使计算时间提速25.6倍的

## 选择使用更合适的函数

### 统计非零元素数量首选 np.count_nonzero(array)

import time
import numpy as np

# 快速统计numpy数组的非零值建议使用np.count_nonzero，一个简单的小实验
def cal_nonzero(size):
a = np.random.randn(size, size)
a = a > 0
start = time.time()
print(np.count_nonzero(a), time.time() - start)
start = time.time()
print(np.sum(a), time.time() - start)
start = time.time()
print(len(np.nonzero(a)[0]), time.time() - start)
start = time.time()
print(len(np.where(a)), time.time() - start)

if __name__ == '__main__':
cal_nonzero(1000)
# 499950 6.723403930664062e-05
# 499950 0.0006949901580810547
# 499950 0.007088184356689453


### 更快的交集计算方式

import time
import numpy as np

# 快速统计numpy数组的非零值建议使用np.count_nonzero，一个简单的小实验
def cal_andnot(size):
a = np.random.randn(size, size)
b = np.random.randn(size, size)
a = a > 0
b = b < 0
start = time.time()
a_and_b_mul = a * b
_a_and__b_mul = (1 - a) * (1 - b)
print(time.time() - start)
start = time.time()
a_and_b_and = a & b
_a_and__b_and = ~a & ~b
print(time.time() - start)

if __name__ == '__main__':
cal_andnot(1000)
# 0.0036919116973876953
# 0.0005502700805664062


## 逻辑的改进

    ...
def step(self, pred: np.ndarray, gt: np.ndarray):
pred, gt = _prepare_data(pred=pred, gt=gt)
self.all_fg = np.all(gt)
self.all_bg = np.all(~gt)
self.gt_size = gt.shape[0] * gt.shape[1]

if self.changeable_ems is not None:
changeable_ems = self.cal_changeable_em(pred, gt)
self.changeable_ems.append(changeable_ems)

def cal_adaptive_em(self, pred: np.ndarray, gt: np.ndarray) -> float:

def cal_changeable_em(self, pred: np.ndarray, gt: np.ndarray) -> list:
changeable_ems = [self.cal_em_with_threshold(pred, gt, threshold=th) for th in np.linspace(0, 1, 256)]
return changeable_ems

def cal_em_with_threshold(self, pred: np.ndarray, gt: np.ndarray, threshold: float) -> float:
binarized_pred = pred >= threshold
if self.all_bg:
enhanced_matrix = 1 - binarized_pred
elif self.all_fg:
enhanced_matrix = binarized_pred
else:
enhanced_matrix = self.cal_enhanced_matrix(binarized_pred, gt)
em = enhanced_matrix.sum() / (gt.shape[0] * gt.shape[1] - 1 + _EPS)
return em

def cal_enhanced_matrix(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray:
demeaned_pred = pred - pred.mean()
demeaned_gt = gt - gt.mean()
align_matrix = 2 * (demeaned_gt * demeaned_pred) / (demeaned_gt ** 2 + demeaned_pred ** 2 + _EPS)
enhanced_matrix = (align_matrix + 1) ** 2 / 4
return enhanced_matrix
...


• 这里计算为什么会那么慢?
• 因为涉及到了大量的矩阵元素级的运算, 例如元素级减法、加法、乘法、平方、除法.
• 大量的元素级运算是否可以优化?
• 必须可以:<
• 如何优化元素级运算?
• 寻找规律性、重复性的计算, 将其合并、消减, 可以联想numpy的稀疏矩阵的思想.
• 规律性、重复性的计算在哪里?
• 去均值实际上是对每个元素减去了相同的一个值, 如果被减数可以优化, 那么这一步就可以被优化
• 元素乘法和平方涉及到两部分, demeaned_gtdemeaned_pred, 如果这两个可以被优化, 那么这些运算就都可以被优化
• 这些元素运算的连锁关系导致了只要我们优化了最初的predgt, 那么整个流程就都可以被优化
• 如何优化predgt的表示?
• 这里需要从二者本身的属性上入手
• 二者最大的特点是什么?
• 都是二值数组, 只有0和1
• 那如何优化?
• 实际上就借鉴了稀疏矩阵的思想, 既然存在大量的重复性, 那么我们就将数值与位置解耦, 优化表示方式
• 如何解耦?
• gt为例, 可以表示为0和1两种数据, 其中0对应背景, 1对应前景, 0的数量表示背景面积, 1的数量表示前景面积
• 那如何使用该思想重构前面的计算呢?

# demeaned_pred = pred - pred.mean()
# demeaned_gt = gt - gt.mean()
pred_fg_numel = np.count_nonzero(binarized_pred)
pred_bg_numel = self.gt_size - pred_fg_numel
gt_fg_numel = np.count_nonzero(gt)
gt_bg_numel = self.gt_size - gt_fg_numel

mean_pred_value = pred_fg_numel / self.gt_size
mean_gt_value = gt_fg_numel / self.gt_size

demeaned_pred_fg_value = 1 - mean_pred_value
demeaned_pred_bg_value = 0 - mean_pred_value
demeaned_gt_fg_value = 1 - mean_gt_value
demeaned_gt_bg_value = 0 - mean_gt_value


1. pred: fg; gt: fg
2. pred: fg; gt: bg
3. pred: bg; gt: fg
4. pred: bg; gt: bg

fg_fg_numel = np.count_nonzero(binarized_pred & gt)
fg_bg_numel = np.count_nonzero(binarized_pred & ~gt)

# bg_fg_numel = np.count_nonzero(~binarized_pred & gt)
bg_fg_numel = self.gt_fg_numel - fg_fg_numel
# bg_bg_numel = np.count_nonzero(~binarized_pred & ~gt)
bg_bg_numel = self.gt_size - (fg_fg_numel + fg_bg_numel + bg_fg_numel)

parts_numel = [fg_fg_numel, fg_bg_numel, bg_fg_numel, bg_bg_numel]

mean_pred_value = (fg_fg_numel + fg_bg_numel) / self.gt_size
mean_gt_value = self.gt_fg_numel / self.gt_size

demeaned_pred_fg_value = 1 - mean_pred_value
demeaned_pred_bg_value = 0 - mean_pred_value
demeaned_gt_fg_value = 1 - mean_gt_value
demeaned_gt_bg_value = 0 - mean_gt_value

combinations = [(demeaned_pred_fg_value, demeaned_gt_fg_value), (demeaned_pred_fg_value, demeaned_gt_bg_value),
(demeaned_pred_bg_value, demeaned_gt_fg_value), (demeaned_pred_bg_value, demeaned_gt_bg_value)]


results_parts = []
for part_numel, combination in zip(parts_numel, combinations):
# align_matrix = 2 * (demeaned_gt * demeaned_pred) / (demeaned_gt ** 2 + demeaned_pred ** 2 + _EPS)
align_matrix_value = 2 * (combination[0] * combination[1]) / \
(combination[0] ** 2 + combination[1] ** 2 + _EPS)
# enhanced_matrix = (align_matrix + 1) ** 2 / 4
enhanced_matrix_value = (align_matrix_value + 1) ** 2 / 4
results_parts.append(enhanced_matrix_value * part_numel)

# enhanced_matrix = enhanced_matrix.sum()
enhanced_matrix = sum(results_parts)


binarized_pred = pred >= threshold
if self.all_bg:
enhanced_matrix = 1 - binarized_pred
elif self.all_fg:
enhanced_matrix = binarized_pred
else:
enhanced_matrix = self.cal_enhanced_matrix(binarized_pred, gt)
em = enhanced_matrix.sum() / (gt.shape[0] * gt.shape[1] - 1 + _EPS)


binarized_pred = pred >= threshold

if self.gt_fg_numel == 0:
binarized_pred_bg_numel = np.count_nonzero(~binarized_pred)
enhanced_matrix_sum = binarized_pred_bg_numel
elif self.gt_fg_numel == self.gt_size:
binarized_pred_fg_numel = np.count_nonzero(binarized_pred)
enhanced_matrix_sum = binarized_pred_fg_numel
else:
enhanced_matrix_sum = self.cal_enhanced_matrix(binarized_pred, gt)
em = enhanced_matrix_sum / (self.gt_size - 1 + _EPS)


## 效率对比

• 'base': 503.5014679431915s
• 'best': 19.27734637260437s

posted @ 2020-11-30 10:34  lart  阅读(592)  评论(2编辑  收藏  举报