import scanpy as sc
import scipy
# from scipy.sparse import csr_matrix
# import scipy.sparse
import numpy as np
import time
# 记个运行时长
start = time.perf_counter()
# read origin file
anndata = sc.read('test.h5ad')
print('start read data ##############')
indptr = np.array(anndata.X.indptr).astype(int)
indices = np.array(anndata.X.indices).astype(int)
data = np.array(anndata.X.data).astype(int)
height, width = anndata.X.shape
total = anndata.X.data.size
print('start patch main data #############')
# 共有(noCnt = indptr.size - 1) 行数据
noCnt = 1
with open('main.mtx', 'w') as f:
# write head
f.write('%%MatrixMarket matrix coordinate integer general\n%\n')
# data size
f.write(str(height) + ' ' + str(width) + ' ' + str(total) + '\n')
# main data content
for row_no in range(1, indptr.size - 1):
# row_no行的有效数据个数
valid_num = indptr[row_no] - indptr[row_no - 1]
blobData = ''
for num in range(0, valid_num):
# 取列序数
colNum = indices[0]
# 取对应数据值
val = data[0]
# 把第一个取出来,并删除
indices = indices[1:]
data = data[1:]
# 组装数据
blobData = blobData + str(noCnt) + ' ' + str(colNum + 1) + ' ' + str(val) + '\n'
pass
f.write(blobData)
noCnt = noCnt + 1
print('progress: ' + str(noCnt) + '/' + str(indptr.size - 1))
pass
end = time.perf_counter()
print("运行时间为", round(end-start), 'seconds')