# 临时处理小记：把Numpy的narray二进制文件转换成json文件

In [1]:
%%time

import numpy as np


Wall time: 135 ms

In [2]:
%%time

import pandas as pd


Wall time: 351 ms

In [3]:
%%time



Wall time: 910 ms

In [4]:
%%time



Wall time: 1 ms

Out[4]:

0
0 {'email': 'liurh@csdn.net', 'pwd': '9755DD0556...
1 {'email': 'fw19@sina.com', 'pwd': '6BB518D1A42...
2 {'email': 'whcheng@126.com', 'pwd': '0079ABBA6...
3 {'email': 'zh4ang@163.com', 'pwd': 'E23E561F02...
4 {'email': 'johnzhou8888@yahoo.com.cn', 'pwd': ...
5 {'email': 'zaza902@hotmail.com', 'pwd': '9B084...
6 {'email': 'yuping_zhong@163.com', 'pwd': '7D07...
7 {'email': 'annnntning@sina.com', 'pwd': '448A2...
8 {'email': 'sunnydinasun@sohu.com', 'pwd': 'DBF...
9 {'email': 'ysmrose@sohu.com', 'pwd': '22DDD26D...
In [5]:
%%time

# 提取email列
df['Email'] = df[0].map(lambda x : dict(x)["email"])
# 提取pwd列
df['MD5'] = df[0].map(lambda x : dict(x)["pwd"] )
# 删除无用列
del df[0]


Wall time: 1.05 s

In [6]:
%%time

df.size # 查看总共多少数据


Wall time: 0 ns

Out[6]:
2097148
In [7]:
%%time

df.shape


Wall time: 0 ns

Out[7]:
(1048574, 2)
In [8]:
%%time



Wall time: 0 ns

Out[8]:

EmailMD5
1 fw19@sina.com 6BB518D1A42F22DA5CA62D5EE41C5D4F
2 whcheng@126.com 0079ABBA66856DAFDF2B9A6E0DB23A09
3 zh4ang@163.com E23E561F0202ACECA30B8F07A48AB8E9
4 johnzhou8888@yahoo.com.cn 0EB1A2DB91A2BF3FB6275DE659A25805
5 zaza902@hotmail.com 9B08473C992C07E98389ED1C280A634A
6 yuping_zhong@163.com 7D0710824FF191F6A0086A7E3891641E
7 annnntning@sina.com 448A2BCEE09A3B14C22DC000351216B7
8 sunnydinasun@sohu.com DBFBA02E366BAB58DF605D6475189A51
9 ysmrose@sohu.com 22DDD26D62AF8B1C4A216BE18FDFF5B2
In [9]:
%%time

df.T.to_json("user.json") # 重新保存为Json（转置只是为了存储成我们常见的json格式）


Wall time: 2.85 s


posted @ 2018-08-01 18:46 鲲逸鹏 阅读(...) 评论(...) 编辑 收藏