python中实现指定列筛选数据——利用python的Pandas模块
001、
root@PC1:/home/test2# ls outcome.map test.map root@PC1:/home/test2# cat outcome.map ## 测试数据 1 snp1 0 55910 1 snp2 0 85204 1 snp3 0 122948 1 snp4 0 203750 1 snp5 0 312707 1 snp6 0 356863 1 snp7 0 400518 1 snp8 0 487423 1 snp9 0 578716 1 snp10 0 639876 root@PC1:/home/test2# cat test.map snp6 7 107059 snp4 7 61125 snp9 7 173615 snp3 7 36884.4
002、
root@PC1:/home/test2# ls outcome.map test.map root@PC1:/home/test2# python ## 启动python shell Python 3.7.0 (default, Jun 28 2018, 13:15:42) [GCC 7.2.0] :: Anaconda, Inc. on linux Type "help", "copyright", "credits" or "license" for more information. >>> import pandas as pd ## 导入panda模块 >>> outcome=pd.read_table('outcome.map', header = -1) ## 读取数据 >>> test=pd.read_table('test.map', header = -1) >>> outcome.head() 0 1 2 3 0 1 snp1 0 55910 1 1 snp2 0 85204 2 1 snp3 0 122948 3 1 snp4 0 203750 4 1 snp5 0 312707 >>> test.head() 0 1 2 0 snp6 7 107059.0 1 snp4 7 61125.0 2 snp9 7 173615.0 3 snp3 7 36884.4 >>> result=outcome[outcome[1].isin(test[0])] ## 依据snp列筛选数据 >>> result.head() 0 1 2 3 2 1 snp3 0 122948 3 1 snp4 0 203750 5 1 snp6 0 356863 8 1 snp9 0 578716 >>> result.to_csv('xxx.txt', sep='\t') ## 写出数据 >>> quit() root@PC1:/home/test2# ls outcome.map test.map xxx.txt root@PC1:/home/test2# cat xxx.txt ## 筛选结果 0 1 2 3 2 1 snp3 0 122948 3 1 snp4 0 203750 5 1 snp6 0 356863 8 1 snp9 0 578716
参考:https://mp.weixin.qq.com/s?__biz=MzIxNzc1Mzk3NQ==&mid=2247492546&idx=1&sn=fe174e559474a95a7fb5b7e87b26af0a&chksm=97f653daa081dacce91c47786854cea073bcffeaa1b621f34e2d7aadebe84cfe266da63264df&mpshare=1&scene=23&srcid=0803H31kgVMVVI4fknb1DnRE&sharer_sharetime=1659470423020&sharer_shareid=50b75c6a886e09824b582fb782a7678b#rd

浙公网安备 33010602011771号