AD统计，排名前十的国家每年的论文统计量

1.获取每个国家的论文数量，采取的方法是写好sql语句，直接用sql语句统计数量，可能这种方式速度会比较慢，另外一种方法是把id全部传过来，在本地做统计。

import pymysql
import json
import re  
import collections  
import json

def get_article_from_mysql(sql):
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()

    a = cursor.execute(sql)
    b = cursor.fetchmany(a)
    return b[0][0]


def getsqllist():
    country = ['USA', 'China', 'UK', 'Germany', 'Italy', 'Japan', 'Canada', 'France', 'Spain', 'Australia']
    top_country = []
    for i in country:
        top_country.append('%'+i+'%')

    year = []
    for i in range(1995,2017):
        year.append(str(i))
    print year

    str1 = 'SELECT COUNT(*) FROM alzheimer where authorinfor like  '
    str2 = '&& union_kwd_str != \'\' && pub_year = '

    countrydict = {}

    for i in top_country:
        sqllist = []
        for j in year:
            sql = str1+"'"+i+"'"+str2+"'"+j+"'"
            sqllist.append(sql)
        countrydict[i] = sqllist
    return countrydict

def change_with_year():
    countrydict = getsqllist()

    fobj = open('1203_topcountry_article_change_with_year.json','w')

    countrylist = ['USA', 'China', 'UK', 'Germany', 'Italy', 'Japan', 'Canada', 'France', 'Spain', 'Australia']
    for country in countrylist:
        this_country_sql_list = countrydict['%'+country+'%']
        
        this_year_article_num = []
        for sql in this_country_sql_list:
            this_year_article_num.append(get_article_from_mysql(sql))
        data = {
            'name':country,
            'type':'line',
            'stack': 'total',
            'areaStyle': '{normal: {}}',
            'data':this_year_article_num
        }
        print country,this_year_article_num
        json_data = json.dumps(data)
        fobj.write(json_data)
        fobj.write(',')
        fobj.write('\n')

#change_with_year()

def article_each_year():
    str1 = 'SELECT COUNT(*) FROM alzheimer where union_kwd_str != \'\' && pub_year ='

    all_this_year_article_num = []
    for i in range(1996,2017):
        year = str(i)
        sql = str1+"'"+year+"'"
        all_this_year_article_num.append(get_article_from_mysql(sql))

    data = {
        'article_each_year':all_this_year_article_num
    }

    fobj = open('1204_article_each_year_num.json','w')
    json_data = json.dumps(data)
    fobj.write(json_data)
    print all_this_year_article_num

article_each_year()

2）获得每个国家每年的论文信息后，求比例。比如1996年美国的比例 = 1996年美国的论文量/1996年全世界的论文量

from __future__ import division 
import json

country = ['USA', 'China', 'UK', 'Germany', 'Italy', 'Japan', 'Canada', 'France', 'Spain', 'Australia']
    
l = [
    [1, 0, 15, 10, 19, 23, 32, 26, 49, 93, 161, 348, 669, 1471, 1669, 1892, 2128, 2762, 3090, 2675, 1458],
    [0, 0, 0, 0, 0, 2, 1, 1, 5, 14, 12, 19, 29, 78, 111, 144, 351, 517, 737, 1062, 899],
    [ 2, 2, 1, 3, 14, 11, 10, 20, 26, 34, 45, 72, 127, 204, 265, 308, 428, 570, 697, 723, 597],
     [ 0, 3, 4, 7, 8, 11, 9, 8, 27, 24, 21, 33, 67, 98, 124, 149, 253, 303, 370, 358, 318],
    [ 0, 1, 5, 5, 3, 7, 3, 10, 12, 27, 22, 28, 44, 67, 94, 106, 187, 285, 330, 322, 280],
    [ 1, 3, 4, 8, 7, 10, 13, 10, 11, 18, 26, 30, 30, 61, 85, 120, 189, 256, 295, 291, 199],
     [ 0, 5, 7, 2, 4, 2, 6, 10, 13, 18, 21, 32, 34, 69, 96, 112, 197, 286, 340, 340, 251],
     [ 0, 4, 2, 6, 7, 5, 9, 10, 13, 12, 17, 17, 29, 44, 65, 63, 126, 155, 189, 209, 184],
    [ 0, 0, 1, 2, 1, 6, 2, 0, 5, 7, 9, 18, 14, 46, 57, 72, 111, 146, 252, 198, 173],
     [ 0, 0, 1, 0, 0, 1, 2, 3, 4, 13, 11, 14, 22, 45, 53, 74, 120, 162, 245, 195, 169]
    ]

print len(l[0])

percent_dict ={}
for i in range(10):
    percent_dict[i] = []

year = [1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]

ltotal = [19, 54, 63, 84, 106, 127, 134, 171, 293, 504, 695, 1306, 2325, 4566, 5415, 5859, 7537, 9603, 11349, 10850, 7211]
# for i in range(len(l[0])):
#     ltotal_i = 0
#     for j in range(10):
#         ltotal_i += l[j][i]
#     ltotal.append(ltotal_i)
for i in range(len(l[0])):
    for j in range(10):
        percent_dict[j].append(round(l[j][i]/ltotal[i],2)) 
    
print percent_dict


fobj = open('1204_top10country_article.json','w')
for i in range(10):
    data = {
            'name':country[i],
            'type':'line',
            'stack': 'total',
            'areaStyle': '{normal: {}}',
            'data':percent_dict[i]
        }
    json_data = json.dumps(data)
    fobj.write(json_data)
    fobj.write(',')
    fobj.write('\n')

posted @ 2016-12-14 12:39 zdmlcmepl 阅读(443) 评论(0) 收藏举报

刷新页面返回顶部

life is tough，so are you

AD统计，排名前十的国家每年的论文统计量

公告