1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
|
# creates: citations.png citations.csv
import os
import datetime
import matplotlib.pyplot as plt
months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
def f(filename):
papers = {}
lines = open(filename).readlines()
n = 0
dois = set()
while n < len(lines):
line = lines[n]
tag = line[:2]
if tag == 'TI':
ntitle = n
y = None
m = 1
d = 15
elif tag == 'SO':
title = ' '.join(lines[i][3:-1] for i in range(ntitle, n))
elif tag == 'DI':
doi = line[3:-1]
elif tag == 'PY':
y = int(line.split()[1])
elif tag == 'PD':
for w in line.split()[1:]:
if w[0].isdigit():
w = int(w)
if w < 100:
d = w
else:
y = w
else:
if '-' in w:
w = w.split('-')[-1]
m = months.index(w) + 1
elif tag == '\n':
date = datetime.date(y, m, d)
if doi not in dois:
dois.add(doi)
papers[doi] = (date, title)
n += 1
return papers
# The papers here are:
label_bib = {
'gpaw1':
'Mortensen et al., Phys. Rev. B (2005)',
# http://doi.org/10.1103/PhysRevB.71.035109
'gpaw2':
'Enkovaara et al., J. Phys.: Condens. Matter (2010)',
# http://doi.org/10.1088/0953-8984/22/25/253202
'lcao':
'Larsen et al., Phys. Rev. B (2009)',
# http://doi.org/10.1103/PhysRevB.80.195112
'tddft':
'Walter et al., J. Chem. Phys. (2008)',
# http://doi.org/10.1063/1.2943138
'response':
'Yan et al., Phys. Rev. B (2011)',
# http://doi.org/10.1103/PhysRevB.83.245122
}
plt.figure(figsize=(8, 4))
total = {}
for bib in ['gpaw1', 'tddft', 'lcao', 'gpaw2', 'response']:
papers = {}
for line in open(bib + '.txt'):
date, doi, title = line.split(' ', 2)
papers[doi] = (datetime.date(*[int(x) for x in date.split('-')]),
title.strip())
if os.path.isfile(bib + '.bib'):
papers.update(f(bib + '.bib'))
papers = sorted((papers[doi][0], doi, papers[doi][1]) for doi in papers)
plt.plot([paper[0] for paper in papers], range(1, len(papers) + 1),
'-o', label=label_bib[bib])
fd = open(bib + '.txt', 'w')
for date, doi, title in papers:
fd.write('%d-%02d-%02d %s %s\n' % (date.year, date.month, date.day,
doi, title))
assert '"' not in title, title
total[doi] = (date, title)
fd.close()
x = dict([(p[1], 0) for p in papers])
print((bib, len(papers), len(x), len(total)))
allpapers = sorted((paper[0], doi, paper[1]) for doi, paper in total.items())
plt.plot([paper[0] for paper in allpapers], range(1, len(allpapers) + 1),
'-o', label='Total')
fd = open('citations.csv', 'w')
n = len(allpapers)
for date, doi, title in allpapers[::-1]:
fd.write('%d,":doi:`%s <%s>`"\n' % (n, title, doi))
n -= 1
fd.close()
plt.xlabel('date')
plt.ylabel('number of citations')
plt.legend(loc='upper left')
plt.savefig('citations.png')
|