File: citations.py

package info (click to toggle)
gpaw 21.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 14,492 kB
  • sloc: python: 121,997; ansic: 14,138; sh: 1,125; csh: 139; makefile: 43
file content (110 lines) | stat: -rw-r--r-- 3,334 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# creates: citations.png citations.csv
import os
import datetime

import matplotlib.pyplot as plt


months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
          'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']


def f(filename):
    papers = {}
    lines = open(filename).readlines()
    n = 0
    dois = set()
    while n < len(lines):
        line = lines[n]
        tag = line[:2]
        if tag == 'TI':
            ntitle = n
            y = None
            m = 1
            d = 15
        elif tag == 'SO':
            title = ' '.join(lines[i][3:-1] for i in range(ntitle, n))
        elif tag == 'DI':
            doi = line[3:-1]
        elif tag == 'PY':
            y = int(line.split()[1])
        elif tag == 'PD':
            for w in line.split()[1:]:
                if w[0].isdigit():
                    w = int(w)
                    if w < 100:
                        d = w
                    else:
                        y = w
                else:
                    if '-' in w:
                        w = w.split('-')[-1]
                    m = months.index(w) + 1
        elif tag == '\n':
            date = datetime.date(y, m, d)
            if doi not in dois:
                dois.add(doi)
                papers[doi] = (date, title)
        n += 1

    return papers


# The papers here are:
label_bib = {
    'gpaw1':
        'Mortensen et al., Phys. Rev. B (2005)',
        # http://doi.org/10.1103/PhysRevB.71.035109
    'gpaw2':
        'Enkovaara et al., J. Phys.: Condens. Matter (2010)',
        # http://doi.org/10.1088/0953-8984/22/25/253202
    'lcao':
        'Larsen et al., Phys. Rev. B (2009)',
        # http://doi.org/10.1103/PhysRevB.80.195112
    'tddft':
        'Walter et al., J. Chem. Phys. (2008)',
        # http://doi.org/10.1063/1.2943138
    'response':
        'Yan et al., Phys. Rev. B (2011)',
        # http://doi.org/10.1103/PhysRevB.83.245122
}

plt.figure(figsize=(8, 4))
total = {}
for bib in ['gpaw1', 'tddft', 'lcao', 'gpaw2', 'response']:
    papers = {}
    for line in open(bib + '.txt'):
        date, doi, title = line.split(' ', 2)
        papers[doi] = (datetime.date(*[int(x) for x in date.split('-')]),
                       title.strip())
    if os.path.isfile(bib + '.bib'):
        papers.update(f(bib + '.bib'))
    papers = sorted((papers[doi][0], doi, papers[doi][1]) for doi in papers)
    plt.plot([paper[0] for paper in papers], range(1, len(papers) + 1),
             '-o', label=label_bib[bib])
    fd = open(bib + '.txt', 'w')
    for date, doi, title in papers:
        fd.write('%d-%02d-%02d %s %s\n' % (date.year, date.month, date.day,
                                           doi, title))
        assert '"' not in title, title
        total[doi] = (date, title)
    fd.close()
    x = dict([(p[1], 0) for p in papers])
    print((bib, len(papers), len(x), len(total)))


allpapers = sorted((paper[0], doi, paper[1]) for doi, paper in total.items())
plt.plot([paper[0] for paper in allpapers], range(1, len(allpapers) + 1),
         '-o', label='Total')

fd = open('citations.csv', 'w')
n = len(allpapers)
for date, doi, title in allpapers[::-1]:
    fd.write('%d,":doi:`%s <%s>`"\n' % (n, title, doi))
    n -= 1
fd.close()

plt.xlabel('date')
plt.ylabel('number of citations')
plt.legend(loc='upper left')
plt.savefig('citations.png')