File: statistics.py

package info (click to toggle)
rdflib-sqlalchemy 0.5.4%2Bgit99f4689-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,544 kB
  • sloc: python: 2,418; sh: 20; makefile: 3
file content (48 lines) | stat: -rw-r--r-- 1,722 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""Statistical summary of store statements mixin"""
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import func


def get_group_by_count(session, group_by_column):
    """
    Construct SQL query to get counts for distinct values using GROUP BY.

    Args:
        session (~sqlalchemy.orm.session.Session): session to query in
        group_by_column (~sqlalchemy.schema.Column): column to group by

    Returns:
        dict: dictionary mapping from value to count
    """
    return dict(
        session.query(
            group_by_column,
            func.count(group_by_column)
        ).group_by(group_by_column).all()
    )


class StatisticsMixin:
    ''' Has methods for statistics on stores '''
    def statistics(self, asserted_statements=True, literals=True, types=True):
        """Store statistics."""
        statistics = {
            "store": dict(total_num_statements=len(self)),
        }

        with self.engine.connect() as connection:
            session = Session(bind=connection)
            if asserted_statements:
                table = self.tables["asserted_statements"]
                group_by_column = table.c.predicate
                statistics["asserted_statements"] = get_group_by_count(session, group_by_column)
            if literals:
                table = self.tables["literal_statements"]
                group_by_column = table.c.predicate
                statistics["literals"] = get_group_by_count(session, group_by_column)
            if types:
                table = self.tables["type_statements"]
                group_by_column = table.c.klass
                statistics["types"] = get_group_by_count(session, group_by_column)

        return statistics