File: cert_desc.py

package info (click to toggle)
golang-github-google-certificate-transparency 0.0~git20160709.0.0f6e3d1~ds1-3
links: PTS, VCS
area: main
in suites: bookworm, bullseye, buster
size: 5,676 kB
sloc: cpp: 35,278; python: 11,838; java: 1,911; sh: 1,885; makefile: 950; xml: 520; ansic: 225
file content (127 lines) | stat: -rw-r--r-- 4,471 bytes
import calendar
import hashlib
import re
import unicodedata

from ct.crypto import cert
from ct.crypto.asn1 import x509_common
from ct.proto import certificate_pb2


def from_cert(certificate):
    """Pulls out interesting fields from certificate, so format of data will
    be similar in every database implementation."""
    proto = certificate_pb2.X509Description()
    proto.der = certificate.to_der()
    try:
        for sub in [(type_.short_name,
                     to_unicode('.'.join(
                         process_name(value.human_readable(), type_.short_name == 'CN'))))
                    for type_, value in certificate.subject()]:
            proto_sub = proto.subject.add()
            proto_sub.type, proto_sub.value = sub
    except cert.CertificateError:
        pass

    try:
        for iss in [(type_.short_name,
                     to_unicode('.'.join(process_name(value.human_readable(), False))))
                    for type_, value in certificate.issuer()]:
            proto_iss = proto.issuer.add()
            proto_iss.type, proto_iss.value = iss
    except cert.CertificateError:
        pass

    try:
        for alt in certificate.subject_alternative_names():
            proto_alt = proto.subject_alternative_names.add()
            proto_alt.type, proto_alt.value = (alt.component_key(),
                                               to_unicode('.'.join(process_name(
                                      alt.component_value().human_readable()))))
    except cert.CertificateError:
        pass

    try:
        proto.version = str(certificate.version())
    except cert.CertificateError:
        pass

    try:
        proto.serial_number = str(certificate.serial_number().human_readable()
                                  .upper().replace(':', ''))
    except cert.CertificateError:
        pass

    try:
        tbs_alg = certificate.signature()["algorithm"]
        if tbs_alg:
            proto.tbs_signature.algorithm_id = tbs_alg.long_name

        tbs_params = certificate.signature()["parameters"]
        if tbs_params:
            proto.tbs_signature.parameters = tbs_params.value

        cert_alg = certificate.signature_algorithm()["algorithm"]
        if cert_alg:
            proto.cert_signature.algorithm_id = cert_alg.long_name

        cert_params = certificate.signature_algorithm()["parameters"]
        if cert_params:
            proto.cert_signature.parameters = cert_params.value
    except cert.CertificateError:
        pass

    try:
        proto.basic_constraint_ca = bool(certificate.basic_constraint_ca())
    except cert.CertificateError:
        pass

    try:
        proto.validity.not_before, proto.validity.not_after = (
            1000 * int(calendar.timegm(certificate.not_before())),
            1000 * int(calendar.timegm(certificate.not_after())))
    except cert.CertificateError:
        pass

    proto.sha256_hash = hashlib.sha256(proto.der).digest()

    return proto


def to_unicode(value):
    encoded = unicode(value, 'utf-8', 'replace')
    for ch in encoded:
        try:
            _ = unicodedata.name(ch)
        except ValueError:
            # Mangled Unicode code-point. Perhaps this is just
            # plain ISO-8859-1 data incorrectly reported as UTF-8.
            return unicode(value, 'iso-8859-1', 'replace')
    return encoded


def process_name(subject, reverse=True):
    # RFCs for DNS names: RFC 1034 (sect. 3.5), RFC 1123 (sect. 2.1);
    # for common names: RFC 5280.
    # However we probably do not care about full RFC compliance here
    # (e.g. we ignore that a compliant label cannot begin with a hyphen,
    # we accept multi-wildcard names, etc.).
    #
    # For now, make indexing work for the common case:
    # allow letter-digit-hyphen, as well as wildcards (RFC 2818).
    forbidden = re.compile(r"[^a-z\d\-\*]")
    labels = subject.lower().split(".")
    valid_dns_name = len(labels) > 1 and all(
        map(lambda x: len(x) and not forbidden.search(x), labels))

    if valid_dns_name:
        # ["com", "example", "*"], ["com", "example", "mail"],
        # ["localhost"], etc.
        return list(reversed(labels)) if reverse else labels

    else:
        # ["John Smith"], ["Trustworthy Certificate Authority"],
        # ["google.com\x00"], etc.
        # TODO(ekasper): figure out what to do (use stringprep as specified
        # by RFC 5280?) to properly handle non-letter-digit-hyphen names.
        return [subject]