File: binary_symbol_explore.py

package info (click to toggle)
apache-arrow 23.0.1-1
links: PTS
area: main
in suites: sid
size: 76,220 kB
sloc: cpp: 654,608; python: 70,522; ruby: 45,964; ansic: 18,742; sh: 7,365; makefile: 669; javascript: 125; xml: 41
file content (121 lines) | stat: -rw-r--r-- 4,080 bytes
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import subprocess as sbp
import sys

try:
    import pandas as pd
    HAVE_PANDAS = True
except ImportError:
    HAVE_PANDAS = False

SYMBOL_FILTERS = {
    'std::chrono::duration': 'duration',
    'std::__cxx11::basic_string': 'std::string',
    'arrow::ArrayData': 'ArrayData',
    'arrow::ArraySpan': 'ArraySpan',
    'arrow::Datum': 'Datum',
    'arrow::Scalar': 'Scalar',
    'arrow::Status': 'Status',
    'arrow::Type': 'Type',
    'arrow::TimestampType': 'TsT',
    'arrow::BinaryType': 'BinaryT',
    'arrow::BooleanType': 'BoolT',
    'arrow::StringType': 'StringT',
    'arrow::LargeStringType': 'LStringT',
    'arrow::DoubleType': 'DoubleT',
    'arrow::FloatType': 'FloatT',
    'arrow::Int64Type': 'Int64T',
    'arrow::UInt64Type': 'UInt64T',
    'arrow::LargeListType': 'LListT',
    'arrow::ListType': 'ListT',
    'arrow::FixedSizeListType': 'FSLT',
    'arrow::compute::': 'ac::',
    'ac::internal::': '',
    'arrow::internal::': 'ai::',
    '(anonymous namespace)::': '',
    'internal::applicator::': '',
    'internal::CastFunctor': 'CastFunctor',
    'ac::KernelContext*': 'C*',
    'ArrayData const&': 'A&',
    'ArraySpan const&': 'A&',
    'ArrayData*': 'O*',
    'Scalar const&': 'S&',
    'Datum const&': 'V&',
    'Datum*': 'O*',
    'ac::ExecBatch const&': 'B&',
    'ac::ExecSpan const&': 'B&',
    'ac::ExecValue const&': 'V&',
    'ac::ExecResult*': 'O*',
    'Type::type': 'T',
}


def filter_symbol(symbol_name):
    for token, replacement in SYMBOL_FILTERS.items():
        symbol_name = symbol_name.replace(token, replacement)
    return symbol_name


def get_symbols_and_sizes(object_file):
    cmd = f"nm --print-size --size-sort {object_file} | c++filt"
    output = sbp.check_output(cmd, shell=True).decode('utf-8')
    symbol_sizes = []
    for x in output.split('\n'):
        if len(x) == 0:
            continue
        _, hex_size, _, symbol_name = x.split(' ', 3)
        symbol_name = filter_symbol(symbol_name)
        symbol_sizes.append((symbol_name, int(hex_size, 16)))
    return dict(symbol_sizes)


if __name__ == '__main__':
    base, contender = sys.argv[1], sys.argv[2]

    base_results = get_symbols_and_sizes(base)
    contender_results = get_symbols_and_sizes(contender)

    all_symbols = set(base_results.keys()) | set(contender_results.keys())

    diff_table = []
    for name in all_symbols:
        if name in base_results and name in contender_results:
            base_size = base_results[name]
            contender_size = contender_results[name]
        elif name in base_results:
            base_size = base_results[name]
            contender_size = 0
        else:
            base_size = 0
            contender_size = contender_results[name]
        diff = contender_size - base_size
        diff_table.append((name, base_size, contender_size, diff))
    diff_table.sort(key=lambda x: x[3])

    if HAVE_PANDAS:
        diff = pd.DataFrame.from_records(diff_table,
                                         columns=['symbol', 'base',
                                                  'contender', 'diff'])
        pd.options.display.max_rows = 1000
        pd.options.display.max_colwidth = 150
        print(diff[diff['diff'] < - 700])
        print(diff[diff['diff'] > 700])
    else:
        # TODO
        pass