1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
#!/bin/sh
# PCP QA Test No. 1794
# Exercise pcp2arrow(1) parquet file generation.
#
# Copyright (c) 2024 Red Hat. All Rights Reserved.
#
seq=`basename $0`
echo "QA output created by $seq"
. ./common.python
test -x $PCP_BIN_DIR/pcp2arrow || _notrun "pcp2arrow(1) is not installed"
$python -c "import pandas" >/dev/null 2>&1 || _notrun "pandas is not installed"
_cleanup()
{
cd $here
$sudo rm -rf $tmp $tmp.*
}
status=0 # success is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15
_filter()
{
sed \
-e "s@$tmp@TMP@g" \
-e "s@0.0[0-9][0-9]*@SMALLNUM@g" \
-e "s@0.0@ZERONUM@g" \
# end
}
# real QA test starts here
for archive in viewqa1 viewqa2 viewqa3 moomba.client proc
do
echo
echo === pcp2arrow $archive
pcp2arrow -z -t 10 -o $tmp.$archive -a archives/$archive
find $tmp.$archive >> $seq_full
$python -c "
import pandas
df = pandas.read_parquet('$tmp.$archive')
if (len(df.columns) < 42):
print('Columns:', sorted(df.columns))
# exercise bug in extracting instance values
if 'kernel.all.load[1 minute]' in df.columns:
print('1 and 15 minute load averages')
ldf = df[['kernel.all.load[1 minute]', 'kernel.all.load[15 minute]']]
print(ldf.head())
else:
print('Columns:', len(df.columns))
print('Shape:', df.shape)
" | _filter
done
# Test raw mode (pmFetchArchive optimization)
echo
echo "=== Testing raw mode with --raw flag ==="
for archive in viewqa1 moomba.client
do
echo
echo === pcp2arrow --raw $archive
pcp2arrow -z -r -s 5 -o $tmp.raw.$archive -a archives/$archive kernel.all.load disk.all.read
find $tmp.raw.$archive >> $seq_full
$python -c "
import pandas
df = pandas.read_parquet('$tmp.raw.$archive')
print('Columns:', sorted(df.columns))
print('Shape:', df.shape)
print('Has timestamp:', 'timestamp' in df.columns)
# Verify timestamps are valid (not 1970 epoch)
if 'timestamp' in df.columns and len(df) > 0:
ts = df['timestamp'].iloc[0]
if ts.year >= 1999:
print('Timestamp valid: True')
else:
print('Timestamp valid: False (year=%d)' % ts.year)
" | _filter
done
# success, all done
exit
|