File: dataframe-select-original.py

package info (click to toggle)
scalene 1.5.51-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 15,528 kB
  • sloc: cpp: 22,930; python: 13,403; javascript: 11,769; ansic: 817; makefile: 196; sh: 45
file content (18 lines) | stat: -rw-r--r-- 565 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import pandas as pd
import numpy as np
import timeit

np.random.seed(1)

column_names_example = [i for i in range(10000)]
index = pd.MultiIndex.from_tuples([("left", c) for c in column_names_example] + [("right", c) for c in column_names_example])
df = pd.DataFrame(np.random.rand(1000, 20000), columns=index)

def keep_column(left_col, right_col):
    return left_col[left_col.first_valid_index()] > right_col[right_col.last_valid_index()]

def do_it():
    v = [c for c in column_names_example if keep_column(df["left"][c], df["right"][c])]
    return v

do_it()