Basic functions that didn't fit anywhere else

Multiprocessing

Python offers an easy way to have a function run on arguments of a list in parallel. Unfortunately, the jupyter environment (on windows?) does not support this feature (yet?). So in order to actually use multiprocessing, we have to write a traditional .py script which uses the below function.

run_in_parallel[source]

run_in_parallel(func, iterable, n_workers=6)

def run_in_parallel(func, iterable, n_workers=6):
    pool = mp.Pool(processes=n_workers)
    return pool.map(func, iterable)

Pandas

original = pd.DataFrame([[1,2,3], [4,5,6]], columns=['A','B','C'])
original
A B C
0 1 2 3
1 4 5 6
additional = pd.DataFrame([[1,2,9,10], [4,5,13,14], [4,6,13,14]], columns=['A','B','E','F'])
additional.head()
A B E F
0 1 2 9 10
1 4 5 13 14
2 4 6 13 14

overlapping_cols[source]

overlapping_cols(df1, df2)

merge_with_prefix[source]

merge_with_prefix(original, additional, prefix, left_on=None, right_on=None)

merge_with_prefix(original, additional, 'home_')
A B C home_E home_F
0 1 2 3 9 10
1 4 5 6 13 14
merge_with_prefix(original, additional, 'one_', left_on=['A'], right_on=['A'])
A B C one_B one_E one_F
0 1 2 3 2 9 10
1 4 5 6 5 13 14
2 4 5 6 6 13 14
merge_with_prefix(original, additional, 'two_', left_on=['A', 'B'], right_on=['A', 'B'])
A B C two_E two_F
0 1 2 3 9 10
1 4 5 6 13 14

Translation with SequenceMatcher

actuals = ['Bayern Munich', 'Wolfsburg', 'Schalke', 'Nurnberg', 'Mainz', 'Hansa Rostock', 'B. Monchengladbach', 'Bochum', 'Hannover', 'Kaiserslautern', 'VfB Stuttgart', 'Hamburger SV', 'Freiburg', 'Bayer Leverkusen', 'Dortmund', 'Arminia Bielefeld', 'Hertha Berlin', 'Werder Bremen', 'Duisburg', 'Eintracht Frankfurt', 'Alemannia Aachen', 'Energie Cottbus', 'Karlsruher', 'Hoffenheim', 'Augsburg', 'Greuther Furth', 'Dusseldorf', 'Braunschweig', 'Paderborn', 'Köln']
targets = ['SV Werder', 'Leverkusen', 'FC Bayern', 'Hamburger SV', 'Schalke 04', '1.FC Köln', 'Dortmund', 'Hannover 96', 'Hertha BSC', 'Wolfsburg', 'Duisburg', 'VfB Stuttgart', "K'lautern", 'Mainz 05', 'Bielefeld', 'Frankfurt', 'Nürnberg', "M'gladbach", 'Bochum', 'Cottbus', 'Aachen', 'Karlsruhe', 'Rostock', 'Hoffenheim', 'SC Freiburg', 'St. Pauli', 'Augsburg', 'Düsseldorf', 'Gr. Fürth', 'Braunschweig', 'Paderborn', 'Ingolstadt', 'Darmstadt', 'RB Leipzig', 'Union Berlin']

similar_score[source]

similar_score(a, b)

similar_n[source]

similar_n(actual, targets, n=5)

actuals[0]
'Bayern Munich'
similar_n(actuals[0], targets)
['FC Bayern', 'Braunschweig', 'Paderborn', "K'lautern", 'Hannover 96']

create_translation[source]

create_translation(actuals, targets)

translation = create_translation(actuals, targets)
translation
{'Bayern Munich': 'FC Bayern',
 'Wolfsburg': 'Wolfsburg',
 'Schalke': 'Schalke 04',
 'Nurnberg': 'Nürnberg',
 'Mainz': 'Mainz 05',
 'Hansa Rostock': 'Rostock',
 'B. Monchengladbach': "M'gladbach",
 'Bochum': 'Bochum',
 'Hannover': 'Hannover 96',
 'Kaiserslautern': "K'lautern",
 'VfB Stuttgart': 'VfB Stuttgart',
 'Hamburger SV': 'Hamburger SV',
 'Freiburg': 'SC Freiburg',
 'Bayer Leverkusen': 'Leverkusen',
 'Dortmund': 'Dortmund',
 'Arminia Bielefeld': 'Bielefeld',
 'Hertha Berlin': 'Hertha BSC',
 'Werder Bremen': 'SV Werder',
 'Duisburg': 'Duisburg',
 'Eintracht Frankfurt': 'Frankfurt',
 'Alemannia Aachen': 'Aachen',
 'Energie Cottbus': 'Cottbus',
 'Karlsruher': 'Karlsruhe',
 'Hoffenheim': 'Hoffenheim',
 'Augsburg': 'Augsburg',
 'Greuther Furth': 'Gr. Fürth',
 'Dusseldorf': 'Düsseldorf',
 'Braunschweig': 'Braunschweig',
 'Paderborn': 'Paderborn',
 'Köln': '1.FC Köln'}

reverse_dict[source]

reverse_dict(d)

reverse_dict(translation)
{'FC Bayern': 'Bayern Munich',
 'Wolfsburg': 'Wolfsburg',
 'Schalke 04': 'Schalke',
 'Nürnberg': 'Nurnberg',
 'Mainz 05': 'Mainz',
 'Rostock': 'Hansa Rostock',
 "M'gladbach": 'B. Monchengladbach',
 'Bochum': 'Bochum',
 'Hannover 96': 'Hannover',
 "K'lautern": 'Kaiserslautern',
 'VfB Stuttgart': 'VfB Stuttgart',
 'Hamburger SV': 'Hamburger SV',
 'SC Freiburg': 'Freiburg',
 'Leverkusen': 'Bayer Leverkusen',
 'Dortmund': 'Dortmund',
 'Bielefeld': 'Arminia Bielefeld',
 'Hertha BSC': 'Hertha Berlin',
 'SV Werder': 'Werder Bremen',
 'Duisburg': 'Duisburg',
 'Frankfurt': 'Eintracht Frankfurt',
 'Aachen': 'Alemannia Aachen',
 'Cottbus': 'Energie Cottbus',
 'Karlsruhe': 'Karlsruher',
 'Hoffenheim': 'Hoffenheim',
 'Augsburg': 'Augsburg',
 'Gr. Fürth': 'Greuther Furth',
 'Düsseldorf': 'Dusseldorf',
 'Braunschweig': 'Braunschweig',
 'Paderborn': 'Paderborn',
 '1.FC Köln': 'Köln'}

Export