Basic functions that didn't fit anywhere else
Python offers an easy way to have a function run on arguments of a list in parallel. Unfortunately, the jupyter environment (on windows?) does not support this feature (yet?). So in order to actually use multiprocessing, we have to write a traditional .py script which uses the below function.
def run_in_parallel(func, iterable, n_workers=6):
pool = mp.Pool(processes=n_workers)
return pool.map(func, iterable)
original = pd.DataFrame([[1,2,3], [4,5,6]], columns=['A','B','C'])
original
additional = pd.DataFrame([[1,2,9,10], [4,5,13,14], [4,6,13,14]], columns=['A','B','E','F'])
additional.head()
merge_with_prefix(original, additional, 'home_')
merge_with_prefix(original, additional, 'one_', left_on=['A'], right_on=['A'])
merge_with_prefix(original, additional, 'two_', left_on=['A', 'B'], right_on=['A', 'B'])
actuals = ['Bayern Munich', 'Wolfsburg', 'Schalke', 'Nurnberg', 'Mainz', 'Hansa Rostock', 'B. Monchengladbach', 'Bochum', 'Hannover', 'Kaiserslautern', 'VfB Stuttgart', 'Hamburger SV', 'Freiburg', 'Bayer Leverkusen', 'Dortmund', 'Arminia Bielefeld', 'Hertha Berlin', 'Werder Bremen', 'Duisburg', 'Eintracht Frankfurt', 'Alemannia Aachen', 'Energie Cottbus', 'Karlsruher', 'Hoffenheim', 'Augsburg', 'Greuther Furth', 'Dusseldorf', 'Braunschweig', 'Paderborn', 'Köln']
targets = ['SV Werder', 'Leverkusen', 'FC Bayern', 'Hamburger SV', 'Schalke 04', '1.FC Köln', 'Dortmund', 'Hannover 96', 'Hertha BSC', 'Wolfsburg', 'Duisburg', 'VfB Stuttgart', "K'lautern", 'Mainz 05', 'Bielefeld', 'Frankfurt', 'Nürnberg', "M'gladbach", 'Bochum', 'Cottbus', 'Aachen', 'Karlsruhe', 'Rostock', 'Hoffenheim', 'SC Freiburg', 'St. Pauli', 'Augsburg', 'Düsseldorf', 'Gr. Fürth', 'Braunschweig', 'Paderborn', 'Ingolstadt', 'Darmstadt', 'RB Leipzig', 'Union Berlin']
actuals[0]
similar_n(actuals[0], targets)
translation = create_translation(actuals, targets)
translation
reverse_dict(translation)