To collect the list with the data from an API, I need to do these steps:
trading = betfairlightweight.APIClient(
'email',
'password',
app_key='app_key',
cert_files=('./certs/bf.pem'),
session=requests.Session()
)
trading.login()
hours_limit = 12
limit_hour = (datetime.datetime.utcnow() + datetime.timedelta(hours=hours_limit))
market_filter = betfairlightweight.filters.market_filter(
event_type_ids=['1'],
market_type_codes = [
'MATCH_ODDS',
'OVER_UNDER_25'
],
market_start_time={
'to': limit_hour.strftime("%Y-%m-%dT%H:%M:%SZ")
}
)
soccer_events = trading.betting.list_events(
filter=market_filter
)
Output soccer_events:
[<EventResult>, <EventResult>, ...]
Using:
def pretty_print(clas, indent=0):
print(' ' * indent + type(clas).__name__ + ':')
indent += 4
for k,v in clas.__dict__.items():
if '__dict__' in dir(v):
pretty_print(v,indent)
else:
print(' ' * indent + k + ': ' + str(v))
The output pretty_print(soccer_events[0]):
EventResult:
elapsed_time: 0.3472568988800049
_datetime_created: 2022-07-03 16:01:07.401756
_datetime_updated: 2022-07-03 16:01:07.401756
_data: {'event': {'id': '31569381', 'name': 'Bayamon FC v Puerto Rico Sol FC', 'countryCode': 'GB', 'timezone': 'GMT', 'openDate': '2022-07-03T23:30:00.000Z'}, 'marketCount': 2}
market_count: 2
Event:
id: 31569381
open_date: 2022-07-03 23:30:00
time_zone: GMT
country_code: GB
name: Bayamon FC v Puerto Rico Sol FC
venue: None
With this list produced I generate a DataFrame to handle this data:
soccer_events_df = pd.DataFrame({
'event_name': [obj_event.event.name for obj_event in soccer_events],
'event_id': [obj_event.event.id for obj_event in soccer_events],
'event_venue': [obj_event.event.venue for obj_event in soccer_events],
'country_code': [obj_event.event.country_code for obj_event in soccer_events],
'time_zone': [obj_event.event.time_zone for obj_event in soccer_events],
'open_date': [obj_event.event.open_date for obj_event in soccer_events],
'market_count': [obj_event.market_count for obj_event in soccer_events],
'open_local_date': [obj_event.event.open_date.replace(tzinfo=datetime.timezone.utc).astimezone(tz=None)
for obj_event in soccer_events]
})
Output:
event_name event_id event_venue country_code time_zone open_date market_count open_local_date
0 FC LaPa v PKK-U 31566562 None FI GMT 2022-07-03 13:00:00 2 2022-07-03 10:00:00-03:00
1 BFA Vilnius v FK Banga II 31566818 None LT GMT 2022-07-03 14:00:00 2 2022-07-03 11:00:00-03:00
2 Varbergs BoIS v Varnamo 31540709 None SE GMT 2022-07-03 13:00:00 2 2022-07-03 10:00:00-03:00
3 Bayamon FC v Puerto Rico Sol FC 31569381 None GB GMT 2022-07-03 23:30:00 2 2022-07-03 20:30:00-03:00
4 Norrkoping v Sirius 31540708 None SE GMT 2022-07-03 15:30:00 2 2022-07-03 12:30:00-03:00
.. ... ... ... ... ... ... ... ...
175 Werder Bremen v Karlsruhe 31566873 None None GMT 2022-07-03 13:30:00 2 2022-07-03 10:30:00-03:00
176 San Martin de Formosa v CA Douglas Haig 31566616 None AR GMT 2022-07-03 19:00:00 2 2022-07-03 16:00:00-03:00
177 Club Defensores de P v Sarmiento de Resistencia 31566619 None AR GMT 2022-07-03 18:00:00 2 2022-07-03 15:00:00-03:00
178 Sportivo AC Las Parejas v CD Juventud Unida (G) 31566621 None AR GMT 2022-07-03 19:30:00 2 2022-07-03 16:30:00-03:00
179 CA Liniers v Camioneros 31566623 None AR GMT 2022-07-03 18:00:00 2 2022-07-03 15:00:00-03:00
As there are many games listed, to do what I need with each of them, I need multiprocessing, which before starting them I go through some filters:
events_bf = soccer_events_df.reset_index()
if len(events_bf) == 0:
trading.logout()
sys.exit()
events_bf = events_bf[events_bf['event_name'].str.contains(" v ")]
data_for_compare = (datetime.datetime.utcnow()).strftime("%Y-%m-%d %H:%M")
events_bf = events_bf[events_bf['open_date'] >= data_for_compare]
events_bf = events_bf[events_bf['open_date'] <= limit_hour.strftime("%Y-%m-%d %H:%M")]
try:
max_process = multiprocessing.cpu_count()-1 or 1
pool = multiprocessing.Pool(max_process)
list_pool = pool.map(data_event, zip(repeat(trading), events_bf.iterrows()))
finally:
pool.close()
pool.join()
This is the function used in multiprocessing:
def data_event(event_bf) -> list:
try:
trading = event_bf[0]
_, event_bf = event_bf[1]
event_name = event_bf['event_name']
event_id = event_bf['event_id']
filter_catalog_markets = betfairlightweight.filters.market_filter(
event_ids=[event_id],
market_type_codes = [
'MATCH_ODDS',
'OVER_UNDER_25'
]
)
catalog_markets = trading.betting.list_market_catalogue(
filter=filter_catalog_markets,
max_results='100',
sort='FIRST_TO_START',
market_projection=['RUNNER_METADATA']
)
markets_df = pd.DataFrame({
'market_name': [market_cat_object.market_name for market_cat_object in catalog_markets],
'market_id': [market_cat_object.market_id for market_cat_object in catalog_markets],
'total_matched': [market_cat_object.total_matched for market_cat_object in catalog_markets],
'Home' : [market_cat_object.runners[0].runner_name if len(market_cat_object.runners) > 0 else '' for market_cat_object in catalog_markets],
'Home_id' : [market_cat_object.runners[0].selection_id if len(market_cat_object.runners) > 0 else 0 for market_cat_object in catalog_markets],
'Away' : [market_cat_object.runners[1].runner_name if len(market_cat_object.runners) > 1 else '' for market_cat_object in catalog_markets],
'Away_id' : [market_cat_object.runners[1].selection_id if len(market_cat_object.runners) > 1 else 0 for market_cat_object in catalog_markets],
'Draw' : [market_cat_object.runners[2].runner_name if len(market_cat_object.runners) > 2 else '' for market_cat_object in catalog_markets],
'Draw_id' : [market_cat_object.runners[2].selection_id if len(market_cat_object.runners) > 2 else 0 for market_cat_object in catalog_markets]
})
match_odds_list = []
Over_Under_list = []
ids_list = []
events_bf_markets = markets_df.reset_index()
for index, event_bf_market in events_bf_markets.iterrows():
if (event_bf_market['market_name'] == 'Match Odds'):
order_filter = betfairlightweight.filters.ex_best_offers_overrides(
best_prices_depth=3
)
price_filter = betfairlightweight.filters.price_projection(
price_data=['EX_BEST_OFFERS'],
ex_best_offers_overrides=order_filter
)
market_books = trading.betting.list_market_book(
market_ids=[event_bf_market['market_id']],
price_projection=price_filter
)
runners = market_books[0].runners
back = []
try:
back.append([runner_book.last_price_traded
if runner_book.last_price_traded
else '-'
for runner_book
in runners])
except:
back.append(['-',"-"])
match_start = event_bf['open_date']
match_start_2 = pd.to_datetime(str(match_start))
match_odds_list.append(match_start_2.strftime('%Y-%m-%dT%H:%M:%SZ'))
match_start_local = event_bf['open_local_date']
match_start_local_2 = pd.to_datetime(str(match_start_local))
match_odds_list.append(match_start_local_2.strftime('%Y-%m-%dT%H:%M:%SZ'))
match_odds_list.append(event_bf_market['Home'] + ' v ' + event_bf_market['Away'])
match_odds_list.append(event_bf_market['Home'])
match_odds_list.append(event_bf_market['Away'])
match_odds_list.append(back[0][0])
match_odds_list.append(back[0][1])
match_odds_list.append(back[0][2])
ids_list.append(event_bf_market['Home_id'])
ids_list.append(event_bf_market['Away_id'])
ids_list.append(event_id)
elif (event_bf_market['market_name'] == 'Over/Under 2.5 Goals'):
order_filter = betfairlightweight.filters.ex_best_offers_overrides(
best_prices_depth=3
)
price_filter = betfairlightweight.filters.price_projection(
price_data=['EX_BEST_OFFERS'],
ex_best_offers_overrides=order_filter
)
market_books = trading.betting.list_market_book(
market_ids=[event_bf_market['market_id']],
price_projection=price_filter
)
runners = market_books[0].runners
back = []
try:
back.append([runner_book.last_price_traded
if runner_book.last_price_traded
else '-'
for runner_book
in runners])
except:
back.append(['-'])
Over_Under_list.append(back[0][0])
Over_Under_list.append(back[0][1])
if (len(match_odds_list) >= 1):
return match_odds_list + Over_Under_list + ids_list
else:
return ['off']
except Exception as e:
print(e)
return ['off']
I would like a review on the methods used to treat this list as well as so many list comprehensions together to be able to generate the DataFrame and if the multiprocessing model I'm using is really the most appropriate for such data type.