Hello beautiful people!
I have created a script where I have two database functions (Feed and Product) where I have a list of Feeds in a store and a list of Product in the store. What I have done is that I am pulling from database every 60 seconds to get all Feeds & Products and see if there is a new URL that has been added or even removed. I have done it by doing something like this:
import time
from threading import Thread
from loguru import logger
from lib.database import Feed, Product
from vendors.feed_page import FeedPage
from vendors.product_page import ProductPage
def main() -> None:
feed_urls: set[str] = set() # Set of urls to monitor
product_urls: set[str] = set() # Set of urls to monitor
while True:
db_urls = Feed.get_feeds(1) # Get from database where id equals store_id
diff = db_urls - feed_urls # Get the difference between the two sets
feed_urls &= db_urls # Set the urls to the intersection of the two sets
feed_urls |= db_urls # Set the urls to the union of the two sets
# Start threads for each url in the difference
for url in diff:
logger.info(f'Adding to Feed monitor: {url}')
Thread(
target=FeedPage(url, store_id, feed_urls.__contains__).do_request
).start()
db_urls = Product.get_visible_products(1)
diff = db_urls - product_urls
product_urls &= db_urls
product_urls |= db_urls
# Start threads for each url in the difference
for url in diff:
logger.info(f'Adding to Product monitor: {url}')
Thread(
target=ProductPage(url, store_id, product_urls.__contains__).do_request
).start()
break
time.sleep(60)
if __name__ == '__main__':
main()
What the program does is that it gets all the URLS from Feed.get_feeds(store_id) and compares it with feed_urls and whenever there is a difference, we shoot a thread to monitor that specific URL to later in the project see if there is something wrong or other things (Not important for this review)
This is the database call I am also doing by using Peewee as ORM:
from loguru import logger
from peewee import (
Model,
TextField,
BooleanField,
IntegrityError
)
from playhouse.pool import PooledPostgresqlDatabase
from config import configuration
# Create a database connection pool.
postgres_pool = PooledPostgresqlDatabase(
configuration.postgresql.database,
host=configuration.postgresql.host,
user=configuration.postgresql.user,
password=configuration.postgresql.password,
stale_timeout=30,
)
# ----------------------------------------------------------------------------------------------- #
class Product(Model):
store_id = TextField(column_name='store_id')
url = TextField(column_name='url')
visible = BooleanField(column_name='visible')
class Meta:
database = postgres_pool
db_table = "product"
@classmethod
def get_visible_products(cls) -> set:
try:
query = cls.select(cls.url).where((cls.store_id == 1) & cls.visible)
return set(url for url, in query.tuples().iterator())
except IntegrityError as err:
logger.info(f'[Product -> get_visible_products] threw error {err}')
postgres_pool.rollback()
# ----------------------------------------------------------------------------------------------- #
class Feed(Model):
store_id = TextField(column_name='store_id')
url = TextField(column_name='url')
class Meta:
database = postgres_pool
db_table = "feed"
@classmethod
def get_feeds(cls) -> set:
try:
query = cls.select(cls.url).where(cls.store_id == 1)
return set(url for url, in query.tuples().iterator())
except IntegrityError as err:
logger.info(f'[get_feeds] threw error {err}')
postgres_pool.rollback()
It is fairly simple and I hope I am able to get some review regarding my code.... a thing I am not satisfied about is that I am doing the same kind of check to see whenever there is a new url or removed url from the database both for feed and product, perhaps its possible to somehow do it as a function instead? Hopefully I can get an extra eye for this,
Thanks :)