Added reddit.py to get data from reddit (#5698)

* Rewritten reddit.py * Removed logging module import * Fixed minor bug which was causing extreme rate limiting * Update reddit.py * Update reddit.py * Update reddit.py Co-authored-by: Christian Clauss <[email protected]>
TheAlgorithms · Nov 4, 2021 · b6eb448e63a7eb8b145a600c368419e77872f134 · b6eb448
1 parent 3815a97
commit b6eb448e63a7eb8b145a600c368419e77872f134
Showing with 53 additions and 0 deletions.

+53 −0 web_programming/reddit.py
diff --git a/web_programming/reddit.py b/web_programming/reddit.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import requests
+
+valid_terms = set(
+    """approved_at_utc approved_by author_flair_background_color
+author_flair_css_class author_flair_richtext author_flair_template_id author_fullname
+author_premium can_mod_post category clicked content_categories created_utc downs
+edited gilded gildings hidden hide_score is_created_from_ads_ui is_meta
+is_original_content is_reddit_media_domain is_video link_flair_css_class
+link_flair_richtext link_flair_text link_flair_text_color media_embed mod_reason_title
+name permalink pwls quarantine saved score secure_media secure_media_embed selftext
+subreddit subreddit_name_prefixed subreddit_type thumbnail title top_awarded_type
+total_awards_received ups upvote_ratio url user_reports""".split()
+)
+
+
+def get_subreddit_data(
+    subreddit: str, limit: int = 1, age: str = "new", wanted_data: list | None = None
+) -> dict:
+    """
+    subreddit : Subreddit to query
+    limit : Number of posts to fetch
+    age : ["new", "top", "hot"]
+    wanted_data : Get only the required data in the list
+
+    >>> pass
+    """
+    wanted_data = wanted_data or []
+    if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)):
+        raise ValueError(f"Invalid search term: {invalid_search_terms}")
+    response = requests.get(
+        f"https://reddit.com/r/{subreddit}/{age}.json?limit={limit}",
+        headers={"User-agent": "A random string"},
+    )
+    if response.status_code == 429:
+        raise requests.HTTPError
+
+    data = response.json()
+    if not wanted_data:
+        return {id_: data["data"]["children"][id_] for id_ in range(limit)}
+
+    data_dict = {}
+    for id_ in range(limit):
+        data_dict[id_] = {
+            item: data["data"]["children"][id_]["data"][item] for item in wanted_data
+        }
+    return data_dict
+
+
+if __name__ == "__main__":
+    # If you get Error 429, that means you are rate limited.Try after some time
+    print(get_subreddit_data("learnpython", wanted_data=["title", "url", "selftext"]))