I'm trying to download a protected PDF from the New York State Courts NYSCEF website using Python. The URL looks like this:
https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=cdHe_PLUS_DaUdFKcTLzBtSo6zw==
When I try to use requests.get() or even navigate to the page with Selenium, I either get:
- A
403 Forbiddenresponse (viarequests) - Or a blank page with no
<embed>tag (via Selenium)
Here’s what I’ve tried:
Using requests:
import requests
url = "https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=..."
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": "https://iapps.courts.state.ny.us/nyscef/"
}
response = requests.get(url, headers=headers)
print(response.status_code) # Always 403
And using SeleniumBase:
from seleniumbase import SB
with SB(headless=False) as sb:
sb.open(url)
sb.wait(5)
try:
embed = sb.find_element("embed")
print(embed.get_attribute("src"))
except Exception as e:
print("❌ No embed tag found", e)
Nothing works.
Full code for reference:
from seleniumbase import SB
import requests
import os
import time
def download_pdf_with_selenium_and_requests():
# Target document URL
doc_url = "https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=cdHe_PLUS_DaUdFKcTLzBtSo6zw=="
# Setup download directory
download_dir = os.path.join(os.getcwd(), "downloads")
os.makedirs(download_dir, exist_ok=True)
filename = os.path.join(download_dir, "NYSCEF_Document.pdf")
with SB(headless=True) as sb:
# Step 1: Navigate to the document page (using browser session)
sb.open(doc_url)
time.sleep(5) # Wait for any redirects/cookies to be set
# Step 2: Grab the actual PDF <embed src>
try:
embed = sb.find_element("embed")
pdf_url = embed.get_attribute("src")
print(f"Found PDF URL: {pdf_url}")
except Exception as e:
print(f"No <embed> tag found: {e}")
return
# Step 3: Extract cookies from Selenium session
selenium_cookies = sb.driver.get_cookies()
session = requests.Session()
for cookie in selenium_cookies:
session.cookies.set(cookie['name'], cookie['value'])
# Step 4: Download PDF using requests with cookies
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": doc_url
}
response = session.get(pdf_url, headers=headers)
if response.status_code == 200 and "application/pdf" in response.headers.get("Content-Type", ""):
with open(filename, "wb") as f:
f.write(response.content)
print(f"PDF saved as: {filename}")
else:
print(f"PDF download failed. Status: {response.status_code}")
print(f"Content-Type: {response.headers.get('Content-Type')}")
print(f"Final URL: {response.url}")
if __name__ == "__main__":
download_pdf_with_selenium_and_requests()
Response:
No <embed> tag found: Message:
Element {embed} was not present after 10 seconds!


requests.get()call clearly tells you that this file is not freely available. If you believe that you have the right to access this file then you need to contact the owners of the website.<embed>, or if there is no<iframe>which would needdriver.switch_to()requestsfor download.requestspackage). If you manage to send exactly the same request as your browser would do, the receiver has no way to tell them apart, does he? I will further investigate this, it seems interesting.