This is an update to my earlier question From Q to compiler in less than 30 seconds.
As with that version, this Python script automatically downloads the markdown from any question on Code Review and saves it to a local file using Unix-style line endings.
For instance, to fetch the markdown for that older question, one could write:
python fetchQ 124479 fetchquestion.md
I'm interested in a general review including style, error handling or any other thing that could be improved.
This also has a new feature, which I'll be showing here soon, which is that this also serves as a companion application to a browser extension that I'm currently testing. In that mode, this same Python script will receive two arguments: the path to the native application app manifest and a special tag that identifies the application. See https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/Native_messaging for details on how the messaging works. That version uses the environment variable AUTOPROJECT_DIR to determine the directory into which the file is placed and the file is named after the question number. So this question, for example, would be saved as 234084.md.
This is intended to be used on Linux and only Python3.
fetchQ
#!/usr/bin/env python
""" Code Review question fetcher. Given the number of the question, uses
the StackExchange API version 2.2 to fetch the markdown of the question and
write it to a local file with the name given as the second argument. """
import sys
import urllib.request
import urllib.parse
import urllib.error
import io
import os
import gzip
import json
import struct
import html.parser
from subprocess import call
def make_URL(qnumber):
return 'https://api.stackexchange.com/2.2/questions/' + \
str(qnumber) + \
'/?order=desc&sort=activity&site=codereview' + \
'&filter=!)5IYc5cM9scVj-ftqnOnMD(3TmXe'
def fetch_compressed_data(url):
compressed = urllib.request.urlopen(url).read()
stream = io.BytesIO(compressed)
return gzip.GzipFile(fileobj=stream).read()
def fetch_question_markdown(qnumber):
url = make_URL(qnumber)
try:
data = fetch_compressed_data(url)
except urllib.error.URLError as err:
if hasattr(err, 'reason'):
print('Could not reach server.')
print(('Reason: ', err.reason))
sys.exit(1)
elif hasattr(err, 'code'):
print(f'Error: {err.code}: while fetching data from {url}')
sys.exit(1)
try:
m = json.loads(data)
except json.JSONDecodeError as err:
print(f'Error: {err.msg}')
sys.exit(1)
return m['items'][0]
def getMessage():
rawLength = sys.stdin.buffer.read(4)
if len(rawLength) == 0:
sys.exit(0)
messageLength = struct.unpack('@I', rawLength)[0]
sendMessage(encodeMessage(f'attempting to read {messageLength} bytes'))
message = sys.stdin.buffer.read(messageLength).decode('utf-8')
return json.loads(message)
# Encode a message for transmission,
# given its content.
def encodeMessage(messageContent):
encodedContent = json.dumps(messageContent).encode('utf-8')
encodedLength = struct.pack('@I', len(encodedContent))
return {'length': encodedLength, 'content': encodedContent}
# Send an encoded message to stdout
def sendMessage(encodedMessage):
sys.stdout.buffer.write(encodedMessage['length'])
sys.stdout.buffer.write(encodedMessage['content'])
sys.stdout.buffer.flush()
if __name__ == '__main__':
if len(sys.argv) != 3:
print(f'Usage: {sys.argv[0]} fetchQ questionnumber mdfilename')
sys.exit(1)
qnumber, qname = sys.argv[1:3]
# are we being called as a Web Extension?
if (qname == '[email protected]'):
msg = getMessage()
basedir = os.getenv('AUTOPROJECT_DIR', '/tmp')
qnumber = msg['question_id']
qname = f'{basedir}/{qnumber}.md'
else:
msg = fetch_question_markdown(qnumber)
md = html.unescape(msg['body_markdown']).replace('\r\n', '\n').encode('utf-8')
title = html.unescape(msg['title']).encode('utf-8')
header = b'# [{title}](https://codereview.stackexchange.com/questions/{qnumber})\n\n'
with open(qname, 'wb') as f:
f.write(header)
f.write(md)
call(["autoproject", qname])
make_URL()would be nice \$\endgroup\$