I know there are other Python wiki API classes out there. I'm writing this one because I don't need all the bells and whistles, no edits, no talks, etc. I just need to be able to search for titles and get the wiki markup.
Any advice or suggestions or comments or a review or anything really.
# -*- coding: utf-8 -*-
import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode
try:
import json
except:
import simplejson as json
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
class Wiki:
def __init__(self, api=None):
if api == None:
self.api = "http://en.wikipedia.org/w/api.php"
else:
self.api = api
return
"""A HTTP Request"""
def downloadFile(self, URL=None):
"""
URL - The URL to fetch
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
responce = opener.open(URL)
data = responce.read()
responce.close()
return data.decode(encoding='UTF-8',errors='strict')
"""Search the wiki for titles"""
def search(self, searchString):
results = []
if (searchString != u""):
encoded_searchString = searchString
if isinstance(encoded_searchString, unicode):
encoded_searchString = searchString.encode('utf-8')
url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
rawData = self.downloadFile(url)
object = json.loads(rawData)
if object:
if 'query' in object:
for item in object['query']['search']:
wikiTitle = item['title']
if isinstance(wikiTitle, str):
wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
results.append(wikiTitle)
return results
"""Search for the top wiki title"""
def searchTop(self, searchString):
results = self.search(searchString)
if len(results) > 0:
return results[0]
else:
return u""
"""Get the raw markup for a title"""
def getPage(self, title):
# Do the best we can to get a valid wiki title
wikiTitle = self.searchTop(title)
if (wikiTitle != u""):
encoded_title = wikiTitle
if isinstance(encoded_title, unicode):
encoded_title = title.encode('utf-8')
url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
rawData = self.downloadFile(url)
object = json.loads(rawData)
for k, v in object['query']['pages'].items():
if 'revisions' in v:
return v['revisions'][0]['*']
return u""