Wiki API getter

Question

I know there are other Python wiki API classes out there. I'm writing this one because I don't need all the bells and whistles, no edits, no talks, etc. I just need to be able to search for titles and get the wiki markup.

Any advice or suggestions or comments or a review or anything really.

# -*- coding: utf-8 -*-

import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode

try:
    import json
except:
    import simplejson as json


def enum(*sequential, **named):
    enums = dict(zip(sequential, range(len(sequential))), **named)
    return type('Enum', (), enums)


class Wiki:
    def __init__(self, api=None):
        if api == None:
            self.api = "http://en.wikipedia.org/w/api.php"
        else:
            self.api = api
        return

    """A HTTP Request"""
    def downloadFile(self, URL=None):
        """     
        URL - The URL to fetch
        """
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
        responce = opener.open(URL)
        data = responce.read()
        responce.close()
        return data.decode(encoding='UTF-8',errors='strict')


    """Search the wiki for titles"""
    def search(self, searchString):
        results = []
        if (searchString != u""):
            encoded_searchString = searchString
            if isinstance(encoded_searchString, unicode):
                encoded_searchString = searchString.encode('utf-8')
            url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            if object:
                if 'query' in object:
                    for item in object['query']['search']:
                        wikiTitle = item['title']
                        if isinstance(wikiTitle, str):
                            wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
                        results.append(wikiTitle)
        return results


    """Search for the top wiki title"""
    def searchTop(self, searchString):
        results = self.search(searchString)
        if len(results) > 0:
            return results[0]
        else:
            return u""


    """Get the raw markup for a title"""
    def getPage(self, title):
        # Do the best we can to get a valid wiki title
        wikiTitle = self.searchTop(title)

        if (wikiTitle != u""):
            encoded_title = wikiTitle
            if isinstance(encoded_title, unicode):
                encoded_title = title.encode('utf-8')
            url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            for k, v in object['query']['pages'].items():
                if 'revisions' in v:
                    return v['revisions'][0]['*']
        return u""

Adam · Accepted Answer · 2013-03-30 13:53:41Z

class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api
        return

This return does nothing

    """A HTTP Request"""
    def downloadFile(self, URL=None):

Python convention is to use lowercase_with_underscores for method names

        """     
        URL - The URL to fetch
        """
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]

Why are you pretending to be Mozilla?

        responce = opener.open(URL)

Response is spelled wrong

        data = responce.read()
        responce.close()
        return data.decode(encoding='UTF-8',errors='strict')

This whole function should probably be a free function, not a method.

    """Search the wiki for titles"""
    def search(self, searchString):

Parameters by convention should be named lowercase_with_underscore

        results = []
        if (searchString != u""):

No need for the ( and ). Also you can just do: if searchString:

            encoded_searchString = searchString

Why?

            if isinstance(encoded_searchString, unicode):
                encoded_searchString = searchString.encode('utf-8')
            url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

I'd combine these two lines

            if object:

In what circumstance will this be false? If that happens you should probably do something besides pretend that nothing happened.

                if 'query' in object:
                    for item in object['query']['search']:
                        wikiTitle = item['title']
                        if isinstance(wikiTitle, str):
                            wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
                        results.append(wikiTitle)
        return results


    """Search for the top wiki title"""
    def searchTop(self, searchString):
        results = self.search(searchString)
        if len(results) > 0:
            return results[0]
        else:
            return u""

Do you really want an empty string if your result wasn't found? You should probably throw an exception here. Returning an empty string will just make failures hard to trace.

    """Get the raw markup for a title"""
    def getPage(self, title):
        # Do the best we can to get a valid wiki title
        wikiTitle = self.searchTop(title)

        if (wikiTitle != u""):
            encoded_title = wikiTitle
            if isinstance(encoded_title, unicode):
                encoded_title = title.encode('utf-8')
            url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            for k, v in object['query']['pages'].items():
                if 'revisions' in v:
                    return v['revisions'][0]['*']
        return u""

Don't default to stupid defaults. If you can't get the requested page throw an error with as much detail as possible, don't just throw me an empty string.

Latty · Accepted Answer · 2012-10-24 10:23:57Z

3

An obvious one that jumps out at me is this:

class Wiki:
    def __init__(self, api=None):
        if api == None:
            self.api = "http://en.wikipedia.org/w/api.php"
        else:
            self.api = api
        return

Can be simplified to this:

class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api

answered Oct 24, 2012 at 10:23

Latty

8146 silver badges11 bronze badges

Add a comment |

Stack Exchange Network

Wiki API getter

2 Answers 2

You must log in to answer this question.

Hot Network Questions

Wiki API getter

2 Answers 2

You must log in to answer this question.

Related

Hot Network Questions