Fetching code for wakabasmaster
authorDan Kluev <[email protected]>
Tue, 18 Nov 2008 13:17:40 +0000 (18 23:17 +1000)
committerDan Kluev <[email protected]>
Tue, 18 Nov 2008 13:17:40 +0000 (18 23:17 +1000)
updateArchive.py [new file with mode: 0644]

diff --git a/updateArchive.py b/updateArchive.py
new file mode 100644 (file)
index 0000000..b3ce414
--- /dev/null
@@ -0,0 +1,656 @@
+# coding=utf-8
+from paste.script.command import Command
+from fc.lib.base import *
+from fc.model import *
+from sqlalchemy.orm import eagerload
+from sqlalchemy.orm import class_mapper
+from sqlalchemy.sql import and_, or_, not_
+import sqlalchemy
+import os
+import cgi
+import shutil
+import datetime
+import time
+import Image
+import hashlib
+import re
+from fc.lib.fuser import FUser
+from fc.lib.miscUtils import *
+from fc.lib.constantValues import *
+from fc.lib.settings import *
+from fc.lib.fileHolder import AngryFileHolder
+import sys
+import paste.fixture
+import paste.registry
+import paste.deploy.config
+from paste.deploy import loadapp, appconfig
+from paste.script.command import Command, BadCommand
+from paste.script.filemaker import FileOp
+from paste.script.pluginlib import find_egg_info_dir
+import urllib2
+import httplib
+from lxml import etree
+import StringIO
+from fc.model.arch import *
+import logging
+
+def can_import(name):
+    """Attempt to __import__ the specified package/module, returning True when
+    succeeding, otherwise False"""
+    try:
+        __import__(name)
+        return True
+    except ImportError:
+        return False
+
+def unicodify(text):
+    if isinstance(text, str):
+        text = text.decode('utf-8')
+    return text
+
+idList = {}
+GFilters = {}
+
+class DateTimeParser:
+    monthes = [('Янв','Jan','января'),('Фев','Feb','февраля'),('Мар','Mar','марта'),('Апр','Apr','апреля'),('Май','May','мая'),('Июн','Jun','июня'),('Июл','Jul','июля'),('Авг','Aug','августа'),('Сен','Sep','сентября'),('Окт','Oct','октября'),('Ноя','Nov','ноября'),('Дек','Dec','декабря')]
+    dateRe = re.compile(r"""[^\d]+(\d+)\s+([^\d\s]+)\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)""")
+    dateReISO = re.compile(r"""(\d+)\-(\d+)\-(\d+) (\d+)\:(\d+)\:(\d+)""")
+    def getDateTime(self,date):
+        dateP  = self.dateRe.findall(date)
+        dateP  = dateP[0]
+        mi = 0
+        f  = False
+        for mm in self.monthes:
+            mi = mi + 1
+            if dateP[1] in mm:
+                f = True
+                break
+        if f:
+            return datetime.datetime(int(dateP[2]),mi,int(dateP[0]),int(dateP[3]),int(dateP[4]),int(dateP[5]))
+        else:
+            return None
+    def getDateTimeFromISO8601(self,date):
+        dateP = self.dateReISO.findall(date)
+        dateP  = dateP[0]
+        return datetime.datetime(int(dateP[0]),int(dateP[1]),int(dateP[2]),int(dateP[3]),int(dateP[4]),int(dateP[5]))
+    
+DTP = DateTimeParser()
+
+class IBParser:
+    def GetNextTag(self,el,tag,skip=0):
+        tag = tag.lower()
+        if skip:
+            r = el.getnext()
+        else:
+            r = el
+        if not r.tag or r.tag.lower() != tag:
+            while (r.getnext() != None) and not (r.getnext().tag and r.getnext().tag.lower() == tag):
+                r = r.getnext()
+            if r.getnext() != None:
+                r = r.getnext()
+        if r.tag and r.tag.lower() == tag:
+            return r
+        else:
+            return None
+    
+    def GetPreviousTag(self,el,tag,skip=0):
+        tag = tag.lower()
+        if skip:
+            r = el.getprevious()
+        else:
+            r = el
+        if not r.tag or r.tag.lower() != tag:
+            while (r.getprevious() != None) and not (r.getprevious().tag and r.getprevious().tag.lower() == tag):
+                r = r.getprevious()
+            if r.getprevious() != None:
+                r = r.getprevious()
+        if r.tag and r.tag.lower() == tag:
+            return r
+        else:
+            return None
+    def ResolveSecondaryId(self,thread,Ids):
+        id = int(Ids[1])
+        if id in idList:
+            return idList[id][0]
+        
+        tagsf = and_(Post.tags.any(tag=thread.chanTag),Post.tags.any(tag=thread.board))
+        f2 = and_(Post.parentid==-1,tagsf)
+        f1 = and_(Post.secondaryIndex==Ids[0],f2)
+        thread = meta.Session.query(Post).filter(f1).first()
+        if thread:
+            if Ids[0] == Ids[1]:
+                return thread.id
+            else:
+                post = meta.Session.query(Post).filter(and_(Post.secondaryIndex==int(Ids[1]),Post.parentid==thread.id)).first()
+                if post:
+                    return post.id
+                else:
+                    return None
+        else:
+            return None
+    def GetPostID(self,post):
+        if post.thread:
+            ids = self.replyIdRe.findall(post.href)
+            return [post.thread.tid,int(ids[0])]
+        else:
+            ids = self.postIdRe.findall(post.href)
+            return [int(ids[0][0]),ids[0][2] and int(ids[0][2]) or int(ids[0][0])]
+
+class Loader:
+    def parseLink(self,link):
+        s1 = link.split('://')
+        p = len(s1)>1 and s1[0] or None
+        p2= p and (p+'://') or ''
+        s2 = s1[-1].split('/')
+        return [p, s2[0], p2 + s2[0] + '/', p2 + '/'.join(s2[:-1]) + '/', s2[-1],'/'+'/'.join(s2[1:])]
+        
+class LoaderLocal(Loader):
+    def __init__(self,link):
+        p = self.parseLink(link)
+        self.relativeUrl = p[3]
+    def stat(self,link):
+        try:
+            stats = os.stat(link)
+            return [datetime.datetime.fromtimestamp(stats[8]),stats[6]]
+        except OSError:
+            return None
+    def get(self,url):
+        return open(url,'rb').read()
+    def getAbsolutePath(self,url):
+        return self.relativeUrl + url
+    def getFromRelative(self,url):
+        return self.get(self.getAbsolutePath(url))
+                            
+class LoaderHTTP(Loader):
+    def __init__(self,link):
+        p = self.parseLink(link)
+        self.proto = p[0]
+        self.host = p[1]
+        self.baseUrl = p[2]
+        self.relativeUrl = p[3]
+    def stat(self,link):
+        linkp = self.parseLink(link)
+        c = httplib.HTTPConnection(linkp[1])
+        c.request('HEAD', linkp[5])
+        r = c.getresponse() 
+        if r.status == 200:
+            size = r.getheader('content-length',0)
+            date = r.getheader('last-modified',r.getheader('date',None))
+            return [DTP.getDateTime(date),size]
+        elif r.status == 404:
+            return None
+        else:
+            return None
+    def get(self,url):
+        req = urllib2.Request(url)
+        req.add_header('Referer', self.baseUrl)
+        try:
+            f = urllib2.urlopen(req)
+            res = f.read()
+            return res
+        except urllib2.HTTPError:
+            return None
+    def getAbsolutePath(self,url):
+        if url[0] == '/':
+            return self.baseUrl + url
+        else:
+            return self.relativeUrl + url
+    def getFromRelative(self,url):
+        return self.get(self.getAbsolutePath(url))
+class IBFilter:
+    def filter(self,post):
+        return None
+class IBFilterSage(IBFilter):
+    def filter(self,post):
+        return post.sage
+class IBFilterLowres(IBFilter):
+    def filter(self,post):
+        return post.pic and post.pic.width < 50
+        
+class Thread:
+    def __init__(self,entry,parsers,directlink=None,forcetype=None):
+        self.parser = parsers[entry.type]
+        self.tid    = entry.tid
+        self.url    = entry.url
+        self.board  = entry.board
+        self.chanTag= entry.chanTag
+        self.tags   = entry.tags and entry.tags.split(',') or []
+        self.type   = entry.type
+        self.forcetype = forcetype
+        self.lastChanged =  entry.lastChanged
+        self.filters = []
+        filters = entry.filters and entry.filters.split(',') or []
+        if filters:
+            for f in filters:
+                self.filters.append(GFilters[f])
+        
+        self.timeDiff = entry.timeDiff
+        self.directlink = directlink
+        self.loader = Loader()
+        if not self.directlink:
+            self.directlink = self.parser.GetThreadLink(self.url,self.board,self.tid)
+        if self.loader.parseLink(self.directlink)[0]:
+            self.loader = LoaderHTTP(self.directlink)
+        else:
+            self.loader = LoaderLocal(self.directlink)
+    def checkState(self):
+        stat = self.loader.stat(self.directlink)
+        if not stat:
+            return [404]
+        elif stat[0] > self.lastChanged:
+            return [200,stat[0],stat[1]]
+        else:
+            return [304,stat[0],stat[1]]
+    def initialize(self):
+        page = self.loader.get(self.directlink)
+        if page:
+            parser = etree.HTMLParser()
+            if isinstance(page, str):
+                page = page.decode('utf-8')
+            self.document = etree.parse(StringIO.StringIO(page), parser)
+            self.posts = self.parser.GetPostsList(self)
+            self.threadId = self.parser.ResolveSecondaryId(self,[self.tid,self.tid])
+            if self.posts:
+                return True
+            else:
+                return False
+        else:
+            return False
+    def filter(self,post):
+        fl = None
+        if self.filters:
+            for f in self.filters:
+                fl = fl or f.filter(post)
+        return fl
+    def ReplaceReference(self,m):
+        mgg = m.groups()
+        mg = [mgg[1],mgg[2]]
+        tid = self.parser.ResolveSecondaryId(self,[mg[0],mg[0]])
+        if tid:
+            if mg[0] != mg[1]:
+                pid = self.parser.ResolveSecondaryId(self,[mg[0],mg[1]])
+            else:
+                pid = tid
+            if pid:
+                return '<a href="/%s#i%s" onclick="highlight(%s)">&gt;&gt;%s</a>' % (tid, pid, pid, mg[1])
+        print "ERROR! %s/%s does not exist!" % (mg[0],mg[1])
+        return '<a href="/secondaryIndex/%s#i%s" onclick="highlight(%s)">&gt;&gt;%s</a>' % (mg[0], mg[1], mg[1], mg[1])
+
+class WakabaParser(IBParser):
+    replyIdRe = re.compile(r""">>(\d+)""")
+    postIdRe = re.compile(r"""\/(\d+)\.x?h?t?ml?(#i?(\d+))?""")
+    referenceRe = re.compile("""<a [^>]*href="([^"]*/)?(\d+)\.[^"]+"[^>]*>\&gt\;\&gt\;(\d+)</a>""")
+    def GetThreadLink(self,url,board,thread):
+        return 'http://'+url+'/'+board+'/res/'+str(thread)+'.html'
+    def GetPostsList(self,thread):
+        posts = thread.document.xpath("/html/body/form//*[@class='reflink']/a")
+        postsList = []
+        if posts:
+            for postA in posts:
+                post = Post()
+                post.thread = thread
+                post.href = postA.get('href')
+                post.reflink = postA.getparent()
+                post.Ids = self.GetPostID(post)
+                post.secondaryIndex = int(post.Ids[1])
+                postsList.append(post)
+            return postsList
+        else:
+            return None
+    def GetImgSrc(self,post):
+        cont = post.l.getparent()
+        for t in cont:
+            if t.tag.lower() == 'a':
+                href = t.get('href')
+                if href and href.find('/src/') != -1:
+                    if post.thread.forcetype:
+                        return '../src/' + post.thread.loader.parseLink(href)[4]
+                    else:
+                        return href
+        return None
+    
+    def ParseText(self,post):
+        if post.bq is not None:
+            post.bq.tail = ''
+            message = etree.tostring(post.bq, pretty_print=False,encoding='utf-8')
+            if message[:12].lower() == '<blockquote>' and message[-13:].lower() == '</blockquote>':
+                message = message[12:-13]
+            else:
+                print "Cant parse this message : '%s'" % message
+                return None
+            message = self.referenceRe.sub(post.thread.ReplaceReference,message)
+            return message
+        else:
+            return u''
+    def parsePost(self,post):
+        post.bq = self.GetNextTag(post.reflink,'blockquote')
+        post.l  = self.GetPreviousTag(post.reflink,'label')
+        post.title = unicodify(post.l[1].text)
+        if not post.title:
+            post.title = u''
+        post.cpn = post.l[2]
+        post.sage = False
+        if len(post.cpn)>0 and post.cpn[0].tag.lower() == 'a':
+            post.cpnHref = post.cpn[0].get('href')
+            if post.cpnHref.find('sage') > -1:
+                post.sage = True       
+        post.src = self.GetImgSrc(post)
+        date = post.l[2].tail.encode('utf-8')
+        date = date.replace("\r",'').replace("\n",'')
+        post.date = DTP.getDateTime(date)
+        post.message = unicodify(self.ParseText(post))
+
+class UpdateArchive(Command):
+    # Parser configuration
+    summary = "--NO SUMMARY--"
+    usage = "--NO USAGE--"
+    group_name = "fc"
+    parser = Command.standard_parser(verbose=False)
+    parser.add_option("--mode")
+    parser.add_option("--chan")
+    parser.add_option("--board")
+    parser.add_option("--thread")
+    parser.add_option("--chanTag")
+    parser.add_option("--type")
+    parser.add_option("--tags")
+    parser.add_option("--timeDiff")
+    parser.add_option("--directlink")
+    parser.add_option("--list")
+    parser.add_option("--filters")
+    parser.add_option("--forcetype")
+    parsers = {'wakaba':WakabaParser()}
+    def command(self):
+        """Main command to create a new shell"""
+        self.verbose = 3
+        config_file = 'development.ini'
+        config_name = 'config:%s' % config_file
+        here_dir = os.getcwd()
+        locs = dict(__name__="pylons-admin")
+        conf = appconfig(config_name, relative_to=here_dir)
+        conf.update(dict(app_conf=conf.local_conf,global_conf=conf.global_conf))
+        paste.deploy.config.CONFIG.push_thread_config(conf)
+        sys.path.insert(0, here_dir)
+        wsgiapp = loadapp(config_name, relative_to=here_dir)
+        test_app = paste.fixture.TestApp(wsgiapp)
+        tresponse = test_app.get('/_test_vars')
+        request_id = int(tresponse.body)
+        test_app.pre_request_hook = lambda self:paste.registry.restorer.restoration_end()
+        test_app.post_request_hook = lambda self:paste.registry.restorer.restoration_begin(request_id)
+        paste.registry.restorer.restoration_begin(request_id)
+        egg_info = find_egg_info_dir(here_dir)
+        f = open(os.path.join(egg_info, 'top_level.txt'))
+        packages = [l.strip() for l in f.readlines() if l.strip() and not l.strip().startswith('#')]
+        f.close()
+        found_base = False
+        for pkg_name in packages:
+            # Import all objects from the base module
+            base_module = pkg_name + '.lib.base'
+            found_base = can_import(base_module)
+            if not found_base:
+                # Minimal template
+                base_module = pkg_name + '.controllers'
+                found_base = can_import(base_module)
+
+            if found_base:
+                break
+
+        if not found_base:
+            raise ImportError("Could not import base module. Are you sure this is a Pylons app?")
+
+        base = sys.modules[base_module]
+        base_public = [__name for __name in dir(base) if not \
+                       __name.startswith('_') or __name == '_']
+        for name in base_public:
+            locs[name] = getattr(base, name)
+        locs.update(dict(wsgiapp=wsgiapp, app=test_app))
+
+        mapper = tresponse.config.get('routes.map')
+        if mapper:
+            locs['mapper'] = mapper
+            
+            
+        self.thread = self.options.thread
+        self.chan = self.options.chan
+        self.chanTag = self.options.chanTag
+        self.board = self.options.board
+        
+        logging.getLogger('sqlalchemy').setLevel(logging.ERROR)
+        GFilters['sage'] = IBFilterSage()
+        GFilters['lowres'] = IBFilterLowres()
+        #logging.getLogger( 'sqlalchemy').setLevel( logging.NONE )
+        if not self.options.mode or self.options.mode == 'update':
+            self.UpdateArchive()
+        elif self.options.mode == 'add':
+            self.AddToArchive()
+        elif self.options.mode == 'thread':
+            if self.options.list:
+                f = open(self.options.list,'r')
+                tList = f.readlines()
+            else:
+                tList = [self.options.thread]
+            for t in tList:
+                entry = ArchiveList()
+                entry.tid = int(t)
+                entry.url = self.options.chan
+                entry.chanTag = self.options.chanTag
+                entry.board = self.options.board or 'b'
+                entry.tags = self.options.tags or ''
+                entry.type = self.options.type or 'wakaba'
+                entry.filters = self.options.filters or ''
+                entry.timeDiff = self.options.timeDiff or 0
+                entry.lastChanged = datetime.datetime.fromtimestamp(0)
+                print "Processing %s %s %s %s" % (entry.tid,entry.url,entry.chanTag,entry.board)
+                thread = Thread(entry,self.parsers,self.options.directlink,self.options.forcetype)
+                self.processThread(thread)
+
+    def LoadPage(self,thread,chan='2ch.ru',board='b'):
+        self.host = 'http://'+chan
+        if thread:
+            self.path = '/'+board+'/res/'
+            self.url = self.host+self.path+thread+'.html'
+        else:
+            self.path = '/'+board+'/'
+            self.url = self.host+self.path
+        print self.url
+        req = urllib2.Request(self.url)
+        req.add_header('Referer', self.host+'/'+board+'/')
+        f = urllib2.urlopen(req)
+        res = f.read()
+        return res
+        
+    def getTags(self,tagsList):
+        tags = []
+        for tagName in tagsList:
+            tag = meta.Session.query(Tag).filter(Tag.tag==tagName).first()
+            if tag:
+                tags.append(tag)
+            else:
+                tags.append(Tag(tagName))
+        return tags
+    
+    def processPost(self,post):
+        post.thread.parser.parsePost(post)
+        post.pic = False
+        if post.src:
+            post.pic = self.LoadImage(post)
+            if post.pic == -1:
+                post.pic = None
+            if post.pic:
+                post.picid = post.pic.id
+        print "Thread %s Post %s (Image:%s %s %sx%s) at %s, sage : %s" % (post.Ids[0],post.Ids[1],post.src,post.pic and post.pic.id or 0,post.pic and post.pic.width or 0,post.pic and post.pic.height or 0,post.date,post.sage)
+        if (post.thread.filter(post)):
+            print "Filtered out"
+            print "----------------------"
+        else:
+            if post.Ids[0] == post.Ids[1]:
+                post.parentid = -1
+                post.replyCount = 0
+                post.bumpDate = post.date
+                post.tags = self.getTags([post.thread.chanTag,post.thread.board]+post.thread.tags)
+                post.thread.post = post
+            else:
+                post.parentid = post.thread.post.id
+                if not post.sage:
+                    post.thread.post.bumpDate = post.date
+                post.thread.post.replyCount += 1
+            post.uidNumber = 1
+            meta.Session.save(post)
+            meta.Session.commit()
+            idList[post.Ids[1]]=[post.id,post.Ids[0]]
+            print "Saved in DB as %s/%s" % (post.id,post.parentid)
+            print "----------------------"
+    
+    def processThread(self,thread):
+        if thread.initialize():
+            if thread.threadId:
+                thread.post = meta.Session.query(Post).get(thread.threadId)
+                lastPost = meta.Session.query(Post).filter(Post.parentid==thread.post.id).filter(Post.secondaryIndex>0).order_by(Post.secondaryIndex.desc()).first()
+                if lastPost:
+                    lastId = lastPost.secondaryIndex
+                else:
+                    lastId = int(thread.tid)
+            else:
+                lastId = 0
+        skipped = 0
+        for post in thread.posts:
+            if int(post.Ids[1]) > lastId:
+                if skipped:
+                    print "Skipped %s out of %s posts" % (skipped,len(thread.posts))
+                    skipped=0
+                self.processPost(post)
+            else:
+                skipped += 1
+        if skipped:
+            print "Skipped %s out of %s posts" % (skipped,len(thread.posts))
+                
+
+    def LoadImage(self,post):
+        url = post.thread.loader.getAbsolutePath(post.src)
+        fileName = post.thread.loader.parseLink(url)[4]
+        res = post.thread.loader.getFromRelative(post.src)
+        if res:
+            localFilePath = os.path.join(g.OPT.uploadPath, fileName)
+            localFile = open(localFilePath,'wb')
+            localFile.write(res)
+            localFile.close()
+            file = FieldStorageLike(fileName,localFilePath)
+            fileDescriptors = self.processFile(file, 200)  
+            pic = False  
+            if fileDescriptors:
+                pic = fileDescriptors[0]
+                fileHolder = fileDescriptors[1]
+            if pic and pic != -1 and fileHolder:
+                fileHolder.disableDeletion()
+            return pic
+        else:
+            return None
+    
+    def processFile(self, file, thumbSize=250):
+        if isinstance(file, cgi.FieldStorage) or isinstance(file,FieldStorageLike):
+           # We should check whether we got this file already or not
+           # If we dont have it, we add it
+           name = str(long(time.time() * 10**7))
+           ext  = file.filename.rsplit('.',1)[:0:-1]
+           
+           if ext:
+              ext = ext[0].lstrip(os.sep)
+           else:
+              # Panic, no extention found
+              ext = ''
+              return ''
+          
+           # Make sure its something we want to have
+
+           extParams = meta.Session.query(Extension).filter(Extension.ext==ext).first()
+           
+           if not extParams:
+              return False
+
+           localFilePath = os.path.join(g.OPT.uploadPath, name + '.' + ext)
+           localFile = open(localFilePath,'w+b')
+           shutil.copyfileobj(file.file, localFile)
+           localFile.seek(0)
+           md5 = hashlib.md5(localFile.read()).hexdigest()
+           file.file.close()
+           localFile.close()
+
+           pic = meta.Session.query(Picture).filter(Picture.md5==md5).first()
+
+           if pic:
+               os.unlink(localFilePath)
+               return [pic, False]
+
+           try:
+                if extParams.type == 'image':
+                   thumbFilePath = name + 's.' + ext
+                   size = self.makeThumbnail(localFilePath, os.path.join(g.OPT.uploadPath,thumbFilePath), (thumbSize,thumbSize))
+                else:
+                   if extParams.type == 'image-jpg':
+                      thumbFilePath = name + 's.jpg'
+                      size = self.makeThumbnail(localFilePath, os.path.join(g.OPT.uploadPath,thumbFilePath), (thumbSize,thumbSize))
+                   else:
+                     thumbFilePath = extParams.path
+                     size = [0, 0, extParams.thwidth, extParams.thheight]
+           except:
+                return [-1, AngryFileHolder(localFilePath)]
+              
+           pic = Picture()
+           pic.path = name + '.' + ext
+           pic.thumpath = thumbFilePath
+           pic.width = size[0]
+           pic.height = size[1]
+           pic.thwidth = size[2]
+           pic.thheight = size[3]
+           pic.extid = extParams.id
+           pic.size = os.stat(localFilePath)[6]
+           pic.md5 = md5
+           meta.Session.save(pic)
+           meta.Session.commit()
+           return [pic, AngryFileHolder(localFilePath, pic)]
+        else:
+           return False
+
+    def makeThumbnail(self, source, dest, maxSize):
+        sourceImage = Image.open(source)
+        size = sourceImage.size
+        if sourceImage:
+           sourceImage.thumbnail(maxSize,Image.ANTIALIAS)
+           sourceImage.save(dest)
+           return size + sourceImage.size
+        else:
+           return []
+    def AddToArchive(self):
+        if self.options.thread and self.options.chan and self.options.chanTag:
+            if not self.options.board:
+                self.options.board = 'b'
+            entry = meta.Session.query(ArchiveList).filter(ArchiveList.tid==self.options.thread).filter(ArchiveList.url==self.options.chan).filter(ArchiveList.board==self.options.board).first()
+            if entry:
+                print "Thread is already in the list"
+            else:
+                entry = ArchiveList()
+                entry.tid = self.options.thread
+                entry.url = self.options.chan
+                entry.chanTag = self.options.chanTag
+                entry.board = self.options.board
+                entry.tags = self.options.tags or ''
+                entry.type = self.options.type or 'wakaba'
+                entry.filters = self.options.filters or ''
+                entry.timeDiff = self.options.timeDiff or 0
+                entry.lastChanged = datetime.datetime.fromtimestamp(0)
+                meta.Session.save(entry)
+                meta.Session.commit()
+        else:
+            print "Bad parameters"
+    def UpdateArchive(self):
+        archiveList = meta.Session.query(ArchiveList).all()
+        for entry in archiveList:
+            thread = Thread(entry,self.parsers)
+            state = thread.checkState()
+            print "*** Thread %s HTTP %s" % (thread.directlink,state[0])
+            if state[0] == 404:
+                meta.Session.delete(entry)
+                meta.Session.commit()
+            elif state[0] == 200:
+                self.processThread(thread)
+                entry.lastChanged = state[1]
+                meta.Session.commit()