Utente:IPork/Sandbox/10: differenze tra le versioni

Contenuto cancellato Contenuto aggiunto
IPork (discussione | contributi)
mNessun oggetto della modifica
snellito e migliorato
Riga 784:
# Distributed under the terms of the MIT license.
#
 
import pickle, bz2
import wikipedia, catlib
import datetime
 
 
class CategoryDatabase:
'''
This is a temporary knowledge base saving for each category the contained
subcategories and articles, so that category pages do not need to
be loaded over and over again
'''
def __init__(self, rebuild = False, filename = 'category.dump.bz2'):
if rebuild:
self.rebuild()
else:
try:
 
f = bz2.BZ2File(filename, 'r')
wikipedia.output(u'Reading dump from %s' % filename)
databases = pickle.load(f)
f.close()
# keys are categories, values are 2-tuples with lists as entries.
self.catContentDB = databases['catContentDB']
# like the above, but for supercategories
self.superclassDB = databases['superclassDB']
del databases
except:
# If something goes wrong, just rebuild the database
self.rebuild()
 
def rebuild(self):
self.catContentDB={}
self.superclassDB={}
 
def getSubcats(self, supercat):
'''
For a given supercategory, return a list of Categorys for all its
subcategories.
Saves this list in a temporary database so that it won't be loaded from the
server next time it's required.
'''
# if we already know which subcategories exist here
if self.catContentDB.has_key(supercat):
return self.catContentDB[supercat][0]
else:
subcatlist = supercat.subcategoriesList()
articlelist = supercat.articlesList()
# add to dictionary
self.catContentDB[supercat] = (subcatlist, articlelist)
return subcatlist
 
def getArticles(self, cat):
'''
For a given category, return a list of Pages for all its articles.
Saves this list in a temporary database so that it won't be loaded from the
server next time it's required.
'''
# if we already know which articles exist here
if self.catContentDB.has_key(cat):
return self.catContentDB[cat][1]
else:
subcatlist = cat.subcategoriesList()
articlelist = cat.articlesList()
# add to dictionary
self.catContentDB[cat] = (subcatlist, articlelist)
return articlelist
 
def getSupercats(self, subcat):
# if we already know which subcategories exist here
if self.superclassDB.has_key(subcat):
return self.superclassDB[subcat]
else:
supercatlist = subcat.supercategoriesList()
# add to dictionary
self.superclassDB[subcat] = supercatlist
return supercatlist
 
def dump(self, filename = 'category.dump.bz2'):
'''
Saves the contents of the dictionaries superclassDB and catContentDB to disk.
'''
wikipedia.output(u'Dumping to %s, please wait...' % filename)
f = bz2.BZ2File(filename, 'w')
databases = {
'catContentDB': self.catContentDB,
'superclassDB': self.superclassDB
}
# store dump to disk in binary format
try:
pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL)
except pickle.PicklingError:
pass
f.close()
 
class CategoryListifyRobot:
Line 1 051 ⟶ 961:
 
try:
catDB = CategoryDatabase()
action = None
restore = False
Line 1 077 ⟶ 986:
 
finally:
catDBwikipedia.dumpstopme()
</source>