Progetto:Bot/Programmi in Python per i bot/jp2todjvu.py: differenze tra le versioni

Contenuto cancellato Contenuto aggiunto
avevo invertito la logica rispetto alla versione precedente, correggo
aggiungo opzione per i pbm
Riga 13:
 
# Dependencies:
#* PIL (Pillow version: http://pillow.readthedocs.io/en/3.1.x/installation.html) built with OpenJPEG support (for JPEG2000)
#* internetarchive: https://pypi.python.org/pypi/internetarchive
#* djvuLibre: http://djvu.sourceforge.net/
Riga 40:
def path2url(path):
return urlparse.urljoin('file:', urllib.pathname2url(path))
 
def cleanfolder(dirpath):
if not os.path.isdir(dirpath):
os.mkdir(dirpath)
 
for filename in os.listdir(dirpath):
filepath = os.path.join(dirpath, filename)
Riga 54:
def dezip(zipf):
cleanfolder("jp2")
z = zipfile.ZipFile(os.path.join("input", zipf))
for f in z.namelist():
jp2 = f.split("/").pop()
if jp2.endswith(".jp2"):
data = z.read(f)
open(os.path.join("jp2", jp2), "wb").write(data)
print jp2, " saved"
 
def downloadItem(IAid):
Riga 67:
download(IAid,glob_pattern="*_jp2.zip",destdir="input", verbose=True,no_directory=True)
 
def jp2tojpg(fileformat="jpg"):
if fileformat not in cleanfolder("jpg", "pbm"):
raise ValueError("Formato file intermedio non supportato")
listaJp2=os.listdir("jp2")
listaJp2.sortcleanfolder("jpg")
cleanfolder("djvupbm")
for f in range(len(listaJp2)):
listaJp2 = if listaJp2[f]os.endswithlistdir(".jp2"):
fout=listaJp2[f][0:-4]+".jpg"sort()
for f in image=Image.openrange(os.path.joinlen("jp2",listaJp2[f])):
if listaJp2[f==0 and image].size[0]<1000endswith(".jp2"):
fout = "%s.%s" % fattore=1024.0/image.size(listaJp2[f][0:-4], fileformat)
comandoimage = "c44 %s %s" % Image.open(os.path.join("jpgjp2", listaJp2[f])),
image=image.resize((int(image.size[0]*fattore),int(image.size[1]*fattore)))
if f == 0 and image.size[0] < image.save(os.path.join("jpg",fout))1000:
## comando="magick convert jp2\%s jpg\%s" % (f,ffattore=1024.0/image.size[0:-4]+".jpg")
## res image=osimage.systemresize(comando(int(image.size[0]*fattore),int(image.size[1]*fattore)))
printimage.save(os.path.join(fileformat, fout, " salvata"))
#comando="convert jp2/%s jpg/%s" % (listaJp2[f], fout)
#res = res=os.system(comando)
print fout, " salvata"
 
def jpgtodjvu(fileformat="jpg"):
if fileformat not in ("jpg", "pbm"):
raise ValueError("Formato file intermedio non supportato")
cleanfolder("djvu")
listaImmagini = os.listdir(fileformat)
for f in listaJpglistaImmagini:
if f.endswith(".jpg" + fileformat):
comando = "c44 %s %s" % (os.path.join(fileformat, f),
os.path.join("djvu", f[0:-4] + ".djvu"))
res = print res,os.system(comando)
print res,comando
 
def jpgtodjvu():
cleanfolder("djvu")
listaJpg=os.listdir("jpg")
for f in listaJpg:
if f.endswith(".jpg"):
comando = "c44 %s %s" % (os.path.join("jpg", f),
os.path.join("djvu", f[0:-4] + ".djvu"))
res=os.system(comando)
print res,comando
def merge(pathdjvu="djvu"):
cleanfolder("output")
Line 105 ⟶ 108:
if len(lista)>7500:
break
 
djvuBundled=os.path.join("output",listaDjvu[0].replace("_0000.djvu",".djvu"))
comando="djvm -c %s %s" % (djvuBundled,lista)
Line 156 ⟶ 159:
if stringa[idip:idfp].count(x)>stringa[idip:idfp].count(idf):
idfp=stringa.find(idf,idfp)+len(idf)
 
if dc==0:
vvalore=stringa[idip+len(idi):idfp-len(idf)]
Line 189 ⟶ 192:
f.close()
print "Variabile salvata nel file "+nome_file
 
def main(IAid, down=True, fileformat="jpg"):
def main(IAid, down=True):
if down:
downloadItem(IAid)
dezip(IAid + "_jp2.zip")
jp2tojpg(fileformat=fileformat)
jpgtodjvu(fileformat=fileformat)
merge()
caricaTesto(IAid)
 
 
#splitta djvu.xml in header,lista di object, footer
def splitObject(IAid):
"""
xmlFile=os.path.join("input",IAid)+"_djvu.xml"
#splitta Splitta djvu.xml in header, lista di object, footer.
xml=open(xmlFile).read()
"""
fs=xml.split("<OBJECT")
xmlFile=os.path.join("input",IAid)+"_djvu.xml"
for i in range(1,len(fs)):
xml=open(xmlFile).read()
fs[i]="<OBJECT "+fs[i].strip()
fs=xml.split("<OBJECT")
fs[len(fs)-1]=fs[len(fs)-1].replace("\n</BODY>\n</DjVuXML>","")
for fi in range(1,len(listaJp2fs)):
footer="\n</BODY>\n</DjVuXML>"
headerfs[i]="<OBJECT "+fs[i].popstrip(0)+"\n"
fs[len(fs)-1]=fs[len(fs)-1].replace("\n</BODY>\n</DjVuXML>","")
return (header,fs,footer)
footer="\n</BODY>\n</DjVuXML>"
header=fs.pop(0)+"\n"
return (header,fs,footer)
 
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Crea un file DjVu a partire dall'Internet Archive.")
Line 221 ⟶ 224:
parser.add_argument('--no-download', dest='download',
action='store_false', help='non scaricare il file')
parser.add_argument('--pbm', dest='pbm',
action='store_true', help='usa PBM come formato intermedio (non compresso)')
 
args = parser.parse_args()
 
main(args.id, down=args.download, fileformat=("pbm" if args.pbm else "jpg"))
 
 
# passo 1: estrazione immagini