Utente:Alex brollo/OCR.js
Nota: dopo aver pubblicato, potrebbe essere necessario pulire la cache del proprio browser per vedere i cambiamenti.
- Firefox / Safari: tieni premuto il tasto delle maiuscole Shift e fai clic su Ricarica, oppure premi Ctrl-F5 o Ctrl-R (⌘-R su Mac)
- Google Chrome: premi Ctrl-Shift-R (⌘-Shift-R su un Mac)
- Internet Explorer / Edge: tieni premuto il tasto Ctrl e fai clic su Aggiorna, oppure premi Ctrl-F5
- Opera: premi Ctrl-F5.
/*local copy of wikisource.org/MediaWiki:OCR.js 8/11/2014
See discussion into original Talk page
*/
/*jshint boss:true*/
/*global $, mw*/
/*
* Query an ocr for a given Page:, first try to get the hocr text layer as it's available
* for most book, fast and of a better quality. If it fails, try the older and slower
* ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
* visible on the Page:.
*/
// alex change to use test routines
importScript("User:Alex brollo/hOCRlab.js");
var lang = mw.config.get( 'wgContentLanguage' );
function disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
if (e.which == 27) { disable_input(false); }
});
}
set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);
$('#wpTextbox1').prop('disabled', set);
}
/* Original Phe callback function
function hocr_callback(data) {
if (data.error) {
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
*/
// text = text.replace(/[ ]*\n[ ]*/g, '\n')
/* .replace(/\n\n\n\n/g, '@_@_@_@_@_@')
.replace(/\n\n/g, '\n')
.replace(/@_@_@_@_@_@/g, '\n\n')
.replace(/\n\n\n/g, '\n\n');
tb.value = $.trim(text);
}
}
disable_input(false);
}
*/
function ocr_callback(data) {
console.log("chiamata a ocr_callback");
if (data.error) {
alert(data.text);
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled)
tb.value = data.text;
}
disable_input(false);
}
// local modified callback function
function hocr_callback(data) {
console.log("chiamata a hocr_callback");
if (data.error) {
console.log(JSON.stringify("Errore hOCR: "+data.error));
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
// alex change: linking hOCR to a specific page, needed for persistent data
localStorage.ws_hOCR_pageName=mw.config.get("wgTitle");
var text=analisiPagina(); // presently into User:Alex brollo/hOCRlab.js
console.log("chiamata a analisiPagina");
// it builds too a mw.pagina object and saves it into localStorage.ws_hOCR_page
if (mw.config.get("wgArticleId")===0 || leggiBox()==="") tb.value = $.trim(text); // posts text into edit box only if the page is new
// else it only saves data into localStorage
if (mw.ocr_show !== undefined) {
console.log("chiamata a mw.ocr_show()")
mw.ocr_show();
mw.ocr_showAllLines();
}
}
}
disable_input(false);
$("#p-tb ul").append($('<li id="t-crop"><a href="javascript:mediaWiki.textSelect()">textSelect</a></li>'));
}
function hocr_callback_view(data) {
if (data.error) {
return;
} else {
localStorage.ws_hOCR = data.text;
// alex change: linking hOCR to a specific page, needed for persistent data
localStorage.ws_hOCR_pageName=mw.config.get("wgTitle");
var text=analisiPagina(); // presently into User:Alex brollo/hOCRlab.js
// it builds too a mw.pagina object and saves it into localStorage.ws_hOCR_page
}
}
function do_hocr() {
if (mw.config.get("wgAction")!=="view") disable_input(true);
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback);
}
function do_hocr_view() {
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback_view);
}
function do_hocr_page(page) {
disable_input(true);
page=page.replace(/Pagina:|Page:/,"");
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(page) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback);
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
disable_input(true);
// server side can't use protocol relative url, request it as https:
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
$.getJSON( request_url ).done( ocr_callback );
}
}
function fraktur_ocr()
{
lang = 'de-f';
// For fraktur we need to use the slow way, all hocr for 'de'
// are done with non-fraktur.
do_ocr();
lang = mw.config.get( 'wgContentLanguage' );
}
function addButtonToWikiEditorToolbar( b ){
var tools = {};
tools[ b.imageId ] = {
label: b.speedTip,
type: 'button',
icon: b.imageFile,
action: {
type: 'callback',
execute: b.onClick
}
};
$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
section: 'advanced',
group: 'format',
tools: tools
} );
$( '[rel="' + b.imageId + '"]' ).width( 42 );
}
function addButtonToClassicToolbar( b ){
mw.toolbar.addButton( {
imageFile: b.imageFile,
speedTip: b.speedTip,
imageId: b.imageId
} );
$( '#' + b.imageId ).off( 'click' ).click( function () {
b.onClick();
return false;
} ).width( 46 );
}
function customizeToolbar()
{
var modules, add, img;
// This can be the string "0" if the user disabled the preference ([[bugzilla:52542#c3]])
if( mw.user.options.get( 'usebetatoolbar' ) == 1 ){
modules = [ 'ext.wikiEditor', 'ext.proofreadpage.page.edit' ];
img = '//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png';
add = addButtonToWikiEditorToolbar;
} else if ( mw.user.options.get( 'showtoolbar' ) == 1 ){
modules = 'mediawiki.toolbar';
img = '//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png';
add = addButtonToClassicToolbar;
} else {
return;
}
$.when(
mw.loader.using( modules ),
$.ready
).then( function(){
if( mw.config.get( 'wgContentLanguage' ) === 'de' ){
add( {
imageFile: img,
speedTip: 'Normale OCR',
imageId: 'wsOcr1',
onClick: do_hocr
} );
add( {
imageFile: '//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png',
speedTip: 'Fraktur OCR',
imageId: 'wsOcr2',
onClick: fraktur_ocr
} );
} else {
add( {
imageFile: img,
speedTip: 'Get the text by OCR',
imageId: 'wsOcr1',
onClick: do_hocr
} );
}
} );
}
if ( mw.config.get( 'wgCanonicalNamespace' ) === 'Page' &&
$.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1 &&
!self.proofreadpage_disable_ocr
) {
mw.loader.using( 'user.options' ).done( customizeToolbar );
}