User:Anomie/linkclassifier.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* If you want to use this script, simply add the following line to your [[Special:Mypage/monobook.js]]:

importScript('User:Anomie/linkclassifier.js'); // Linkback: [[User:Anomie/linkclassifier.js]]

* (Please keep the comment so I can see how many people use this). You will also want to
* add some CSS classes, such as those at [[User:Anomie/linkclassifier.css]].
*/

/* If you want this to run "on demand" instead of on every page, set "LinkClassifierOnDemand=true" and
 * use addPortletLink() or the like to add a button calling LinkClassifier.onDemand().
 */

var LinkClassifier = {
	/* This object maps classes to the categories for which to apply them. Values may be an array of strings or a regex. */
	cats: {
		deletion: [
			'Category:All articles proposed for deletion',
			'Category:All books proposed for deletion',
			'Category:All categories for discussion',
			'Category:All disputed non-free Wikipedia files',
			'Category:All files proposed for deletion',
			'Category:All orphaned non-free use Wikipedia files',
			'Category:All redirects for discussion',
			'Category:All replaceable non-free use Wikipedia files',
			'Category:All Wikipedia files with no non-free use rationale',
			'Category:All Wikipedia files with unknown copyright status',
			'Category:All Wikipedia files with unknown source',
			'Category:Articles for deletion',
			'Category:Articles for deletion using wrong syntax',
			'Category:Articles on deletion review',
			'Category:Articles to be merged after an Articles for deletion discussion',
			'Category:Candidates for speedy deletion',
			'Category:Candidates for undeletion',
			'Category:Categories for conversion',
			'Category:Categories for deletion',
			'Category:Categories for listifying',
			'Category:Categories for merging',
			'Category:Categories for renaming',
			'Category:Categories for speedy renaming',
			'Category:Categories to be listified then deleted',
			'Category:Empty categories awaiting deletion',
			'Category:Items pending OTRS confirmation of permission for over 30 days',
			'Category:Miscellaneous pages for deletion',
			'Category:Templates for deletion',
			'Category:Templates for merging',
			'Category:Wikipedia files for discussion'
		].sort(),
		disambiguation: [
			'Category:All disambiguation pages'
		].sort(),
		'set-index': [
			'Category:All set index articles'
		].sort(),
		'featured-content': [
			'Category:Featured articles',
			'Category:Featured lists',
			'Category:Featured pictures',
			'Category:Featured sounds',
			'Category:Featured videos',
			'Category:Featured portals'
		].sort(),
		'good-content': [
			'Category:Good articles'
		].sort(),
		'soft-redirect-cats': [
			'Category:Wikipedia soft redirected categories'
		].sort(),
		'spoken-articles': [
			'Category:Spoken articles'
		].sort(),
		stubcls: /^Category:.* stubs$/,
		'nonfree-media': [
			'Category:All non-free media'
		].sort(),
		unprintworthy: [
			'Category:Unprintworthy redirects',
			'Category:Middle-earth redirects from redundant titles'
		].sort(),
		'unprintworthy-shortcut': [
			'Category:Redirects from shortcuts'
		].sort(),
		'incorrect-title': [
			'Category:Redirects from incorrect disambiguation',
			'Category:Redirects from incorrect names',
			'Category:Redirects from miscapitalisations',
			'Category:Redirects from misspellings'
		].sort()
	},

	/* This object maps page props to CSS classes for which to apply them. Values may be an array of strings or a function returning such. */
	props: {
		disambiguation: [
			'disambiguation'
		]
	},

	/* This regex matches page titles to be marked as intentional links to disambiguation pages */
	intentionaldab: / \(disambiguation\)$/,

	/* Was it run already? */
	wasRun: false,

	onAjaxError: function ( xhr, textStatus, errorThrown ) {
		mw.log.error( 'AJAX error: ' + textStatus + ' ' + errorThrown );
	},

	callback: function ( r ) {
		var i, j, k, k2, v, node, alist, q, prefix, seen, cls,
			redir = {},
			redirlist = [],
			cats = {},
			missing = {},
			classes = {};

		if ( !r || !r.query ) {
			if ( !window.console || !$.isFunction( window.console.error ) ) {
				throw new Error( 'Bad response' );
			}
			window.console.error( 'Bad response', r );
			return;
		}
		if ( r['query-continue'] ) {
			q = this.rawdata;
			for ( k in r['query-continue'] ) {
				for ( k2 in r['query-continue'][k] ) {
					q[k2] = r['query-continue'][k][k2];
				}
			}
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				dataType: 'json',
				type: 'POST',
				data: q,
				rawdata: this.rawdata,
				success: LinkClassifier.callback,
				error: LinkClassifier.onAjaxError
			} );
		}
		r = r.query;

		node = document.getElementById( 'wikiPreview' );
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( !node ) {
			throw new Error( 'Huh? No body content?' );
		}
		alist = node.getElementsByTagName( 'A' );
		if ( alist.length === 0 ) {
			return;
		}

		if ( r.redirects ) {
			for ( i = r.redirects.length - 1; i >= 0; i-- ) {
				redir[r.redirects[i].from] = r.redirects[i].to;
				redirlist.push( r.redirects[i].from );
			}
		}
		if ( redirlist.length > 0 ) {
			q = {
				format: 'json',
				action: 'query',
				titles: redirlist.join( '|' ),
				prop: 'categories|info',
				inprop: 'protection',
				cllimit: 'max',
				rawcontinue: 1
			};
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				dataType: 'json',
				type: 'POST',
				data: q,
				rawdata: q,
				success: LinkClassifier.callback,
				error: LinkClassifier.onAjaxError
			} );
		}

		prefix = this.rawdata.redirects ? '' : 'redir-';
		if ( r.pages ) {
			for ( i in r.pages ) {
				classes[r.pages[i].title] = [];
				missing[r.pages[i].title] = r.pages[i].missing !== undefined;
				if ( r.pages[i].categories ) {
					cats[r.pages[i].title] = r.pages[i].categories.map( function ( a ) {
						return a.title;
					} ).sort();
				}
				if ( r.pages[i].pageprops ) {
					for ( k in r.pages[i].pageprops ) {
						if ( !LinkClassifier.props[k] ) {
							continue;
						}
						v = LinkClassifier.props[k];
						if ( $.isFunction( v ) ) {
							v = v( r.pages[i].pageprops[k], k, r.pages[i].title );
						}
						classes[r.pages[i].title].push.apply( classes[r.pages[i].title], v );
					}
				}
				if ( r.pages[i].protection ) {
					seen = {};
					for ( j = r.pages[i].protection.length - 1; j >= 0; j-- ) {
						cls = prefix + 'protection-' + r.pages[i].protection[j].type + '-' + r.pages[i].protection[j].level;
						if ( !seen[cls] ) {
							seen[cls] = 1;
							classes[r.pages[i].title].push( cls );
						}
						if ( r.pages[i].protection[j].expiry === 'infinity' ) {
							cls += '-indef';
							if ( !seen[cls] ) {
								seen[cls] = 1;
								classes[r.pages[i].title].push( cls );
							}
						}
					}
				}
				if ( r.pages[i].flagged ) {
					if ( r.pages[i].lastrevid !== r.pages[i].flagged.stable_revid ) {
						classes[r.pages[i].title].push( 'needs-review' );
					}
				}
			}
		}
		Array.prototype.forEach.call( alist, function ( a ) {
			var cns, cls, m, i, j, pageCats, matchCats,
				$a = $( a );

			if ( a.wikipage === undefined ) {
				return;
			}
			if ( redir[a.wikipage] ) {
				$a.addClass( 'redirect' );
				a.wikipage = redir[a.wikipage];
				a.title = a.wikipage;
				cns = mw.config.get( 'wgCanonicalNamespace' );
				if ( a.wikipage === ( cns ? cns + ':' : '' ) + mw.config.get( 'wgTitle' ) ) {
					$a.addClass( 'self-redirect' );
				}
				if ( missing[a.wikipage] ) {
					$a.addClass( 'broken-redirect' );
				}
			}
			m = a.href.match( /#.*/ );
			if ( m && m[0].substr( 0, 10 ) !== '#cite_note' ) {
				try {
					// Modern MediaWiki doesn't normally do the dot-encoding thing anymore, but humans/scripts sometimes still do.
					a.title = a.title.replace( /#.*/, '' ) + decodeURIComponent( m[0].replace( /_/g, ' ' ).replace( /\.([0-9A-F][0-9A-F])/g, '%$1' ) );
				} catch ( e ) {
					// Malformed UTF8? Decode it as bytes.
					a.title = a.title.replace( /#.*/, '' ) + m[0].replace( /_/g, ' ' ).replace( /\.([0-9A-F][0-9A-F])/g, function ( x, n ) {
						return String.fromCharCode( parseInt( n, 16 ) );
					} );
				}
			}
			if ( LinkClassifier.intentionaldab.test( a.origwikipage ) ) {
				$a.addClass( 'intentional-disambiguation' );
			}
			if ( classes[a.wikipage] ) {
				for ( j = classes[a.wikipage].length - 1; j >= 0; j-- ) {
					$a.addClass( classes[a.wikipage][j] );
				}
			}
			if ( a.wikipage !== a.origwikipage && classes[a.origwikipage] ) {
				for ( j = classes[a.origwikipage].length - 1; j >= 0; j-- ) {
					$a.addClass( classes[a.origwikipage][j] );
				}
			}

			pageCats = [];
			if ( cats[a.wikipage] ) {
				pageCats = pageCats.concat( cats[a.wikipage] );
			}
			if ( a.wikipage !== a.origwikipage && cats[a.origwikipage] ) {
				pageCats = pageCats.concat( cats[a.origwikipage] );
			}
			if ( pageCats.length > 0 ) {
				pageCats = pageCats.sort();
				for ( cls in LinkClassifier.cats ) {
					i = pageCats.length - 1;
					matchCats = LinkClassifier.cats[cls];
					if ( matchCats instanceof RegExp ) {
						while ( i >= 0 ) {
							if ( matchCats.test( pageCats[i] ) ) {
								$a.addClass( cls );
								break;
							}
							i--;
						}
					} else {
						j = matchCats.length - 1;
						while ( i >= 0 && j >= 0 ) {
							if ( pageCats[i] === matchCats[j] ) {
								$a.addClass( cls );
								break;
							}
							if ( pageCats[i] > matchCats[j] ) {
								--i;
							} else {
								--j;
							}
						}
					}
				}
			}
		} );
	},

	draftsCallback: function ( r ) {
		var i, node, alist,
			found = {};

		if ( !r.query ) {
			if ( !window.console || !$.isFunction( window.console.error ) ) {
				throw new Error( 'Bad response' );
			}
			window.console.error( 'Bad response', r );
			return;
		}
		r = r.query;

		node = document.getElementById( 'wikiPreview' );
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( !node ) {
			throw new Error( 'Huh? No body content?' );
		}
		alist = node.getElementsByTagName( 'A' );
		if ( alist.length === 0 ) {
			return;
		}

		if ( r.pages ) {
			for ( i in r.pages ) {
				found[r.pages[i].title] = r.pages[i].missing === undefined;
			}
		}
		Array.prototype.forEach.call( alist, function ( a ) {
			if ( a.wikipage !== undefined && found['Draft:' + a.origwikipage] ) {
				$( a ).addClass( 'has-draft' );
			}
		} );
	},

	getPageName: function ( url ) {
		var t, m = url.match( /\/wiki\/([^?#]+)/ );
		if ( !m ) {
			m = url.match( /\/w\/index.php\?(?:.*&)?title=([^&#]+)/ );
		}
		if ( !m ) {
			return '';
		}
		t = decodeURIComponent( m[1] ).replace( /_/g, ' ' );
		if ( t.substr( 0, 6 ) === 'Image:' ) {
			t = 'File:' + t.substr( 6 );
		}
		if ( t.substr( 0, 11 ) === 'Image talk:' ) {
			t = 'File talk:' + t.substr( 6 );
		}
		if ( t.substr( 0, 8 ) === 'Special:' ) {
			t = '';
		}
		return t;
	},

	classifyChildren: function ( node ) {
		mw.loader.using( [ 'mediawiki.util', 'mediawiki.user' ], function () {
			var alist, titles, draftTitles, re, self, props, i, k;

			LinkClassifier.wasRun = true;
			alist = node.getElementsByTagName( 'A' );
			if ( !alist.length ) {
				return;
			}
			self = LinkClassifier.getPageName( location.href );
			titles = Array.prototype.map.call( alist, function ( a ) {
				a.wikipage = '';
				if ( /(^|\s)(external|extiw)(\s|$)/.test( a.className ) ) {
					return '';
				}
				if ( !/(^|\s)(image|mw-file-description)(\s|$)/.test( a.className ) ) {
					a.className += ' nonimage';
				}
				a.wikipage = LinkClassifier.getPageName( a.href );
				if ( a.wikipage === self ) {
					a.wikipage = '';
				}
				a.origwikipage = a.wikipage;
				return a.wikipage;
			} ).sort().filter( function ( e, i, a ) {
				return e !== '' && ( i === 0 || a[i - 1] !== e );
			} );

			re = [];
			for ( k in mw.config.get( 'wgNamespaceIds' ) ) {
				if ( k !== '' ) {
					re.push( k.replace( /_/g, ' ' ) );
				}
			}
			re = new RegExp( '^(' + re.join( '|' ) + '):', 'i' );
			draftTitles = [];
			for ( i = titles.length - 1; i >= 0; i-- ) {
				if ( !re.test( titles[i] ) ) {
					draftTitles.push( 'Draft:' + titles[i] )
				}
			}

			props = [];
			for ( k in LinkClassifier.props ) {
				props.push( k );
			}

			function processLinks( limit ) {
				var q;
				while ( titles.length > 0 ) {
					q = {
						format: 'json',
						action: 'query',
						titles: titles.splice( 0, limit ).join( '|' ),
						prop: 'categories|pageprops|info|flagged',
						redirects: 1,
						cllimit: 'max',
						inprop: 'protection',
						rawcontinue: 1
					};
					if ( props.length <= limit ) {
						q.ppprop = props.join( '|' );
					}
					$.ajax( {
						url: mw.util.wikiScript( 'api' ),
						dataType: 'json',
						type: 'POST',
						data: q,
						rawdata: q,
						success: LinkClassifier.callback,
						error: LinkClassifier.onAjaxError
					} );
				}

				while ( draftTitles.length > 0 ) {
					q = {
						format: 'json',
						action: 'query',
						titles: draftTitles.splice( 0, limit ).join( '|' ),
						rawcontinue: 1
					};
					$.ajax( {
						url: mw.util.wikiScript( 'api' ),
						dataType: 'json',
						type: 'POST',
						data: q,
						rawdata: q,
						success: LinkClassifier.draftsCallback,
						error: LinkClassifier.onAjaxError
					} );
				}
			}

			if ( titles.length <= 100 ) {
				// Not worth querying the API to see if the user has apihighlimits
				processLinks( 50 );
			} else {
				// Note mw.user.getRights queries the API
				mw.user.getRights( function ( rights ) {
					processLinks( ( rights.indexOf( 'apihighlimits' ) >= 0 ) ? 500 : 50 );
				} );
			}
		} );
	},

	onLoad: function () {
		if ( window.LinkClassifierOnDemand ) {
			return;
		}
		if ( window.AJAXPreview ) {
			window.AJAXPreview.AddOnLoadHook( LinkClassifier.classifyChildren );
		}
		LinkClassifier.onDemand();
	},

	onDemand: function () {
		mw.hook( 'LinkClassifier' ).fire( this );
		var node = document.getElementById( 'wikiPreview' );
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( node ) {
			LinkClassifier.classifyChildren( node );
		}
	},

	rerun: function () {
		if ( LinkClassifier.wasRun ) {
			LinkClassifier.onDemand();
		}
	}
};

if ( !window.LinkClassifierOnDemand ) {
	$( document ).ready( LinkClassifier.onLoad );
}