User:Proteins/striparticlelinks.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<pre>
// Strip document hyperlinks (esp. wikilinks), leaving only their text; useful for FireVox screen reader
// Also fix bug 11555 (order of section title and edit link) and double caption
//
// To use this script, add "importScript('User:Proteins/striparticlelinks.js');" to your monobook.js subpage 
// under your user page, as you can see at User:Proteins/monobook.js

function stripHyperlinks() {
	var alert_string = "";

	var on_main_page = false;
	var eliminate_edit_section_links = true;
	var delete_line_breaks_in_mp_topbanner = false;

	var strip_hyperlinks = true; // turn off to control stripping in some sections
	var within_closing_section = false; // determine when we near the end of the article
	var force_hyperlink_deletion = false;

	var temp_hyperlink;
	var temp_hyperlink_text;

	var temp_anchor_name;

	var hyperlinks;
	var num_hyperlinks = 0;
	var hyperlink_index = 0;
	var hyperlink_counter = 0;
	var num_hyperlinks_removed = 0;

	var num_redlinks = 0;
	var redlink_index = 0;
	var num_redlinks_removed = 0;
	var redlink_names = new Array();

	var parent_node;
	var element_node;
	var replacement_node;
	var grandparent_node;
	var next_sibling_node;
	var prev_sibling_node;
	var greatgrandparent_node;
	var greatgreatgrandparent_node;
	var prev_element_node;

	var child_node;
	var num_child_nodes = 0;
	var child_node_index = 0;

	var headers;
	var temp_header;
	var num_headers = 0;
	var header_index = 0;
	var mw_headline_node;
	var editsection_node;
	var num_header_swaps = 0;
	var total_num_header_swaps = 0;

	var num_header_tag_strings = 0;
	var header_tag_string_index = 0;
	var header_tag_strings = [ "H2", "H3", "H4", "H5" ];

	var temp_image;
	var num_images = 0;
	var num_pixels = 0;
	var alt_string = "";
	var src_string = "";
	var image_index = 0;
	var image_counter = 0;
	var num_significant_images = 0;
 	var num_uncaptioned_images = 0;

	var temp_list;
	var list_index = 0;
	var max_list_index = 0;
	var num_list_mergers = 0;

	var ordered_lists;
	var num_ordered_lists = 0;
	var unordered_lists;
	var num_unordered_lists = 0;
	var discursive_lists;
	var num_discursive_lists = 0;


// Check whether we're on the Main Page
	on_main_page = false;
	if (document.getElementById("mp-topbanner")) {
		on_main_page = true;
//		window.alert("We're reading the Main Page.");

// Try to remove two annoying linebeaks, per Graham87
		if (delete_line_breaks_in_mp_topbanner == true) {
			next_sibling_node = document.getElementById('articlecount');
			element_node = next_sibling_node.previousSibling;
			prev_sibling_node = element_node.previousSibling;
			parent_node = next_sibling_code.parentNode;

//There no document subtree, just the text in two subsequent DIV's
			child_node = document.createTextNode(element_node.innerHTML);
			prev_sibling_node.appendChild(child_node);

			child_node = document.createTextNode(next_sibling_node.innerHTML);
			prev_sibling_node.appendChild(child_node);

			parent_node.removeChild(element_node);
			parent_node.removeChild(next_sibling_node);
		}
	} // closes check whether we're on the Main Page


// Merge adjacent lists of the same type 
	num_list_mergers = 0;
	diagnostic_string = "";

	unordered_lists = document.getElementById("bodyContent").getElementsByTagName("UL");
	num_unordered_lists = unordered_lists.length;
	max_list_index = num_unordered_lists - 1;
	diagnostic_string += "There are " + num_unordered_lists + " unordered lists in this document.\n\n";
	for (list_index=max_list_index; list_index>=0; list_index--) { // merge upwards
		temp_list = unordered_lists[list_index];
		prev_element_node = temp_list.previousSibling;

		while ((prev_element_node) && (prev_element_node.nodeType != 1)) { // look for previous Element node
			if (prev_element_node.nodeType == 3) {
				text_length = prev_element_node.data.replace(/\s/ig, "").length;
				if (text_length > 0 ) { break; } // break off loop if a non-empty text area is encountered
			}
			prev_element_node = prev_element_node.previousSibling;
		} // closes search for the previous sibling Element node
		if (!prev_element_node) { continue; }

		diagnostic_string += "Previous element of UL " + list_index + " is of type " + prev_element_node.nodeType + " and tagName " + prev_element_node.nodeName + ".\n";
//		if (prev_element_node.nodeType == 3) { diagnostic_string += "   text = " + prev_element_node.data.replace(/\s/ig, "") + "  length = " + prev_element_node.data.replace(/\s/ig, "").length + "\n"; }

		if (prev_element_node.nodeName == "UL") {
			parent_node = temp_list.parentNode;

			num_child_nodes = temp_list.childNodes.length;
			for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
				child_node = temp_list.childNodes[0];
				prev_element_node.appendChild(child_node);
			}
			prev_element_node.normalize();
			parent_node.removeChild(temp_list);
			parent_node.normalize();
			num_list_mergers++;
			diagnostic_string += "Merged unordered list " + list_index + " upwards.\n";
		} // closes check for adjacent unordered list
	} // closes loop over unordered lists
//	window.alert(diagnostic_string);

// Merge ordered lists
	diagnostic_string = "";
	ordered_lists = document.getElementById("bodyContent").getElementsByTagName("OL");
	num_ordered_lists = ordered_lists.length;
	max_list_index = num_ordered_lists - 1;
	diagnostic_string += "There are " + num_ordered_lists + " ordered lists in this document.\n\n";
	for (list_index=max_list_index; list_index>=0; list_index--) { // merge upwards
		temp_list = ordered_lists[list_index];
		prev_element_node = temp_list.previousSibling;

		while ((prev_element_node) && (prev_element_node.nodeType != 1)) { // look for previous Element node
			if (prev_element_node.nodeType == 3) {
				text_length = prev_element_node.data.replace(/\s/ig, "").length;
				if (text_length > 0 ) { break; } // break off loop if a non-empty text area is encountered
			}
			prev_element_node = prev_element_node.previousSibling;
		} // closes search for the previous sibling Element node
		if (!prev_element_node) { continue; }

		diagnostic_string += "Previous element of OL " + list_index + " is of type " + prev_element_node.nodeType + " and tagName " + prev_element_node.nodeName + ".\n";
//		if (prev_element_node.nodeType == 3) { diagnostic_string += "   text = " + prev_element_node.data.replace(/\s/ig, "") + "  length = " + prev_element_node.data.replace(/\s/ig, "").length + "\n"; }

		if (prev_element_node.nodeName == "OL") {
			parent_node = temp_list.parentNode;

			num_child_nodes = temp_list.childNodes.length;
			for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
				child_node = temp_list.childNodes[0];
				prev_element_node.appendChild(child_node);
			}
			prev_element_node.normalize();
			parent_node.removeChild(temp_list);
			parent_node.normalize();
			num_list_mergers++;
			diagnostic_string += "Merged ordered list " + list_index + " upwards.\n";
		} // closes check for adjacent ordered list
	} // closes loop over ordered lists
//	window.alert(diagnostic_string);

// Merge discursive lists
	diagnostic_string = "";
	discursive_lists = document.getElementById("bodyContent").getElementsByTagName("DL");
	num_discursive_lists = discursive_lists.length;
	max_list_index = num_discursive_lists - 1;
	diagnostic_string += "There are " + num_discursive_lists + " discursive lists in this document.\n\n";
	for (list_index=max_list_index; list_index>=0; list_index--) { // merge upwards
		temp_list = discursive_lists[list_index];
		prev_element_node = temp_list.previousSibling;

		while ((prev_element_node) && (prev_element_node.nodeType != 1)) { // look for previous Element node
			if (prev_element_node.nodeType == 3) {
				text_length = prev_element_node.data.replace(/\s/ig, "").length;
				if (text_length > 0 ) { break; } // break off loop if a non-empty text area is encountered
			}
			prev_element_node = prev_element_node.previousSibling;
		} // closes search for the previous sibling Element node
		if (!prev_element_node) { continue; }

		diagnostic_string += "Previous element of DL " + list_index + " is of type " + prev_element_node.nodeType + " and tagName " + prev_element_node.nodeName + ".\n";
//		if (prev_element_node.nodeType == 3) { diagnostic_string += "   text = " + prev_element_node.data.replace(/\s/ig, "") + "  length = " + prev_element_node.data.replace(/\s/ig, "").length + "\n"; }

		if (prev_element_node.nodeName == "DL") {
			parent_node = temp_list.parentNode;

			num_child_nodes = temp_list.childNodes.length;
			for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
				child_node = temp_list.childNodes[0];
				prev_element_node.appendChild(child_node);
			}
			prev_element_node.normalize();
			parent_node.removeChild(temp_list);
			parent_node.normalize();
			num_list_mergers++;
			diagnostic_string += "Merged discursive list " + list_index + " upwards.\n";
		} // closes check for adjacent discursive list
	} // closes loop over discursive lists
//	window.alert(diagnostic_string);

	if (num_list_mergers == 1) {
		alert_string += "\nThere was one list merger.\n";
	} else {
		alert_string += "\nThere were " + num_list_mergers + " list mergers.\n";
	}

// Fix bug 11555 for screen readers: swap order of "editsection" and "mw-headline" nodes in headings
	total_num_header_swaps = 0;
	num_header_tag_strings = header_tag_strings.length;
	for (header_tag_string_index = 0; header_tag_string_index < num_header_tag_strings; header_tag_string_index++) { 
		headers = document.getElementsByTagName(header_tag_strings[header_tag_string_index]);
		num_headers = headers.length;
		num_header_swaps = 0;
		for (header_index=1; header_index<num_headers; header_index++) {
			temp_header = headers[header_index];

			editsection_node = null;
			mw_headline_node = null;
			num_child_nodes = temp_header.childNodes.length;
			for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
				child_node = temp_header.childNodes[child_node_index];
				if (child_node.className == "editsection") {
					editsection_node = child_node;
				} else if (child_node.className == "mw-headline") {
					mw_headline_node = child_node;
				}
			}

			if ((eliminate_edit_section_links) && (editsection_node != null)) {
				temp_header.removeChild(editsection_node);
				num_header_swaps++;
			} else if ((editsection_node != null) && (mw_headline_node != null)) {
				temp_header.insertBefore(mw_headline_node, editsection_node);
				num_header_swaps++;
			}
		} // closes loop over headers of that type in document
		total_num_header_swaps += num_header_swaps;

	} // closes loop over different types of headers
// Acknowledgment
	if (eliminate_edit_section_links) {
		if (total_num_header_swaps == 1) {
			alert_string += "Eliminated the edit-section link of one header.\n";
		} else {
			alert_string += "Eliminated the edit-section link of " + total_num_header_swaps + " headers.\n";
		}
	} else {
		if (total_num_header_swaps == 1) {
			alert_string += "Swapped text and edit link in one header.\n";
		} else {
			alert_string += "Swapped text and edit link in " + total_num_header_swaps + " headers.\n";
		}
	}
// Main work of the script: eliminating hyperlinks
	hyperlinks = document.getElementById("bodyContent").getElementsByTagName("a");

	num_redlinks = 0;
	num_redlinks_removed = 0;
	within_closing_section = false;
	num_hyperlinks = hyperlinks.length;
	while (hyperlink_counter<num_hyperlinks) {
		temp_hyperlink = hyperlinks[hyperlink_index];
		hyperlink_counter++;

// Count the redlinks
		if (temp_hyperlink.className == "new") { num_redlinks++; }

// Determine whether we've reached the end of the article
		if ((temp_hyperlink.name) && (!within_closing_section)) {
			temp_anchor_name = temp_hyperlink.name;

			temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end
			temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end
			temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");
			if (temp_anchor_name == "") { 
				within_closing_section = true;
//				window.alert("The end of the article begins with section \"" + temp_hyperlink.name + "\"\n");
			}
		} // closes check whether we've reached the end of the article

// allow some sections to be skipped

		if (temp_hyperlink.name == "See_also") { 
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name == "Related_topics") { 
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name == "Related_articles") {
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name) {
			strip_hyperlinks = true; 
		}
//		if ((strip_hyperlinks == false) && (temp_hyperlink.className != "new")) { continue; }


// criteria for keeping some links
		if (!temp_hyperlink.title) { hyperlink_index++; continue; } // replace only wikilinks?
		if (temp_hyperlink.title.match(/^User:/)) { hyperlink_index++; continue; } // keep user names
		if (temp_hyperlink.title.match(/^User\stalk:/)) { hyperlink_index++; continue; } // keep user talk pages
		if (temp_hyperlink.getAttribute("accesskey")) { hyperlink_index++; continue; } // avoid command links
		if (temp_hyperlink.className == "image") { hyperlink_index++; continue; } // keep images
		if (temp_hyperlink.className == "internal") { hyperlink_index++; continue; } // keep Enlarge buttons
		if (temp_hyperlink.className == "external text") { hyperlink_index++; continue; } // keep geotags, etc.
//		if ((on_main_page) && (temp_hyperlink.className == "extiw")) { continue; } // interwiki links at bottom


// force the deletion of some types of links 
		force_hyperlink_deletion = false;
/*
		if (temp_hyperlink.className == "new") {
			force_hyperlink_deletion = true;
		}
*/

// check ancestor links against criteria to keep other types of links
		if (!force_hyperlink_deletion) {
			parent_node = temp_hyperlink.parentNode;
			grandparent_node = parent_node.parentNode;
			greatgrandparent_node = grandparent_node.parentNode;
			greatgreatgrandparent_node = greatgrandparent_node.parentNode;

//Save all bold links on the Main Page
			if ((on_main_page) && (parent_node.nodeName == "B")) { hyperlink_index++; continue; }

// Save specific types of navigational links on the Main Page
// Save links in the mp-strapline
			if ((on_main_page) && ((greatgreatgrandparent_node.id == "mp-strapline") || (greatgreatgrandparent_node.parentNode.id == "mp-strapline"))) { hyperlink_index++; continue; }

// Save "Recently featured:" links: most other parts use DIV; this section uses P as the parent
			if ((on_main_page) && (parent_node.nodeName == "P")) {

				next_sibling_node = parent_node.nextSibling;
				if ((next_sibling_node) && (next_sibling_node.nextSibling)) {
					next_sibling_node = next_sibling_node.nextSibling;
					if ((next_sibling_node.nodeName == "DIV") && (next_sibling_node.className = "noprint")) { hyperlink_index++; continue; }
				}
/*
				diagnostic_string = temp_hyperlink.innerHTML;
				diagnostic_string += "\nParent node: " + parent_node.nodeName;
				if (parent_node.className) { diagnostic_string += "Class: " + parent_node.className; }
				diagnostic_string += "\nSibling node: " + next_sibling_node.nodeName;
				if (next_sibling_node.className) { diagnostic_string += "Class: " + next_sibling_node.className; }
				diagnostic_string += "\nGrandparent node: " + grandparent_node.nodeName;
				if (grandparent_node.className) { diagnostic_string += "Class: " + grandparent_node.className; }
				window.alert(diagnostic_string);
*/
			}
//			if ((on_main_page) && (parent_node.nodeName == "P") && (grandparent_node.nodeName != "TD")) { hyperlink_index++; continue; }
//			if ((on_main_page) && (parent_node.nodeName == "P") && (grandparent_node.nodeName != "TD") && (greatgrandparent_node.nodeName != "TD") && (greatgreatgrandparent_node.nodeName != "TD")) { hyperlink_index++; continue; }

// Imperfect solutions:
// if on Main Page and parent_node firstChild text equals "Recently featured: ": Language-specific is bad
// if on Main Page and nextSibling of parent is DIV with align=right and className=noprint and prevSibling has id mp-tfa
//			next_sibling_node = parent_node.nextSibling;
//			prev_sibling_node = parent_node.previousSibling;
//FAILED		if ((on_main_page) && (parent_node.nodeName == "P") && (next_sibling_node.nodeName == "DIV") && (next_sibling_node.className == "noprint")) { hyperlink_index++; continue; }
//FAILED		if ((on_main_page) && (parent_node.nodeName == "P") && (next_sibling_node.nodeName == "DIV") && (next_sibling_node.className == "noprint") && (grandparent_node.nodeName == "DIV")) { hyperlink_index++; continue; }

// Links that should be kept:
// keep links within most lists per Graham87's suggestion, but not References and Notes; allow anchor to be in italics
			if (((parent_node.nodeName == "LI") || (grandparent_node.nodeName == "LI")) && (!on_main_page) && (grandparent_node.className != "references") && (greatgrandparent_node.className != "references-small")  && (temp_hyperlink.className != "new")) {hyperlink_index++; continue; } 

// keep section edit buttons
			if (parent_node.className == "editsection") { hyperlink_index++; continue; }

// keep sidebar buttons 
			if (greatgrandparent_node.className == "pBody") { hyperlink_index++; continue; } 

// keep category links
			if ((greatgrandparent_node.className == "catlinks") || (grandparent_node.className == "catlinks")) { hyperlink_index++; continue; } 

// keep disambiguations
			if ((parent_node.className == "dablink") || (grandparent_node.className == "dablink") || (greatgrandparent_node.className == "dablink")) { hyperlink_index++; continue; } 

// keep "Main article" links
			if (grandparent_node.className == "noprint relarticle mainarticle") { hyperlink_index++; continue; } 

// keep "Further details" links
			if ((grandparent_node.className == "boilerplate seealso") || (grandparent_node.className == "boilerplate further")){ hyperlink_index++; continue; } 

// keep protected and semi-protected icons
			if (grandparent_node.className == "metadata plainlinks") { hyperlink_index++; continue; } 

// keep links in sound samples
			if ((parent_node.className == "medialist listenlist") || (grandparent_node.className == "medialist listenlist") || (greatgrandparent_node.className == "medialist listenlist")) { hyperlink_index++; continue; } 
		} // closes check for forced deletion of hyperlink

// Old technique for replacing link; fails for italicized text, and is not general
//		temp_hyperlink_text = document.createTextNode(temp_hyperlink.innerHTML);
//		parent_node.replaceChild(temp_hyperlink_text, temp_hyperlink);

// Better technique for replacing links: graft subtree back into the document

		num_child_nodes = temp_hyperlink.childNodes.length;
		for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
			child_node = temp_hyperlink.childNodes[0];
			parent_node.insertBefore(child_node, temp_hyperlink);
		}
		parent_node.removeChild(temp_hyperlink);
		num_hyperlinks_removed++;

// Count the redlinks removed
		if (temp_hyperlink.className == "new") {
			redlink_names.push(temp_hyperlink.title); 
			num_redlinks_removed++; 
		}

// Merge blocks of text that are adjacent in the document tree, prevent screen reader pauses
		parent_node.normalize();
	} // closes loop over hyperlinks

// Acknowledgment
	if (num_redlinks == 1) {
		alert_string += "Counted one redlink in the main article, unlinked " + num_redlinks_removed + ".\n";
		if (num_redlinks_removed == 1) {
			alert_string += "   " + redlink_names[0] + "\n";
		}
	} else {
		alert_string += "Counted " + num_redlinks + " redlinks in the main article, unlinked " + num_redlinks_removed + ".\n";

		if (num_redlinks_removed == 1) {
			alert_string += "   " + redlink_names[0] + "\n";
		} else if (num_redlinks_removed > 1) { 
			diagnostic_string = "Removed " + num_redlinks_removed + " redlinks:\n\n";
			for (redlink_index=1; redlink_index<=num_redlinks_removed; redlink_index++) {
				if ((redlink_index%40 == 1) && (redlink_index > 1)) { 
					window.alert(diagnostic_string);
					diagnostic_string = "List of " + num_redlinks_removed + " redlinks continued...\n\n";
				}
				diagnostic_string += redlink_index + "   " + redlink_names[redlink_index-1] + "\n";
			} // closes loop over removed redlinks
			window.alert(diagnostic_string);
		} // checks whether more than one redlink was removed
	} // closes check for redlinks

	if (num_hyperlinks_removed == 1) {
		alert_string += "Removed one hyperlink from this article.\n";
	} else {
		alert_string += "Removed " + num_hyperlinks_removed + " hyperlinks from this article.\n";
	}


// Count number of significant images
// This code seems dangerous for Internet Explorer
	image_counter = 0;
 	num_significant_images = 0;	
 	num_images = document.images.length;
	for (image_index=0; image_index<num_images; image_index++) {
		temp_image = document.images[image_index];

		num_pixels = temp_image.width * temp_image.height;
		if (num_pixels > 5000) { 
			image_counter++;
		} // closes check for a "significant" image, not an tiny icon
	} // closes loop over the images
	num_significant_images = image_counter;

// Amend ALT text of image captions, initially to avoid double reading of captions
	image_counter = 0;
 	num_uncaptioned_images = 0;	
 	num_images = document.images.length;
	for (image_index=0; image_index<num_images; image_index++) {
		alt_string = "";
		temp_image = document.images[image_index];

		num_pixels = temp_image.width * temp_image.height;
		if (num_pixels > 5000) { 
			image_counter++;
			alt_string = "Image " + image_counter + " of " + num_significant_images + ": ";
			if (temp_image.alt != "") { // preface image with number
				temp_image.alt = alt_string + temp_image.alt;
 			} else if (temp_image.src) {
				temp_image.alt = alt_string + temp_image.src.split('/').pop();
			}
		} // closes check for a "significant" image, not an tiny icon
/*
		if (temp_image.className == "thumbimage") {
			if (temp_image.alt) {
				num_uncaptioned_images++;
				alt_string = "Image " + num_uncaptioned_images + ": ";
				temp_image.alt = alt_string + temp_image.alt; // preface image with number
			}
		} else if (num_pixels > 5000) { // uncaptioned infobox images
			num_uncaptioned_images++;

		}
*/
	} // closes loop over the images
// Acknowledgment
	if (image_counter == 1) {
		alert_string += "Modified ALT text of one image.\n";
	} else {
		alert_string += "Modified ALT text of " + image_counter + " images.\n";
	}

// Print combined alert string
	window.alert(alert_string);
 
} // closes function stripHyperlinks()

addOnloadHook(function () {
            mw.util.addPortletLink('p-cactions', 'javascript:stripHyperlinks()', '–links', 'ca-nolinks', 'Strips links for screen readers like FireVox', 's', '');
});

//</pre>