User:Ed Poor/hiragana.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// Converts Hiragana text to Romaji

// Includes an optional self-test

var bugs = false;

function test() {

    assertEquals( kanaToRomaji('か'), 'ka');
    assertEquals( kanaToRomaji('と'), 'to');

    assertEquals( hiraganaToRomaji('ちば'), 'Chiba');
    
    assertEquals( hiraganaToRomaji('まって'), 'matte');
    assertEquals( hiraganaToRomaji('いっぱい'), 'ippai');
    assertEquals( hiraganaToRomaji('いっ'), 'iっ');
    
    assertEquals( hiraganaToRomaji('きょこ'), 'Kyoko');
    
    assertEquals( hiraganaToRomaji('しゃい'), 'shai');
    assertEquals( hiraganaToRomaji('いらっしゃい'), 'irasshai'); // buggy
    
    assertEquals( hiraganaToRomaji('まっちがった'), 'matchigatta'); // buggy
    
    if (!bugs) debugPrint("Okay");
}

function hiraganaToRomaji(kanaText) {

    //for each Japanese syllable that has a conversion, use it.
    //any characters that can't be converted, just pass them through.

    var result = "";
    
    while (kanaText.length > 0) {
    
        var romaji;
        var kanaProcessed;
        
        var kana = kanaText.charAt(0);
        var follower = kanaText.charAt(1);
    
        if (isSokuonPair(kana, follower)) {

        // SMALL TSU is called a 'sokuon' and it doubles or strengthens the next kana's 'consonant'
        
        //Find the first letter of the next kana's romaji equivalent, and pass that along.
        // Example: T + TE or P + PA or M + MA
        
/*      romaji = getSokuonSyllable(follower);
        kanaProcessed = 2;
*/
        romaji = getSokuonConsonant(follower);
        kanaProcessed = 1;
        
    } else if ( isComboSyllable(kana, follower) ) {
        
        var prefix = kanaToRomaji(kana).charAt(0);
        romaji = prefix + kanaToRomaji(follower);
        
        kanaProcessed = 2;
        
    } else {
    
        romaji = kanaToRomaji( kana );
        kanaProcessed = 1;
    }
    
    romaji = kanaToHepburn( romaji );   // comment out if you don't want Hepburn romanization
        result += romaji;
        
        // Dispose of processed characters.
        kanaText = kanaText.substring(kanaProcessed);
    }
    
    return result;
}

function isComboSyllable(leader, follower) {

    var leaderRomaji = kanaToRomaji(leader);
    var leaderVowel = leaderRomaji.charAt(leaderRomaji.length - 1); // last char must be vowel i
    
    if ( leaderVowel != 'i' ) return false;
    else return isSmallYaYuYo(follower);
}

function isSmallYaYuYo(kana) {
    
    var littleVowels = 'ゃゅょ' ;
    return (isNeedleInHaystack(kana, littleVowels));
}

function isSokuonPair(leader, follower) {
    
    // Must start with small tsu
    if (leader != 'っ') return false;
    
    if (follower.length == 0) return false;
    
    // only for K, T, S, or P
    var followerConsonant = kanaToRomaji( follower ).charAt(0);
    var allowable = 'ktsp';
    return allowable.indexOf(followerConsonant) >= 0;
}

function getSokuonSyllable(kana) {

    var romaji = kanaToRomaji(kana);
    var consonant = romaji.charAt(0);
    
    return consonant + romaji;
}

function getSokuonConsonant(kana) {

    var romaji = kanaToRomaji(kana);
    return romaji.charAt(0);
}

// Convert one kana to its plain romaji equivalent

function kanaToRomaji(text) {

    var myArray = [
    
        ['ぁ', 'a'],
        ['あ', 'a'],
        ['ぃ', 'i'],
        ['い', 'i'],
        ['ぅ', 'u'],
        ['う', 'u'],
        ['ぇ', 'e'],
        ['え', 'e'],
        ['ぉ', 'o'],
        ['お', 'o'],
        ['か', 'ka'],
        ['が', 'ga'],
        ['き', 'ki'],
        ['ぎ', 'gi'],
        ['く', 'ku'],
        ['ぐ', 'gu'],
        ['け', 'ke'],
        ['げ', 'ge'],
        ['こ', 'ko'],
        ['ご', 'go'],
        ['さ', 'sa'],
        ['ざ', 'za'],
        ['し', 'si'],
        ['じ', 'zi'],
        ['す', 'su'],
        ['ず', 'zu'],
        ['せ', 'se'],
        ['ぜ', 'ze'],
        ['そ', 'so'],
        ['ぞ', 'zo'],
        ['た', 'ta'],
        ['だ', 'da'],
        ['ち', 'ti'],
        ['ぢ', 'di'],
//      ['っ', 'sokuon'], // handled differently
        ['つ', 'tu'],
        ['づ', 'du'],
        ['て', 'te'],
        ['で', 'de'],
        ['と', 'to'],
        ['ど', 'do'],
        ['な', 'na'],
        ['に', 'ni'],
        ['ぬ', 'nu'],
        ['ね', 'ne'],
        ['の', 'no'],
        ['は', 'ha'],
        ['ば', 'ba'],
        ['ぱ', 'pa'],
        ['ひ', 'hi'],
        ['び', 'bi'],
        ['ぴ', 'pi'],
        ['ふ', 'hu'],
        ['ぶ', 'bu'],
        ['ぷ', 'pu'],
        ['へ', 'he'],
        ['べ', 'be'],
        ['ぺ', 'pe'],
        ['ほ', 'ho'],
        ['ぼ', 'bo'],
        ['ぽ', 'po'],
        ['ま', 'ma'],
        ['み', 'mi'],
        ['む', 'mu'],
        ['め', 'me'],
        ['も', 'mo'],
        ['ゃ', 'ya'],
        ['や', 'ya'],
        ['ゅ', 'yu'],
        ['ゆ', 'yu'],
        ['ょ', 'yo'],
        ['よ', 'yo'],
        ['ら', 'ra'],
        ['り', 'ri'],
        ['る', 'ru'],
        ['れ', 're'],
        ['ろ', 'ro'],
        ['ゎ', 'wa'],
        ['わ', 'wa'],
        ['ゐ', 'wi'],
        ['ゑ', 'we'],
        ['を', 'wo'],
        ['ん', 'n'],
        ['ヴ', 'vu']
    ];

    for (var i = 0; i < myArray.length; i++) {
    
        var kana = myArray[ i ] [ 0 ];
        
        if (kana == text) {
        
            return myArray[ i ] [ 1 ];
        }
    }
    
    return text;
}

    // These "eat" the Y from YA, YU, YO.
    hungryYoonArray = [
    
        'SHI',
        'CHI',
        'JI'
    ];


function kanaToHepburn(romaji) {

    var result = romaji;
    
    // Look for Hepburn variations.
    
        var hepburn = [

        ['si', 'shi'],
        ['zi', 'ji'],
        ['ti', 'chi'],
        ['di', 'ji'],
        ['tu', 'tsu'],
        ['du', 'zu'],
        ['hu', 'fu'],
        ['sya', 'sha'],
        ['syu', 'shu'],
        ['syo', 'sho'],
        ['zya', 'ja'],
        ['zyu', 'ju'],
        ['zyo', 'jo'],
        ['tya', 'cha'],
        ['tyu', 'chu'],
        ['tyo', 'cho']
        ];
    
    for (var i = 0; i < hepburn.length; i++) {
    
        var myHep = hepburn[ i ] [ 0 ];
        
        if (myHep == romaji) {
        
            result = hepburn[ i ] [ 1 ];
            break;
        }
    }
    
    return result;
}

function isNeedleInHaystack(needle, haystack) {
    
    return haystack.indexOf(needle) != -1;
}

//////////// UTILITIES ////////////////////////////////

function debugPrint(blurb) {
    
    document.write(blurb + '<BR>');
}

function assertEquals(first, second) {

    if (first.toUpperCase() == second.toUpperCase()) {
        return;
    } else {
        
        bugs = true;
        
        var problem = first + " should equal " + second;
        
        document.write(problem + "<BR>");
        
        throw new Error(problem);
    }
}

function assert(myBool) {

    if (myBool) return;
    else ("Error found");
}