merge_rules.js (3332B)
1 import { getLead, getPadchim, getVowel, hasHiddenPadchim, join, matchGeulja } from "./hangul.js"; 2 /** 3 * Helper function for defining common contractions between a character without 4 * a padchim and a character that starts with 'ᄋ', e.g. ㅐ + ㅕ -> ㅐ when 5 * applied to 해 + 였 yields 했. 6 * 7 * @param {string} vowel1 8 * @param {string} vowel2 9 * @param {string} new_vowel 10 * @returns {Rule} The created rule. 11 */ 12 function vowelContraction(vowel1, vowel2, new_vowel) { 13 function rule(x, y) { 14 if (matchGeulja(x[x.length - 1], undefined, vowel1, null) && matchGeulja(y[0], "ᄋ", vowel2, undefined)) { 15 return [`vowel contraction [${vowel1} + ${vowel2} -> ${new_vowel}]`, x.substring(0, x.length - 1) + join(getLead(x[x.length - 1]), new_vowel, getPadchim(y[0])) + y.substring(1)]; 16 } 17 return null; 18 } 19 return rule; 20 } 21 /** 22 * Helper function for defining merges where a character will take the padchim 23 * of a merged character if the first character doesn't already have a padchim, 24 * .e.g. 습 -> 가 + 습니다 -> 갑니다. 25 */ 26 function noPadchimRule(geuljas) { 27 function rule(x, y) { 28 if (!getPadchim(x[x.length - 1]) && geuljas.includes(y[0])) { 29 return ["borrow padchim", x.substring(0, x.length - 1) + join(getLead(x[x.length - 1]), getVowel(x[x.length - 1]), getPadchim(y[0])) + y.substring(1)]; 30 } 31 return null; 32 } 33 return rule; 34 } 35 export const merge_rules = [ 36 noPadchimRule(["을", "습", "읍", "는", "음"]), 37 (x, y) => { 38 if (getPadchim(x[x.length - 1]) === "ᆯ" && y.startsWith("음")) { 39 return ["ㄹ + ㅁ -> ᆱ", x.slice(0, x.length - 1) + join(getLead(x[x.length - 1]), getVowel(x[x.length - 1]), "ᆱ")]; 40 } 41 return null; 42 }, 43 vowelContraction("ㅐ", "ㅓ", "ㅐ"), 44 vowelContraction("ㅡ", "ㅓ", "ㅓ"), 45 vowelContraction("ㅜ", "ㅓ", "ㅝ"), 46 vowelContraction("ㅗ", "ㅏ", "ㅘ"), 47 vowelContraction("ㅚ", "ㅓ", "ㅙ"), 48 vowelContraction("ㅙ", "ㅓ", "ㅙ"), 49 vowelContraction("ㅘ", "ㅓ", "ㅘ"), 50 vowelContraction("ㅝ", "ㅓ", "ㅝ"), 51 vowelContraction("ㅏ", "ㅏ", "ㅏ"), 52 vowelContraction("ㅡ", "ㅏ", "ㅏ"), 53 vowelContraction("ㅣ", "ㅓ", "ㅕ"), 54 vowelContraction("ㅓ", "ㅓ", "ㅓ"), 55 vowelContraction("ㅓ", "ㅣ", "ㅐ"), 56 vowelContraction("ㅏ", "ㅣ", "ㅐ"), 57 vowelContraction("ㅑ", "ㅣ", "ㅒ"), 58 vowelContraction("ㅒ", "ㅓ", "ㅒ"), 59 vowelContraction("ㅔ", "ㅓ", "ㅔ"), 60 vowelContraction("ㅕ", "ㅓ", "ㅕ"), 61 vowelContraction("ㅏ", "ㅕ", "ㅐ"), 62 vowelContraction("ㅖ", "ㅓ", "ㅖ"), 63 vowelContraction("ㅞ", "ㅓ", "ㅞ"), 64 // Rule: Don't append 으 to ㄹ irregulars. 65 (x, y) => { 66 if (getPadchim(x[x.length - 1]) === "ᆯ" && y.startsWith("면")) { 67 return ["join", x.toString() + y.toString()]; 68 } 69 return null; 70 }, 71 // Rule: 으 insertion. 72 (x, y) => { 73 const geuljas = ["면", "세", "십"]; 74 if ((hasHiddenPadchim(x[x.length - 1]) || getPadchim(x[x.length - 1])) && geuljas.includes(y[0])) { 75 return ["padchim + consonant -> insert 으", x + "으" + y]; 76 } 77 return null; 78 }, 79 // Default rule: Just append the contents. 80 (x, y) => ["join", x.toString() + y.toString()], 81 ];