Add Tiberian Transcription Schema

charlesLoder commented 1 year ago

See discussion here

Will definitely need a test under test/schemas.

johnlockejrr commented 1 year ago

I tried with tiberian schema (hebrew-transliteration/dist/schemas/tiberianKhan.js), still working on it, much to do:

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.tiberianKhan = void 0;
const additionalFeatureTransliteration = require("../rules").additionalFeatureTransliteration;
exports.tiberianKhan = {
    VOCAL_SHEVA: "ǝ",
    HATAF_SEGOL: "ɛ",
    HATAF_PATAH: "a",
    HATAF_QAMATS: "o",
    HIRIQ: "i",
    TSERE: "e",
    SEGOL: "ɛ",
    PATAH: "a",
    QAMATS: "ɔ",
    HOLAM: "o",
    QUBUTS: "u",
    DAGESH: "",
    DAGESH_CHAZAQ: true,
    MAQAF: "-",
    PASEQ: "",
    SOF_PASUQ: "",
    QAMATS_QATAN: "ɔ",
    FURTIVE_PATAH: "a",
    HIRIQ_YOD: "i:",
    TSERE_YOD: "e:",
    SEGOL_YOD: "ɛ:",
    SHUREQ: "u:",
    HOLAM_VAV: "o:",
    QAMATS_HE: "ɔ:",
    SEGOL_HE: "ɛ:",
    TSERE_HE: "e:",
    MS_SUFX: "ɔw",
    ALEF: "ʔ",
    BET: "v",
    BET_DAGESH: "b",
    GIMEL: "ʁ",
    GIMEL_DAGESH: "g",
    DALET: "ð",
    DALET_DAGESH: "d",
    HE: "h",
    VAV: "v",
    ZAYIN: "z",
    HET: "ħ",
    TET: "tˁ",
    YOD: "j",
    FINAL_KAF: "χ",
    KAF: "χ",
    KAF_DAGESH: "kʰ",
    LAMED: "l",
    FINAL_MEM: "m",
    MEM: "m",
    FINAL_NUN: "n",
    NUN: "n",
    SAMEKH: "s",
    AYIN: "ʕ",
    FINAL_PE: "f",
    PE: "f",
    PE_DAGESH: "pʰ",
    FINAL_TSADI: "sˁ",
    TSADI: "sˁ",
    QOF: "q̟",
    RESH: "ʀ̟",
    SHIN: "ʃ",
    SIN: "s",
    TAV: "θ",
    TAV_DAGESH: "tʰ",
    DIVINE_NAME: "yhwh",
    STRESS_MARKER: { location: "before-syllable", mark: "ˈ" },
    /*ADDITIONAL_FEATURES: [
            { FEATURE: "syllable", HEBREW: "[\u05D0]$", TRANSLITERATION: "" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B4]$", TRANSLITERATION: "i:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B5]$", TRANSLITERATION: "e:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B6]$", TRANSLITERATION: "ɛ:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B7]$", TRANSLITERATION: "a:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B8]$", TRANSLITERATION: "ɔ:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B9]$", TRANSLITERATION: "o:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05BB]$", TRANSLITERATION: "u:" },
            /////{ FEATURE: "cluster", HEBREW: "[\u05B1]", TRANSLITERATION: "ɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B4", TRANSLITERATION: "iʔi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B5", TRANSLITERATION: "eʔe" },
            //{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B6", TRANSLITERATION: "ɛʔɛ" }, // !!! //
            //{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B7", TRANSLITERATION: "aʔa" }, // !!! //
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B8", TRANSLITERATION: "ɔʔɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B9", TRANSLITERATION: "oʔo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05BB", TRANSLITERATION: "uʔu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B4", TRANSLITERATION: "ihi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B5", TRANSLITERATION: "ehe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B6", TRANSLITERATION: "ɛhɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B7", TRANSLITERATION: "aha" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B8", TRANSLITERATION: "ɔhɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B9", TRANSLITERATION: "oho" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05BB", TRANSLITERATION: "uhu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B4", TRANSLITERATION: "iħi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B5", TRANSLITERATION: "eħe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B6", TRANSLITERATION: "ɛħɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B7", TRANSLITERATION: "aħa" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B8", TRANSLITERATION: "ɔħɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B9", TRANSLITERATION: "oħo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05BB", TRANSLITERATION: "uħu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B4", TRANSLITERATION: "iʕi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B5", TRANSLITERATION: "eʕe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B6", TRANSLITERATION: "ɛʕɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B7", TRANSLITERATION: "aʕa" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B8", TRANSLITERATION: "ɔʕɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B9", TRANSLITERATION: "oʕo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05BB", TRANSLITERATION: "uʕu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D9", TRANSLITERATION: "i:" }
    ],*/
    ADDITIONAL_FEATURES: [
      {
        FEATURE: "cluster",
        HEBREW: "\u05B0",
        TRANSLITERATION: (cluster, transliteration, schema) => {
          const shewa = new RegExp(transliteration, "u");
          const clusterText = cluster.text;
          /**
           * @type {Cluster}
           */
          const next = cluster.next;
          const gutturalYodVowel = /[אהחעי]([\u{05B1}-\u{05BB}\u{05C7}])/u;
          const match = next.text.match(gutturalYodVowel);
          if (shewa.test(clusterText) && match) {
            return additionalFeatureTransliteration(clusterText, shewa, match[1], schema);
          }
          return clusterText;
        }
      }
    ],
    longVowels: false,
    qametsQatan: false,
    sqnmlvy: true,
    wawShureq: false,
    article: true,
    allowNoNiqqud: false,
    strict: true
};

johnlockejrr commented 1 year ago

Sample for what we should accomplish: Genesis 1:1-4

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ

charlesLoder commented 1 year ago

@johnlockejrr

Thanks for all this!

In the branch with the new callback function for additional features, the callback gives access to the Word, Syllable, or Cluster objects and their newly added properties in v0.13.x.

Right now, I'm running into a bit of a wall. Calling something like syllable.vowelName could return something that matches a schema property. I was envisioning it being used like this:

{
  FEATURE: "syllable",
  HEBREW: "\u{05B0}",
  TRANSLITERATION: (syllable, hebrew, schema) => {
    const next = syllable.next;
    if(next && next.vowelName) {
      // renamed function below from additionalFeatureTransliteration
      return replaceAndTransliterate(syllable.text, new Regex(hebrew, "u"), schema[next.vowelName], schema);
    }
  }
}

The problem, however, is this schema[next.vowelName] which lacks type safety...

Not totally sure how to resolve other than merging these two packages into a monorepo or heavily refactoring the schema interface — probably the latter

johnlockejrr commented 1 year ago

Probably the latter I think too.

charlesLoder commented 1 year ago

Refactoring allows for something a little more elegant:

const heb = require("./dist/index");
const rules = require("./dist/rules");

const result = heb.transliterate("בְּרֵאשִׁ֖ית וַיַּבְדֵּל", {
  ADDITIONAL_FEATURES: [
    {
      // matches any sheva in a syllable that is NOT preceded by a vowel character
      HEBREW: "(?<![\u{05B1}-\u{05BB}\u{05C7}].*)\u{05B0}",
      FEATURE: "syllable",
      TRANSLITERATION: function (syllable, _hebrew, schema) {
        const next = syllable.next;
        // discrepancy here: in havarotjs SHEVA is simply the character
        // whereas transliteration is concerned with a specific sheva, a vocal sheva
        const nextVowel = next.vowelName === "SHEVA" ? "VOCAL_SHEVA" : next.vowelName;

        if (next && nextVowel) {
          const vowel = schema[nextVowel] || "";
          // replaceAndTransliterate is an internal helper function
          return rules.replaceAndTransliterate(syllable.text, new RegExp("\u{05B0}", "u"), vowel, schema);
        }

        return syllable.text;
      }
    }
  ]
});

// bērēʾšît wayyabdēl

Though the regex is a little more complicated, it ensures that the sheva being matched is likely a vocal one.

thinking out loud: the ADDITIONAL_FEATURES property was originally designed with orthographic features in mind. Perhaps an ADDITIONAL_RULES could be a possible future property where the rule could match on something simpler like syl.vowelName === "SHEVA"

johnlockejrr commented 1 year ago

bērēʾšît wayyabdēl would be wrong because shewa is a short vowel and the b in the second word is spirantizated to v, in Tiberian transcription proposed by Khan we should have baʀ̟eːˈʃiːiθ waɟɟav'deːel or if you want something like barē'šît wayyav'dēl

charlesLoder commented 1 year ago

@johnlockejrr

Checkout this branch for tiberian.

If you could look through the tests, and let me know what is incorrect.

Feel free to push changes or just comment here

johnlockejrr commented 1 year ago

Ok. I'll do that

On Thu, 16 Feb 2023 at 03:27, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

Checkout this branch for tiberian https://github.com/charlesLoder/hebrew-transliteration/tree/tiberian.

If you could look through the tests, and let me know what is incorrect.

Feel free to push changes or just comment here

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1432382166, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVWAGUJDXVGCI6ZOU3WXWGAHANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

johnlockejrr commented 1 year ago

Quite close!!! Need some more little work but we are almost there:

hebrew-transliteration output:

bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ ʔɛloˈhi:m ˈʔeθ haʃʃˈmajim vǝˈʔeθ hɔʔɔʀ̟ɛsˁ
vǝhɔˈʔɔʀ̟ɛsˁ hɔjˈθɔ: ˈθohu: vɔˈvohu: vǝˈħoʃɛχ ʕal-pʰǝˈne: θǝˈho:m vǝʀ̟u:aħ ʔɛloˈhi:m mǝʀ̟aˈħɛfɛθ ʕal-pʰǝˈne: hammɔjim
vaˈjjoʔmɛʀ̟ ʔɛloˈhi:m jǝˈhi: ˈʔo:ʀ̟ vajǝhi:-ʔo:ʀ̟
vaˈjjaʀ̟ʔ ʔɛloˈhi:m ʔɛθ-hɔˈʔo:ʀ̟ kʰi:-ˈtˁo:v vajjavˈdel ʔɛloˈhi:m ˈbe:n hɔˈʔo:ʀ̟ u:ˈve:n haħoʃɛχ
vajjiq̟ˈʀ̟ɔʔ ʔɛloˈhi:m lɔˈʔo:ʀ̟ ˈjo:m vǝlaˈħoʃɛχ ˈq̟ɔʀ̟ɔʔ ˈlɔjlɔ: vajǝhi:-ˈʕɛʀ̟ɛv vajǝhi:-ˈvoq̟ɛʀ̟ ˈjo:m ʔɛħɔð

Geoffrey Khan:

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ
vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvoːqɛ̟ʀ̟ ˈjoːom ʔɛːˈħɔːɔð

NOTES:

We should:

ammend in the schema VOCAL_SHEVA: "ǝ" to VOCAL_SHEVA: "a" (my bad!)
YOD with DAGGESH is pronounced ɟɟ and not jj
in Tiberian Hebrew vocalization the vowels represent qualitative distinctions not quantitative, the vowels are long when: (i) in a stressed syllable or (ii) in an open unstressed syllable. That's why vocal SHEVA even exists, is a full wovel that can't be really accentuated or made long (can't even form a sillable by itself - NOTE: even though, we can make syllables with it but as a strict Tiberian rule we shouldn't). eg. If there wasn't a vocal shewa invented, they would have written בְּרֵאשִׁית as בַּרֵאשִׁית but with the rule of vowel lenghtening that would have give the reciter something like ba:ʀ̟eːˈʃiːiθ with long PATACH in open syllable, with SHEVA we have baʀ̟eːˈʃiːiθ.
in an closed accentuated syllable the vowel is extra long (iːi in ʔɛloːˈhiːim or eːe in ˈʔeːeθ), in Khan's words "when a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant", e.g. דָּבָר [dɔːˈvɔ:ɔʀ̟], [ʃɔːˈmɑːɑʀ̟].
the epenthetic vowel in glide was pronounced like: רוּחַ [ˈʀ̟uːwaħ], שִׂיחַ [ˈsiːjaħ] etc.
quiescent ALEPH in bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ (and elsewhere) should be dropped.
not lastly the rules of SHEVA, I quote Khan:

The shewa (שְׁוָא) sign (אְ) in the Tiberian vocalization system was read either as a vowel or as zero When shewa was read as vocalic, its quality in the Tiberian tradition was by default the same as that of the pataḥ vowel sign, i.e., the maximally low vowel [a] e.g. תְּכַסֶּה [tʰaχasˈsɛː] "you (ms) cover" מְדַּבְּרִים [maðabbaˈʀ̟iːim] "speaking (mpl)"

In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process (i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation" וְהָיָה [vɔhɔːˈjɔː] "and it became" בְּאֵר [beˈʔeːeʀ̟] "well" מְאוֹד [moˈʔoːoð] "very" מְחִיר [miˈħiːiʀ̟] "price" מְעוּכָה [muʕuːˈχɔː] "pressed" (ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i] e.g. בְּיוֹם [biˈjoːom] "on the day" לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel" תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"

The shewa sign is combined with some of the basic vowel signs to form the so-called ḥaṭef signs (i) ḥaṭef pataḥ (אֲ) [a] (ii) ḥaṭef segol (אֱ) [ɛ] (iii) ḥaṭef qameṣ (אֳ) [ɔ] In such signs the vocalic reading of the shewa is made explicit and also its quality The default pronunciation of vocalic shewa with the quality of [a] was equivalent to that of the ḥaṭef pataḥ sign (אֲ) Both the vocalic shewa and the vowels expressed by ḥaṭef signs were short vowels that, in principle, had the same quantity as short vowels in closed unstressed syllables, which were represented in standard Tiberian vocalization by a simple vowel sign.

charlesLoder commented 1 year ago

Let me take these a little at a time.

ammend in the schema VOCAL_SHEVA: "ǝ" to VOCAL_SHEVA: "a" (my bad!)

Ok, that one is easy.

YOD with DAGGESH is pronounced ɟɟ and not jj

I think I got this correct, see test

https://github.com/charlesLoder/hebrew-transliteration/blob/5d8c053c63e356092797ad7d4a307dcd3b53e959/test/schemas/tiberian.test.ts#L71

quiescent ALEPH in bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ (and elsewhere) should be dropped.

That makes sense. See tests on the following lines, and let me know if they're correct at least in regards to the aleph:

https://github.com/charlesLoder/hebrew-transliteration/blob/251b4cedab6ff4c232b577b233db692661345173/test/schemas/tiberian.test.ts#L69

and

https://github.com/charlesLoder/hebrew-transliteration/blob/251b4cedab6ff4c232b577b233db692661345173/test/schemas/tiberian.test.ts#L103

The rest will take a little more time to get to.

johnlockejrr commented 1 year ago

Yes, you are correct!

Forgot about DIVINE_NAME: "yhwh", it was pronounced according to the vowels written: יֱהוִה֙ [ʔɛloːˈhiːim] יְהוָֹ֤ה [ʔaðo:ˈnɔ:j]

charlesLoder commented 1 year ago

the epenthetic vowel in glide was pronounced like: רוּחַ [ˈʀ̟uːwaħ], שִׂיחַ [ˈsiːjaħ] etc.

See test:

https://github.com/charlesLoder/hebrew-transliteration/blob/da4095664a1544442ec93d60be653846b168bbae/test/schemas/tiberian.test.ts#L56

Forgot about DIVINE_NAME: "yhwh", it was pronounced according to the vowels written:

That one is easy enough:

https://github.com/charlesLoder/hebrew-transliteration/blob/da4095664a1544442ec93d60be653846b168bbae/src/schemas/tiberian.ts#L66-L67

Still have to work on the long vowels and sheva.

Had a baby a few months ago, hence the stop-and-go work on this

johnlockejrr commented 1 year ago

Splendid! Now we are even closer. Good work @charlesLoder

Congratulations on the baby!

charlesLoder commented 1 year ago

Just realizing I forgot to add a test for שִׂיחַ [ˈsiːjaħ]

charlesLoder commented 1 year ago

Take a look at all these, and let me know if I'm missing something.

https://github.com/charlesLoder/hebrew-transliteration/blob/16480b501202b10c1c499f4708beff5bfc8ab36e/test/schemas/tiberian.test.ts#L52-L64

What about a vav/yod before a he (not even sure if that happens)?

johnlockejrr commented 1 year ago

All seem right, besides the long vowels of course. גָּבֹ֗הַּ gɔˈvo:ah כִּשְׁמֹ֤עַ kʰiʃˈmo:aʕ נֹ֖חַ ˈno:aħ

Summary:

SHEVA never long, never accented
any vowel long when accented even if the syllable is closed
any vowel long when in open syllable NOTE: A vowel in an unstressed closed syllable was, in principle, short. If, however, it was followed by a series of contiguous consonants of relatively weak articulation (e.g. אהעחינל ʾhʿḥynl), then the vowel was sometimes lengthened, even when not stressed. This occurred in certain prefixes of the verbs היה hyh ‘be’ and חיה ḥyh ‘live’, namely the ḥireq of prefixes before he or ḥet, e.g. יִהְיֶ֫ה [jiːhˈjɛː] ‘he will be’, and the pataḥ of the conjunctive prefix וַ wa- before yod, e.g. וַיְהִ֫י [vaːjˈhiː] ‘and it was’. Such lengthening is occasionally found elsewhere and is marked by the gaʿya sign, e.g. הֲשָׁ֣מַֽע עָם֩ [haˈʃɔːmaːʕ ˈʕɔːm] ‘did any people hear?’ (Deut. 4.33), שְׁמַֽע־נָ֤א [ʃamaːʕ-ˈnɔː] ‘listen’ (1 Sam. 28.22). The intention of the lengthening of the unstressed vowel in such contexts was, it seems, to ensure that adjacent weak letters were not elided in the reading. When a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant e.g. דָּבָר [dɔːˈvɔ:ɔʀ̟] שָׁמַר [ʃɔːˈmɑːɑʀ̟]

What about a vav/yod before a he (not even sure if that happens)? Not sure I'm following.

NOTE:

Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).

charlesLoder commented 1 year ago

What about a vav/yod before a he (not even sure if that happens)? Not sure I'm following.

The furtive patach tests have a vav or yod before a chet or ayin. I'm trying to think if there are any words with a furtive patach before a he (e.g. גָּבֹ֗הַּ), where the he is preceded by a vav or yod.

Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).

This would be a feature to build out. I also really need to update the isAccented property on the Syllable object.

Will look at vowel length next

johnlockejrr commented 1 year ago

What about a vav/yod before a he (not even sure if that happens)? It happens: מַגְבִּ֥יהַּ תַּגְבִּ֣יהַּ יַגִּ֥יהַּ יַגְבִּ֣יהַּ אֱלֹ֨והַּ I will try to find with vav too, I think there are. EDIT: found in BHS only אֱלֹ֨והַּ. Other patach furtives: מָנֹ֜וחַ לָשׂ֥וּחַ יֵשׁ֡וּעַ אֲבִישׁ֥וּעַ וּמַלְכִּישׁ֑וּעַ שְׁלִ֔יחַ רֵ֣יח

johnlockejrr commented 1 year ago

Also, don't forget about the SHEVA rules when you got time.

In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process (i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation" וְהָיָה [vɔhɔːˈjɔː] "and it became" בְּאֵר [beˈʔeːeʀ̟] "well" מְאוֹד [moˈʔoːoð] "very" מְחִיר [miˈħiːiʀ̟] "price" מְעוּכָה [muʕuːˈχɔː] "pressed" (ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i] e.g. בְּיוֹם [biˈjoːom] "on the day" לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel" תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"

charlesLoder commented 1 year ago

@johnlockejrr

Another round of work.

More furtive tests Take a look at these. They should be correct in terms of being preceded by a vav or yod. The long vowels aren't correct in this commit https://github.com/charlesLoder/hebrew-transliteration/commit/db97c62c3c693dc26d4e26dc36b98eaa762292bb

Epenthetic vowel

When a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant

These long vowels are going to be tricky....

See the updated tests here https://github.com/charlesLoder/hebrew-transliteration/commit/37bbdc1c04148a8625d84dbcc46190e98c222f2b

Could you comment on each line whether it is correct or not. A simple 👍 if it's correct, and if it's not correct, then comment with the correct value.

johnlockejrr commented 1 year ago

I have commented on not correct ones, I hope I didn't make any mistakes, I could ask Khan to correct but maybe a little later.

johnlockejrr commented 1 year ago

What's the latest branch with Tiberian Schema?

johnlockejrr commented 1 year ago

Tried on the latest. Genesis 1

baʀ̟eʃiːθ bɔˈʀ̟ɔ ʔɛloˈhiːm ˈʔeθ haʃʃˈmajim vaˈʔeθ hɔʔɔʀ̟ɛsˁ
vahɔˈʔɔʀ̟ɛsˁ hɔjˈθɔː ˈθoˈhuː vɔˈvohuː vaˈħoʃɛχ ʕal-pʰaˈneː θaˈhoːm vaˈʀ̟uːwaħ ʔɛloˈhiːm maʀ̟aˈħɛfɛθ ʕal-pʰaˈneː hammɔjim
vaˈɟɟo֥mɛʀ̟ ʔɛloˈhiːm jaˈhiː ˈʔoːʀ̟ vajahiː-ʔoːʀ̟
vaˈɟɟa֧ʀ̟ ʔɛloˈhiːm ʔɛθ-hɔˈʔoːʀ̟ kʰiː-ˈtˁoːv vaɟɟavˈdel ʔɛloˈhiːm ˈbeːn hɔˈʔoːʀ̟ uːˈveːn haħoʃɛχ
vaɟɟiq̟ˈʀ̟ɔ ʔɛloˈhiːm lɔˈʔoːʀ̟ ˈjoːm valaˈħoʃɛχ ˈq̟ɔʀ̟ɔ ˈlɔjlɔː vajahiː-ˈʕɛʀ̟ɛv vajahiː-ˈvoq̟ɛʀ̟ ˈjoːm ʔɛħɔð

Khan:

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ
vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvo:q̟ɛʀ̟ ˈjoːom ʔɛːˈħɔːɔð

charlesLoder commented 1 year ago

@johnlockejrr

Just updated the branch.

I'm struggling a bit with the vowel length stuff.

The most recent commit fro Gen 1:1-5 produces:

baʀ̟eːˈʃiːijθ bɔːˈʀ̟ɔːɔ ʔɛːloːˈhiːijm ˈʔeːeθ haʃɔːˈmaːjim vaˈʔeːeθ hɔːʔɔːˈʀ̟ɛːɛsˁ
vahɔːˈʔɔːʀ̟ɛsˁ hɔjˈθɔːh ˈθoːˈhuː vɔːˈvoːhuː vaˈħoːʃɛχ ʕal-pʰaˈneːj θaˈhoːovm vaˈʀ̟uːwaħ ʔɛːloːˈhiːijm maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneːj hamɔːˈjiːim
vaˈɟɟoː֥mɛʀ̟ ʔɛːloːˈhiːijm jaˈhiːj ˈʔoːovʀ̟ vaːjahiːj-ˈʔoːovʀ̟
vaˈɟɟa֧ʀ̟ ʔɛːloːˈhiːijm ʔɛθ-hɔːˈʔoːovʀ̟ kʰiːj-ˈtˁoːovv vaɟɟavˈdeːel ʔɛːloːˈhiːijm ˈbeːejn hɔːˈʔoːovʀ̟ uːˈveːejn haːħoːˈʃɛχ
vaɟɟiq̟ˈʀ̟ɔːɔ ʔɛːloːˈhiːijm lɔːˈʔoːovʀ̟ ˈjoːovm valaːˈħoːʃɛχ ˈq̟ɔːʀ̟ɔ ˈlɔjlɔːh vaːjahiːj-ˈʕɛːʀ̟ɛv vaːjahiːj-ˈvoːq̟ɛʀ̟ ˈjoːovm ʔɛːˈħɔːɔð

Someways it's closer, other ways it's way off

johnlockejrr commented 1 year ago

Yes, way closer! We are on the right path :)

johnlockejrr commented 1 year ago

Same branch gave me this for Gen 1:1-5:

baʀ̟eːʃiːθ bɔːˈʀ̟ɔːɔ ʔɛːloːˈhiːijm ˈʔeːeθ haʃʃˈmaːjim vaˈʔeːeθ hɔːʔɔːʀ̟ɛsˁ
vahɔːˈʔɔːʀ̟ɛsˁ hɔjˈθɔːh ˈθoːˈhuː vɔːˈvoːhuː vaˈħoːʃɛχ ʕal-pʰaˈneːj θaˈhoːovm vaˈʀ̟uːwaħ ʔɛːloːˈhiːijm maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneːj hamɔːjim
vaˈɟɟoː֥mɛʀ̟ ʔɛːloːˈhiːijm jaˈhiːj ˈʔoːovʀ̟ vaːjahiːj-ʔoːʀ̟
vaˈɟɟa֧ʀ̟ ʔɛːloːˈhiːijm ʔɛθ-hɔːˈʔoːovʀ̟ kʰiːj-ˈtˁoːovv vaɟɟavˈdeːel ʔɛːloːˈhiːijm ˈbeːejn hɔːˈʔoːovʀ̟ uːˈveːejn haːħoːʃɛχ
vaɟɟiq̟ˈʀ̟ɔːɔ ʔɛːloːˈhiːijm lɔːˈʔoːovʀ̟ ˈjoːovm valaːˈħoːʃɛχ ˈq̟ɔːʀ̟ɔ ˈlɔjlɔːh vaːjahiːj-ˈʕɛːʀ̟ɛv vaːjahiːj-ˈvoːq̟ɛʀ̟ ˈjoːovm ʔɛːħɔð

One note (or two), the prolonged vowel appears only in accented closed syllable so not in hɔːʔɔːˈʀ̟ɛːɛsˁ that should be hɔː'ʔɔːʀ̟ɛsˁ, in bɔːˈʀ̟ɔːɔ we should have only bɔːˈʀ̟ɔː because Aleph is quiescent so it doesn't prolong the already long vowel. We should also get rid of the Yod as mater e.g. uːˈveːejn that should be wuˈveːen or ʔɛːloːˈhiːijm that should be ʔɛloːˈhiːim etc. Also the quality of the Sheva before gutturals and Yod: not vaˈħoːʃɛχ but voˈħoːʃɛχ, not vaˈʔeːeθ but veˈʔeːeθ etc.

charlesLoder commented 1 year ago

Ok, some more progress is being made, but now I'm hitting up against some deeper issues related to the syllabification package:

And some other issues I'm still trying to figure out.

I'm going to remove this from the v2.4.0 milestone so I can create another release and update the site.

Once I make more substantial changes to the syllabification package, I'll return to this.

It is, however, getting much closer! For Gen 1:1-5 I'm seeing a lot of the same issues occur, so much of it should be resolved soon.

I'm also working on a book project soon so that may take time away from this (too many irons in the fire! 🔥 )

charlesLoder commented 1 year ago

@johnlockejrr

I'm confused by "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː". He says regarding the first vowel:

Insertion of epenthetic in closed syllable with a long vowel: §I.2.4.

In that section he says:

When long vowels with the main stress occur in closed syllables, there is evidence that an epenthetic with the same quality as that of the long vowel occurred before the final consonant in its phonetic realization

The first vowel, however, does not take the main stress.

Thoughts?

johnlockejrr commented 1 year ago

I'm confused too, I will ask Khan, for me, if I'm not mistaken should be "hɔjˈθɔː" the first vowel should be short because is in closed unstressed syllable… what do you think?

On Thu, 27 Apr 2023 at 22:20, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

I'm confused by "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː". He says regarding the first vowel:

Insertion of epenthetic in closed syllable with a long vowel: §I.2.4.

In that section he says:

When long vowels with the main stress occur in closed syllables, there is evidence that an epenthetic with the same quality as that of the long vowel occurred before the final consonant in its phonetic realization

The first vowel, however, does not take the main stress.

Thoughts?

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1526368498, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVUXFDK6ZPFP6FGQ63XDLIJVANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

The latest commit produces "hɔjˈθɔː", which I think it should be

charlesLoder commented 1 year ago

@johnlockejrr

Any response from Khan?

I've locally been slowly updating the tests to match the output, but only when I can infer that they are correct.

Finishing some projects this next week, then I can shift some attention back to this

johnlockejrr commented 1 year ago

Can you refresh my memory?

On Sat, 3 Jun 2023 at 13:56, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

Any response from Khan?

I've locally been slowly updating the tests to match the output, but only when I can infer that they are correct.

Finishing some projects this next week, then I can shift some attention back to this

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1574891954, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHQPIVFXRF4B3P7MJNLXJMQ5ZANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

@johnlockejrr ,

This issue is "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː" in the book, but definitely seems like it should be "hɔjˈθɔː"

johnlockejrr commented 1 year ago

I'm 100% sure is an error or a typo in the book.

On Sat, 10 Jun 2023 at 19:56, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr ,

This issue is "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː" in the book, but definitely seems like it should be "hɔjˈθɔː"

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1585754955, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVSCSNE6C3XDH45VMLXKSYNTANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

Ok, I'll keep updating the tests

charlesLoder commented 1 year ago

@johnlockejrr

I made some updates to the tests, primarily with regard to vowel length.

There are, of course, some remaining issues:

stress markers in wrong place with doubled consonants
- for a word like וַיֹּ֥אמֶר it is transliterated as vaˈɟɟoː֥mɛʀ̟
- it should be vaɟˈɟoːmɛʀ̟
- a fix is possible, I just haven't quite figured it out
possbile typos in transcriptions
- we've already discussed הָיְתָ֥ה being transliterated hɔjˈθɔː instead of the transcribed hɔːɔjˈθɔː
- another is וַֽיְהִי being transliterated as vaːjihiː instead of the transcribed vaˑjhiː
- both of these issues contain a yod, so I'll research more
- another is קָ֣רָא being transliterated as ˈq̟ɔːʀ̟ɔː instead of the transcribed ˈq̟ɔʀ̟ɔː
- the resh may be an expection

Other than that, it's getting close!

johnlockejrr commented 1 year ago

Yes, very close! I will test them, very nice job!

On Sun, 25 Jun 2023 at 20:16, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

I made some updates to the tests, primarily with regard to vowel length.

There are, of course, some remaining issues:

stress markers in wrong place with doubled consonants

for a word like וַיֹּ֥אמֶר it is transliterated as vaˈɟɟoː֥mɛʀ̟

it should be vaɟˈɟoːmɛʀ̟

a fix is possible, I just haven't quite figured it out

possbile typos in transcriptions

we've already discussed הָיְתָ֥ה being transliterated hɔjˈθɔː instead of the transcribed hɔːɔjˈθɔː

another is וַֽיְהִי being transliterated as vaːjihiː instead of the transcribed vaˑjhiː

both of these issues contain a yod, so I'll research more

another is קָ֣רָא being transliterated as ˈq̟ɔːʀ̟ɔː instead of the transcribed ˈq̟ɔʀ̟ɔː

the resh may be an expection

Other than that, it's getting close!

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606208849, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHRQCJSDWSUH6WFY37LXNB57VANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

At the root of the repo, I have a test file like this:

const heb = require("./dist/index");
const tiberian = require("./dist/schemas/tiberian").tiberian;

// the first 5 verses of Gen 1
const khan = [
  "baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ",
  "vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim",
  "vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ",
  "vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ",
  "vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvo:q̟ɛʀ̟ ˈjoːom ʔɛːˈħɔːɔð"
]
  .map((x) => x.split(" "))
  .flat();

const inputs = [
  "בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ׃",
  "וְהָאָ֗רֶץ הָיְתָ֥ה תֹ֨הוּ֙ וָבֹ֔הוּ וְחֹ֖שֶׁךְ עַל־פְּנֵ֣י תְה֑וֹם וְר֣וּחַ אֱלֹהִ֔ים מְרַחֶ֖פֶת עַל־פְּנֵ֥י הַמָּֽיִם׃",
  "וַיֹּ֥אמֶר אֱלֹהִ֖ים יְהִ֣י א֑וֹר וַֽיְהִי־אֽוֹר׃",
  "וַיַּ֧רְא אֱלֹהִ֛ים אֶת־הָא֖וֹר כִּי־ט֑וֹב וַיַּבְדֵּ֣ל אֱלֹהִ֔ים בֵּ֥ין הָא֖וֹר וּבֵ֥ין הַחֹֽשֶׁךְ׃",
  "וַיִּקְרָ֨א אֱלֹהִ֤ים לָאוֹר֙ י֔וֹם וְלַחֹ֖שֶׁךְ קָ֣רָא לָ֑יְלָה וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם אֶחָֽד׃"
]
  .map((x) => x.split(" "))
  .flat();

const results = inputs
  .map((v, i) => {
    const t = heb.transliterate(v, tiberian);
    if (t === khan[i]) return false;
    return {
      text: v,
      expected: khan[i],
      received: t
    };
  })
  .filter(Boolean);

console.log(results);

Which is helpful for finding all the incorrect ones:


[
  { text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' },
  { text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' },
  {
    text: 'הַמָּֽיִם׃',
    expected: 'hamˈmɔːjim',
    received: 'haˈmmɔːjim'
  },
// ... etc.
]

johnlockejrr commented 1 year ago

I think קָ֣רָא should be transliterated as q̟ɔː'ʀ̟ɔː

On Sun, 25 Jun 2023 at 20:16, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

I made some updates to the tests, primarily with regard to vowel length.

There are, of course, some remaining issues:

stress markers in wrong place with doubled consonants

for a word like וַיֹּ֥אמֶר it is transliterated as vaˈɟɟoː֥mɛʀ̟

it should be vaɟˈɟoːmɛʀ̟

a fix is possible, I just haven't quite figured it out

possbile typos in transcriptions

we've already discussed הָיְתָ֥ה being transliterated hɔjˈθɔː instead of the transcribed hɔːɔjˈθɔː

another is וַֽיְהִי being transliterated as vaːjihiː instead of the transcribed vaˑjhiː

both of these issues contain a yod, so I'll research more

another is קָ֣רָא being transliterated as ˈq̟ɔːʀ̟ɔː instead of the transcribed ˈq̟ɔʀ̟ɔː

the resh may be an expection

Other than that, it's getting close!

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606208849, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHRQCJSDWSUH6WFY37LXNB57VANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

I thought that too, but ˈq̟ɔʀ̟ɔː is the transcription at the end of the book...

I may reach out to Khan soon once I try to tackle the stress marker

johnlockejrr commented 1 year ago

I'm sure is a typo because any open syllable lenghtens the vowel otherwise should be a shewa, we should consult Khan anyway.

On Sun, 25 Jun 2023 at 23:51, Charles Loder @.***> wrote:

I thought that too, but ˈq̟ɔʀ̟ɔː is the transcription at the end of the book...

I may reach out to Khan soon once I try to tackle the stress marker

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606273743, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHV44UZUIA2E36AKSHTXNCXGPANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

@johnlockejrr

Ok, so huge progress!

Using the script above, there are only a handful of different ones!:

[
  { text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' },
  { text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' },
  { text: 'בֵּ֥ין', expected: 'beːen', received: 'ˈbeːen' },
  { text: 'קָ֣רָא', expected: 'ˈq̟ɔʀ̟ɔː', received: 'ˈq̟ɔːʀ̟ɔː' }
]

There are 2 categories.

potential typos

We've already discussed them, but הָיְתָ֥ה and קָ֣רָא may potentially be typos in TPTBH and the results from the package are actually correct.

accents

The havarotjs package doesn't mark stress in the most rigorous way. If the syllable has a taam, it is marked as accented (i.e. stressed). This is why תֹ֨הוּ֙ has two stress markers as ˈθoːˈhuː and why בֵּ֥ין has a stress marker.

next steps

I'm willing to let the accents be less than perfect for now.

I'll test a little more, push a dev release of this, make a preview site, and ask Khan for some feedback.

charlesLoder commented 1 year ago

May actually be able to fix the "תֹ֨הוּ֙" in this

johnlockejrr commented 1 year ago

Wonderful! I'll do the tests myself too. Very good work @charlesLoder!

On Sun, 2 Jul 2023 at 02:48, Charles Loder @.***> wrote:

@johnlockejrr https://github.com/johnlockejrr

Ok, so huge progress!

Using the script above https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606222671 , there are only a handful of different ones!:

[ { text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' }, { text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' }, { text: 'בֵּ֥ין', expected: 'beːen', received: 'ˈbeːen' }, { text: 'קָ֣רָא', expected: 'ˈq̟ɔʀ̟ɔː', received: 'ˈq̟ɔːʀ̟ɔː' }]

There are 2 categories. potential typos

We've already discussed them, but הָיְתָ֥ה and קָ֣רָא may potentially be typos in TPTBH and the results from the package are actually correct. accents

The havarotjs package doesn't mark stress in the most rigorous way. If the syllable has a taam, it is marked as accented (i.e. stressed). This is why תֹ֨הוּ֙ has two stress markers as ˈθoːˈhuː and why בֵּ֥ין has a stress marker. next steps

I'm willing to let the accents be less than perfect for now.

I'll test a little more, push a dev release of this, make a preview site, and ask Khan for some feedback.

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1616243154, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHUWLSJYR2ALNU5AL5LXODANRANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

The next major hurdle has been the resh. There are two realizations — the standard "ʀ̟" and the pharyngealized "rˁ".

I have the pharyngealized almost figured out, except for one case when the resh

is in the same syllable, or at least the same foot, as a preceding alveolar p229

He does not give an example, however, of a word like תְּפַר where the resh is in a different syllable but same foot as an alveolar, but the resh is not directly preceded by an alveolar. I believe this should be a ʀ̟ but I'm awaiting confirmation

charlesLoder commented 1 year ago

I'm awaiting confirmation

The said they were unaware.

Given the examples and explanations in the book, I'm assuming he means that the resh must be in direct contact with the alveolar. I'll research a little more, but working off the assumption it should be a ʀ̟ for now

johnlockejrr commented 1 year ago

Yes, I'm aware of those two realisations of resh, but I'm copletely puzzled about it because I didn't find anything like that in other semitic languages (other than normal R and “emphatic” R in some contexts).

On Tue, 11 Jul 2023 at 04:23, Charles Loder @.***> wrote:

The next major hurdle has been the resh. There are two realizations — the standard "ʀ̟" and the pharyngealized "rˁ".

I have the pharyngealized almost figured out, except for one case when the resh

is in the same syllable, or at least the same foot, as a preceding alveolar p229

He does not give an example, however, of a word like תְּפַר where the resh is in a different syllable but same foot as an alveolar, but the resh is not directly preceded by an alveolar. I believe this should be a ʀ̟ but I'm awaiting confirmation

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1629953188, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHUDQZAXOL5QIZN3MC3XPSTKDANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

Ok, so I got the resh figured out.

The next step is figuring out the patach (I.2.1.2), which is very confusing. He says:

The back quality [ɑ] would have been induced in particular by the environment of consonants involving retraction of the tongue root, especially pharyngeals and pharyngealized consonants.

which makes sense at first glance.

But then on p249, he gives the transcription of [ˈbɑːʕaˌra] for בָּעֲרָ֥ה where the qamets, not patach, is transcribed with [ɑ]. Perhaps another typo? Are those transcriptions from Morag?

Then in a similar word on p621, בַּעֲצַת, he transcribes it as [baːʕɑˈsˁɑːɑθ] with the hatef-patach as [ɑ]. Is the ayin or the tsade causing the conditioned realization?

For the transcription of מַזְרִ֣יעַ on p618–19, he has [mɑzˈrˁiːjaʕ], with a note:

Pataḥ is pronounced as back [ɑ] in the environment of pharyngealized consonants: §I.2.1.3.

The first patach isn't even in the same syllable or foot as the resh, so why not [a], and the second patach precedes a pharyngeal, so why not [ɑ]???

I'm completely lost on this one...

johnlockejrr commented 1 year ago

In the second example I'm sure is a typo.

On Sun, 16 Jul 2023 at 03:19, Charles Loder @.***> wrote:

Ok, so I got the resh figured out.

The next step is figuring out the patach (I.2.1.2), which is very confusing. He says:

The back quality [ɑ] would have been induced in particular by the environment of consonants involving retraction of the tongue root, especially pharyngeals and pharyngealized consonants.

which makes sense at first glance.

But then on p249, he gives the transcription of [ˈbɑːʕaˌra] for בָּעֲרָ֥ה where the qamets, not patach, is transcribed with [ɑ]. Perhaps another typo? Are those transcriptions from Morag?

For the transcription of מַזְרִ֣יעַ on p618–19, he has [mɑzˈrˁiːjaʕ], with a note:

Pataḥ is pronounced as back [ɑ] in the environment of pharyngealized consonants: §I.2.1.3.

The first patach isn't even in the same syllable or foot as the resh, so why not [a], and the second patach precedes a pharyngeal, so why not [ɑ]???

I'm completely lost on this one...

— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1636926379, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHTC2OZIVETPKAUSXJTXQMXP3ANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>

charlesLoder commented 1 year ago

So Khan said:

If I transcribed בָּעֲרָ֥ה as [ˈbɑːʕaˌra], this is a mistake. The transcription should be [bɔːʕaˌrɔː].

Which makes sense with regard to the qamets, but I still would have expected an [ɑ].

I asked a follow up regarding מַזְרִ֣יעַ but have not received an answer back.

charlesLoder / hebrew-transliteration