158 lines
5.6 KiB
C#
158 lines
5.6 KiB
C#
using System.Linq;
|
|
|
|
namespace I2.Loc
|
|
{
|
|
|
|
public class HindiFixer
|
|
{
|
|
|
|
// Needs to also implement: Hindi: https://www.microsoft.com/typography/OpenTypeDev/devanagari/intro.htm
|
|
//https://social.msdn.microsoft.com/Forums/windows/en-US/9883ff08-bd72-499b-9543-ed424167281d/converting-hindi-text-to-english-text?forum=winforms
|
|
internal static string Fix(string text)
|
|
{
|
|
while (true)
|
|
{
|
|
char[] arr = text.ToCharArray();
|
|
bool changed = false;
|
|
|
|
for (int i = 0; i < arr.Length; ++i)
|
|
{
|
|
// interchange the order of "i" vowel
|
|
if (arr[i] == 2367 && !char.IsWhiteSpace(arr[i - 1]) && arr[i - 1]!=0)
|
|
{
|
|
arr[i] = arr[i - 1];
|
|
arr[i - 1] = (char)2367;
|
|
changed = true;
|
|
}
|
|
|
|
if (i == arr.Length - 1)
|
|
continue;
|
|
|
|
// letter "I" + Nukta forms letter vocalic "L"
|
|
if (arr[i] == 2311)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2316;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// vowel sign vocalic "R" + sign Nukta forms vowel sign vocalic "Rr"
|
|
if (arr[i] == 2371)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2372;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// Candrabindu + sign Nukta forms Om
|
|
if (arr[i] == 2305)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2384;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// letter vocalic "R" + sign Nukta forms letter vocalic "Rr"
|
|
if (arr[i] == 2315)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2400;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// letter "Ii" + sign Nukta forms letter vocalic "LI"
|
|
if (arr[i] == 2312)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2401;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// vowel sign "I" + sign Nukta forms vowel sign vocalic "L"
|
|
if (arr[i] == 2367)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2402;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// vowel sign "Ii" + sign Nukta forms vowel sign vocalic "LI"
|
|
if (arr[i] == 2368)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2403;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// Danda + sign Nukta forms sign Avagraha
|
|
if (arr[i] == 2404)
|
|
{
|
|
if (arr[i + 1] == 2364)
|
|
{
|
|
arr[i] = (char)2365;
|
|
arr[i + 1] = (char)0;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
// consonant + Halant + Halant + consonant forms consonant + Halant + ZWNJ + consonant
|
|
//if (arr[i] == 2381)
|
|
//{
|
|
// if (arr[i + 1] == 2381)
|
|
// {
|
|
// arr[i+1] = (char)8204; //
|
|
// }
|
|
//}
|
|
|
|
// consonant + Halant + Nukta + consonant forms consonant + Halant + ZWJ + Consonant
|
|
//if (arr[i] == 2364)
|
|
//{
|
|
// if (arr[i + 1] == 2381)
|
|
// {
|
|
// arr[i] = (char)2381; //
|
|
// arr[i+1] = (char)8205; //
|
|
// }
|
|
//}
|
|
/*if (arr[i] == 0x938 && arr[i + 1] == 0x94D)//थ')
|
|
{
|
|
arr[i] = (char)0x930;
|
|
arr[i + 1] = (char)0;
|
|
}*/
|
|
}
|
|
|
|
if (!changed)
|
|
{
|
|
return text;
|
|
}
|
|
|
|
var newText = new string(arr.Where(x => x != 0).ToArray());
|
|
if (newText == text)
|
|
return newText;
|
|
text = newText;
|
|
return text; // remove this later to allow for several passes
|
|
}
|
|
}
|
|
}
|
|
}
|