Zadatak: napisati C program kojim se vrši konverzija izmedju kodiranja zapisa teksta u računaru utf32 u ISO-8859-5-LATIN i obrnuto.
Očekuje se da program radi na realnim primerima. Prilikom konverzije iz UTF-32 u ISO-8859-5-LATIN treba sve karaktere koji se ne mogu kodirati u ciljnom kodiranju zameniti karakterom '?'.

Rešenje:

/* U programu koristimo dve funkcije, iso_to_utf i utf_to_iso  */

#include<stdio.h>
#define MAX_IME 1024
#define nula 0
#define jedan 0x01
#define dva 0x02

void iso_to_utf(FILE *ulaz, FILE *izlaz)
{
unsigned int pomocna;
unsigned int c;
c = 0xFF;
fputc(c, izlaz);
c = 0xFE;
fputc(c, izlaz); /* Postavljamo BOM - byte order mark */

fputc(nula, izlaz);
fputc(nula, izlaz);

while((c = fgetc(ulaz)) != EOF) /* Posto je utf-32 kodiranje "nadskup", iso8859-2 latin kodiranja svi karakteri se mogu kodirati u utf-32
tako da nema potrebe za menjanjem karaktera u '?' */
{

if ( c <= 0xA0 || c == 0xA4 || c == 0xA7 || c == 0xAD || c == 0xB0 || c == 0xB4 || c == 0xB8 || c == 0xC1 || c == 0xC2 || c == 0xC4 || c == 0xC7 || c == 0xC9 || c == 0xCB || c == 0xCD || c == 0xCE || c == 0xD3 || c == 0xD4 || c == 0xD6 || c == 0xD7 || c == 0xDA || c == 0xDC || c == 0xDD || c == 0xE1 || c == 0xE2 || c == 0xE7 || c == 0xE9 || c == 0xEB || c == 0xED || c == 0xEE || c == 0xF3 || c == 0xF4 || c == 0xF6 || c == 0xF7 || c == 0xFA || c == 0xFC || c == 0xFD)
/* heksadekadne vrednosti svih karaktera cije se kodiranje u iso8859-2 i utf-32 kodiranju poklapaju respektivno ne racunajuci ostale bajtove koji su postavljeni na nulu u utf 32 */
/* c == 0x0A je potrebno zbog prelaska u novi red */
{ fputc(c, izlaz);
fputc(nula, izlaz);
fputc(nula, izlaz);
fputc(nula, izlaz);
}
else if ( c!= 0xA2 && c!= 0xB2 && c!= 0xB7 && c!= 0xBD && c!= 0xFF )

{ switch (c){ /* karakteri koji se kodiraju sa drugim bajtom postavljenim na 1 */

case 0xA1:
pomocna = 0x04 ; /* LATIN CAPITAL LETTER A WITH OGONEK */
break;

case 0xA3:
pomocna = 0x41 ; /* LATIN CAPITAL LETTER L WITH STROKE */
break;

case 0xA5 :
pomocna = 0x3D ;/* LATIN CAPITAL LETTER L WITH CARON */
break;

case 0xA6 :
pomocna = 0x5A ;/* LATIN CAPITAL LETTER S WITH ACUTE */
break;

case 0xA9 :
pomocna = 0x60 ;/* LATIN CAPITAL LETTER S WITH CARON */
break;

case 0xAA : /*LATIN CAPITAL LETTER S WITH CEDILLA */
pomocna = 0x5E ;
break;

case 0xAB : /* LATIN CAPITAL LETTER T WITH CARON */
pomocna = 0x64;
break;

case 0xAC : /* LATIN CAPITAL LETTER Z WITH ACUTE */
pomocna = 0x79;
break;

case 0xAE : /* LATIN CAPITAL LETTER Z WITH CARON */
pomocna = 0x7D;
break;

case 0xAF : /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
pomocna = 0x7B ;
break;

case 0xB1 : /* LATIN SMALL LETTER A WITH OGONEK */
pomocna = 0x05 ;
break;

case 0xB3 : /* LATIN SMALL LETTER L WITH STROKE */
pomocna = 0x42 ;
break;

case 0xB5 : /* LATIN SMALL LETTER L WITH CARON */
pomocna = 0x3E;
break;

case 0xB6 : /* LATIN SMALL LETTER S WITH ACUTE */
pomocna = 0x5B ;
break;

case 0xB9 : /* LATIN SMALL LETTER S WITH CARON */
pomocna = 0x61 ;
break;

case 0xBA : /* LATIN SMALL LETTER S WITH CEDILLA */
pomocna = 0x5F ;
break;

case 0xBB: /* LATIN SMALL LETTER T WITH CARON */
pomocna = 0x65 ;
break;

case 0xBC : /* LATIN SMALL LETTER Z WITH ACUTE */
pomocna = 0x7A ;
break;

case 0xBE : /* LATIN SMALL LETTER Z WITH CARON */
pomocna = 0x7E ;
break;

case 0xBF : /* LATIN SMALL LETTER Z WITH DOT ABOVE */
pomocna = 0x7C;
break;

case 0xC0: /* LATIN CAPITAL LETTER R WITH ACUTE */
pomocna = 0x54 ;
break;

case 0xC3 : /* LATIN CAPITAL LETTER A WITH BREVE */
pomocna = 0x02 ;
break;

case 0xC5 : /* LATIN CAPITAL LETTER L WITH ACUTE */
pomocna = 0x39;
break;

case 0xC8 : /* LATIN CAPITAL LETTER C WITH CARON */
pomocna = 0x0C ;
break;

case 0xCA: /* LATIN CAPITAL LETTER E WITH OGONEK */
pomocna = 0x18 ;
break;

case 0xCC: /* LATIN CAPITAL LETTER E WITH CARON */
pomocna = 0x1A;
break;

case 0xCF : /*LATIN CAPITAL LETTER D WITH CARON */
pomocna = 0x0E ;
break;

case 0xD0: /* LATIN CAPITAL LETTER D WITH STROKE */
pomocna = 0x10;
break;

case 0xD1: /* LATIN CAPITAL LETTER N WITH ACUTE */
pomocna = 0x43 ;
break;

case 0xD2 : /* LATIN CAPITAL LETTER N WITH CARON */
pomocna = 0x47;
break;

case 0xD5 :
pomocna = 0x50 ; /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
break;

case 0xD8 : /* LATIN CAPITAL LETTER R WITH CARON */
pomocna = 0x58;
break;

case 0xD9 : /*LATIN CAPITAL LETTER U WITH RING ABOVE */
pomocna = 0x6E;
break;

case 0xDB : /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
pomocna = 0x70 ;
break;

case 0xDE: /* LATIN CAPITAL LETTER T WITH CEDILLA */
pomocna = 0x62;
break;

case 0xE0 :
pomocna = 0x55 ; /* LATIN SMALL LETTER R WITH ACUTE */
break;

case 0xE3 :
pomocna = 0x03; /* LATIN SMALL LETTER A WITH BREVE */
break;

case 0xE5 :
pomocna = 0x3A; /* LATIN SMALL LETTER L WITH ACUTE */
break;

case 0xE6:
pomocna = 0x07; /* LATIN SMALL LETTER C WITH ACUTE */
break;

case 0xE8:
pomocna = 0x0D; /* LATIN SMALL LETTER C WITH CARON */
break;

case 0xEA :
pomocna = 0x19; /* LATIN SMALL LETTER E WITH OGONEK */
break;

case 0xEC:
pomocna = 0x1B; /*LATIN SMALL LETTER E WITH CARON */
break;

case 0xEF:
pomocna = 0x0F; /*LATIN SMALL LETTER D WITH CARON */
break;

case 0xF0:
pomocna = 0x11 ; /*LATIN SMALL LETTER D WITH STROKE */
break;

case 0xF1:
pomocna = 0x44 ; /* LATIN SMALL LETTER N WITH ACUTE */
break;

case 0xF2 :
pomocna = 0x48; /* LATIN SMALL LETTER N WITH CARON */
break;

case 0xF5 :
pomocna = 0x51; /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
break;

case 0xF8 :
pomocna = 0x59; /* LATIN SMALL LETTER R WITH CARON */
break;

case 0xF9 :
pomocna = 0x6F; /* LATIN SMALL LETTER U WITH RING ABOVE */
break;

case 0xFB :
pomocna = 0x71; /*LATIN SMALL LETTER U WITH DOUBLE ACUTE */
break;

case 0xFE :
pomocna = 0x63 ; /* LATIN SMALL LETTER T WITH CEDILLA */
break;






}

fputc(pomocna, izlaz);
fputc(jedan, izlaz);
fputc(nula, izlaz);
fputc(nula, izlaz);

}

else { switch (c){

case 0xA2: /* BREVE */
pomocna = 0xD8;
break;

case 0xB2: /* OGONEK */
pomocna = 0xDB;
break;

case 0xB7: /* CARON */
pomocna = 0xC7 ;
break;

case 0xBD: /* DOUBLE ACUTE ACCENT */
pomocna = 0xDD;
break;

case 0xFF: /* DOT ABOVE */
pomocna = 0xD9;
break;
}

fputc(pomocna, izlaz);
fputc(dva, izlaz);
fputc(nula, izlaz);
fputc(nula, izlaz);

}

}

}

int utf32_to_iso ( FILE *ulaz, FILE *izlaz){//nazad na vrh
int brojac = 0;
char c;
char pomocna;
c= fgetc(ulaz);
c= fgetc(ulaz); /* preskakanje BOM-a, (byte order mark) */
c= fgetc(ulaz);
c= fgetc(ulaz);

while((c = fgetc(ulaz)) != EOF)

{
/* na pocetku utf32 dokumenta nalazi se sekvenca bajtova koja oznacava BOM */

pomocna = c;


c= fgetc(ulaz);
if (c == (char)(0)) /* sledeci procitani bajt je 0x00 tako da se utf32 i iso kodiranje za dati karakter poklapaju */
fputc(pomocna, izlaz);
else if (c == (char)(1))
{ /* ukoliko nije znaci da karakter moramo da enkodiramo, ukoliko se njegovo utf32 iso8859-2 i enkodiranje ne poklapaju, sto je u vecini slucajeva tacno */

switch (pomocna){

case 0x04 :
pomocna = 0xA1; /* LATIN CAPITAL LETTER A WITH OGONEK */
break;

case 0x41 :
pomocna = 0xA3 ; /* LATIN CAPITAL LETTER L WITH STROKE */
break;

case 0x3D :
pomocna = 0xA5 ;/* LATIN CAPITAL LETTER L WITH CARON */
break;

case 0x5A :
pomocna = 0xA6 ;/* LATIN CAPITAL LETTER S WITH ACUTE */
break;

case 0x60 :
pomocna = 0xA9 ;/* LATIN CAPITAL LETTER S WITH CARON */
break;

case 0x5E : /*LATIN CAPITAL LETTER S WITH CEDILLA */
pomocna = 0xAA ;
break;

case 0x64 : /* LATIN CAPITAL LETTER T WITH CARON */
pomocna = 0xAB ;
break;

case 0x79 : /* LATIN CAPITAL LETTER Z WITH ACUTE */
pomocna = 0xAC ;
break;

case 0x7D : /* LATIN CAPITAL LETTER Z WITH CARON */
pomocna = 0xAE;
break;

case 0x7B : /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
pomocna = 0xAF ;
break;

case 0x05 : /* LATIN SMALL LETTER A WITH OGONEK */
pomocna = 0xB1 ;
break;

case 0x42 : /* LATIN SMALL LETTER L WITH STROKE */
pomocna = 0xB3 ;
break;

case 0x3E : /* LATIN SMALL LETTER L WITH CARON */
pomocna = 0xB5;
break;

case 0x5B : /* LATIN SMALL LETTER S WITH ACUTE */
pomocna = 0xB6 ;
break;

case 0x61 : /* LATIN SMALL LETTER S WITH CARON */
pomocna = 0xB9;
break;

case 0x5F : /* LATIN SMALL LETTER S WITH CEDILLA */
pomocna = 0xBA ;
break;

case 0x65 : /* LATIN SMALL LETTER T WITH CARON */
pomocna = 0xBB ;
break;

case 0x7A : /* LATIN SMALL LETTER Z WITH ACUTE */
pomocna = 0xBC ;
break;

case 0x7E : /* LATIN SMALL LETTER Z WITH CARON */
pomocna = 0xBE ;
break;

case 0x7C : /* LATIN SMALL LETTER Z WITH DOT ABOVE */
pomocna = 0xBF ;
break;

case 0x54 : /* LATIN CAPITAL LETTER R WITH ACUTE */
pomocna = 0xC0 ;
break;

case 0x02 : /* LATIN CAPITAL LETTER A WITH BREVE */
pomocna = 0xC3 ;
break;

case 0x39 : /* LATIN CAPITAL LETTER L WITH ACUTE */
pomocna = 0xC5 ;
break;

case 0x0C : /* LATIN CAPITAL LETTER C WITH CARON */
pomocna = 0xC8 ;
break;

case 0x18 : /* LATIN CAPITAL LETTER E WITH OGONEK */
pomocna = 0xCA ;
break;

case 0x1A : /* LATIN CAPITAL LETTER E WITH CARON */
pomocna = 0xCC;
break;

case 0x0E : /*LATIN CAPITAL LETTER D WITH CARON */
pomocna = 0xCF ;
break;

case 0x10 : /* LATIN CAPITAL LETTER D WITH STROKE */
pomocna = 0xD0;
break;

case 0x43 : /* LATIN CAPITAL LETTER N WITH ACUTE */
pomocna = 0xD1 ;
break;

case 0x47 : /* LATIN CAPITAL LETTER N WITH CARON */
pomocna = 0xD2 ;
break;

case 0x50 :
pomocna = 0xD5 ; /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
break;

case 0x58 : /* LATIN CAPITAL LETTER R WITH CARON */
pomocna = 0xD8;
break;

case 0x6E : /*LATIN CAPITAL LETTER U WITH RING ABOVE */
pomocna = 0xD9 ;
break;

case 0x70 : /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
pomocna = 0xDB;
break;

case 0x62 : /* LATIN CAPITAL LETTER T WITH CEDILLA */
pomocna = 0xDE;
break;

case 0x55 :
pomocna = 0xE0; /* LATIN SMALL LETTER R WITH ACUTE */
break;

case 0x03 :
pomocna = 0xE3; /* LATIN SMALL LETTER A WITH BREVE */
break;

case 0x3A :
pomocna = 0xE5; /* LATIN SMALL LETTER L WITH ACUTE */
break;

case 0x07 :
pomocna = 0xE6; /* LATIN SMALL LETTER C WITH ACUTE */
break;

case 0x0D :
pomocna = 0xE8; /* LATIN SMALL LETTER C WITH CARON */
break;

case 0x19 :
pomocna = 0xEA; /* LATIN SMALL LETTER E WITH OGONEK */
break;

case 0x1B :
pomocna = 0xEC; /*LATIN SMALL LETTER E WITH CARON */
break;

case 0x0F :
pomocna = 0xEF; /*LATIN SMALL LETTER D WITH CARON */
break;

case 0x11 :
pomocna = 0xF0; /*LATIN SMALL LETTER D WITH STROKE */
break;

case 0x44 :
pomocna = 0xF1; /* LATIN SMALL LETTER N WITH ACUTE */
break;

case 0x48 :
pomocna = 0xF2; /* LATIN SMALL LETTER N WITH CARON */
break;

case 0x51 :
pomocna =0xF5; /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
break;

case 0x59 :
pomocna = 0xF8; /* LATIN SMALL LETTER R WITH CARON */
break;

case 0x6F :
pomocna = 0xF9; /* LATIN SMALL LETTER U WITH RING ABOVE */
break;

case 0x71 :
pomocna = 0xFB; /*LATIN SMALL LETTER U WITH DOUBLE ACUTE */
break;

case 0x63 :
pomocna = 0xFE; /* LATIN SMALL LETTER T WITH CEDILLA */
break;

default :
pomocna = '?'; /* karakter se ne moze iskodirati, pa je zamenjen sa '?' */
++brojac;

}


fputc(pomocna, izlaz);
}

else
if (c == 0x2){

switch (pomocna){

case (char)(0xD8) : /* BREVE */
pomocna = (char)(0xA2);
break;

case (char)(0xDB) : /* OGONEK */
pomocna = (char)(0xB2);
break;

case (char)(0xC7) : /* CARON */
pomocna = (char)(0xB7);
break;

case (char)(0xDD) : /* DOUBLE ACUTE ACCENT */
pomocna = (char)(0xBD);
break;

case (char)(0xD9) : /* DOT ABOVE */
pomocna = (char)(0xFF);
break;

default :
pomocna = '?'; /* karakter se ne moze iskodirati, pa je zamenjen sa '?' */
++brojac;
}

fputc(pomocna, izlaz);
}

else {pomocna = '?'; ++brojac; fputc(pomocna, izlaz); /* karakter se ne moze iskodirati, pa je zamenjen sa '?' */
}


c= fgetc(ulaz);
c= fgetc(ulaz); /* ucitavanje sledecih bajtova */
}

return brojac;
}

 

/* void print_bits_char(char x)
{
unsigned mask = 1 << 7;

while(mask)
{
putchar(x & mask ? '1' : '0');
mask >>= 1;
}
} */

 

 

 

 

int main()
{

/* Ucitavamo ime ulaznog fajla */
printf("Uneti ime ulaznog fajla: ");
scanf("%s", ime_fajla);

/* Otvaramo ulazni fajl za citanje */
ulaz = fopen(ime_fajla, "r");

/* da li je doslo do greske prilikom otvaranja */
if(ulaz == NULL)
{
printf("Greska prilikom otvaranja fajla\n");
return 1;
}

/* Ucitavamo ime izlaznog fajla */
printf("Uneti ime izlaznog fajla: ");
scanf("%s", ime_fajla);

/* Otvaramo izlazni fajl za upisivanje */
izlaz = fopen(ime_fajla, "w");

/* da li je doslo do greske prilikom otvaranja */
if(izlaz == NULL)
{
printf("Greska prilikom otvaranja fajla\n");
return 1;
}

/* printf("Ne moze se iskodirati ukupno %d , karaktera! \n" , utf32_to_iso( ulaz, izlaz)); */

printf("Unesite '1' za kodiranje utf32->iso8859-2 \n pricemu ulazni fajl mora biti sacuvan sa utf32 enkodiranjem \n ili '2' za kodiranje is8859-2 -> utf32 \n pri cemu ulazni fajl mora biti sacuvan sa iso8859-2 enkodiranjem: \n" );
scanf("%d", &unos);

switch (unos){

case 1 :
printf("Ne moze se iskodirati ukupno %d , karaktera! \n" , utf32_to_iso( ulaz, izlaz));
break;
case 2 :
iso_to_utf(ulaz, izlaz);
break;
default :
printf("Pogresan unos!\n");
}

/* Zatvaramo fajlove */
fclose(ulaz);
fclose(izlaz);

return 0;


}