#include <stdio.h>

/** 
 * Code fragments for properly encoding UTF-8 strings in cdrdao TOCs
 * 
 * Andreas Ruge, Jan 2012
 */
 
/* This is taken from cdrdao 1.2.3, it shows how text fields fro TOCs
   are escaped (cdrdao runs under the C locale):
  
    out << " \"";
    for (i = 0; i < dataLen_ - 1; i++) {
      if (data_[i] == '"') {
        out << "\\\"";
      }
      else if (isprint(data_[i])) {
        out << data_[i];
      }
      else {
        sprintf(buf, "\\%03o", (unsigned int)data_[i]);
        out << buf;
      }
    }

    out << "\"";
  }
*/




/** 
 * Print a string in the format used for CD text strings in cdrdao TOC files
 * 
 * This is:
 *      a) escape double quotes with a backslash
 *      b) print all characters from 0x20 - 0x7E (printable ascii), except the backslash
 *      c) use octal three digit representation for all other values 
 *      d) enclose the whole string in double quotes
 * 
 * Andreas Ruge, 2012
 */
void toc_print_string(const char *s, FILE *fp)
{
    fprintf(fp, " \"");
    
    for ( ; *s != '\0'; s++) 
    {
        if (*s == '"') 
        {
            fprintf(fp, "\\\"");
        } 
        else if (0x20 <= *s && *s <= 0x7E && *s != '\\')
        {
            fprintf(fp, "%c", *s);
        } 
        else 
        {
            fprintf(fp, "\\%03o", (unsigned char)*s);
        }
    }
    
    fprintf(fp, "\"");
}


/**
 * Translate UTF-8 string to ISO 8859-1 (latin1)
 * 
 *  the dest buffer will never have to be larger than the src string
 * 
 * Return
 *  0   on success
 *  1   when a unicode sequnece was found which can't be represented in
 *      ISO 8859-1, or when the unicode string is invalid
 * 
 * Andreas Ruge, 2012
 */
int utf8_to_latin1(unsigned char *dest, int dest_len, unsigned char *src)
{
    int ret = 0;
    
    while (*src && dest_len) 
    {
        if (!(*src & 0x80))
        {
            /* 7-bit => ASCII range */
            *dest++ = *src++;
            dest_len--;
        } 
        else
        {   
            /* 8-bit => UTF-8 multi-byte sequence */
            
            if (((*src & 0xFC) == 0xC0) && ((*(src + 1) & 0xC0) == 0x80)) 
            {
                /* bit pattern 110000xx  10xxxxxx,
                   a two byte UTF-8 sequence with no more than 8 data bits used,
                   i.e. can be translated straight to IS0 8859-1 */
                *dest    = *src++ << 6;
                *dest++ |= *src++ & 0x3F;
                dest_len--;
            } 
            else
            {
                /* part of UTF-8 multi-byte sequence which can't be
                   translated to ISO 8859-1 */
                ret = 1;
                *src++;
            }
        }
    }
    *dest = '\0';
    return ret;
}


/* Test function, to be used on a UTF-8 terminal. */
int main(int argc, char *argv[]) {
    
    char buf[100];
    char *p;

    int ret = utf8_to_latin1(buf, sizeof buf, argv[1]);
    if (ret == 0) {
        printf("All characters converted to ISO 8859-1\n");
    } else {
        printf("Warning: some characters could not be converted to ISO 8859-1\n");
    }
    /*for (p = buf; *p; p++)
    {
        printf("%x ", (unsigned char)*p);
    }
    printf("\n");*/
    printf("Encoded for cdrdao TOC:");
    toc_print_string(buf, stdout);
    printf("\n");
    return 0;
    
}
