D
D
Danny Belchenko2017-11-11 16:14:17
C++ / C#
Danny Belchenko, 2017-11-11 16:14:17

How to make iconv work?

Hello everyone!
I'm writing a converter from the ancient Lexer format and found a library that is friendly with it. It returns multiple events for each character in the .lex file. Here is one of the events - this is a character, in the original it is stored in the CP866 encoding, I wrote a small function that converts the current character to utf, but iconv simply ignores half of the characters and cannot read it :(
The code is Frankinstein, so please do not scold for it

#include <stdio.h>
#include <stdint.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#include "liblex.h"
#include <iconv.h>
#include <errno.h>
#include <err.h>

#define ROW_SIZE 512


void parse_callback( struct parser_event *pe, void *userdata )
{
    iconv_t foo = iconv_open("CP866", "UTF-8");
    if((int) foo == -1) {
        if (errno == EINVAL) {
            fprintf(stderr,
                    "Conversion is not supported");
        } else {
            fprintf(stderr, "Initialization failure:\n");
        }
    }
    // calloc fills memory with 0 bytes. we alloc two -
    // one for the 'ö' and one for the ending delimeter
    char *iso = calloc(2, sizeof(char));

    // the converted string can be four times larger
    // then the original, as the largest known char width is 4 bytes.
    char *converted = calloc(5, sizeof(char));

    // we need to store an additional pointer that targets the
    // start of converted. (iconv modifies the original 'converted')
    char *converted_start = converted;

    size_t ibl = 2; // len of iso
    size_t obl = 5; // len of converted

    // do it!

    switch ( pe->event ) {

        case EVENT_RUNE:
            iso[0] = pe->rune;
            int ret = iconv(foo, &iso, &ibl, &converted, &obl);

            // if iconv fails it returns -1
            if(ret == (iconv_t)-1) {
                perror("iconv");
                iconv_close(foo);
            } else {
                // other wise the number of converted bytes
                printf("%i bytes converted\n", ret);
                printf("result: '%s'\n", converted_start);
                iconv_close(foo);
            }
            fprintf( stdout, "debug: event = RUNE, data = %c\n", iso);
            break;

        case EVENT_FONT:
            fprintf( stderr, "FUCK FONT\n");
            fprintf( stdout, "debug: event = FONT, data = %d\n", pe->font_id );
            break;

        case EVENT_ERROR:
            fprintf( stderr, "FUCK ERROR\n");
            fprintf( stdout, "debug: event = ERROR\n" );
            break;

        default:
            fprintf( stdout, "debug: event = UNKNOWN\n" );
    }
}


int main()
{
    int err = 0;
    struct parser *p;
    struct parser_event pe;
    int fd;
    uint8_t row[ROW_SIZE];
    ssize_t size;
    fprintf( stderr, "FUCK IM STARTED BITCH\n");
    /* Create new parser */
    err = parser_create( &p );
    if ( err ) {
        fprintf( stderr, "error: Unable to create parser\n" );
        goto out;
    }

    fprintf( stderr, "FUCK PARSER\n");
    fd = open("example.lex", O_RDONLY);
    if ( fd == -1 ) {
        fprintf( stderr, "error: Unable to open stream\n" );
        goto out;
    }
    fprintf( stderr, "FUCK FILES\n");
    err = parser_set_callback( p, parse_callback, NULL );
    if ( err ) {
        fprintf( stderr, "error: Unable to setup callback\n" );
        goto out;
    }
    fprintf( stderr, "FUCK IT CALLBACK\n");
    for ( ;5; ) {

        /* Read */
        size = read( fd, row, ROW_SIZE );
        if ( size == 0 ) {
            goto out;
        }
        if ( size == -1 ) { /* Some error when reading */
            fprintf( stderr, "error: Unable to read bytes\n" );
            goto out;
        }

        /* Parse */
        err = parser_parse( p, row, size );
        if ( err ) {
            fprintf(stderr, "error: Unable to parsing row\n");
            goto out;
        }

    }

  out:

    /* Close stream */
    if ( fd != -1 ) {
        close( fd );
    }

    /* Dispose reader */
    parser_dispose( p );

    return err;
}

Answer the question

In order to leave comments, you need to log in

1 answer(s)
J
jcmvbkbc, 2017-11-12
@belchenko

How to make iconv work?

I see two errors in your code and some strange places. Bugs:
- you create a converter for each call to parse_callback, but close it only on the EVENT_RUNE event.
- you allocate memory (two callocs in parse_callback) and don't free it. Despite the fact that you do not need dynamic memory here at all.
Strange places:
- in converted_start -- utf8... -printf("result: '%s'\n", converted_start);
fprintf(stdout, "debug: event = RUNE, data = %c\n", iso);
iso is firstly an array and secondly an array of CP866 characters.
At least one (and most likely both) of these printfs will not print what you expected.
parse_callback can be rewritten like this:
void parse_callback( struct parser_event *pe, void *userdata )
{
    switch ( pe->event ) {

        case EVENT_RUNE: {
            char *iso = &pe->rune;
            char out[10] = {0};
            char *converted = out;
            size_t ibl = 1;
            size_t obl = sizeof(out);
            iconv_t foo = iconv_open("CP866", "UTF-8");
            int ret;

            if((int) foo == -1) {
                if (errno == EINVAL) {
                    fprintf(stderr,
                            "Conversion is not supported");
                } else {
                    fprintf(stderr, "Initialization failure:\n");
                }
                break;
            }
            ret = iconv(foo, &iso, &ibl, &converted, &obl);

            // if iconv fails it returns -1
            if(ret == (iconv_t)-1) {
                perror("iconv");
            } else {
                // otherwise the number of converted bytes
                printf("%i bytes converted\n", ret);
                printf("result: '%s'\n", out);
            }
            iconv_close(foo);
            fprintf(stdout, "debug: event = RUNE, data = %c\n", pe->rune);
            break;
        }

        case EVENT_FONT:
            fprintf( stderr, "FUCK FONT\n");
            fprintf( stdout, "debug: event = FONT, data = %d\n", pe->font_id );
            break;

        case EVENT_ERROR:
            fprintf( stderr, "FUCK ERROR\n");
            fprintf( stdout, "debug: event = ERROR\n" );
            break;

        default:
            fprintf( stdout, "debug: event = UNKNOWN\n" );
    }
}

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question