File: fix.c - Tab length: 1 2 4 8 - Lines: on off - No wrap: on off


#include <stdio.h>

#define BUFFER_CAPACITY     1024 * 1024 * 25

#define INDICES_EXPECTED    32
#define INDICES_MAXIMUM     40
#define INDICES_WRITE       16

#define PRINT_FREQUENCY     16

int     main (int argc, char* argv[])
{
    unsigned char*  bufferArray;
    unsigned long   bufferSize;
    unsigned long   indicesArray[INDICES_MAXIMUM + 1];
    unsigned long   indicesCount;
    unsigned long   indicesWrite[INDICES_WRITE] = {0, 1, 14, 7, 8, 16, 10, 12, 13, 11, 9, 25, 26, 27, 17, 18};
    FILE*           streamWriter;
    FILE*           streamReader;
    unsigned char   pipe = '|';
    unsigned long   ln;
    unsigned long   i;
    unsigned long   j;

    if (argc < 3)
    {
        printf ("usage: %s <input> <output>\n", argv[0]);

        return 0;
    }

    streamReader = fopen (argv[1], "rb");

    if (!streamReader)
    {
        fprintf (stderr, "cannot open input file \"%s\" for reading\n", argv[1]);

        return 1;
    }

    streamWriter = fopen (argv[2], "wb");

    if (!streamWriter)
    {
        fprintf (stderr, "cannot open output file \"%s\" for writing\n", argv[2]);
        fclose (streamReader);

        return 1;
    }

    bufferArray = malloc (sizeof (*bufferArray) * BUFFER_CAPACITY);
    bufferSize = 0;
    indicesArray[0] = 0;
    indicesCount = 1;
    ln = 0;

    printf ("starting process...\n");

    for (i = 0; i < bufferSize || !feof (streamReader); ++i)
    {
        // End of buffer reached: content must be shifted to the left before
        // buffer is populated with incoming data from input stream
        if (i == bufferSize)
        {
            i = bufferSize - indicesArray[0];

            memmove (bufferArray, bufferArray + indicesArray[0], i * sizeof (*bufferArray));

            bufferSize = i + fread (bufferArray + i, sizeof (*bufferArray), BUFFER_CAPACITY - i, streamReader);

            for (j = indicesCount; j--; )
                indicesArray[j] -= indicesArray[0];

            if (i >= bufferSize)
                break;
        }

        // End of item found: save starting index of the next one
        if (bufferArray[i] == '|')
        {
            if (indicesCount < INDICES_MAXIMUM)
                indicesArray[indicesCount++] = i + 1;
        }

        // End of line found: write required indices and flush list
        else if (bufferArray[i] < ' ')
        {
            if (indicesCount == INDICES_EXPECTED)
            {
                indicesArray[indicesCount] = i + 1;

                for (j = 0; j + 1 < INDICES_WRITE; ++j)
                {  
                    fwrite (bufferArray + indicesArray[indicesWrite[j]], sizeof (*bufferArray), indicesArray[indicesWrite[j] + 1] - indicesArray[indicesWrite[j]] - 1, streamWriter);
                    fwrite (&pipe, sizeof (*bufferArray), 1, streamWriter);
                }

                if (j < INDICES_WRITE)
                    fwrite (bufferArray + indicesArray[indicesWrite[j]], sizeof (*bufferArray), indicesArray[indicesWrite[j] + 1] - indicesArray[indicesWrite[j]] - 1, streamWriter);

                if ((++ln & (((unsigned)1 << PRINT_FREQUENCY) - 1)) == 0)
                    printf ("writing line %lu...\n", ln);
            }

            fwrite (bufferArray + i, sizeof (*bufferArray), 1, streamWriter);

            indicesArray[0] = i + 1;
            indicesCount = 1;
        }
    }

    if ((ln & (((unsigned)1 << PRINT_FREQUENCY) - 1)) != 0)
        printf ("writing line %lu...\n", ln);

    printf ("done.\n");

    fclose (streamWriter);
    fclose (streamReader);
    free (bufferArray);

    return 0;
}