State

Occasionally, state objects are passed to the routines that perform conversions. These can do things such as hold onto shift state, have special flags for special processing (e.g., for assuming input for a given function is valid). In general, the state parameter comes last and is a pointer to a state the user creates (or, if using the typed conversion functions, may be created for you and used.) The function therefore usually ends up of the form:

#include <uchar.h>

typedef struct my_state {
        bool assume_valid_input;
        char accumulation[2];
} my_state;

cnc_mcerr my_single_conversion_to_utf32(size_t* p_output_size,
        const char32_t** p_output,
        size_t* p_input_size,
        const char** p_input,
        my_state* p_state);

As described by the indivisible unit of work, reading input can change the state internally, even accumulate data in the state while outputting 0 characters. Because state may influence whether or not a conversion is complete (e.g., it has accumulated data inside of it and must empty it out), complex state objects should add a visible inspection function to check if the state is “complete” (has nothing left to output and is awaiting no further characters):

#include <uchar.h>

typedef struct my_conversion_state {
        char accumulation[2];
        unsigned char accumulation_count;
        bool assume_valid_input;
} my_conversion_state;

bool my_state_is_complete(const my_conversion_state* p_state);

cnc_mcerr my_single_conversion_to_utf32(size_t* p_output_size,
        const char32_t** p_output,
        size_t* p_input_size,
        const char** p_input,
        my_conversion_state* p_state);

The cnc_mcstate_t type has a similar function named cnc_mcstate_is_complete(), used for this purpose. As a completely made up example for a complete made up encoding,

 1#include <ztd/cuneicode.h>
 2
 3#include <ztd/idk/size.h>
 4#include <ztd/idk/restrict.h>
 5
 6#include <stdio.h>
 7#include <string.h>
 8#include <stddef.h>
 9
10cnc_mcerr my_bulk_mcsnrtoc8sn(size_t* output_size, unsigned char** output,
11     size_t* input_size, const char** restrict input, cnc_mcstate_t* state) {
12	cnc_mcstate_t invocation_unique_internal_state;
13	if (state == NULL) {
14		invocation_unique_internal_state = (cnc_mcstate_t) { 0 };
15		state                            = &invocation_unique_internal_state;
16	}
17	if (input == NULL || *input == NULL) {
18		return cnc_mcnrtoc8n(output_size, output, input_size, input, state);
19	}
20	for (;;) {
21		cnc_mcerr err
22		     = cnc_mcnrtoc8n(output_size, output, input_size, input, state);
23		if (err != cnc_mcerr_ok) {
24			return err;
25		}
26		if (*input_size > 0) {
27			continue;
28		}
29		bool state_finished = cnc_mcstate_is_complete(state);
30		if (!state_finished) {
31			continue;
32		}
33		return err;
34	}
35}
36
37int main() {
38	const char input[]              = "abc";
39	const char* input_ptr           = input;
40	const size_t initial_input_size = ztdc_c_array_size(input);
41	size_t input_size               = initial_input_size;
42
43	unsigned char output[CNC_C8_MAX * ztdc_c_array_size(input)] = { 0 };
44	unsigned char* output_ptr                                  = output;
45	const size_t initial_output_size = ztdc_c_array_size(output);
46	size_t output_size               = initial_output_size;
47
48	cnc_mcerr err = my_bulk_mcsnrtoc8sn(
49	     &output_size, &output_ptr, &input_size, &input_ptr, NULL);
50	const size_t output_written = initial_output_size - output_size;
51	const size_t input_read     = initial_input_size - input_size;
52	if (err != cnc_mcerr_ok) {
53		fprintf(stderr, "The conversion failed with an unexpected error of %s.",
54		     cnc_mcerr_to_str(err));
55		return 1;
56	}
57
58	const unsigned char expected_output[] = "abc";
59
60	if (memcmp(output, expected_output, output_written) != 0) {
61		fprintf(stderr,
62		     "The expected intput was not the same as the expected output.");
63		return 2;
64	}
65
66	fprintf(stdout, "Read: %zu units\nWrote: %zu units\nWritten value: %s\n",
67	     input_read, output_written, (const char*)output);
68
69	return 0;
70}