-
Notifications
You must be signed in to change notification settings - Fork 0
/
ucprint.c
130 lines (117 loc) · 3.35 KB
/
ucprint.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* UCPrint: Print a sequence of Unicode code points.
*
* Original algorithm:
* <https://stackoverflow.com/a/4609989/2954547>
*
* ISC License:
* Copyright 2021 Greg Werbin
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
* IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <errno.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
enum retcode {
RET_OK = 0,
RET_USAGE = 1,
RET_INVALID = 2,
RET_MALLOC = 9
};
int print_unicode(unsigned long codepoint) {
// Buffer for the Unicode string, to stay at 0.
char *output_buffer;
// "Cursor" in the output buffer, to be incremented.
char *b;
if (codepoint < 0x80) {
output_buffer = malloc(1 * sizeof(char));
*output_buffer = codepoint;
} else if (codepoint < 0x800) {
output_buffer = malloc(2 * sizeof(char));
if (output_buffer == NULL) {
goto MallocFailed;
}
b = output_buffer;
*b++ = 192 + codepoint / 64;
*b = 128 + codepoint % 64;
} else if (codepoint - 0xd800u < 0x800) {
goto InvalidUnicode;
} else if (codepoint < 0x10000) {
output_buffer = malloc(3 * sizeof(char));
if (output_buffer == NULL) {
goto MallocFailed;
}
b = output_buffer;
*b++ = 224 + codepoint / 4096;
*b++ = 128 + codepoint / 64 % 64;
*b = 128 + codepoint % 64;
} else if (codepoint < 0x110000) {
output_buffer = malloc(4 * sizeof(char));
if (output_buffer == NULL) {
goto MallocFailed;
}
b = output_buffer;
*b++ = 240 + codepoint / 262144;
*b++ = 128 + codepoint / 4096 % 64;
*b++ = 128 + codepoint / 64 % 64;
*b = 128 + codepoint % 64;
} else {
goto InvalidUnicode;
}
printf("%s", output_buffer);
free(output_buffer);
return 0;
InvalidUnicode:
fprintf(stderr, "Invalid Unicode code point: 0x%lx\n", codepoint);
return RET_INVALID;
MallocFailed:
fprintf(stderr, "Failed to allocate memory!!\n");
return RET_MALLOC;
}
int main(int argc, char *argv[]) {
long codepoint;
char *arg;
char *strtol_err;
size_t argi;
int retval = 0;
if (argc < 2) {
return 0;
}
for (argi = 1 ; argi < argc ; argi++) {
arg = argv[argi];
if (*arg == '\0') {
fprintf(stderr, "Empty argument.");
return RET_USAGE;
}
// Set the locale to "C" to prohibit excessively fancy number representations.
setlocale(LC_ALL, "C");
// Explicitly set errno = 0
errno = 0;
codepoint = strtol(arg, &strtol_err, 16);
if (*strtol_err != '\0' || errno != 0) {
printf("%s", arg);
//fprintf(stderr, "Invalid hexadecimal integer: %s\n", arg);
//return RET_USAGE;
} else if (codepoint < 0) {
printf("%s", arg);
//fprintf(stderr, "Not a positive integer: %s\n", arg);
//return RET_INVALID;
} else {
retval = print_unicode((unsigned long)codepoint);
}
if (retval != 0) {
return retval;
}
}
printf("\n");
}