Volksdata 1.0b7
RDF library
Loading...
Searching...
No Matches
codec.c
Go to the documentation of this file.
1#include "volksdata/codec.h"
2
3
8#define LIT_ECHAR "\t\b\n\r\f\"\'\\"
9
10
11uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
12{
13 // Output will not be longer than the escaped sequence.
14 uint8_t *data = malloc (size + 1);
15
16 size_t len = 0; // Size of output string.
17 uint8_t tmp_chr[9];
18 for (size_t i = 0; i < size;) {
19 int esc_len; // Size of escape sequence.
20 if (esc_str[i] == '\\') {
21 i++; // Skip over '\\'
22
23 if (esc_str[i] == 'u') {
24 // 4-hex (2 bytes) sequence.
25 esc_len = 4;
26 } else if (esc_str[i] == 'U') {
27 // 8-hex (4 bytes) sequence.
28 esc_len = 8;
29 } else {
30 // Unescape other escaped characters.
31 data[len++] = unescape_char (esc_str[i++]);
32 continue;
33 }
34
35 // Continue encoding UTF code point.
36
37 i ++; // Skip over 'u' / 'U'
38
39 // Use tmp_chr to hold the hex string for the code point.
40 memcpy (tmp_chr, esc_str + i, esc_len);
41 tmp_chr[esc_len] = '\0';
42
43 uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
44 //LOG_DEBUG("tmp_val: %d", tmp_val);
45
46 // Reuse tmp_chr to hold the byte values for the code point.
47 int cp_len = utf8_encode (tmp_val, tmp_chr);
48 if (cp_len == 0) {
49 log_error ("Error encoding sequence: %s", tmp_chr);
50 return NULL;
51 }
52
53 // Copy bytes into destination.
54 memcpy (data + len, tmp_chr, cp_len);
55#if 0
56 // This can generate a LOT of output.
57 if (esc_len == 4)
58 LOG_TRACE("UC byte value: %2x %2x", data[len], data[len + 1]);
59 else
61 "UC byte value: %2x %2x %2x %2x",
62 data[len], data[len + 1], data[len + 2], data[len + 3]
63 );
64#endif
65 len += cp_len;
66 i += esc_len;
67 } else {
68 data[len++] = esc_str[i++];
69 }
70 }
71
72 data[len++] = '\0';
73 uint8_t *ret = realloc (data, len); // Compact result.
74 if (UNLIKELY (!ret)) return NULL;
75
76 return ret;
77}
78
79
81escape_lit (const char *in, char **out_p)
82{
83 size_t out_size = strlen (in) + 1;
84
85 // Expand output string size to accommodate escape characters.
86 for (
87 size_t i = strcspn (in, LIT_ECHAR);
88 i < strlen (in);
89 i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
90 out_size ++;
91 }
92
93 char *out = calloc (1, out_size);
94 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
95
96 size_t boundary;
97 boundary = strcspn (in, LIT_ECHAR);
98 for (size_t i = 0, j = 0;;) {
99 out = strncat (out, in + i, boundary);
100
101 i += boundary;
102 j += boundary;
103 if (i >= strlen (in)) break;
104
105 out[j++] = '\\';
106 out[j++] = escape_char (in[i++]);
107 boundary = strcspn (in + i, LIT_ECHAR);
108 }
109
110 *out_p = out;
111 return VOLK_OK;
112}
113
114
115char *
116fmt_header (char *pfx)
117{
118 char *body = "Generated by Volksdata v" VOLK_VERSION " on ";
119 time_t now = time (NULL);
120 char date[16];
121 strftime (date, sizeof (date), "%m/%d/%Y", gmtime (&now));
122
123 char *out = malloc (strlen (pfx) + strlen (body) + strlen (date) + 2);
124 if (UNLIKELY (!out)) return NULL;
125
126 sprintf (out, "%s%s%s\n", pfx, body, date);
127
128 return out;
129}
130
131/*
132 * Extern inline functions.
133 */
134
135char escape_char (const char c);
136char unescape_char (const char c);
137uint8_t *uint8_dup (const uint8_t *str);
138uint8_t *uint8_ndup (const uint8_t *str, size_t size);
char escape_char(const char c)
#define LIT_ECHAR
List of characters to be escaped in serialized literals.
Definition codec.c:8
#define UNLIKELY(x)
Definition core.h:39
uint8_t * uint8_ndup(const uint8_t *str, size_t size)
strndup() for unsigned char.
Definition codec.h:82
char * fmt_header(char *pfx)
Format an informational header.
Definition codec.c:116
char unescape_char(const char c)
Unescape a single character.
Definition codec.h:128
VOLK_rc escape_lit(const char *in, char **out_p)
Add escape character (backslash) to illegal literal characters.
Definition codec.c:81
uint8_t * uint8_dup(const uint8_t *str)
Parse error information.
Definition codec.h:72
uint8_t * unescape_unicode(const uint8_t *esc_str, size_t size)
Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
Definition codec.c:11
#define VOLK_VERSION
Definition core.h:53
#define LOG_TRACE(...)
Definition core.h:276
int utf8_encode(const uint32_t utf, unsigned char *out)
Encode a code point using UTF-8.
Definition core.h:445
#define VOLK_MEM_ERR
Memory allocation error.
Definition core.h:144
#define VOLK_OK
Generic success return code.
Definition core.h:83
int VOLK_rc
Definition core.h:79