Volksdata 1.0b7
RDF library
Loading...
Searching...
No Matches
codec_ttl.c
Go to the documentation of this file.
2
3
13typedef struct {
14 const VOLK_Codec * codec;
15 const VOLK_Graph * gr;
17 size_t s_cur;
19 char * s_str;
20 char * p_str;
21 char * o_str;
23
24
25/* * * Codec functions. * * */
26
27static VOLK_rc
28term_to_ttl (const VOLK_Term *term, char **out_p)
29{
30 VOLK_rc rc;
31 char
32 *tmp = NULL,
33 *out = NULL,
34 *metadata = NULL;
35 size_t buf_len;
36
37 VOLK_rc md_rc = VOLK_NORESULT;
38 switch (term->type) {
40 if (strcmp (term->data, VOLK_RDF_TYPE) == 0) {
41 // Shorten RDF type
42 buf_len = 2;
43 out = realloc (*out_p, 2);
44 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
45 out[0] = 'a';
46 out[1] = '\0';
47 } else {
48 md_rc = VOLK_nsmap_denormalize_uri (term->data, &tmp);
49 PRCCK (md_rc);
50 if (md_rc == VOLK_NORESULT) {
51 // If URI counld not be shortened, add `<>`
52 // and copy term from the original.
53 out = realloc (*out_p, strlen (term->data) + 3);
54 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
55 sprintf (out, "<%s>", term->data);
56 } else {
57 // If URI was shortened, write it out without `<>` and
58 // use previously allocated data from denormalization.
59 // Free previous output pointer
60 free (*out_p);
61 out = tmp;
62 }
63 }
64 rc = VOLK_OK;
65 break;
66
68 // Calculate string length.
69 if (escape_lit (term->data, &tmp) != VOLK_OK)
70 return VOLK_ERROR;
71 buf_len = strlen (tmp) + 3; // Room for "" and terminator
72
73 // Data type.
74 bool shorten = false;
75 if (
76 term->datatype != 0
78 ) {
80 term->datatype->data, &metadata);
81 RCCK (md_rc);
82 unsigned padding = 0;
83
84 // Shorten numeric and boolean types.
85 if (strcmp (metadata, "xsd:integer") == 0) {
86 // TODO check for valid format.
87 shorten = true;
88 } else if (strcmp (metadata, "xsd:double") == 0) {
89 // TODO check for valid format.
90 shorten = true;
91 } else if (strcmp (metadata, "xsd:decimal") == 0) {
92 // TODO check for valid format.
93 shorten = true;
94 } else if (strcmp (metadata, "xsd:boolean") == 0) {
95 // TODO check for valid format.
96 shorten = true;
97 } else {
98 // Room for `^^<>` for FQURI, `^^` for NS URI
99 padding = md_rc == VOLK_NORESULT ? 4 : 2;
100 }
101 buf_len += strlen (metadata) + padding;
102 }
103
104 out = realloc (*out_p, buf_len);
105 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
106
107 if (shorten) {
108 strcpy (out, tmp);
109 } else if (metadata) {
110 char *fmt = (
111 md_rc == VOLK_NORESULT ? "\"%s\"^^<%s>"
112 : "\"%s\"^^%s");
113 sprintf (out, fmt, tmp, metadata);
114 }
115 else {
116 sprintf (out, "\"%s\"", tmp);
117 }
118 free (tmp);
119
120 rc = VOLK_OK;
121
122 break;
123
125 // Calculate string length.
126 if (escape_lit (term->data, &tmp) != VOLK_OK)
127 return VOLK_ERROR;
128 buf_len = strlen (tmp) + 3; // Room for "" and terminator
129
130 if (term->lang[0] != '\0') {
131 metadata = strndup (term->lang, sizeof (VOLK_LangTag));
132 buf_len += strlen (metadata) + 1; // Room for @
133 }
134
135 out = realloc (*out_p, buf_len);
136 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
137
138 sprintf (out, "\"%s\"", tmp);
139 free (tmp);
140
141 // Add lang.
142 if (metadata) out = strcat (strcat (out, "@"), metadata);
143
144 rc = VOLK_OK;
145
146 break;
147
148 case VOLK_TERM_BNODE:
149 out = realloc (*out_p, strlen (term->data) + 3);
150 if (UNLIKELY (!out)) return VOLK_MEM_ERR;
151
152 sprintf (out, "_:%s", term->data);
153 rc = VOLK_OK;
154
155 break;
156
157 default:
158 out = *out_p; // This is considered garbage.
159 log_error ("Invalid term type: %d", term->type);
160 rc = VOLK_PARSE_ERR;
161 }
162 free (metadata);
163
164 *out_p = out;
165 return rc;
166}
167
168
169static void *
170gr_to_ttl_init (const VOLK_Graph *gr)
171{
173 CALLOC_GUARD (it, NULL);
174
175 it->codec = &ttl_codec;
176 it->gr = gr;
178 // Sets the condition to build the prolog on 1st iteration.
179 it->rc = VOLK_NORESULT;
180
181 return it;
182}
183
184
186static VOLK_rc
187build_prolog (VOLK_TTLCodecIterator *it, char **res_p)
188{
189 char *res = fmt_header ("# ");
190
191 const char ***nsm = VOLK_nsmap_dump ();
192 char *ns_tpl = "@prefix %s: <%s> .\n";
193
194 // Prefix map.
195 for (size_t i = 0; nsm[i]; i++) {
196 const char **ns = nsm[i];
197 size_t old_len = strlen (res);
198 size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl);
199 char *tmp = realloc (res, old_len + ns_len + 1);
200 if (UNLIKELY (!tmp)) return VOLK_MEM_ERR;
201 res = tmp;
202
203 sprintf (res + old_len, ns_tpl, ns[0], ns[1]);
204 free (ns);
205 }
206 free (nsm);
207
208 // Base.
209 char *base_uri_str = VOLK_graph_uri (it->gr)->data;
210 char *base_stmt_tpl = "\n@base <%s> .\n\n";
211 char *base_stmt = malloc (
212 strlen (base_stmt_tpl) + strlen (base_uri_str) + 1);
213 if (!UNLIKELY (base_stmt)) return VOLK_MEM_ERR;
214 sprintf (base_stmt, base_stmt_tpl, base_uri_str);
215 res = realloc (res, strlen (res) + strlen (base_stmt) + 1);
216 if (!UNLIKELY (res)) return VOLK_MEM_ERR;
217 res = strcat (res, base_stmt);
218 free (base_stmt);
219
220 *res_p = res;
221 it->rc = VOLK_OK;
222
223 return VOLK_OK;
224}
225
226
228static VOLK_rc
229gr_to_ttl_iter (void *h, char **res_p) {
230 VOLK_TTLCodecIterator *it = h;
231
232 if (it->rc == VOLK_NORESULT) return build_prolog (it, res_p);
233
234 VOLK_Term *s = NULL;
235 char *res = *res_p; // Result string will be reallocated.
236 VOLK_rc rc = VOLK_term_set_next (it->subjects, &it->s_cur, &s);
237 if (rc == VOLK_END) return rc; // Return without logging error.
238 RCCK (rc); // Log error or warning for anything else.
239
240 term_to_ttl (s, &res);
241
243 it->gr, s, VOLK_LINK_OUTBOUND);
244
245 VOLK_LinkMapIterator *lmit = VOLK_link_map_iter_new (lmap);
246 VOLK_Term *p = NULL;
247 VOLK_TermSet *o_ts = NULL;
248 char *p_join = "\n "; // Newline & indent after subject.
249 // Begin predicate loop.
250 while (VOLK_link_map_next (lmit, &p, &o_ts) != VOLK_END) {
251 // Add predicate representation.
252 RCCK (term_to_ttl (p, &it->p_str));
253 char *tmp = realloc (
254 res, strlen (res) + strlen (it->p_str) + strlen (p_join) + 1);
255 if (UNLIKELY (!tmp)) goto memfail;
256 res = strcat (strcat (tmp, p_join), it->p_str);
257
258 p_join = " ;\n ";
259
260 // Add objects for predicate.
261 size_t i = 0;
262 VOLK_Term *o = NULL;
263 char *o_join = " ";
264 while (VOLK_term_set_next (o_ts, &i, &o) != VOLK_END) {
265 it->rc = term_to_ttl (o, &it->o_str);
266 RCCK (it->rc);
267 char *tmp = realloc (
268 res, strlen (res) + strlen (it->o_str) + strlen (o_join) + 1);
269 if (UNLIKELY (!tmp)) goto memfail;
270 res = strcat (strcat (tmp, o_join), it->o_str);
271 o_join = " ,\n "; // Double indent for objects.
272 }
273 }
274
275 char *s_sep = "\n.\n\n"; // Period goes on its own line for visibility.
276 char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1);
277 if (UNLIKELY (!tmp)) goto memfail;
278
279 *res_p = strcat (tmp, s_sep);
280
282 VOLK_link_map_free (lmap);
283
284 return it->rc;
285
286memfail:
287 free (res);
288 *res_p = NULL;
289 return VOLK_MEM_ERR;
290}
291
292
293static void
294gr_to_ttl_done (void *h)
295{
296 VOLK_TTLCodecIterator *it = h;
298 free (it->s_str);
299 free (it->p_str);
300 free (it->o_str);
301 free (it);
302}
303
304
305const VOLK_Codec ttl_codec = {
306 .name = "Turtle",
307 .mimetype = "text/turtle",
308 .extension = "ttl",
309
310 .encode_term = term_to_ttl,
311
312 .encode_graph_init = gr_to_ttl_init,
313 .encode_graph_iter = gr_to_ttl_iter,
314 .encode_graph_done = gr_to_ttl_done,
315
316 //.decode_term = VOLK_ttl_parse_term,
317 .decode_graph = VOLK_ttl_parse_doc,
318};
const VOLK_Codec ttl_codec
Turtle codec.
Definition codec_ttl.c:305
#define UNLIKELY(x)
Definition core.h:39
@ TRP_POS_S
Definition buffer.h:20
char * fmt_header(char *pfx)
Format an informational header.
Definition codec.c:116
VOLK_rc escape_lit(const char *in, char **out_p)
Add escape character (backslash) to illegal literal characters.
Definition codec.c:81
VOLK_LinkMap * VOLK_graph_connections(const VOLK_Graph *gr, const VOLK_Term *t, const VOLK_LinkType type)
Get term pairs connected to a term in a graph.
Definition graph.c:697
VOLK_TermSet * VOLK_graph_unique_terms(const VOLK_Graph *gr, VOLK_TriplePos pos)
Get all unique subjcts, predicates, or objects in a graph.
Definition graph.c:791
const VOLK_Term * VOLK_graph_uri(const VOLK_Graph *gr)
Read-only graph URI.
Definition graph.c:263
const char *** VOLK_nsmap_dump(void)
Dump all entries of the namespace map.
Definition namespace.c:191
VOLK_rc VOLK_nsmap_denormalize_uri(const char *fq_uri, char **pfx_uri_p)
Convert a FQ URI string to a prefixed string if the prefix is found.
Definition namespace.c:153
#define CALLOC_GUARD(var, rc)
Allocate one pointer with calloc and return rc if it fails.
Definition core.h:381
char * strndup(const char *src, size_t max)
Replacement for GNU strndup.
Definition core.c:92
#define RCCK(exp)
Return exp return value if it is of VOLK_rc type and nonzero.
Definition core.h:323
#define PRCCK(exp)
Return exp return value if it is of VOLK_rc type and negative (=error).
Definition core.h:334
#define VOLK_ERROR
Generic error return code.
Definition core.h:123
#define VOLK_MEM_ERR
Memory allocation error.
Definition core.h:144
#define VOLK_NORESULT
No result yielded.
Definition core.h:100
#define VOLK_END
Loop end.
Definition core.h:107
#define VOLK_OK
Generic success return code.
Definition core.h:83
#define VOLK_PARSE_ERR
Codec parser error.
Definition core.h:126
int VOLK_rc
Definition core.h:79
struct hashmap VOLK_TermSet
a set of unique terms.
Definition term.h:124
VOLK_rc VOLK_term_set_next(VOLK_TermSet *ts, size_t *i, VOLK_Term **term)
Iterate trough a term set.
Definition term.c:592
void VOLK_link_map_iter_free(VOLK_LinkMapIterator *it)
Free a link map iterator.
Definition term.c:707
VOLK_LinkMapIterator * VOLK_link_map_iter_new(const VOLK_LinkMap *lmap)
Create a new iterator to loop through a link map.
Definition term.c:695
VOLK_Term * VOLK_default_datatype
Default literal data type URI.
Definition term.c:60
#define VOLK_RDF_TYPE
Definition term.h:17
void VOLK_term_set_free(VOLK_TermSet *ts)
Free a term set.
Definition term.c:604
VOLK_rc VOLK_link_map_next(VOLK_LinkMapIterator *it, VOLK_Term **lt, VOLK_TermSet **ts)
Iterate through a link map.
Definition term.c:711
void VOLK_link_map_free(VOLK_LinkMap *lm)
Free a link map.
Definition term.c:637
char VOLK_LangTag[8]
Language tag, currently restricted to 7 characters.
Definition term.h:28
@ VOLK_LINK_OUTBOUND
Outbound link (po).
Definition term.h:96
@ VOLK_TERM_IRIREF
IRI reference.
Definition term.h:35
@ VOLK_TERM_LT_LITERAL
Language-tagged string literal.
Definition term.h:37
@ VOLK_TERM_LITERAL
Literal without language tag.
Definition term.h:36
@ VOLK_TERM_BNODE
Blank node.
Definition term.h:38
VOLK_rc VOLK_ttl_parse_doc(FILE *fh, const char *sh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
Parse an RDF document in Turtle format.
NT codec iterator.
Definition codec_ttl.c:13
char * s_str
Serialized subject block (output).
Definition codec_ttl.c:19
const VOLK_Codec * codec
Codec that generated this iterator.
Definition codec_ttl.c:14
const VOLK_Graph * gr
Graph being encoded.
Definition codec_ttl.c:15
char * p_str
Serialized predicate block.
Definition codec_ttl.c:20
VOLK_rc rc
Internal return code.
Definition codec_ttl.c:18
VOLK_TermSet * subjects
All subjects in the graph.
Definition codec_ttl.c:16
char * o_str
Serialized object block.
Definition codec_ttl.c:21
size_t s_cur
Term set cursor.
Definition codec_ttl.c:17
RDF term.
Definition term.h:62
char * data
URI, literal value, or BNode label.
Definition term.h:63
struct term_t * datatype
Data type IRI for VOLK_TERM_LITERAL.
Definition term.h:65
VOLK_TermType type
Term type.
Definition term.h:70
VOLK_LangTag lang
Lang tag for VOLK_TERM_LT_LITERAL.
Definition term.h:66