Volksdata 1.0b7
RDF library
Loading...
Searching...
No Matches
term.c
Go to the documentation of this file.
1#include "volksdata/term.h"
2
3#define MAX_VALID_TERM_TYPE VOLK_TERM_BNODE /* For type validation. */
4
5
6/*
7 * Data structures.
8 */
9
11typedef struct keyed_term {
14} KeyedTerm;
15
16
22typedef struct link {
25} Link;
26
27
31 size_t i;
32 size_t j;
33 const Link * link;
34};
35
36
37/*
38 * A link map is thus nested:
39 *
40 * - A link map contains a hash map of Link instances (link).
41 * - It also contains the single term that the other terms are related to
42 * (linked_t).
43 * - Each Link contains a KeyedTerm (term) and a TermSet (tset).
44 * - Each term set is a hash map of KeyedTerm instances.
45 * - Each KeyedTerm contains a Term and its hash.
46 */
47typedef struct link_map {
50 struct hashmap *links;
52
53
54/*
55 * External variables.
56 */
57
62
63
64/*
65 * Static variables.
66 */
67
68// Characters not allowed in a URI string.
69static const char *invalid_uri_chars = "<>\" {}|\\^`";
70
72static const VOLK_TermType MIN_VALID_TYPE = VOLK_TERM_IRIREF;
74static const VOLK_TermType MAX_VALID_TYPE = VOLK_TERM_BNODE;
75
76/*
77 * Static prototypes.
78 */
79
80static VOLK_rc
81term_init (
82 VOLK_Term *term, VOLK_TermType type, const char *data, void *metadata);
83
84
85/*
86 * Term set callbacks.
87 */
88
89static uint64_t
90tset_hash_fn (
91 const void *item, uint64_t _unused, uint64_t _unused2)
92{
93 (void) _unused;
94 (void) _unused2;
95
96 return ((const KeyedTerm *) item)->key;
97}
98
99
100static int
101tset_cmp_fn (const void *a, const void *b, void *_unused)
102{
103 (void) _unused;
104
105 return
106 ((const KeyedTerm *) a)->key -
107 ((const KeyedTerm *) b)->key;
108}
109
110
111static void
112tset_free_fn (void *item)
113{ VOLK_term_free (((KeyedTerm *) item)->term); }
114
115
116/*
117 * Link map callbacks.
118 */
119
120static uint64_t
121link_map_hash_fn (
122 const void *item, uint64_t _unused, uint64_t _unused2)
123{
124 (void) _unused;
125 (void) _unused2;
126
127 return ((const Link *)item)->term->key;
128}
129
130
131static int
132link_map_cmp_fn (const void *a, const void *b, void *_unused)
133{
134 (void) _unused;
135
136 return
137 ((const Link *)a)->term->key -
138 ((const Link *)b)->term->key;
139}
140
141
142static void
143link_map_free_fn (void *item)
144{
145 Link *link = item;
146 VOLK_term_free (link->term->term);
147 free (link->term);
148 VOLK_term_set_free (link->tset);
149}
150
151
152 /*
153 * Term API.
154 */
155
156VOLK_Term *
158 VOLK_TermType type, const char *data, void *metadata)
159{
160 VOLK_Term *term;
161 CALLOC_GUARD (term, NULL);
162
163 if (UNLIKELY (term_init (
164 term, type, data, metadata) != VOLK_OK)) {
165 free (term);
166 return NULL;
167 }
168
169 return term;
170}
171
172
173VOLK_Term *
175{
176 void *metadata = NULL;
177
178 if (src->type == VOLK_TERM_LITERAL)
179 metadata = (void *) src->datatype;
180 else if (src->type == VOLK_TERM_LT_LITERAL)
181 metadata = (void *) src->lang;
182
183 return VOLK_term_new (src->type, src->data, metadata);
184}
185
186
188VOLK_Term *
190{
191 if (UNLIKELY (!sterm)) return NULL;
192
193 VOLK_TermType type;
194 char *data;
195 void *metadata = NULL;
196
197 // Copy term type.
198 size_t cplen = sizeof (type);
199 char *cpcur = (char *)sterm->addr;
200 memcpy (&type, cpcur, cplen);
201
202 // Copy term data.
203 cpcur += cplen;
204 cplen = strlen (cpcur) + 1;
205 data = malloc (cplen);
206 NLNL (data);
207 memcpy (data, cpcur, cplen);
208
209 // If applicable, create term metadata.
210 cpcur += cplen;
211 if (type == VOLK_TERM_LITERAL) {
212 if (strlen(cpcur) > 0)
213 NLNL (metadata = (void *) VOLK_iriref_new (cpcur));
214 } else if (type == VOLK_TERM_LT_LITERAL) {
215 cplen = sizeof (VOLK_LangTag);
216 metadata = malloc (cplen);
217 NLNL (metadata);
218 memcpy (metadata, cpcur, cplen);
219 }
220
221 VOLK_Term *ret = VOLK_term_new (type, data, metadata);
222
223 free (data);
224 if (type == VOLK_TERM_LT_LITERAL) free (metadata);
225
226 return ret;
227}
228
229
230VOLK_Term *
231VOLK_iriref_new_abs (const VOLK_Term *root, const VOLK_Term *iri)
232{
233 if (iri->type != VOLK_TERM_IRIREF) {
234 log_error ("Provided path is not an IRI.");
235 return NULL;
236 }
237 if (root->type != VOLK_TERM_IRIREF) {
238 log_error ("Provided root is not an IRI.");
239 return NULL;
240 }
241
242 char
243 *data,
244 *pfx = VOLK_iriref_prefix (iri);
245
246 if (strlen (pfx) > 0) data = iri->data;
247
248 else if (iri->data[0] == '/') {
249 free (pfx);
250
251 pfx = VOLK_iriref_prefix (root);
252 data = malloc (strlen (iri->data) + strlen (pfx) + 1);
253 if (!data) return NULL;
254
255 sprintf (data, "%s%s", pfx, iri->data);
256
257 } else {
258 data = malloc (strlen (iri->data) + strlen (root->data) + 1);
259 if (!data) return NULL;
260
261 sprintf (data, "%s%s", root->data, iri->data);
262 }
263 free (pfx);
264
265 VOLK_Term *ret = VOLK_iriref_new (data);
266 if (data != iri->data) free (data);
267
268 return ret;
269}
270
271
272VOLK_Term *
273VOLK_iriref_new_rel (const VOLK_Term *root, const VOLK_Term *iri)
274{
275 if (iri->type != VOLK_TERM_IRIREF) {
276 log_error ("Provided path is not an IRI.");
277 return NULL;
278 }
279 if (root->type != VOLK_TERM_IRIREF) {
280 log_error ("Provided root is not an IRI.");
281 return NULL;
282 }
283
284 size_t offset = (
285 strstr (iri->data, root->data) == iri->data ?
286 strlen (root->data) : 0);
287
288 return VOLK_iriref_new (iri->data + offset);
289}
290
291
294{
295 /* The serialized data are a byte string (unsigned char *) formatted in
296 * the following way:
297 *
298 * - (unsigned char) term->type
299 * - (char *) NUL-delimited term->data
300 * - (char *) serialized metadata as byte string
301 *
302 * All fields are cast to uchar. The first field is fixed, the
303 * second and third are NUL-delimited, hence all fields are easily
304 * identifiable.
305 *
306 * Metadata are:
307 *
308 * - For VOLK_TERM_IRIREF, no data. IRI info is calculated on demand.
309 * - For VOLK_TERM_LITERAL, a `char` (`\1` or `\2`) indicating if a
310 * language tag is present, followed by the fully-qualified data type URI
311 * or the language tag, as a `NUL`-delimited string. For a `xsd:string`
312 * literal with no language, it is a zero-length string.
313 * - For VOLK_TERM_BNODE, no data. Skolemization ID is calculated on
314 * deserialization.
315 *
316 * In serializing a term, the fact that two terms of different types may
317 * be semantically identical must be taken into account. Specifically, a
318 * VOLK_TERM_LT_LITERAL with no language tag is identical to a
319 * VOLK_TERM_LITERAL of xsd:string type, made up of the same string. Such
320 * terms must have identical serializations.
321 */
322
323 if (UNLIKELY (!term)) return NULL;
324
325 VOLK_Buffer *sterm;
326 CALLOC_GUARD (sterm, NULL);
327
328 sterm->size = sizeof(VOLK_TermType) + strlen(term->data) + 1;
329 NLNL (sterm->addr = malloc (sterm->size));
330
331 // Copy term type.
332 size_t offset = 0;
333 size_t cplen = sizeof(term->type);
334 memcpy (sterm->addr, &term->type, cplen);
335
336 // Copy term data.
337 offset += cplen;
338 cplen = strlen (term->data) + 1;
339 memcpy (sterm->addr + offset, term->data, cplen);
340
341 // If applicable, copy literal metadata.
342 offset += cplen;
343 // Copy data type URI string or lang tag.
344 if (term->type == VOLK_TERM_LITERAL) {
345 // Non-language-tagged term.
346 // Don't store default datatype (xsd:string).
347 if (term->datatype == VOLK_default_datatype) {
348 NLNL (sterm->addr = realloc (sterm->addr, ++sterm->size));
349 sterm->addr[offset] = '\0';
350 }
351 else {
352 cplen = strlen (term->datatype->data) + 1;
353 sterm->size += cplen;
354 NLNL (sterm->addr = realloc (sterm->addr, sterm->size));
355 memcpy (
356 sterm->addr + offset, term->datatype->data, cplen);
357 }
358 } else if (term->type == VOLK_TERM_LT_LITERAL) {
359 // Language-tagged term.
360 cplen = sizeof (VOLK_LangTag);
361 sterm->size += cplen;
362 NLNL (sterm->addr = realloc (sterm->addr, sterm->size));
363 memcpy (sterm->addr + offset, &term->lang, sizeof (VOLK_LangTag));
364 }
365
366 return sterm;
367}
368
369
372{
373 VOLK_Buffer *buf;
374
375 if (UNLIKELY (!term)) buf = BUF_DUMMY;
376 else buf = VOLK_term_serialize (term);
377
378 VOLK_Key key = VOLK_buffer_hash (buf);
379
380 VOLK_buffer_free (buf);
381
382 return key;
383}
384
385
386void
388{
389 if (UNLIKELY (!term)) return;
390
391 free (term->data);
392 /*
393 if (
394 term->type == VOLK_TERM_LITERAL &&
395 term->datatype != VOLK_default_datatype)
396 free (term->datatype);
397 */
398 free (term);
399}
400
401
402char *
404{
405 if (iri->type != VOLK_TERM_IRIREF) {
406 log_error ("Term is not a IRI ref type.");
407 return NULL;
408 }
409
410 // if (iri->iri_info->prefix.size == 0) return NULL;
411 VOLK_IRIInfo iri_info;
412 RCNL (VOLK_parse_iri (iri->data, &iri_info));
413
414 return strndup (
415 iri->data + iri_info.prefix.offset, iri_info.prefix.size);
416}
417
418
419char *
421{
422 if (iri->type != VOLK_TERM_IRIREF) {
423 log_error ("Term is not a IRI ref type.");
424 return NULL;
425 }
426
427 // if (iri->iri_info->path.size == 0) return NULL;
428 VOLK_IRIInfo iri_info;
429 RCNL (VOLK_parse_iri (iri->data, &iri_info));
430
431 return strndup (iri->data + iri_info.path.offset, iri_info.path.size);
432}
433
434
435char *
437{
438 if (iri->type != VOLK_TERM_IRIREF) {
439 log_error ("Term is not a IRI ref type.");
440 return NULL;
441 }
442
443 // if (iri->iri_info->frag.size == 0) return NULL;
444 VOLK_IRIInfo iri_info;
445 RCNL (VOLK_parse_iri (iri->data, &iri_info));
446
447 return strndup (iri->data + iri_info.frag.offset, iri_info.frag.size);
448}
449
450
451/*
452 * Triple API.
453 */
454
457{
458 VOLK_Triple *spo = malloc (sizeof (*spo));
459 if (!spo) return NULL;
460
461 if (UNLIKELY (VOLK_triple_init (spo, s, p, o))) {
462 free (spo);
463 return NULL;
464 }
465
466 return spo;
467}
468
469
472{
473 VOLK_Triple *spo = malloc (sizeof (*spo));
474 if (!spo) return NULL;
475
476 spo->s = VOLK_term_new_from_buffer (sspo->s);
477 spo->p = VOLK_term_new_from_buffer (sspo->p);
478 spo->o = VOLK_term_new_from_buffer (sspo->o);
479
480 return spo;
481}
482
483
486{
487 VOLK_BufferTriple *sspo = malloc (sizeof (*sspo));
488 if (!sspo) return NULL;
489
490 sspo->s = VOLK_term_serialize (spo->s);
491 sspo->p = VOLK_term_serialize (spo->p);
492 sspo->o = VOLK_term_serialize (spo->o);
493
494 return sspo;
495}
496
497
500{
501 /* FIXME TRP_DUMMY is a problem here.
502 if (! VOLK_IS_IRI (s) && s->type != VOLK_TERM_BNODE) {
503 log_error ("Subject is not of a valid term type: %d", s->type);
504 return VOLK_VALUE_ERR;
505 }
506 if (! VOLK_IS_IRI (p)) {
507 log_error ("Predicate is not of a valid term type: %d", p->type);
508 return VOLK_VALUE_ERR;
509 }
510 */
511
512 spo->s = s;
513 spo->p = p;
514 spo->o = o;
515
516 return VOLK_OK;
517}
518
519
520void
522{
523 if (UNLIKELY (!spo)) return;
524
525 VOLK_term_free (spo->s);
526 VOLK_term_free (spo->p);
527 VOLK_term_free (spo->o);
528}
529
530
531void
533{
534 if (UNLIKELY (!spo)) return;
535
536 VOLK_term_free (spo->s);
537 VOLK_term_free (spo->p);
538 VOLK_term_free (spo->o);
539
540 free (spo);
541}
542
543
544/*
545 * Multi-add functions.
546 */
547
550{
551 // Capacity of 4 is an arbitrary guess.
552 VOLK_TermSet *ts = hashmap_new (
553 sizeof (KeyedTerm), 4, VOLK_HASH_SEED, 0,
554 tset_hash_fn, tset_cmp_fn, tset_free_fn, NULL);
555 if (UNLIKELY (hashmap_oom (ts))) return NULL;
556
557 return ts;
558}
559
560
563{
564 VOLK_Hash key = VOLK_term_hash (term);
565 KeyedTerm entry_s = {.key=key, .term=term};
566
567 const KeyedTerm *ex = hashmap_get (ts, &entry_s);
568 if (ex) {
569 if (existing) *existing = ex->term;
570 return VOLK_NOACTION;
571 }
572
573 hashmap_set (ts, &entry_s);
574 if (hashmap_oom (ts)) return VOLK_MEM_ERR;
575
576 return VOLK_OK;
577}
578
579
580const VOLK_Term *
582{
583 const KeyedTerm *entry = hashmap_get (ts, &(KeyedTerm){.key=key});
584 if (entry) LOG_TRACE("ID found for key %lx: %s", key, entry->term->data);
585 else {LOG_TRACE("No ID found for key %lx.", key);}
586
587 return (entry) ? entry->term : NULL;
588}
589
590
593{
594 KeyedTerm *kt = NULL;
595 if (!hashmap_iter (ts, i, (void **)&kt)) return VOLK_END;
596
597 if (term) *term = kt->term;
598
599 return VOLK_OK;
600}
601
602
603void
605{
606 if (UNLIKELY (!ts)) return;
607 hashmap_free (ts);
608}
609
610
611size_t
613{ return hashmap_count (ts); }
614
615
617VOLK_link_map_new (const VOLK_Term *linked_term, VOLK_LinkType type)
618{
619 VOLK_LinkMap *lm;
620 MALLOC_GUARD (lm, NULL);
621 lm->type = type;
622 lm->links = hashmap_new (
623 sizeof (Link), 0, VOLK_HASH_SEED, 0,
624 link_map_hash_fn, link_map_cmp_fn, link_map_free_fn, NULL);
625 if (!linked_term) {
626 log_error ("term must not be NULL.");
627 free (lm);
628 return NULL;
629 }
630 lm->linked_t = VOLK_term_copy (linked_term);
631
632 return lm;
633}
634
635
636void
638{
639 hashmap_free (lm->links);
641 free (lm);
642}
643
644
647{ return map->type; }
648
649
650// TODO Memory error handling.
653 VOLK_LinkMap *lmap, VOLK_Term *term, VOLK_TermSet *tset)
654{
655 // Keyed term to look up the link term and insert it, if necessary.
656 KeyedTerm entry_s = {.key=VOLK_term_hash (term), .term=term};
657
658 const Link *ex = hashmap_get (lmap->links, &(Link){.term=&entry_s});
659 if (ex) {
660 // Add terms one by one to the existing term set.
661 LOG_TRACE(
662 "Linking term %s exists. Adding individual terms.",
663 ex->term->term->data);
664 size_t i = 0;
665 KeyedTerm *kt;
666 while (hashmap_iter (tset, &i, (void **)&kt)) {
667 LOG_TRACE(
668 "Adding term %s to link %s",
669 kt->term->data, ex->term->term->data);
670 if (hashmap_get (ex->tset, kt))
671 // Term already exist, free the new one and move on.
672 VOLK_term_free (kt->term);
673 else
674 // Insert KeyedTerm, the term set now owns the underlying term.
675 hashmap_set (ex->tset, kt);
676 }
677 // Free link term that hasn't been used.
678 VOLK_term_free (term);
679 } else {
680 // Add the new term and the termset wholesale.
681 LOG_TRACE("Adding new linking term %s.", term->data);
682 // Allocate inserted member on heap, it will be owned by the map.
683 KeyedTerm *ins;
685 memcpy (ins, &entry_s, sizeof (entry_s));
686 Link link = {.term=ins, .tset=tset};
687 hashmap_set (lmap->links, &link);
688 }
689
690 return VOLK_OK;
691}
692
693
694VOLK_LinkMapIterator *
696{
697 VOLK_LinkMapIterator *it;
698 CALLOC_GUARD (it, NULL);
699 it->map = lmap;
700
701 return it;
702}
703
704
705// This leaves the link and link map references intact.
706void
707VOLK_link_map_iter_free (VOLK_LinkMapIterator *it) { free (it); }
708
709
712 VOLK_LinkMapIterator *it, VOLK_Term **lt, VOLK_TermSet **ts)
713{
714 if (!hashmap_iter (it->map->links, &it->i, (void **)&it->link))
715 return VOLK_END;
716
717 *lt = it->link->term->term;
718 *ts = it->link->tset;
719
720 return VOLK_OK;
721}
722
723
724// TODO dismantle if the only triple generator is for the graph.
727 VOLK_LinkMapIterator *it, VOLK_Triple *spo)
728{
729 // Assign external (related) term.
730 if (it->map->type == VOLK_LINK_INBOUND)
731 spo->o = it->map->linked_t;
732 else if (it->map->type == VOLK_LINK_OUTBOUND)
733 spo->s = it->map->linked_t;
734 else spo->p = it->map->linked_t;
735
736 KeyedTerm *kt;
737
738 // If we are already handling a link, continue the internal loop.
739 if (it->link) goto int_loop;
740ext_loop:
741 // Advance external counter and start new internal loop.
742 it->j = 0;
743 if (!hashmap_iter (it->map->links, &it->i, (void **)&it->link))
744 return VOLK_END;
745int_loop:
746 // If end of the term set is reached, start with a new linking term.
747 if (!hashmap_iter (it->link->tset, &it->j, (void **)&kt)) goto ext_loop;
748
749 // Continue pulling from term set.
750 // Assign linking term.
751 if (it->map->type == VOLK_LINK_EDGE) spo->s = it->link->term->term;
752 else spo->p = it->link->term->term;
753
754 // Assign term in term set.
755 if (it->map->type == VOLK_LINK_INBOUND) spo->s = kt->term;
756 else spo->o = kt->term;
757
758 return VOLK_OK;
759}
760
761
762/*
763 * Static functions.
764 */
765
766static VOLK_rc
767term_init (
768 VOLK_Term *term, VOLK_TermType type,
769 const char *data, void *metadata)
770{
771 // Exit early if environment is not initialized.
772 // EXCEPT for IRIRef which is used inside of VOLK_init().
773 if (!VOLK_env_is_init && type != VOLK_TERM_IRIREF)
774 return VOLK_ENV_ERR;
775
776 // Undefined type. Make quick work of it.
777 if (type == VOLK_TERM_UNDEFINED) {
778 term->type = type;
779 if (data) {
780 term->data = malloc (strlen (data) + 1);
781 if (UNLIKELY (!term->data)) return VOLK_MEM_ERR;
782 strcpy (term->data, data);
783 }
784 return VOLK_OK;
785 }
786
787 if (type < MIN_VALID_TYPE || type > MAX_VALID_TYPE) {
788 log_error ("%d is not a valid term type.", type);
789 return VOLK_VALUE_ERR;
790 }
791
792 term->type = type;
793
794 if (data) {
795 // Validate IRI.
796 if (term->type == VOLK_TERM_IRIREF) {
797 char *fquri = (char *) data;
798
799 if (strpbrk (fquri, invalid_uri_chars) != NULL) {
800 log_warn (
801 "Characters %s are not valid in a URI. Got: %s\n",
802 invalid_uri_chars, fquri);
803#if 0
804 // TODO This causes W3C TTL test #29 to fail. Remove?
805 return VOLK_VALUE_ERR;
806#endif
807 }
808 }
809
810 term->data = strdup (data);
811
812 } else {
813 // No data. Make up a random UUID or URI if allowed.
814 if (type == VOLK_TERM_IRIREF || type == VOLK_TERM_BNODE) {
815 uuid_t uuid;
816 uuid_generate_random (uuid);
817
818 uuid_str_t uuid_str;
819 uuid_unparse_lower (uuid, uuid_str);
820
821 if (type == VOLK_TERM_IRIREF) {
822 term->data = malloc (UUID4_URN_SIZE);
823 snprintf (
824 term->data, UUID4_URN_SIZE, "urn:uuid:%s", uuid_str);
825 } else term->data = strdup (uuid_str);
826 } else {
827 log_error ("No data provided for term.");
828 return VOLK_VALUE_ERR;
829 }
830 }
831
832 if (term->type == VOLK_TERM_LT_LITERAL) {
833 if (!metadata) {
834 log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
835 term->type = VOLK_TERM_LITERAL;
836 } else {
837 // FIXME metadata should be const all across.
838 char *lang_str = (char *) metadata;
839 LOG_TRACE("Lang string: '%s'", lang_str);
840 // Lang tags longer than 7 characters will be truncated.
841 strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
842 if (strlen (term->lang) < 1) {
843 log_error ("Lang tag cannot be an empty string.");
844 return VOLK_VALUE_ERR;
845 }
846 term->lang[7] = '\0';
847 }
848 }
849
850 if (term->type == VOLK_TERM_LITERAL) {
851 term->datatype = metadata;
852 if (! term->datatype) term->datatype = VOLK_default_datatype;
853 LOG_TRACE("Storing data type: %s", term->datatype->data);
854
855 if (term->datatype->type != VOLK_TERM_IRIREF) {
856 log_error (
857 "Literal data type is not an IRI: %s",
858 term->datatype->data);
859
860 return VOLK_VALUE_ERR;
861 }
862
863 VOLK_Term *ex = NULL;
865 if (ex && ex != term->datatype) {
866 // Replace datatype handle with the one in term cache, and free
867 // the new one.
868 if (term->datatype != VOLK_default_datatype)
869 VOLK_term_free (term->datatype);
870 term->datatype = ex;
871 }
872
873 //LOG_TRACE("Datatype address: %p", term->datatype);
874 LOG_TRACE("Datatype hash: %lx", VOLK_term_hash (term->datatype));
875
876 } else if (term->type == VOLK_TERM_BNODE) {
877 // TODO This is not usable for global skolemization.
878 term->bnode_id = VOLK_HASH (
879 term->data, strlen (term->data) + 1, VOLK_HASH_SEED);
880 }
881
882 return VOLK_OK;
883}
884
885
887VOLK_parse_iri (char *iri_str, VOLK_IRIInfo *iri_info) {
888 char *cur = iri_str;
889 size_t iri_len = strlen (iri_str);
890 MatchCoord tmp = {}; // Temporary storage for capture groups
891
892 memset (iri_info, 0, sizeof (*iri_info));
893 //LOG_DEBUG("Parsing IRI: %s", iri_str);
894 // #2: ([^:/?#]+)
895 while (
896 *cur != ':' && *cur != '/' && *cur != '?'
897 && *cur != '#' && *cur != '\0') {
898 tmp.size++;
899 cur++;
900 }
901
902 // Non-capturing: (?([^:/?#]+):)?
903 if (tmp.size > 0 && *cur == ':') {
904 // Got capture groups #2 and #3. Store them.
905 iri_info->scheme.offset = 0;
906 iri_info->scheme.size = tmp.size;
907 cur++;
908 //LOG_DEBUG("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
909 } else cur = iri_str; // Backtrack if no match.
910
911 // Non-capturing: (?//([^/?#]*))?
912 if (*cur == '/' && *(cur + 1) == '/') {
913 cur += 2;
914 tmp.offset = cur - iri_str;
915 tmp.size = 0;
916
917 // #3: ([^/?#]*)
918 while (*cur != '/' && *cur != '?' && *cur != '#' && *cur != '\0') {
919 tmp.size++;
920 cur++;
921 }
922 iri_info->auth.offset = tmp.offset;
923 iri_info->auth.size = tmp.size;
924 //LOG_DEBUG("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
925 }
926
927 // Capture group 1.
928 iri_info->prefix.offset = 0;
929 iri_info->prefix.size = cur - iri_str;
930 //LOG_DEBUG("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
931
932 tmp.offset = cur - iri_str;
933 tmp.size = 0;
934
935 iri_info->path.offset = tmp.offset;
936 iri_info->path.size = iri_len - tmp.offset;
937 //LOG_DEBUG("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
938
939 // Non-capturing: (?[^?#]*)
940 while (*cur != '?' && *cur != '#' && *cur != '\0') {
941 tmp.size++;
942 cur++;
943 }
944
945 // Non-capturing: (?\?([^#]*))
946 if (*cur == '?') {
947 // 5: ([^#]*)
948 tmp.offset = ++cur - iri_str;
949 tmp.size = 0;
950 while (*cur != '#' && *cur != '\0') {
951 tmp.size++;
952 cur++;
953 }
954
955 if (tmp.size > 0) {
956 // Got capture group #5.
957 iri_info->query.offset = tmp.offset;
958 iri_info->query.size = tmp.size;
959 //LOG_DEBUG("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
960 }
961 }
962
963 // Non-capturing: (?#(.*))?
964 if (*cur == '#') {
965 // #6: (.*)
966 iri_info->frag.offset = ++cur - iri_str;
967 iri_info->frag.size = iri_str + iri_len - cur;
968 //LOG_DEBUG("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
969 }
970
971 /* TODO add error cases.
972 if (UNLIKELY (rc != VOLK_OK)) {
973 log_error ("Error matching URI pattern.");
974
975 return VOLK_VALUE_ERR;
976 }
977 */
978
979 return VOLK_OK;
980}
981
982/*
983 * Extern inline functions.
984 */
985
986VOLK_Key VOLK_term_hash (const VOLK_Term *term);
987VOLK_Term *VOLK_iriref_new (const char *data);
988VOLK_Term *VOLK_iriref_new_ns (const char *data);
989VOLK_Term *VOLK_literal_new (const char *data, VOLK_Term *datatype);
990VOLK_Term *VOLK_lt_literal_new (const char *data, char *lang);
991VOLK_Term *VOLK_bnode_new (const char *data);
992bool VOLK_term_equals (const VOLK_Term *term1, const VOLK_Term *term2);
#define UNLIKELY(x)
Definition core.h:39
VOLK_Key VOLK_buffer_hash(const VOLK_Buffer *buf)
Hash a buffer.
Definition buffer.h:175
VOLK_TriplePos
Triple position of s, p, o.
Definition buffer.h:19
#define BUF_DUMMY
Dummy buffer to be used with VOLK_buffer_init.
Definition buffer.h:154
void VOLK_buffer_free(VOLK_Buffer *buf)
Free a buffer.
Definition buffer.c:97
#define VOLK_HASH_SEED
Seed used for all hashing. Compile-time configurable.
Definition core.h:175
VOLK_Hash64 VOLK_Hash
Default hash data type.
Definition core.h:214
bool VOLK_env_is_init
Whether the environment is initialized.
Definition core.c:11
#define VOLK_HASH(...)
Default hashing function. Depends on architecture.
Definition core.h:186
#define MALLOC_GUARD(var, rc)
Allocate one pointer with malloc and return rc if it fails.
Definition core.h:375
#define RCNL(exp)
Return NULL if exp returns a nonzero value.
Definition core.h:345
#define LOG_TRACE(...)
Definition core.h:276
char * strdup(const char *src)
Replacement for GNU strdup.
Definition core.c:109
#define NLNL(exp)
Log error and return NULL if exp is NULL.
Definition core.h:367
#define CALLOC_GUARD(var, rc)
Allocate one pointer with calloc and return rc if it fails.
Definition core.h:381
char * strndup(const char *src, size_t max)
Replacement for GNU strndup.
Definition core.c:92
size_t VOLK_Key
Term key, i.e., hash of a serialized term.
Definition core.h:230
char uuid_str_t[UUIDSTR_SIZE]
UUID string tpe.
Definition core.h:239
#define VOLK_VALUE_ERR
An invalid input value was provided.
Definition core.h:129
#define VOLK_MEM_ERR
Memory allocation error.
Definition core.h:144
#define VOLK_END
Loop end.
Definition core.h:107
#define VOLK_OK
Generic success return code.
Definition core.h:83
#define VOLK_NOACTION
No action taken.
Definition core.h:93
int VOLK_rc
Definition core.h:79
#define VOLK_ENV_ERR
Error while handling environment setup; or environment not initialized.
Definition core.h:158
VOLK_Key VOLK_triple_hash(const VOLK_Triple *trp)
Hash a triple.
Definition term.h:513
VOLK_Term * VOLK_lt_literal_new(const char *data, char *lang)
Shortcut to create a language-tagged literal term.
Definition term.h:317
VOLK_rc VOLK_term_set_add(VOLK_TermSet *ts, VOLK_Term *term, VOLK_Term **existing)
Add term to a term set.
Definition term.c:562
VOLK_Triple * VOLK_triple_new_from_btriple(const VOLK_BufferTriple *sspo)
Definition term.c:471
struct hashmap VOLK_TermSet
a set of unique terms.
Definition term.h:124
VOLK_rc VOLK_term_set_next(VOLK_TermSet *ts, size_t *i, VOLK_Term **term)
Iterate trough a term set.
Definition term.c:592
VOLK_Triple * VOLK_triple_new(VOLK_Term *s, VOLK_Term *p, VOLK_Term *o)
Create a new triple from three terms.
Definition term.c:456
char * VOLK_iriref_frag(const VOLK_Term *iri)
Get the fragment portion of a IRI ref.
Definition term.c:436
VOLK_Term * VOLK_iriref_new_rel(const VOLK_Term *root, const VOLK_Term *iri)
Create a new relative IRI from an absolute IRI and a web root IRI.
Definition term.c:273
VOLK_rc VOLK_parse_iri(char *iri_str, VOLK_IRIInfo *iri_info)
scan an IRI string and parse IRI parts.
Definition term.c:887
void VOLK_link_map_iter_free(VOLK_LinkMapIterator *it)
Free a link map iterator.
Definition term.c:707
VOLK_LinkType
Link type.
Definition term.h:94
VOLK_Term * VOLK_iriref_new(const char *data)
Create an IRI reference.
Definition term.h:192
VOLK_TermType
Term type.
Definition term.h:31
bool VOLK_term_equals(const VOLK_Term *term1, const VOLK_Term *term2)
Compare two terms.
Definition term.h:377
size_t VOLK_term_set_size(VOLK_TermSet *ts)
Size of a term set.
Definition term.c:612
VOLK_Term * VOLK_iriref_new_abs(const VOLK_Term *root, const VOLK_Term *iri)
Create a new absolute IRI from a path relative to a root IRI.
Definition term.c:231
VOLK_Term * VOLK_literal_new(const char *data, VOLK_Term *datatype)
Shortcut to create a literal term.
Definition term.h:266
VOLK_LinkMapIterator * VOLK_link_map_iter_new(const VOLK_LinkMap *lmap)
Create a new iterator to loop through a link map.
Definition term.c:695
char * VOLK_iriref_prefix(const VOLK_Term *iri)
Get the prefix portion of a IRI ref.
Definition term.c:403
VOLK_LinkMap * VOLK_link_map_new(const VOLK_Term *linked_term, VOLK_LinkType type)
New link map.
Definition term.c:617
VOLK_Term * VOLK_default_datatype
Default literal data type URI.
Definition term.c:60
VOLK_rc VOLK_link_map_triples(VOLK_LinkMapIterator *it, VOLK_Triple *spo)
Iterate over a link map and generate triples.
Definition term.c:726
void VOLK_triple_done(VOLK_Triple *spo)
Free the internal pointers of a triple.
Definition term.c:521
VOLK_rc VOLK_triple_init(VOLK_Triple *spo, VOLK_Term *s, VOLK_Term *p, VOLK_Term *o)
Initialize internal term pointers in a heap-allocated triple.
Definition term.c:499
const VOLK_Term * VOLK_term_set_get(VOLK_TermSet *ts, VOLK_Key key)
Get a term from a term set.
Definition term.c:581
VOLK_Term * VOLK_bnode_new(const char *data)
Shortcut to create a blank node.
Definition term.h:331
VOLK_TermSet * VOLK_term_set_new()
Create a new term set.
Definition term.c:549
uint32_t VOLK_default_dtype_key
Compiled hash of default literal data type.
Definition term.c:58
void VOLK_term_set_free(VOLK_TermSet *ts)
Free a term set.
Definition term.c:604
VOLK_Key VOLK_term_hash(const VOLK_Term *term)
Hash a buffer.
Definition term.c:371
VOLK_rc VOLK_link_map_next(VOLK_LinkMapIterator *it, VOLK_Term **lt, VOLK_TermSet **ts)
Iterate through a link map.
Definition term.c:711
void VOLK_link_map_free(VOLK_LinkMap *lm)
Free a link map.
Definition term.c:637
VOLK_Term * VOLK_iriref_new_ns(const char *data)
Create an IRI reference from a namespace-prefixed string.
Definition term.h:205
#define UUID4_URN_SIZE
Definition term.h:14
char * VOLK_iriref_path(const VOLK_Term *iri)
Get the path portion of a IRI ref.
Definition term.c:420
char VOLK_LangTag[8]
Language tag, currently restricted to 7 characters.
Definition term.h:28
void VOLK_term_free(VOLK_Term *term)
Definition term.c:387
VOLK_Term * VOLK_triple_pos(const VOLK_Triple *trp, VOLK_TriplePos n)
Get triple by term position.
Definition term.h:499
VOLK_Buffer * VOLK_term_serialize(const VOLK_Term *term)
Serialize a term into a buffer.
Definition term.c:293
VOLK_Term * VOLK_default_ctx
Default context.
Definition term.c:59
VOLK_Term * VOLK_term_new_from_buffer(const VOLK_Buffer *sterm)
See notes in VOLK_term_serialize function body for format info.
Definition term.c:189
VOLK_rc VOLK_link_map_add(VOLK_LinkMap *lmap, VOLK_Term *term, VOLK_TermSet *tset)
Add a term - term set pair to a link map.
Definition term.c:652
VOLK_LinkType VOLK_link_map_type(const VOLK_LinkMap *map)
Return the link map type.
Definition term.c:646
VOLK_BufferTriple * VOLK_triple_serialize(const VOLK_Triple *spo)
Definition term.c:485
VOLK_Term * VOLK_term_new(VOLK_TermType type, const char *data, void *metadata)
Create a new term.
Definition term.c:157
VOLK_TermSet * VOLK_term_cache
Global term cache.
Definition term.c:61
void VOLK_triple_free(VOLK_Triple *spo)
Free a triple and all its internal pointers.
Definition term.c:532
VOLK_Term * VOLK_term_copy(const VOLK_Term *src)
Copy a term.
Definition term.c:174
@ VOLK_LINK_EDGE
Edge link (so).
Definition term.h:97
@ VOLK_LINK_INBOUND
Inbound link (sp).
Definition term.h:95
@ VOLK_LINK_OUTBOUND
Outbound link (po).
Definition term.h:96
@ VOLK_TERM_IRIREF
IRI reference.
Definition term.h:35
@ VOLK_TERM_UNDEFINED
Definition term.h:32
@ VOLK_TERM_LT_LITERAL
Language-tagged string literal.
Definition term.h:37
@ VOLK_TERM_LITERAL
Literal without language tag.
Definition term.h:36
@ VOLK_TERM_BNODE
Blank node.
Definition term.h:38
Key-term pair in term set.
Definition term.c:11
VOLK_Term * term
Term handle.
Definition term.c:13
VOLK_Key key
Key (hash) of the term.
Definition term.c:12
Match coordinates in IRI parsing results.
Definition term.h:42
unsigned int size
Length of match.
Definition term.h:44
unsigned int offset
Offset of match from start of string.
Definition term.h:43
Triple of byte buffers.
Definition buffer.h:60
VOLK_Buffer * o
Definition buffer.h:63
VOLK_Buffer * s
Definition buffer.h:61
VOLK_Buffer * p
Definition buffer.h:62
General-purpose data buffer.
Definition buffer.h:47
unsigned char * addr
Definition buffer.h:48
size_t size
Definition buffer.h:49
Matching sub-patterns for IRI parts.
Definition term.h:48
MatchCoord frag
Fragment (frag).
Definition term.h:55
MatchCoord auth
Authority (example.org).
Definition term.h:51
MatchCoord query
Query (query=blah).
Definition term.h:54
MatchCoord prefix
Prefix (http://example.org).
Definition term.h:49
MatchCoord path
Definition term.h:52
MatchCoord scheme
Scheme (http).
Definition term.h:50
VOLK_Term * linked_t
Linked term.
Definition term.c:49
VOLK_LinkType type
Link type.
Definition term.c:48
struct hashmap * links
Map of Link instances.
Definition term.c:50
RDF term.
Definition term.h:62
VOLK_Key bnode_id
BN ID for comparison & skolemization.
Definition term.h:67
char * data
URI, literal value, or BNode label.
Definition term.h:63
struct term_t * datatype
Data type IRI for VOLK_TERM_LITERAL.
Definition term.h:65
VOLK_TermType type
Term type.
Definition term.h:70
VOLK_LangTag lang
Lang tag for VOLK_TERM_LT_LITERAL.
Definition term.h:66
RDF triple.
Definition term.h:86
VOLK_Term * p
Predicate.
Definition term.h:88
VOLK_Term * s
Subject.
Definition term.h:87
VOLK_Term * o
Object.
Definition term.h:89