Volksdata 1.0b7
RDF library
Loading...
Searching...
No Matches
term.c
Go to the documentation of this file.
1#include "volksdata/term.h"
2
3#define MAX_VALID_TERM_TYPE VOLK_TERM_BNODE /* For type validation. */
4
5
6/*
7 * Data structures.
8 */
9
11typedef struct keyed_term {
14} KeyedTerm;
15
16
22typedef struct link {
25} Link;
26
27
31 size_t i;
32 size_t j;
33 const Link * link;
34};
35
36
37/*
38 * A link map is thus nested:
39 *
40 * - A link map contains a hash map of Link instances (link).
41 * - It also contains the single term that the other terms are related to
42 * (linked_t).
43 * - Each Link contains a KeyedTerm (term) and a TermSet (tset).
44 * - Each term set is a hash map of KeyedTerm instances.
45 * - Each KeyedTerm contains a Term and its hash.
46 */
47typedef struct link_map {
50 struct hashmap *links;
52
53
54/*
55 * External variables.
56 */
57
62
63
64/*
65 * Static variables.
66 */
67
68// Characters not allowed in a URI string.
69static const char *invalid_uri_chars = "<>\" {}|\\^`";
70
72static const VOLK_TermType MIN_VALID_TYPE = VOLK_TERM_IRIREF;
74static const VOLK_TermType MAX_VALID_TYPE = VOLK_TERM_BNODE;
75
76/*
77 * Static prototypes.
78 */
79
80static VOLK_rc
81term_init (
82 VOLK_Term *term, VOLK_TermType type, const char *data, void *metadata);
83
84
85/*
86 * Term set callbacks.
87 */
88
89static uint64_t
90tset_hash_fn (
91 const void *item, uint64_t _unused, uint64_t _unused2)
92{
93 (void) _unused;
94 (void) _unused2;
95
96 return ((const KeyedTerm *) item)->key;
97}
98
99
100static int
101tset_cmp_fn (const void *a, const void *b, void *_unused)
102{
103 (void) _unused;
104
105 return
106 ((const KeyedTerm *) a)->key -
107 ((const KeyedTerm *) b)->key;
108}
109
110
111static void
112tset_free_fn (void *item)
113{ VOLK_term_free (((KeyedTerm *) item)->term); }
114
115
116/*
117 * Link map callbacks.
118 */
119
120static uint64_t
121link_map_hash_fn (
122 const void *item, uint64_t _unused, uint64_t _unused2)
123{
124 (void) _unused;
125 (void) _unused2;
126
127 return ((const Link *)item)->term->key;
128}
129
130
131static int
132link_map_cmp_fn (const void *a, const void *b, void *_unused)
133{
134 (void) _unused;
135
136 return
137 ((const Link *)a)->term->key -
138 ((const Link *)b)->term->key;
139}
140
141
142static void
143link_map_free_fn (void *item)
144{
145 Link *link = item;
146 VOLK_term_free (link->term->term);
147 free (link->term);
148 VOLK_term_set_free (link->tset);
149}
150
151
152 /*
153 * Term API.
154 */
155
156VOLK_Term *
158 VOLK_TermType type, const char *data, void *metadata)
159{
160 VOLK_Term *term;
161 CALLOC_GUARD (term, NULL);
162
163 if (UNLIKELY (term_init (
164 term, type, data, metadata) != VOLK_OK)) {
165 free (term);
166 return NULL;
167 }
168
169 return term;
170}
171
172
173VOLK_Term *
175{
176 void *metadata = NULL;
177
178 if (src->type == VOLK_TERM_LITERAL)
179 metadata = (void *) src->datatype;
180 else if (src->type == VOLK_TERM_LT_LITERAL)
181 metadata = (void *) src->lang;
182
183 return VOLK_term_new (src->type, src->data, metadata);
184}
185
186
188VOLK_Term *
190{
191 if (UNLIKELY (!sterm)) return NULL;
192
193 VOLK_TermType type;
194 char *data;
195 void *metadata = NULL;
196
197 // Copy term type.
198 size_t cplen = sizeof (type);
199 char *cpcur = (char *)sterm->addr;
200 memcpy (&type, cpcur, cplen);
201
202 // Copy term data.
203 cpcur += cplen;
204 cplen = strlen (cpcur) + 1;
205 data = malloc (cplen);
206 NLNL (data);
207 memcpy (data, cpcur, cplen);
208
209 // If applicable, create term metadata.
210 cpcur += cplen;
211 if (type == VOLK_TERM_LITERAL) {
212 if (strlen(cpcur) > 0)
213 NLNL (metadata = (void *) VOLK_iriref_new (cpcur));
214 } else if (type == VOLK_TERM_LT_LITERAL) {
215 cplen = sizeof (VOLK_LangTag);
216 metadata = malloc (cplen);
217 NLNL (metadata);
218 memcpy (metadata, cpcur, cplen);
219 }
220
221 VOLK_Term *ret = VOLK_term_new (type, data, metadata);
222
223 free (data);
224 if (type == VOLK_TERM_LT_LITERAL) free (metadata);
225
226 return ret;
227}
228
229
230VOLK_Term *
231VOLK_iriref_new_abs (const VOLK_Term *root, const VOLK_Term *iri)
232{
233 if (iri->type != VOLK_TERM_IRIREF) {
234 log_error ("Provided path is not an IRI.");
235 return NULL;
236 }
237 if (root->type != VOLK_TERM_IRIREF) {
238 log_error ("Provided root is not an IRI.");
239 return NULL;
240 }
241
242 char
243 *data,
244 *pfx = VOLK_iriref_prefix (iri);
245
246 if (strlen (pfx) > 0) data = iri->data;
247
248 else if (iri->data[0] == '/') {
249 free (pfx);
250
251 pfx = VOLK_iriref_prefix (root);
252 data = malloc (strlen (iri->data) + strlen (pfx) + 1);
253 if (!data) return NULL;
254
255 sprintf (data, "%s%s", pfx, iri->data);
256
257 } else {
258 data = malloc (strlen (iri->data) + strlen (root->data) + 1);
259 if (!data) return NULL;
260
261 sprintf (data, "%s%s", root->data, iri->data);
262 }
263 free (pfx);
264
265 VOLK_Term *ret = VOLK_iriref_new (data);
266 if (data != iri->data) free (data);
267
268 return ret;
269}
270
271
272VOLK_Term *
273VOLK_iriref_new_rel (const VOLK_Term *root, const VOLK_Term *iri)
274{
275 if (iri->type != VOLK_TERM_IRIREF) {
276 log_error ("Provided path is not an IRI.");
277 return NULL;
278 }
279 if (root->type != VOLK_TERM_IRIREF) {
280 log_error ("Provided root is not an IRI.");
281 return NULL;
282 }
283
284 size_t offset = (
285 strstr (iri->data, root->data) == iri->data ?
286 strlen (root->data) : 0);
287
288 return VOLK_iriref_new (iri->data + offset);
289}
290
291
294{
295 /* The serialized data are a byte string (unsigned char *) formatted in
296 * the following way:
297 *
298 * - (unsigned char) term->type
299 * - (char *) NUL-delimited term->data
300 * - (char *) serialized metadata as byte string
301 *
302 * All fields are cast to uchar. The first field is fixed, the
303 * second and third are NUL-delimited, hence all fields are easily
304 * identifiable.
305 *
306 * Metadata are:
307 *
308 * - For VOLK_TERM_IRIREF, no data. IRI info is calculated on demand.
309 * - For VOLK_TERM_LITERAL, a `char` (`\1` or `\2`) indicating if a
310 * language tag is present, followed by the fully-qualified data type URI
311 * or the language tag, as a `NUL`-delimited string. For a `xsd:string`
312 * literal with no language, it is a zero-length string.
313 * - For VOLK_TERM_BNODE, no data. Skolemization ID is calculated on
314 * deserialization.
315 *
316 * In serializing a term, the fact that two terms of different types may
317 * be semantically identical must be taken into account. Specifically, a
318 * VOLK_TERM_LT_LITERAL with no language tag is identical to a
319 * VOLK_TERM_LITERAL of xsd:string type, made up of the same string. Such
320 * terms must have identical serializations.
321 */
322
323 if (UNLIKELY (!term)) return NULL;
324
325 VOLK_Buffer *sterm;
326 CALLOC_GUARD (sterm, NULL);
327
328 sterm->size = sizeof(VOLK_TermType) + strlen(term->data) + 1;
329 NLNL (sterm->addr = malloc (sterm->size));
330
331 // Copy term type.
332 size_t offset = 0;
333 size_t cplen = sizeof(term->type);
334 memcpy (sterm->addr, &term->type, cplen);
335
336 // Copy term data.
337 offset += cplen;
338 cplen = strlen (term->data) + 1;
339 memcpy (sterm->addr + offset, term->data, cplen);
340
341 // If applicable, copy literal metadata.
342 offset += cplen;
343 // Copy data type URI string or lang tag.
344 if (term->type == VOLK_TERM_LITERAL) {
345 // Non-language-tagged term.
346 // Don't store default datatype (xsd:string).
347 if (term->datatype == VOLK_default_datatype) {
348 NLNL (sterm->addr = realloc (sterm->addr, ++sterm->size));
349 sterm->addr[offset] = '\0';
350 }
351 else {
352 cplen = strlen (term->datatype->data) + 1;
353 sterm->size += cplen;
354 NLNL (sterm->addr = realloc (sterm->addr, sterm->size));
355 memcpy (
356 sterm->addr + offset, term->datatype->data, cplen);
357 }
358 } else if (term->type == VOLK_TERM_LT_LITERAL) {
359 // Language-tagged term.
360 cplen = sizeof (VOLK_LangTag);
361 sterm->size += cplen;
362 NLNL (sterm->addr = realloc (sterm->addr, sterm->size));
363 memcpy (sterm->addr + offset, &term->lang, sizeof (VOLK_LangTag));
364 }
365
366 return sterm;
367}
368
369
372{
373 VOLK_Buffer *buf;
374
375 if (UNLIKELY (!term)) buf = BUF_DUMMY;
376 else buf = VOLK_term_serialize (term);
377
378 VOLK_Key key = VOLK_buffer_hash (buf);
379
380 VOLK_buffer_free (buf);
381
382 return key;
383}
384
385
386void
388{
389 if (UNLIKELY (!term)) return;
390
391 free (term->data);
392 /*
393 if (
394 term->type == VOLK_TERM_LITERAL &&
395 term->datatype != VOLK_default_datatype)
396 free (term->datatype);
397 */
398 free (term);
399}
400
401
402char *
404{
405 if (iri->type != VOLK_TERM_IRIREF) {
406 log_error ("Term is not a IRI ref type.");
407 return NULL;
408 }
409
410 // if (iri->iri_info->prefix.size == 0) return NULL;
411 VOLK_IRIInfo iri_info;
412 RCNL (VOLK_parse_iri (iri->data, &iri_info));
413
414 return strndup (
415 iri->data + iri_info.prefix.offset, iri_info.prefix.size);
416}
417
418
419char *
421{
422 if (iri->type != VOLK_TERM_IRIREF) {
423 log_error ("Term is not a IRI ref type.");
424 return NULL;
425 }
426
427 // if (iri->iri_info->path.size == 0) return NULL;
428 VOLK_IRIInfo iri_info;
429 RCNL (VOLK_parse_iri (iri->data, &iri_info));
430
431 return strndup (iri->data + iri_info.path.offset, iri_info.path.size);
432}
433
434
435char *
437{
438 if (iri->type != VOLK_TERM_IRIREF) {
439 log_error ("Term is not a IRI ref type.");
440 return NULL;
441 }
442
443 // if (iri->iri_info->frag.size == 0) return NULL;
444 VOLK_IRIInfo iri_info;
445 RCNL (VOLK_parse_iri (iri->data, &iri_info));
446
447 return strndup (iri->data + iri_info.frag.offset, iri_info.frag.size);
448}
449
450
451/*
452 * Triple API.
453 */
454
457{
458 VOLK_Triple *spo = malloc (sizeof (*spo));
459 if (!spo) return NULL;
460
461 if (UNLIKELY (VOLK_triple_init (spo, s, p, o))) {
462 free (spo);
463 return NULL;
464 }
465
466 return spo;
467}
468
469
472{
473 VOLK_Triple *spo = malloc (sizeof (*spo));
474 if (!spo) return NULL;
475
476 spo->s = VOLK_term_new_from_buffer (sspo->s);
477 spo->p = VOLK_term_new_from_buffer (sspo->p);
478 spo->o = VOLK_term_new_from_buffer (sspo->o);
479
480 return spo;
481}
482
483
486{
487 VOLK_BufferTriple *sspo = malloc (sizeof (*sspo));
488 if (!sspo) return NULL;
489
490 sspo->s = VOLK_term_serialize (spo->s);
491 sspo->p = VOLK_term_serialize (spo->p);
492 sspo->o = VOLK_term_serialize (spo->o);
493
494 return sspo;
495}
496
497
500{
501 /* FIXME TRP_DUMMY is a problem here.
502 if (! VOLK_IS_IRI (s) && s->type != VOLK_TERM_BNODE) {
503 log_error ("Subject is not of a valid term type: %d", s->type);
504 return VOLK_VALUE_ERR;
505 }
506 if (! VOLK_IS_IRI (p)) {
507 log_error ("Predicate is not of a valid term type: %d", p->type);
508 return VOLK_VALUE_ERR;
509 }
510 */
511
512 spo->s = s;
513 spo->p = p;
514 spo->o = o;
515
516 return VOLK_OK;
517}
518
519
520void
522{
523 if (UNLIKELY (!spo)) return;
524
525 VOLK_term_free (spo->s);
526 VOLK_term_free (spo->p);
527 VOLK_term_free (spo->o);
528}
529
530
531void
533{
534 if (UNLIKELY (!spo)) return;
535
536 VOLK_term_free (spo->s);
537 VOLK_term_free (spo->p);
538 VOLK_term_free (spo->o);
539
540 free (spo);
541}
542
543
544/*
545 * Multi-add functions.
546 */
547
550{
551 // Capacity of 4 is an arbitrary guess.
552 VOLK_TermSet *ts = hashmap_new (
553 sizeof (KeyedTerm), 4, VOLK_HASH_SEED, 0,
554 tset_hash_fn, tset_cmp_fn, tset_free_fn, NULL);
555 if (UNLIKELY (hashmap_oom (ts))) return NULL;
556
557 return ts;
558}
559
560
563{
564 VOLK_Hash key = VOLK_term_hash (term);
565 KeyedTerm entry_s = {.key=key, .term=term};
566
567 const KeyedTerm *ex = hashmap_get (ts, &entry_s);
568 if (ex) {
569 if (existing) *existing = ex->term;
570 return VOLK_NOACTION;
571 }
572
573 hashmap_set (ts, &entry_s);
574 if (hashmap_oom (ts)) return VOLK_MEM_ERR;
575
576 return VOLK_OK;
577}
578
579
580const VOLK_Term *
582{
583 const KeyedTerm *entry = hashmap_get (ts, &(KeyedTerm){.key=key});
584 if (entry) log_trace ("ID found for key %lx: %s", key, entry->term->data);
585 else log_trace ("No ID found for key %lx.", key);
586
587 return (entry) ? entry->term : NULL;
588}
589
590
593{
594 KeyedTerm *kt = NULL;
595 if (!hashmap_iter (ts, i, (void **)&kt)) return VOLK_END;
596
597 if (term) *term = kt->term;
598
599 return VOLK_OK;
600}
601
602
603void
605{
606 if (UNLIKELY (!ts)) return;
607 hashmap_free (ts);
608}
609
610
611VOLK_Term *
613{
614 const KeyedTerm *kt = NULL;
615 size_t i = 0;
616 if (!hashmap_iter (ts, &i, (void **)&kt)) return NULL;
617
618 kt = hashmap_delete (ts, kt);
619
620 return VOLK_term_copy (kt->term);
621}
622
623
624size_t
626{ return hashmap_count (ts); }
627
628
630VOLK_link_map_new (const VOLK_Term *linked_term, VOLK_LinkType type)
631{
632 VOLK_LinkMap *lm;
633 MALLOC_GUARD (lm, NULL);
634 lm->type = type;
635 lm->links = hashmap_new (
636 sizeof (Link), 0, VOLK_HASH_SEED, 0,
637 link_map_hash_fn, link_map_cmp_fn, link_map_free_fn, NULL);
638 if (!linked_term) {
639 log_error ("term must not be NULL.");
640 free (lm);
641 return NULL;
642 }
643 lm->linked_t = VOLK_term_copy (linked_term);
644
645 return lm;
646}
647
648
649void
651{
652 hashmap_free (lm->links);
654 free (lm);
655}
656
657
660{ return map->type; }
661
662
663// TODO Memory error handling.
666 VOLK_LinkMap *lmap, VOLK_Term *term, VOLK_TermSet *tset)
667{
668 // Keyed term to look up the link term and insert it, if necessary.
669 KeyedTerm entry_s = {.key=VOLK_term_hash (term), .term=term};
670
671 const Link *ex = hashmap_get (lmap->links, &(Link){.term=&entry_s});
672 if (ex) {
673 // Add terms one by one to the existing term set.
674 log_trace (
675 "Linking term %s exists. Adding individual terms.",
676 ex->term->term->data);
677 size_t i = 0;
678 KeyedTerm *kt;
679 while (hashmap_iter (tset, &i, (void **)&kt)) {
680 log_trace (
681 "Adding term %s to link %s",
682 kt->term->data, ex->term->term->data);
683 if (hashmap_get (ex->tset, kt))
684 // Term already exist, free the new one and move on.
685 VOLK_term_free (kt->term);
686 else
687 // Insert KeyedTerm, the term set now owns the underlying term.
688 hashmap_set (ex->tset, kt);
689 }
690 // Free link term that hasn't been used.
691 VOLK_term_free (term);
692 } else {
693 // Add the new term and the termset wholesale.
694 log_trace ("Adding new linking term %s.", term->data);
695 // Allocate inserted member on heap, it will be owned by the map.
696 KeyedTerm *ins;
698 memcpy (ins, &entry_s, sizeof (entry_s));
699 Link link = {.term=ins, .tset=tset};
700 hashmap_set (lmap->links, &link);
701 }
702
703 return VOLK_OK;
704}
705
706
707VOLK_LinkMapIterator *
709{
710 VOLK_LinkMapIterator *it;
711 CALLOC_GUARD (it, NULL);
712 it->map = lmap;
713
714 return it;
715}
716
717
718// This leaves the link and link map references intact.
719void
720VOLK_link_map_iter_free (VOLK_LinkMapIterator *it) { free (it); }
721
722
725 VOLK_LinkMapIterator *it, VOLK_Term **lt, VOLK_TermSet **ts)
726{
727 if (!hashmap_iter (it->map->links, &it->i, (void **)&it->link))
728 return VOLK_END;
729
730 *lt = it->link->term->term;
731 *ts = it->link->tset;
732
733 return VOLK_OK;
734}
735
736
737// TODO dismantle if the only triple generator is for the graph.
740 VOLK_LinkMapIterator *it, VOLK_Triple *spo)
741{
742 // Assign external (related) term.
743 if (it->map->type == VOLK_LINK_INBOUND)
744 spo->o = it->map->linked_t;
745 else if (it->map->type == VOLK_LINK_OUTBOUND)
746 spo->s = it->map->linked_t;
747 else spo->p = it->map->linked_t;
748
749 KeyedTerm *kt;
750
751 // If we are already handling a link, continue the internal loop.
752 if (it->link) goto int_loop;
753ext_loop:
754 // Advance external counter and start new internal loop.
755 it->j = 0;
756 if (!hashmap_iter (it->map->links, &it->i, (void **)&it->link))
757 return VOLK_END;
758int_loop:
759 // If end of the term set is reached, start with a new linking term.
760 if (!hashmap_iter (it->link->tset, &it->j, (void **)&kt)) goto ext_loop;
761
762 // Continue pulling from term set.
763 // Assign linking term.
764 if (it->map->type == VOLK_LINK_EDGE) spo->s = it->link->term->term;
765 else spo->p = it->link->term->term;
766
767 // Assign term in term set.
768 if (it->map->type == VOLK_LINK_INBOUND) spo->s = kt->term;
769 else spo->o = kt->term;
770
771 return VOLK_OK;
772}
773
774
775/*
776 * Static functions.
777 */
778
779static VOLK_rc
780term_init (
781 VOLK_Term *term, VOLK_TermType type,
782 const char *data, void *metadata)
783{
784 // Exit early if environment is not initialized.
785 // EXCEPT for IRIRef which is used inside of VOLK_init().
786 if (!VOLK_env_is_init && type != VOLK_TERM_IRIREF)
787 return VOLK_ENV_ERR;
788
789 // Undefined type. Make quick work of it.
790 if (type == VOLK_TERM_UNDEFINED) {
791 term->type = type;
792 if (data) {
793 term->data = malloc (strlen (data) + 1);
794 if (UNLIKELY (!term->data)) return VOLK_MEM_ERR;
795 strcpy (term->data, data);
796 }
797 return VOLK_OK;
798 }
799
800 if (type < MIN_VALID_TYPE || type > MAX_VALID_TYPE) {
801 log_error ("%d is not a valid term type.", type);
802 return VOLK_VALUE_ERR;
803 }
804
805 term->type = type;
806
807 if (data) {
808 // Validate IRI.
809 if (term->type == VOLK_TERM_IRIREF) {
810 char *fquri = (char *) data;
811
812 if (strpbrk (fquri, invalid_uri_chars) != NULL) {
813 log_warn (
814 "Characters %s are not valid in a URI. Got: %s\n",
815 invalid_uri_chars, fquri);
816#if 0
817 // TODO This causes W3C TTL test #29 to fail. Remove?
818 return VOLK_VALUE_ERR;
819#endif
820 }
821 }
822
823 term->data = strdup (data);
824
825 } else {
826 // No data. Make up a random UUID or URI if allowed.
827 if (type == VOLK_TERM_IRIREF || type == VOLK_TERM_BNODE) {
828 uuid_t uuid;
829 uuid_generate_random (uuid);
830
831 uuid_str_t uuid_str;
832 uuid_unparse_lower (uuid, uuid_str);
833
834 if (type == VOLK_TERM_IRIREF) {
835 term->data = malloc (UUID4_URN_SIZE);
836 snprintf (
837 term->data, UUID4_URN_SIZE, "urn:uuid:%s", uuid_str);
838 } else term->data = strdup (uuid_str);
839 } else {
840 log_error ("No data provided for term.");
841 return VOLK_VALUE_ERR;
842 }
843 }
844
845 if (term->type == VOLK_TERM_LT_LITERAL) {
846 if (!metadata) {
847 log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
848 term->type = VOLK_TERM_LITERAL;
849 } else {
850 // FIXME metadata should be const all across.
851 char *lang_str = (char *) metadata;
852 log_trace ("Lang string: '%s'", lang_str);
853 // Lang tags longer than 7 characters will be truncated.
854 strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
855 if (strlen (term->lang) < 1) {
856 log_error ("Lang tag cannot be an empty string.");
857 return VOLK_VALUE_ERR;
858 }
859 term->lang[7] = '\0';
860 }
861 }
862
863 if (term->type == VOLK_TERM_LITERAL) {
864 term->datatype = metadata;
865 if (! term->datatype) term->datatype = VOLK_default_datatype;
866 log_trace ("Storing data type: %s", term->datatype->data);
867
868 if (term->datatype->type != VOLK_TERM_IRIREF) {
869 log_error (
870 "Literal data type is not an IRI: %s",
871 term->datatype->data);
872
873 return VOLK_VALUE_ERR;
874 }
875
876 VOLK_Term *ex = NULL;
878 if (ex && ex != term->datatype) {
879 // Replace datatype handle with the one in term cache, and free
880 // the new one.
881 if (term->datatype != VOLK_default_datatype)
882 VOLK_term_free (term->datatype);
883 term->datatype = ex;
884 }
885
886 //log_trace ("Datatype address: %p", term->datatype);
887 log_trace ("Datatype hash: %lx", VOLK_term_hash (term->datatype));
888
889 } else if (term->type == VOLK_TERM_BNODE) {
890 // TODO This is not usable for global skolemization.
891 term->bnode_id = VOLK_HASH (
892 term->data, strlen (term->data) + 1, VOLK_HASH_SEED);
893 }
894
895 return VOLK_OK;
896}
897
898
900VOLK_parse_iri (char *iri_str, VOLK_IRIInfo *iri_info) {
901 char *cur = iri_str;
902 size_t iri_len = strlen (iri_str);
903 MatchCoord tmp = {}; // Temporary storage for capture groups
904
905 memset (iri_info, 0, sizeof (*iri_info));
906 //log_debug ("Parsing IRI: %s", iri_str);
907 // #2: ([^:/?#]+)
908 while (
909 *cur != ':' && *cur != '/' && *cur != '?'
910 && *cur != '#' && *cur != '\0') {
911 tmp.size++;
912 cur++;
913 }
914
915 // Non-capturing: (?([^:/?#]+):)?
916 if (tmp.size > 0 && *cur == ':') {
917 // Got capture groups #2 and #3. Store them.
918 iri_info->scheme.offset = 0;
919 iri_info->scheme.size = tmp.size;
920 cur++;
921 //log_debug ("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
922 } else cur = iri_str; // Backtrack if no match.
923
924 // Non-capturing: (?//([^/?#]*))?
925 if (*cur == '/' && *(cur + 1) == '/') {
926 cur += 2;
927 tmp.offset = cur - iri_str;
928 tmp.size = 0;
929
930 // #3: ([^/?#]*)
931 while (*cur != '/' && *cur != '?' && *cur != '#' && *cur != '\0') {
932 tmp.size++;
933 cur++;
934 }
935 iri_info->auth.offset = tmp.offset;
936 iri_info->auth.size = tmp.size;
937 //log_debug ("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
938 }
939
940 // Capture group 1.
941 iri_info->prefix.offset = 0;
942 iri_info->prefix.size = cur - iri_str;
943 //log_debug ("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
944
945 tmp.offset = cur - iri_str;
946 tmp.size = 0;
947
948 iri_info->path.offset = tmp.offset;
949 iri_info->path.size = iri_len - tmp.offset;
950 //log_debug ("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
951
952 // Non-capturing: (?[^?#]*)
953 while (*cur != '?' && *cur != '#' && *cur != '\0') {
954 tmp.size++;
955 cur++;
956 }
957
958 // Non-capturing: (?\?([^#]*))
959 if (*cur == '?') {
960 // 5: ([^#]*)
961 tmp.offset = ++cur - iri_str;
962 tmp.size = 0;
963 while (*cur != '#' && *cur != '\0') {
964 tmp.size++;
965 cur++;
966 }
967
968 if (tmp.size > 0) {
969 // Got capture group #5.
970 iri_info->query.offset = tmp.offset;
971 iri_info->query.size = tmp.size;
972 //log_debug ("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
973 }
974 }
975
976 // Non-capturing: (?#(.*))?
977 if (*cur == '#') {
978 // #6: (.*)
979 iri_info->frag.offset = ++cur - iri_str;
980 iri_info->frag.size = iri_str + iri_len - cur;
981 //log_debug ("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
982 }
983
984 /* TODO add error cases.
985 if (UNLIKELY (rc != VOLK_OK)) {
986 log_error ("Error matching URI pattern.");
987
988 return VOLK_VALUE_ERR;
989 }
990 */
991
992 return VOLK_OK;
993}
994
995/*
996 * Extern inline functions.
997 */
998
999VOLK_Key VOLK_term_hash (const VOLK_Term *term);
1000VOLK_Term *VOLK_iriref_new (const char *data);
1001VOLK_Term *VOLK_iriref_new_ns (const char *data);
1002VOLK_Term *VOLK_literal_new (const char *data, VOLK_Term *datatype);
1003VOLK_Term *VOLK_lt_literal_new (const char *data, char *lang);
1004VOLK_Term *VOLK_bnode_new (const char *data);
1005bool VOLK_term_equals (const VOLK_Term *term1, const VOLK_Term *term2);
#define UNLIKELY(x)
Definition core.h:39
VOLK_Key VOLK_buffer_hash(const VOLK_Buffer *buf)
Hash a buffer.
Definition buffer.h:175
VOLK_TriplePos
Triple position of s, p, o.
Definition buffer.h:19
#define BUF_DUMMY
Dummy buffer to be used with VOLK_buffer_init.
Definition buffer.h:154
void VOLK_buffer_free(VOLK_Buffer *buf)
Free a buffer.
Definition buffer.c:97
#define VOLK_HASH_SEED
Seed used for all hashing. Compile-time configurable.
Definition core.h:175
VOLK_Hash64 VOLK_Hash
Default hash data type.
Definition core.h:214
bool VOLK_env_is_init
Whether the environment is initialized.
Definition core.c:11
#define VOLK_HASH(...)
Default hashing function. Depends on architecture.
Definition core.h:186
#define MALLOC_GUARD(var, rc)
Allocate one pointer with malloc and return rc if it fails.
Definition core.h:382
#define RCNL(exp)
Return NULL if exp returns a nonzero value.
Definition core.h:352
char * strdup(const char *src)
Replacement for GNU strdup.
Definition core.c:109
#define NLNL(exp)
Log error and return NULL if exp is NULL.
Definition core.h:374
#define CALLOC_GUARD(var, rc)
Allocate one pointer with calloc and return rc if it fails.
Definition core.h:388
char * strndup(const char *src, size_t max)
Replacement for GNU strndup.
Definition core.c:92
#define log_trace(...)
Definition core.h:275
size_t VOLK_Key
Term key, i.e., hash of a serialized term.
Definition core.h:230
char uuid_str_t[UUIDSTR_SIZE]
UUID string tpe.
Definition core.h:239
#define VOLK_VALUE_ERR
An invalid input value was provided.
Definition core.h:129
#define VOLK_MEM_ERR
Memory allocation error.
Definition core.h:144
#define VOLK_END
Loop end.
Definition core.h:107
#define VOLK_OK
Generic success return code.
Definition core.h:83
#define VOLK_NOACTION
No action taken.
Definition core.h:93
int VOLK_rc
Definition core.h:79
#define VOLK_ENV_ERR
Error while handling environment setup; or environment not initialized.
Definition core.h:158
VOLK_Key VOLK_triple_hash(const VOLK_Triple *trp)
Hash a triple.
Definition term.h:513
VOLK_Term * VOLK_lt_literal_new(const char *data, char *lang)
Shortcut to create a language-tagged literal term.
Definition term.h:317
VOLK_rc VOLK_term_set_add(VOLK_TermSet *ts, VOLK_Term *term, VOLK_Term **existing)
Add term to a term set.
Definition term.c:562
VOLK_Triple * VOLK_triple_new_from_btriple(const VOLK_BufferTriple *sspo)
Definition term.c:471
struct hashmap VOLK_TermSet
a set of unique terms.
Definition term.h:124
VOLK_rc VOLK_term_set_next(VOLK_TermSet *ts, size_t *i, VOLK_Term **term)
Iterate trough a term set.
Definition term.c:592
VOLK_Triple * VOLK_triple_new(VOLK_Term *s, VOLK_Term *p, VOLK_Term *o)
Create a new triple from three terms.
Definition term.c:456
char * VOLK_iriref_frag(const VOLK_Term *iri)
Get the fragment portion of a IRI ref.
Definition term.c:436
VOLK_Term * VOLK_iriref_new_rel(const VOLK_Term *root, const VOLK_Term *iri)
Create a new relative IRI from an absolute IRI and a web root IRI.
Definition term.c:273
VOLK_rc VOLK_parse_iri(char *iri_str, VOLK_IRIInfo *iri_info)
scan an IRI string and parse IRI parts.
Definition term.c:900
void VOLK_link_map_iter_free(VOLK_LinkMapIterator *it)
Free a link map iterator.
Definition term.c:720
VOLK_LinkType
Link type.
Definition term.h:94
VOLK_Term * VOLK_iriref_new(const char *data)
Create an IRI reference.
Definition term.h:192
VOLK_TermType
Term type.
Definition term.h:31
bool VOLK_term_equals(const VOLK_Term *term1, const VOLK_Term *term2)
Compare two terms.
Definition term.h:377
size_t VOLK_term_set_size(VOLK_TermSet *ts)
Size of a term set.
Definition term.c:625
VOLK_Term * VOLK_iriref_new_abs(const VOLK_Term *root, const VOLK_Term *iri)
Create a new absolute IRI from a path relative to a root IRI.
Definition term.c:231
VOLK_Term * VOLK_literal_new(const char *data, VOLK_Term *datatype)
Shortcut to create a literal term.
Definition term.h:266
VOLK_LinkMapIterator * VOLK_link_map_iter_new(const VOLK_LinkMap *lmap)
Create a new iterator to loop through a link map.
Definition term.c:708
char * VOLK_iriref_prefix(const VOLK_Term *iri)
Get the prefix portion of a IRI ref.
Definition term.c:403
VOLK_LinkMap * VOLK_link_map_new(const VOLK_Term *linked_term, VOLK_LinkType type)
New link map.
Definition term.c:630
VOLK_Term * VOLK_default_datatype
Default literal data type URI.
Definition term.c:60
VOLK_rc VOLK_link_map_triples(VOLK_LinkMapIterator *it, VOLK_Triple *spo)
Iterate over a link map and generate triples.
Definition term.c:739
void VOLK_triple_done(VOLK_Triple *spo)
Free the internal pointers of a triple.
Definition term.c:521
VOLK_rc VOLK_triple_init(VOLK_Triple *spo, VOLK_Term *s, VOLK_Term *p, VOLK_Term *o)
Initialize internal term pointers in a heap-allocated triple.
Definition term.c:499
const VOLK_Term * VOLK_term_set_get(VOLK_TermSet *ts, VOLK_Key key)
Get a term from a term set.
Definition term.c:581
VOLK_Term * VOLK_bnode_new(const char *data)
Shortcut to create a blank node.
Definition term.h:331
VOLK_TermSet * VOLK_term_set_new()
Create a new term set.
Definition term.c:549
uint32_t VOLK_default_dtype_key
Compiled hash of default literal data type.
Definition term.c:58
void VOLK_term_set_free(VOLK_TermSet *ts)
Free a term set.
Definition term.c:604
VOLK_Key VOLK_term_hash(const VOLK_Term *term)
Hash a buffer.
Definition term.c:371
VOLK_rc VOLK_link_map_next(VOLK_LinkMapIterator *it, VOLK_Term **lt, VOLK_TermSet **ts)
Iterate through a link map.
Definition term.c:724
void VOLK_link_map_free(VOLK_LinkMap *lm)
Free a link map.
Definition term.c:650
VOLK_Term * VOLK_iriref_new_ns(const char *data)
Create an IRI reference from a namespace-prefixed string.
Definition term.h:205
#define UUID4_URN_SIZE
Definition term.h:14
char * VOLK_iriref_path(const VOLK_Term *iri)
Get the path portion of a IRI ref.
Definition term.c:420
char VOLK_LangTag[8]
Language tag, currently restricted to 7 characters.
Definition term.h:28
void VOLK_term_free(VOLK_Term *term)
Definition term.c:387
VOLK_Term * VOLK_triple_pos(const VOLK_Triple *trp, VOLK_TriplePos n)
Get triple by term position.
Definition term.h:499
VOLK_Buffer * VOLK_term_serialize(const VOLK_Term *term)
Serialize a term into a buffer.
Definition term.c:293
VOLK_Term * VOLK_default_ctx
Default context.
Definition term.c:59
VOLK_Term * VOLK_term_new_from_buffer(const VOLK_Buffer *sterm)
See notes in VOLK_term_serialize function body for format info.
Definition term.c:189
VOLK_rc VOLK_link_map_add(VOLK_LinkMap *lmap, VOLK_Term *term, VOLK_TermSet *tset)
Add a term - term set pair to a link map.
Definition term.c:665
VOLK_Term * VOLK_term_set_pop(VOLK_TermSet *ts)
Pop a term from a term set.
Definition term.c:612
VOLK_LinkType VOLK_link_map_type(const VOLK_LinkMap *map)
Return the link map type.
Definition term.c:659
VOLK_BufferTriple * VOLK_triple_serialize(const VOLK_Triple *spo)
Definition term.c:485
VOLK_Term * VOLK_term_new(VOLK_TermType type, const char *data, void *metadata)
Create a new term.
Definition term.c:157
VOLK_TermSet * VOLK_term_cache
Global term cache.
Definition term.c:61
void VOLK_triple_free(VOLK_Triple *spo)
Free a triple and all its internal pointers.
Definition term.c:532
VOLK_Term * VOLK_term_copy(const VOLK_Term *src)
Copy a term.
Definition term.c:174
@ VOLK_LINK_EDGE
Edge link (so).
Definition term.h:97
@ VOLK_LINK_INBOUND
Inbound link (sp).
Definition term.h:95
@ VOLK_LINK_OUTBOUND
Outbound link (po).
Definition term.h:96
@ VOLK_TERM_IRIREF
IRI reference.
Definition term.h:35
@ VOLK_TERM_UNDEFINED
Definition term.h:32
@ VOLK_TERM_LT_LITERAL
Language-tagged string literal.
Definition term.h:37
@ VOLK_TERM_LITERAL
Literal without language tag.
Definition term.h:36
@ VOLK_TERM_BNODE
Blank node.
Definition term.h:38
Key-term pair in term set.
Definition term.c:11
VOLK_Term * term
Term handle.
Definition term.c:13
VOLK_Key key
Key (hash) of the term.
Definition term.c:12
Match coordinates in IRI parsing results.
Definition term.h:42
unsigned int size
Length of match.
Definition term.h:44
unsigned int offset
Offset of match from start of string.
Definition term.h:43
Triple of byte buffers.
Definition buffer.h:60
VOLK_Buffer * o
Definition buffer.h:63
VOLK_Buffer * s
Definition buffer.h:61
VOLK_Buffer * p
Definition buffer.h:62
General-purpose data buffer.
Definition buffer.h:47
unsigned char * addr
Definition buffer.h:48
size_t size
Definition buffer.h:49
Matching sub-patterns for IRI parts.
Definition term.h:48
MatchCoord frag
Fragment (frag).
Definition term.h:55
MatchCoord auth
Authority (example.org).
Definition term.h:51
MatchCoord query
Query (query=blah).
Definition term.h:54
MatchCoord prefix
Prefix (http://example.org).
Definition term.h:49
MatchCoord path
Definition term.h:52
MatchCoord scheme
Scheme (http).
Definition term.h:50
VOLK_Term * linked_t
Linked term.
Definition term.c:49
VOLK_LinkType type
Link type.
Definition term.c:48
struct hashmap * links
Map of Link instances.
Definition term.c:50
RDF term.
Definition term.h:62
VOLK_Key bnode_id
BN ID for comparison & skolemization.
Definition term.h:67
char * data
URI, literal value, or BNode label.
Definition term.h:63
struct term_t * datatype
Data type IRI for VOLK_TERM_LITERAL.
Definition term.h:65
VOLK_TermType type
Term type.
Definition term.h:70
VOLK_LangTag lang
Lang tag for VOLK_TERM_LT_LITERAL.
Definition term.h:66
RDF triple.
Definition term.h:86
VOLK_Term * p
Predicate.
Definition term.h:88
VOLK_Term * s
Subject.
Definition term.h:87
VOLK_Term * o
Object.
Definition term.h:89