1 2 // Copyright Ferdinand Majerech 2011. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 /** 8 * Class that processes YAML mappings, sequences and scalars into nodes. 9 * This can be used to add custom data types. A tutorial can be found 10 * $(LINK2 https://dlang-community.github.io/D-YAML/, here). 11 */ 12 module dyaml.constructor; 13 14 import mir.timestamp; 15 import std.algorithm; 16 import std.array; 17 import std.base64; 18 import std.container; 19 import std.conv; 20 import std.exception; 21 import std.regex; 22 import std..string; 23 import std.utf; 24 25 import dyaml.node; 26 import dyaml.exception; 27 import dyaml.style; 28 29 package: 30 31 // Exception thrown at constructor errors. 32 class ConstructorException : YAMLException 33 { 34 /// Construct a ConstructorException. 35 /// 36 /// Params: msg = Error message. 37 /// start = Start position of the error context. 38 /// end = End position of the error context. 39 this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__) 40 @safe pure nothrow 41 { 42 super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(), 43 file, line); 44 } 45 } 46 47 /** Constructs YAML values. 48 * 49 * Each YAML scalar, sequence or mapping has a tag specifying its data type. 50 * Constructor uses user-specifyable functions to create a node of desired 51 * data type from a scalar, sequence or mapping. 52 * 53 * 54 * Each of these functions is associated with a tag, and can process either 55 * a scalar, a sequence, or a mapping. The constructor passes each value to 56 * the function with corresponding tag, which then returns the resulting value 57 * that can be stored in a node. 58 * 59 * If a tag is detected with no known constructor function, it is considered an error. 60 */ 61 /* 62 * Construct a node. 63 * 64 * Params: start = Start position of the node. 65 * end = End position of the node. 66 * tag = Tag (data type) of the node. 67 * value = Value to construct node from (string, nodes or pairs). 68 * style = Style of the node (scalar or collection style). 69 * 70 * Returns: Constructed node. 71 */ 72 Node constructNode(T)(const Mark start, const Mark end, const string tag, 73 T value) @safe 74 if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) 75 { 76 Node newNode; 77 try 78 { 79 switch(tag) 80 { 81 case "tag:yaml.org,2002:null": 82 newNode = Node(null, tag); 83 break; 84 case "tag:yaml.org,2002:bool": 85 static if(is(T == string)) 86 { 87 newNode = Node(constructBool(value), tag); 88 break; 89 } 90 else throw new Exception("Only scalars can be bools"); 91 case "tag:yaml.org,2002:int": 92 static if(is(T == string)) 93 { 94 newNode = Node(constructLong(value), tag); 95 break; 96 } 97 else throw new Exception("Only scalars can be ints"); 98 case "tag:yaml.org,2002:float": 99 static if(is(T == string)) 100 { 101 newNode = Node(constructReal(value), tag); 102 break; 103 } 104 else throw new Exception("Only scalars can be floats"); 105 case "tag:yaml.org,2002:binary": 106 static if(is(T == string)) 107 { 108 newNode = Node(constructBinary(value), tag); 109 break; 110 } 111 else throw new Exception("Only scalars can be binary data"); 112 case "tag:yaml.org,2002:timestamp": 113 static if(is(T == string)) 114 { 115 newNode = Node(constructTimestamp(value), tag); 116 break; 117 } 118 else throw new Exception("Only scalars can be timestamps"); 119 case "tag:yaml.org,2002:str": 120 static if(is(T == string)) 121 { 122 newNode = Node(constructString(value), tag); 123 break; 124 } 125 else throw new Exception("Only scalars can be strings"); 126 case "tag:yaml.org,2002:value": 127 static if(is(T == string)) 128 { 129 newNode = Node(constructString(value), tag); 130 break; 131 } 132 else throw new Exception("Only scalars can be values"); 133 case "tag:yaml.org,2002:omap": 134 static if(is(T == Node[])) 135 { 136 newNode = Node(constructOrderedMap(value), tag); 137 break; 138 } 139 else throw new Exception("Only sequences can be ordered maps"); 140 case "tag:yaml.org,2002:pairs": 141 static if(is(T == Node[])) 142 { 143 newNode = Node(constructPairs(value), tag); 144 break; 145 } 146 else throw new Exception("Only sequences can be pairs"); 147 case "tag:yaml.org,2002:set": 148 static if(is(T == Node.Pair[])) 149 { 150 newNode = Node(constructSet(value), tag); 151 break; 152 } 153 else throw new Exception("Only mappings can be sets"); 154 case "tag:yaml.org,2002:seq": 155 static if(is(T == Node[])) 156 { 157 newNode = Node(constructSequence(value), tag); 158 break; 159 } 160 else throw new Exception("Only sequences can be sequences"); 161 case "tag:yaml.org,2002:map": 162 static if(is(T == Node.Pair[])) 163 { 164 newNode = Node(constructMap(value), tag); 165 break; 166 } 167 else throw new Exception("Only mappings can be maps"); 168 case "tag:yaml.org,2002:merge": 169 newNode = Node(YAMLMerge(), tag); 170 break; 171 default: 172 newNode = Node(value, tag); 173 break; 174 } 175 } 176 catch(Exception e) 177 { 178 throw new ConstructorException("Error constructing " ~ typeid(T).toString() 179 ~ ":\n" ~ e.msg, start, end); 180 } 181 182 newNode.startMark_ = start; 183 184 return newNode; 185 } 186 187 private: 188 // Construct a boolean _node. 189 bool constructBool(const string str) @safe 190 { 191 string value = str.toLower(); 192 if(value.among!("yes", "true", "on")){return true;} 193 if(value.among!("no", "false", "off")){return false;} 194 throw new Exception("Unable to parse boolean value: " ~ value); 195 } 196 197 // Construct an integer (long) _node. 198 long constructLong(const string str) @safe 199 { 200 string value = str.replace("_", ""); 201 const char c = value[0]; 202 const long sign = c != '-' ? 1 : -1; 203 if(c == '-' || c == '+') 204 { 205 value = value[1 .. $]; 206 } 207 208 enforce(value != "", new Exception("Unable to parse float value: " ~ value)); 209 210 long result; 211 try 212 { 213 //Zero. 214 if(value == "0") {result = cast(long)0;} 215 //Binary. 216 else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} 217 //Hexadecimal. 218 else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} 219 //Octal. 220 else if(value[0] == '0') {result = sign * to!int(value, 8);} 221 //Sexagesimal. 222 else if(value.canFind(":")) 223 { 224 long val; 225 long base = 1; 226 foreach_reverse(digit; value.split(":")) 227 { 228 val += to!long(digit) * base; 229 base *= 60; 230 } 231 result = sign * val; 232 } 233 //Decimal. 234 else{result = sign * to!long(value);} 235 } 236 catch(ConvException e) 237 { 238 throw new Exception("Unable to parse integer value: " ~ value); 239 } 240 241 return result; 242 } 243 @safe unittest 244 { 245 string canonical = "685230"; 246 string decimal = "+685_230"; 247 string octal = "02472256"; 248 string hexadecimal = "0x_0A_74_AE"; 249 string binary = "0b1010_0111_0100_1010_1110"; 250 string sexagesimal = "190:20:30"; 251 252 assert(685230 == constructLong(canonical)); 253 assert(685230 == constructLong(decimal)); 254 assert(685230 == constructLong(octal)); 255 assert(685230 == constructLong(hexadecimal)); 256 assert(685230 == constructLong(binary)); 257 assert(685230 == constructLong(sexagesimal)); 258 } 259 260 // Construct a floating point (double) _node. 261 double constructReal(const string str) @safe 262 { 263 import mir.conv: to; 264 string value = str.replace("_", "").toLower(); 265 const char c = value[0]; 266 const double sign = c != '-' ? 1.0 : -1.0; 267 if(c == '-' || c == '+') 268 { 269 value = value[1 .. $]; 270 } 271 272 if (value == "" && value == "nan" && value == "inf" && value == "-inf") 273 throw new Exception("Unable to parse float value: " ~ value); 274 275 double result; 276 try 277 { 278 //Infinity. 279 if (value == ".inf"){result = sign * double.infinity;} 280 //Not a Number. 281 else if(value == ".nan"){result = double.nan;} 282 //Sexagesimal. 283 else if(value.canFind(":")) 284 { 285 double val = 0.0; 286 double base = 1.0; 287 foreach_reverse(digit; value.split(":")) 288 { 289 val += to!double(digit) * base; 290 base *= 60.0; 291 } 292 result = sign * val; 293 } 294 //Plain floating point. 295 else{result = sign * to!double(value);} 296 } 297 catch(Exception e) 298 { 299 throw new Exception("Unable to parse float value: \"" ~ value ~ "\""); 300 } 301 302 return result; 303 } 304 @safe unittest 305 { 306 bool eq(double a, double b, double epsilon = 0.2) @safe 307 { 308 return a >= (b - epsilon) && a <= (b + epsilon); 309 } 310 311 string canonical = "6.8523015e+5"; 312 string exponential = "685.230_15e+03"; 313 string fixed = "685_230.15"; 314 string sexagesimal = "190:20:30.15"; 315 string negativeInf = "-.inf"; 316 string NaN = ".NaN"; 317 318 assert(eq(685230.15, constructReal(canonical))); 319 assert(eq(685230.15, constructReal(exponential))); 320 assert(eq(685230.15, constructReal(fixed))); 321 assert(eq(685230.15, constructReal(sexagesimal))); 322 assert(eq(-double.infinity, constructReal(negativeInf))); 323 assert(to!string(constructReal(NaN)) == "nan"); 324 } 325 326 // Construct a binary (base64) _node. 327 ubyte[] constructBinary(const string value) @safe 328 { 329 import std.ascii : newline; 330 import std.array : array; 331 332 // For an unknown reason, this must be nested to work (compiler bug?). 333 try 334 { 335 return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); 336 } 337 catch(Base64Exception e) 338 { 339 throw new Exception("Unable to decode base64 value: " ~ e.msg); 340 } 341 } 342 343 @safe unittest 344 { 345 auto test = "The Answer: 42".representation; 346 char[] buffer; 347 buffer.length = 256; 348 string input = Base64.encode(test, buffer).idup; 349 const value = constructBinary(input); 350 assert(value == test); 351 assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); 352 } 353 354 // Construct a timestamp _node. 355 Timestamp constructTimestamp(const string str) @safe 356 { 357 import mir.conv: to; 358 string value = str; 359 360 auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); 361 auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); 362 auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); 363 364 try 365 { 366 // First, get year, month and day. 367 auto matches = match(value, YMDRegexp); 368 369 enforce(!matches.empty, 370 new Exception("Unable to parse timestamp value: " ~ value)); 371 372 auto captures = matches.front.captures; 373 const year = to!short(captures[1]); 374 const month = to!ubyte(captures[2]); 375 const day = to!ubyte(captures[3]); 376 377 // If available, get hour, minute, second and fraction, if present. 378 value = matches.front.post; 379 matches = match(value, HMSRegexp); 380 if(matches.empty) 381 return Timestamp(year, month, day); 382 383 captures = matches.front.captures; 384 const hour = to!byte(captures[1]); 385 const minute = to!byte(captures[2]); 386 const second = to!byte(captures[3]); 387 Timestamp ret; 388 if (captures[4].length <= 1) 389 { 390 ret = Timestamp(year, month, day, hour, minute, second); 391 } 392 else 393 { 394 long fraction = 1 - captures[4].length; 395 auto fractionCoefficient = captures[4][1 .. $].to!ulong; 396 // If available, get timezone. 397 ret = Timestamp(year, month, day, hour, minute, second, cast(byte) fraction, fractionCoefficient); 398 } 399 400 value = matches.front.post; 401 matches = match(value, TZRegexp); 402 if(matches.empty || matches.front.captures[0] == "Z") 403 // No timezone. 404 return ret; 405 406 // We have a timezone, so parse it. 407 captures = matches.front.captures; 408 int sign = 1; 409 int tzHours; 410 if(!captures[1].empty) 411 { 412 if(captures[1][0] == '-') {sign = -1;} 413 tzHours = to!ubyte(captures[1][1 .. $]); 414 } 415 const tzMinutes = (!captures[2].empty) ? to!ubyte(captures[2][1 .. $]) : 0; 416 const tzOffset = sign * (60 * tzHours + tzMinutes); 417 ret.offset = cast(short)tzOffset; 418 ret.addMinutes(cast(short)-tzOffset); 419 return ret; 420 } 421 catch(Exception e) 422 { 423 throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); 424 } 425 } 426 @safe unittest 427 { 428 string timestamp(string value) 429 { 430 return constructTimestamp(value).toISOString(); 431 } 432 433 string canonical = "2001-12-15T02:59:43.1Z"; 434 string iso8601 = "2001-12-14t21:59:43.10-05:00"; 435 string spaceSeparated = "2001-12-14 21:59:43.10 -5"; 436 string noTZ = "2001-12-15 2:59:43.10"; 437 string noFraction = "2001-12-15 2:59:43"; 438 string ymd = "2002-12-14"; 439 440 assert(timestamp(canonical) == "20011215T025943.1Z", timestamp(canonical)); 441 //avoiding float conversion errors 442 assert(timestamp(iso8601) == "20011214T215943.10-05", timestamp(iso8601)); 443 assert(timestamp(spaceSeparated) == "20011214T215943.10-05", timestamp(spaceSeparated)); 444 assert(timestamp(noTZ) == "20011215T025943.10Z", timestamp(noTZ)); 445 assert(timestamp(noFraction) == "20011215T025943Z", timestamp(noFraction)); 446 assert(timestamp(ymd) == "20021214", timestamp(ymd)); 447 } 448 449 // Construct a string _node. 450 string constructString(const string str) @safe 451 { 452 return str; 453 } 454 455 // Convert a sequence of single-element mappings into a sequence of pairs. 456 Node.Pair[] getPairs(string type, const Node[] nodes) @safe 457 { 458 Node.Pair[] pairs; 459 pairs.reserve(nodes.length); 460 foreach(node; nodes) 461 { 462 enforce(node.nodeID == NodeID.mapping && node.length == 1, 463 new Exception("While constructing " ~ type ~ 464 ", expected a mapping with single element")); 465 466 pairs ~= node.as!(Node.Pair[]); 467 } 468 469 return pairs; 470 } 471 472 // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. 473 Node.Pair[] constructOrderedMap(const Node[] nodes) @safe 474 { 475 auto pairs = getPairs("ordered map", nodes); 476 477 //Detect duplicates. 478 //TODO this should be replaced by something with deterministic memory allocation. 479 auto keys = redBlackTree!Node(); 480 foreach(ref pair; pairs) 481 { 482 enforce(!(pair.key in keys), 483 new Exception("Duplicate entry in an ordered map: " 484 ~ pair.key.debugString())); 485 keys.insert(pair.key); 486 } 487 return pairs; 488 } 489 @safe unittest 490 { 491 Node[] alternateTypes(uint length) @safe 492 { 493 Node[] pairs; 494 foreach(long i; 0 .. length) 495 { 496 auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); 497 pairs ~= Node([pair]); 498 } 499 return pairs; 500 } 501 502 Node[] sameType(uint length) @safe 503 { 504 Node[] pairs; 505 foreach(long i; 0 .. length) 506 { 507 auto pair = Node.Pair(i.to!string, i); 508 pairs ~= Node([pair]); 509 } 510 return pairs; 511 } 512 513 assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2))); 514 assertNotThrown(constructOrderedMap(alternateTypes(8))); 515 assertThrown(constructOrderedMap(sameType(64) ~ sameType(16))); 516 assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16))); 517 assertNotThrown(constructOrderedMap(sameType(64))); 518 assertNotThrown(constructOrderedMap(alternateTypes(64))); 519 } 520 521 // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. 522 Node.Pair[] constructPairs(const Node[] nodes) @safe 523 { 524 return getPairs("pairs", nodes); 525 } 526 527 // Construct a set _node. 528 Node[] constructSet(const Node.Pair[] pairs) @safe 529 { 530 // In future, the map here should be replaced with something with deterministic 531 // memory allocation if possible. 532 // Detect duplicates. 533 ubyte[Node] map; 534 Node[] nodes; 535 nodes.reserve(pairs.length); 536 foreach(pair; pairs) 537 { 538 enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); 539 map[pair.key] = 0; 540 nodes ~= pair.key; 541 } 542 543 return nodes; 544 } 545 @safe unittest 546 { 547 Node.Pair[] set(uint length) @safe 548 { 549 Node.Pair[] pairs; 550 foreach(long i; 0 .. length) 551 { 552 pairs ~= Node.Pair(i.to!string, null); 553 } 554 555 return pairs; 556 } 557 558 auto DuplicatesShort = set(8) ~ set(2); 559 auto noDuplicatesShort = set(8); 560 auto DuplicatesLong = set(64) ~ set(4); 561 auto noDuplicatesLong = set(64); 562 563 bool eq(Node.Pair[] a, Node[] b) 564 { 565 if(a.length != b.length){return false;} 566 foreach(i; 0 .. a.length) 567 { 568 if(a[i].key != b[i]) 569 { 570 return false; 571 } 572 } 573 return true; 574 } 575 576 auto nodeDuplicatesShort = DuplicatesShort.dup; 577 auto nodeNoDuplicatesShort = noDuplicatesShort.dup; 578 auto nodeDuplicatesLong = DuplicatesLong.dup; 579 auto nodeNoDuplicatesLong = noDuplicatesLong.dup; 580 581 assertThrown(constructSet(nodeDuplicatesShort)); 582 assertNotThrown(constructSet(nodeNoDuplicatesShort)); 583 assertThrown(constructSet(nodeDuplicatesLong)); 584 assertNotThrown(constructSet(nodeNoDuplicatesLong)); 585 } 586 587 // Construct a sequence (array) _node. 588 Node[] constructSequence(Node[] nodes) @safe 589 { 590 return nodes; 591 } 592 593 // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. 594 Node.Pair[] constructMap(Node.Pair[] pairs) @safe 595 { 596 //Detect duplicates. 597 //TODO this should be replaced by something with deterministic memory allocation. 598 auto keys = redBlackTree!Node(); 599 foreach(ref pair; pairs) 600 { 601 enforce(!(pair.key in keys), 602 new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); 603 keys.insert(pair.key); 604 } 605 return pairs; 606 }