• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

/disk4/html/www/moses/doxygen/mosesdecoder/phrase-extract/extract-mixed-syntax/pugixml.cpp

Go to the documentation of this file.
00001 
00014 #ifndef SOURCE_PUGIXML_CPP
00015 #define SOURCE_PUGIXML_CPP
00016 
00017 #include "pugixml.hpp"
00018 
00019 #include <cstdlib>
00020 #include <cstdio>
00021 #include <cstring>
00022 #include <cassert>
00023 #include <cwchar>
00024 
00025 #ifndef PUGIXML_NO_XPATH
00026 #       include <cmath>
00027 #       include <float.h>
00028 #       ifdef PUGIXML_NO_EXCEPTIONS
00029 #               include <csetjmp>
00030 #       endif
00031 #endif
00032 
00033 #ifndef PUGIXML_NO_STL
00034 #       include <istream>
00035 #       include <ostream>
00036 #       include <string>
00037 #endif
00038 
00039 // For placement new
00040 #include <new>
00041 
00042 #ifdef _MSC_VER
00043 #       pragma warning(push)
00044 #       pragma warning(disable: 4127) // conditional expression is constant
00045 #       pragma warning(disable: 4324) // structure was padded due to __declspec(align())
00046 #       pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
00047 #       pragma warning(disable: 4702) // unreachable code
00048 #       pragma warning(disable: 4996) // this function or variable may be unsafe
00049 #       pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
00050 #endif
00051 
00052 #ifdef __INTEL_COMPILER
00053 #       pragma warning(disable: 177) // function was declared but never referenced
00054 #       pragma warning(disable: 279) // controlling expression is constant
00055 #       pragma warning(disable: 1478 1786) // function was declared "deprecated"
00056 #       pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
00057 #endif
00058 
00059 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
00060 #       pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
00061 #endif
00062 
00063 #ifdef __BORLANDC__
00064 #       pragma option push
00065 #       pragma warn -8008 // condition is always false
00066 #       pragma warn -8066 // unreachable code
00067 #endif
00068 
00069 #ifdef __SNC__
00070 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
00071 #       pragma diag_suppress=178 // function was declared but never referenced
00072 #       pragma diag_suppress=237 // controlling expression is constant
00073 #endif
00074 
00075 // Inlining controls
00076 #if defined(_MSC_VER) && _MSC_VER >= 1300
00077 #       define PUGI__NO_INLINE __declspec(noinline)
00078 #elif defined(__GNUC__)
00079 #       define PUGI__NO_INLINE __attribute__((noinline))
00080 #else
00081 #       define PUGI__NO_INLINE
00082 #endif
00083 
00084 // Simple static assertion
00085 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
00086 
00087 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
00088 #ifdef __DMC__
00089 #       define PUGI__DMC_VOLATILE volatile
00090 #else
00091 #       define PUGI__DMC_VOLATILE
00092 #endif
00093 
00094 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
00095 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
00096 using std::memcpy;
00097 using std::memmove;
00098 #endif
00099 
00100 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
00101 #if defined(_MSC_VER) && !defined(__S3E__)
00102 #       define PUGI__MSVC_CRT_VERSION _MSC_VER
00103 #endif
00104 
00105 #ifdef PUGIXML_HEADER_ONLY
00106 #       define PUGI__NS_BEGIN namespace pugi { namespace impl {
00107 #       define PUGI__NS_END } }
00108 #       define PUGI__FN inline
00109 #       define PUGI__FN_NO_INLINE inline
00110 #else
00111 #       if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
00112 #               define PUGI__NS_BEGIN namespace pugi { namespace impl {
00113 #               define PUGI__NS_END } }
00114 #       else
00115 #               define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
00116 #               define PUGI__NS_END } } }
00117 #       endif
00118 #       define PUGI__FN
00119 #       define PUGI__FN_NO_INLINE PUGI__NO_INLINE
00120 #endif
00121 
00122 // uintptr_t
00123 #if !defined(_MSC_VER) || _MSC_VER >= 1600
00124 #       include <stdint.h>
00125 #else
00126 #       ifndef _UINTPTR_T_DEFINED
00127 // No native uintptr_t in MSVC6 and in some WinCE versions
00128 typedef size_t uintptr_t;
00129 #define _UINTPTR_T_DEFINED
00130 #       endif
00131 PUGI__NS_BEGIN
00132 typedef unsigned __int8 uint8_t;
00133 typedef unsigned __int16 uint16_t;
00134 typedef unsigned __int32 uint32_t;
00135 PUGI__NS_END
00136 #endif
00137 
00138 // Memory allocation
00139 PUGI__NS_BEGIN
00140 PUGI__FN void* default_allocate(size_t size)
00141 {
00142   return malloc(size);
00143 }
00144 
00145 PUGI__FN void default_deallocate(void* ptr)
00146 {
00147   free(ptr);
00148 }
00149 
00150 template <typename T>
00151 struct xml_memory_management_function_storage {
00152   static allocation_function allocate;
00153   static deallocation_function deallocate;
00154 };
00155 
00156 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
00157 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
00158 
00159 typedef xml_memory_management_function_storage<int> xml_memory;
00160 PUGI__NS_END
00161 
00162 // String utilities
00163 PUGI__NS_BEGIN
00164 // Get string length
00165 PUGI__FN size_t strlength(const char_t* s)
00166 {
00167   assert(s);
00168 
00169 #ifdef PUGIXML_WCHAR_MODE
00170   return wcslen(s);
00171 #else
00172   return strlen(s);
00173 #endif
00174 }
00175 
00176 // Compare two strings
00177 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
00178 {
00179   assert(src && dst);
00180 
00181 #ifdef PUGIXML_WCHAR_MODE
00182   return wcscmp(src, dst) == 0;
00183 #else
00184   return strcmp(src, dst) == 0;
00185 #endif
00186 }
00187 
00188 // Compare lhs with [rhs_begin, rhs_end)
00189 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
00190 {
00191   for (size_t i = 0; i < count; ++i)
00192     if (lhs[i] != rhs[i])
00193       return false;
00194 
00195   return lhs[count] == 0;
00196 }
00197 
00198 #ifdef PUGIXML_WCHAR_MODE
00199 // Convert string to wide string, assuming all symbols are ASCII
00200 PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
00201 {
00202   for (const char* i = source; *i; ++i) *dest++ = *i;
00203   *dest = 0;
00204 }
00205 #endif
00206 PUGI__NS_END
00207 
00208 #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
00209 // auto_ptr-like buffer holder for exception recovery
00210 PUGI__NS_BEGIN
00211 struct buffer_holder {
00212   void* data;
00213   void (*deleter)(void*);
00214 
00215   buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_) {
00216   }
00217 
00218   ~buffer_holder() {
00219     if (data) deleter(data);
00220   }
00221 
00222   void* release() {
00223     void* result = data;
00224     data = 0;
00225     return result;
00226   }
00227 };
00228 PUGI__NS_END
00229 #endif
00230 
00231 PUGI__NS_BEGIN
00232 static const size_t xml_memory_page_size =
00233 #ifdef PUGIXML_MEMORY_PAGE_SIZE
00234   PUGIXML_MEMORY_PAGE_SIZE
00235 #else
00236   32768
00237 #endif
00238   ;
00239 
00240 static const uintptr_t xml_memory_page_alignment = 32;
00241 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
00242 static const uintptr_t xml_memory_page_name_allocated_mask = 16;
00243 static const uintptr_t xml_memory_page_value_allocated_mask = 8;
00244 static const uintptr_t xml_memory_page_type_mask = 7;
00245 
00246 struct xml_allocator;
00247 
00248 struct xml_memory_page {
00249   static xml_memory_page* construct(void* memory) {
00250     if (!memory) return 0; //$ redundant, left for performance
00251 
00252     xml_memory_page* result = static_cast<xml_memory_page*>(memory);
00253 
00254     result->allocator = 0;
00255     result->memory = 0;
00256     result->prev = 0;
00257     result->next = 0;
00258     result->busy_size = 0;
00259     result->freed_size = 0;
00260 
00261     return result;
00262   }
00263 
00264   xml_allocator* allocator;
00265 
00266   void* memory;
00267 
00268   xml_memory_page* prev;
00269   xml_memory_page* next;
00270 
00271   size_t busy_size;
00272   size_t freed_size;
00273 
00274   char data[1];
00275 };
00276 
00277 struct xml_memory_string_header {
00278   uint16_t page_offset; // offset from page->data
00279   uint16_t full_size; // 0 if string occupies whole page
00280 };
00281 
00282 struct xml_allocator {
00283   xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) {
00284   }
00285 
00286   xml_memory_page* allocate_page(size_t data_size) {
00287     size_t size = offsetof(xml_memory_page, data) + data_size;
00288 
00289     // allocate block with some alignment, leaving memory for worst-case padding
00290     void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
00291     if (!memory) return 0;
00292 
00293     // align upwards to page boundary
00294     void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
00295 
00296     // prepare page structure
00297     xml_memory_page* page = xml_memory_page::construct(page_memory);
00298 
00299     page->memory = memory;
00300     page->allocator = _root->allocator;
00301 
00302     return page;
00303   }
00304 
00305   static void deallocate_page(xml_memory_page* page) {
00306     xml_memory::deallocate(page->memory);
00307   }
00308 
00309   void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
00310 
00311   void* allocate_memory(size_t size, xml_memory_page*& out_page) {
00312     if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
00313 
00314     void* buf = _root->data + _busy_size;
00315 
00316     _busy_size += size;
00317 
00318     out_page = _root;
00319 
00320     return buf;
00321   }
00322 
00323   void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) {
00324     if (page == _root) page->busy_size = _busy_size;
00325 
00326     assert(ptr >= page->data && ptr < page->data + page->busy_size);
00327     (void)!ptr;
00328 
00329     page->freed_size += size;
00330     assert(page->freed_size <= page->busy_size);
00331 
00332     if (page->freed_size == page->busy_size) {
00333       if (page->next == 0) {
00334         assert(_root == page);
00335 
00336         // top page freed, just reset sizes
00337         page->busy_size = page->freed_size = 0;
00338         _busy_size = 0;
00339       } else {
00340         assert(_root != page);
00341         assert(page->prev);
00342 
00343         // remove from the list
00344         page->prev->next = page->next;
00345         page->next->prev = page->prev;
00346 
00347         // deallocate
00348         deallocate_page(page);
00349       }
00350     }
00351   }
00352 
00353   char_t* allocate_string(size_t length) {
00354     // allocate memory for string and header block
00355     size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
00356 
00357     // round size up to pointer alignment boundary
00358     size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
00359 
00360     xml_memory_page* page;
00361     xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
00362 
00363     if (!header) return 0;
00364 
00365     // setup header
00366     ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
00367 
00368     assert(page_offset >= 0 && page_offset < (1 << 16));
00369     header->page_offset = static_cast<uint16_t>(page_offset);
00370 
00371     // full_size == 0 for large strings that occupy the whole page
00372     assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
00373     header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
00374 
00375     // round-trip through void* to avoid 'cast increases required alignment of target type' warning
00376     // header is guaranteed a pointer-sized alignment, which should be enough for char_t
00377     return static_cast<char_t*>(static_cast<void*>(header + 1));
00378   }
00379 
00380   void deallocate_string(char_t* string) {
00381     // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
00382     // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
00383 
00384     // get header
00385     xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
00386 
00387     // deallocate
00388     size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
00389     xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
00390 
00391     // if full_size == 0 then this string occupies the whole page
00392     size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
00393 
00394     deallocate_memory(header, full_size, page);
00395   }
00396 
00397   xml_memory_page* _root;
00398   size_t _busy_size;
00399 };
00400 
00401 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
00402 {
00403   const size_t large_allocation_threshold = xml_memory_page_size / 4;
00404 
00405   xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
00406   out_page = page;
00407 
00408   if (!page) return 0;
00409 
00410   if (size <= large_allocation_threshold) {
00411     _root->busy_size = _busy_size;
00412 
00413     // insert page at the end of linked list
00414     page->prev = _root;
00415     _root->next = page;
00416     _root = page;
00417 
00418     _busy_size = size;
00419   } else {
00420     // insert page before the end of linked list, so that it is deleted as soon as possible
00421     // the last page is not deleted even if it's empty (see deallocate_memory)
00422     assert(_root->prev);
00423 
00424     page->prev = _root->prev;
00425     page->next = _root;
00426 
00427     _root->prev->next = page;
00428     _root->prev = page;
00429   }
00430 
00431   // allocate inside page
00432   page->busy_size = size;
00433 
00434   return page->data;
00435 }
00436 PUGI__NS_END
00437 
00438 namespace pugi
00439 {
00441 struct xml_attribute_struct {
00443   xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) {
00444   }
00445 
00446   uintptr_t header;
00447 
00448   char_t* name; 
00449   char_t*       value;  
00450 
00451   xml_attribute_struct* prev_attribute_c;       
00452   xml_attribute_struct* next_attribute; 
00453 };
00454 
00456 struct xml_node_struct {
00459   xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) {
00460   }
00461 
00462   uintptr_t header;
00463 
00464   xml_node_struct*              parent;                                 
00465 
00466   char_t*                                       name;                                   
00467   char_t*                                       value;                                  
00468 
00469   xml_node_struct*              first_child;                    
00470 
00471   xml_node_struct*              prev_sibling_c;                 
00472   xml_node_struct*              next_sibling;                   
00473 
00474   xml_attribute_struct* first_attribute;                
00475 };
00476 }
00477 
00478 PUGI__NS_BEGIN
00479 struct xml_document_struct: public xml_node_struct, public xml_allocator {
00480   xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0) {
00481   }
00482 
00483   const char_t* buffer;
00484 };
00485 
00486 inline xml_allocator& get_allocator(const xml_node_struct* node)
00487 {
00488   assert(node);
00489 
00490   return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
00491 }
00492 PUGI__NS_END
00493 
00494 // Low-level DOM operations
00495 PUGI__NS_BEGIN
00496 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
00497 {
00498   xml_memory_page* page;
00499   void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
00500 
00501   return new (memory) xml_attribute_struct(page);
00502 }
00503 
00504 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
00505 {
00506   xml_memory_page* page;
00507   void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
00508 
00509   return new (memory) xml_node_struct(page, type);
00510 }
00511 
00512 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
00513 {
00514   uintptr_t header = a->header;
00515 
00516   if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
00517   if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
00518 
00519   alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
00520 }
00521 
00522 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
00523 {
00524   uintptr_t header = n->header;
00525 
00526   if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
00527   if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
00528 
00529   for (xml_attribute_struct* attr = n->first_attribute; attr; ) {
00530     xml_attribute_struct* next = attr->next_attribute;
00531 
00532     destroy_attribute(attr, alloc);
00533 
00534     attr = next;
00535   }
00536 
00537   for (xml_node_struct* child = n->first_child; child; ) {
00538     xml_node_struct* next = child->next_sibling;
00539 
00540     destroy_node(child, alloc);
00541 
00542     child = next;
00543   }
00544 
00545   alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
00546 }
00547 
00548 PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
00549 {
00550   xml_node_struct* child = allocate_node(alloc, type);
00551   if (!child) return 0;
00552 
00553   child->parent = node;
00554 
00555   xml_node_struct* first_child = node->first_child;
00556 
00557   if (first_child) {
00558     xml_node_struct* last_child = first_child->prev_sibling_c;
00559 
00560     last_child->next_sibling = child;
00561     child->prev_sibling_c = last_child;
00562     first_child->prev_sibling_c = child;
00563   } else {
00564     node->first_child = child;
00565     child->prev_sibling_c = child;
00566   }
00567 
00568   return child;
00569 }
00570 
00571 PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
00572 {
00573   xml_attribute_struct* a = allocate_attribute(alloc);
00574   if (!a) return 0;
00575 
00576   xml_attribute_struct* first_attribute = node->first_attribute;
00577 
00578   if (first_attribute) {
00579     xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
00580 
00581     last_attribute->next_attribute = a;
00582     a->prev_attribute_c = last_attribute;
00583     first_attribute->prev_attribute_c = a;
00584   } else {
00585     node->first_attribute = a;
00586     a->prev_attribute_c = a;
00587   }
00588 
00589   return a;
00590 }
00591 PUGI__NS_END
00592 
00593 // Helper classes for code generation
00594 PUGI__NS_BEGIN
00595 struct opt_false {
00596   enum { value = 0 };
00597 };
00598 
00599 struct opt_true {
00600   enum { value = 1 };
00601 };
00602 PUGI__NS_END
00603 
00604 // Unicode utilities
00605 PUGI__NS_BEGIN
00606 inline uint16_t endian_swap(uint16_t value)
00607 {
00608   return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
00609 }
00610 
00611 inline uint32_t endian_swap(uint32_t value)
00612 {
00613   return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
00614 }
00615 
00616 struct utf8_counter {
00617   typedef size_t value_type;
00618 
00619   static value_type low(value_type result, uint32_t ch) {
00620     // U+0000..U+007F
00621     if (ch < 0x80) return result + 1;
00622     // U+0080..U+07FF
00623     else if (ch < 0x800) return result + 2;
00624     // U+0800..U+FFFF
00625     else return result + 3;
00626   }
00627 
00628   static value_type high(value_type result, uint32_t) {
00629     // U+10000..U+10FFFF
00630     return result + 4;
00631   }
00632 };
00633 
00634 struct utf8_writer {
00635   typedef uint8_t* value_type;
00636 
00637   static value_type low(value_type result, uint32_t ch) {
00638     // U+0000..U+007F
00639     if (ch < 0x80) {
00640       *result = static_cast<uint8_t>(ch);
00641       return result + 1;
00642     }
00643     // U+0080..U+07FF
00644     else if (ch < 0x800) {
00645       result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
00646       result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
00647       return result + 2;
00648     }
00649     // U+0800..U+FFFF
00650     else {
00651       result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
00652       result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
00653       result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
00654       return result + 3;
00655     }
00656   }
00657 
00658   static value_type high(value_type result, uint32_t ch) {
00659     // U+10000..U+10FFFF
00660     result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
00661     result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
00662     result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
00663     result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
00664     return result + 4;
00665   }
00666 
00667   static value_type any(value_type result, uint32_t ch) {
00668     return (ch < 0x10000) ? low(result, ch) : high(result, ch);
00669   }
00670 };
00671 
00672 struct utf16_counter {
00673   typedef size_t value_type;
00674 
00675   static value_type low(value_type result, uint32_t) {
00676     return result + 1;
00677   }
00678 
00679   static value_type high(value_type result, uint32_t) {
00680     return result + 2;
00681   }
00682 };
00683 
00684 struct utf16_writer {
00685   typedef uint16_t* value_type;
00686 
00687   static value_type low(value_type result, uint32_t ch) {
00688     *result = static_cast<uint16_t>(ch);
00689 
00690     return result + 1;
00691   }
00692 
00693   static value_type high(value_type result, uint32_t ch) {
00694     uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
00695     uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
00696 
00697     result[0] = static_cast<uint16_t>(0xD800 + msh);
00698     result[1] = static_cast<uint16_t>(0xDC00 + lsh);
00699 
00700     return result + 2;
00701   }
00702 
00703   static value_type any(value_type result, uint32_t ch) {
00704     return (ch < 0x10000) ? low(result, ch) : high(result, ch);
00705   }
00706 };
00707 
00708 struct utf32_counter {
00709   typedef size_t value_type;
00710 
00711   static value_type low(value_type result, uint32_t) {
00712     return result + 1;
00713   }
00714 
00715   static value_type high(value_type result, uint32_t) {
00716     return result + 1;
00717   }
00718 };
00719 
00720 struct utf32_writer {
00721   typedef uint32_t* value_type;
00722 
00723   static value_type low(value_type result, uint32_t ch) {
00724     *result = ch;
00725 
00726     return result + 1;
00727   }
00728 
00729   static value_type high(value_type result, uint32_t ch) {
00730     *result = ch;
00731 
00732     return result + 1;
00733   }
00734 
00735   static value_type any(value_type result, uint32_t ch) {
00736     *result = ch;
00737 
00738     return result + 1;
00739   }
00740 };
00741 
00742 struct latin1_writer {
00743   typedef uint8_t* value_type;
00744 
00745   static value_type low(value_type result, uint32_t ch) {
00746     *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
00747 
00748     return result + 1;
00749   }
00750 
00751   static value_type high(value_type result, uint32_t ch) {
00752     (void)ch;
00753 
00754     *result = '?';
00755 
00756     return result + 1;
00757   }
00758 };
00759 
00760 template <size_t size> struct wchar_selector;
00761 
00762 template <> struct wchar_selector<2> {
00763   typedef uint16_t type;
00764   typedef utf16_counter counter;
00765   typedef utf16_writer writer;
00766 };
00767 
00768 template <> struct wchar_selector<4> {
00769   typedef uint32_t type;
00770   typedef utf32_counter counter;
00771   typedef utf32_writer writer;
00772 };
00773 
00774 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
00775 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
00776 
00777 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder {
00778   static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) {
00779     const uint8_t utf8_byte_mask = 0x3f;
00780 
00781     while (size) {
00782       uint8_t lead = *data;
00783 
00784       // 0xxxxxxx -> U+0000..U+007F
00785       if (lead < 0x80) {
00786         result = Traits::low(result, lead);
00787         data += 1;
00788         size -= 1;
00789 
00790         // process aligned single-byte (ascii) blocks
00791         if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) {
00792           // round-trip through void* to silence 'cast increases required alignment of target type' warnings
00793           while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) {
00794             result = Traits::low(result, data[0]);
00795             result = Traits::low(result, data[1]);
00796             result = Traits::low(result, data[2]);
00797             result = Traits::low(result, data[3]);
00798             data += 4;
00799             size -= 4;
00800           }
00801         }
00802       }
00803       // 110xxxxx -> U+0080..U+07FF
00804       else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) {
00805         result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
00806         data += 2;
00807         size -= 2;
00808       }
00809       // 1110xxxx -> U+0800-U+FFFF
00810       else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) {
00811         result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
00812         data += 3;
00813         size -= 3;
00814       }
00815       // 11110xxx -> U+10000..U+10FFFF
00816       else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) {
00817         result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
00818         data += 4;
00819         size -= 4;
00820       }
00821       // 10xxxxxx or 11111xxx -> invalid
00822       else {
00823         data += 1;
00824         size -= 1;
00825       }
00826     }
00827 
00828     return result;
00829   }
00830 
00831   static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) {
00832     const uint16_t* end = data + size;
00833 
00834     while (data < end) {
00835       uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
00836 
00837       // U+0000..U+D7FF
00838       if (lead < 0xD800) {
00839         result = Traits::low(result, lead);
00840         data += 1;
00841       }
00842       // U+E000..U+FFFF
00843       else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) {
00844         result = Traits::low(result, lead);
00845         data += 1;
00846       }
00847       // surrogate pair lead
00848       else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end) {
00849         uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
00850 
00851         if (static_cast<unsigned int>(next - 0xDC00) < 0x400) {
00852           result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
00853           data += 2;
00854         } else {
00855           data += 1;
00856         }
00857       } else {
00858         data += 1;
00859       }
00860     }
00861 
00862     return result;
00863   }
00864 
00865   static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) {
00866     const uint32_t* end = data + size;
00867 
00868     while (data < end) {
00869       uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
00870 
00871       // U+0000..U+FFFF
00872       if (lead < 0x10000) {
00873         result = Traits::low(result, lead);
00874         data += 1;
00875       }
00876       // U+10000..U+10FFFF
00877       else {
00878         result = Traits::high(result, lead);
00879         data += 1;
00880       }
00881     }
00882 
00883     return result;
00884   }
00885 
00886   static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result) {
00887     for (size_t i = 0; i < size; ++i) {
00888       result = Traits::low(result, data[i]);
00889     }
00890 
00891     return result;
00892   }
00893 
00894   static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result) {
00895     return decode_utf16_block(data, size, result);
00896   }
00897 
00898   static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result) {
00899     return decode_utf32_block(data, size, result);
00900   }
00901 
00902   static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result) {
00903     return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
00904   }
00905 };
00906 
00907 template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
00908 {
00909   for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
00910 }
00911 
00912 #ifdef PUGIXML_WCHAR_MODE
00913 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
00914 {
00915   for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
00916 }
00917 #endif
00918 PUGI__NS_END
00919 
00920 PUGI__NS_BEGIN
00921 enum chartype_t {
00922   ct_parse_pcdata = 1,  // \0, &, \r, <
00923   ct_parse_attr = 2,            // \0, &, \r, ', "
00924   ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
00925   ct_space = 8,                 // \r, \n, space, tab
00926   ct_parse_cdata = 16,  // \0, ], >, \r
00927   ct_parse_comment = 32,        // \0, -, >, \r
00928   ct_symbol = 64,                       // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
00929   ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
00930 };
00931 
00932 static const unsigned char chartype_table[256] = {
00933   55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
00934   0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
00935   8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
00936   64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
00937   0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
00938   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
00939   0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
00940   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
00941 
00942   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
00943   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00944   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00945   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00946   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00947   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00948   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
00949   192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
00950 };
00951 
00952 enum chartypex_t {
00953   ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
00954   ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
00955   ctx_start_symbol = 4,   // Any symbol > 127, a-z, A-Z, _
00956   ctx_digit = 8,                          // 0-9
00957   ctx_symbol = 16                         // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
00958 };
00959 
00960 static const unsigned char chartypex_table[256] = {
00961   3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
00962   3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
00963   0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
00964   24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
00965 
00966   0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
00967   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
00968   0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
00969   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
00970 
00971   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
00972   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00973   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00974   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00975   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00976   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00977   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
00978   20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
00979 };
00980 
00981 #ifdef PUGIXML_WCHAR_MODE
00982 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
00983 #else
00984 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
00985 #endif
00986 
00987 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
00988 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
00989 
00990 PUGI__FN bool is_little_endian()
00991 {
00992   unsigned int ui = 1;
00993 
00994   return *reinterpret_cast<unsigned char*>(&ui) == 1;
00995 }
00996 
00997 PUGI__FN xml_encoding get_wchar_encoding()
00998 {
00999   PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
01000 
01001   if (sizeof(wchar_t) == 2)
01002     return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
01003   else
01004     return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
01005 }
01006 
01007 PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
01008 {
01009   // look for BOM in first few bytes
01010   if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
01011   if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
01012   if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
01013   if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
01014   if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
01015 
01016   // look for <, <? or <?xm in various encodings
01017   if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
01018   if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
01019   if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
01020   if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
01021   if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
01022 
01023   // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
01024   if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
01025   if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
01026 
01027   // no known BOM detected, assume utf8
01028   return encoding_utf8;
01029 }
01030 
01031 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
01032 {
01033   // replace wchar encoding with utf implementation
01034   if (encoding == encoding_wchar) return get_wchar_encoding();
01035 
01036   // replace utf16 encoding with utf16 with specific endianness
01037   if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
01038 
01039   // replace utf32 encoding with utf32 with specific endianness
01040   if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
01041 
01042   // only do autodetection if no explicit encoding is requested
01043   if (encoding != encoding_auto) return encoding;
01044 
01045   // skip encoding autodetection if input buffer is too small
01046   if (size < 4) return encoding_utf8;
01047 
01048   // try to guess encoding (based on XML specification, Appendix F.1)
01049   const uint8_t* data = static_cast<const uint8_t*>(contents);
01050 
01051   PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
01052 
01053   return guess_buffer_encoding(d0, d1, d2, d3);
01054 }
01055 
01056 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
01057 {
01058   if (is_mutable) {
01059     out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
01060   } else {
01061     void* buffer = xml_memory::allocate(size > 0 ? size : 1);
01062     if (!buffer) return false;
01063 
01064     memcpy(buffer, contents, size);
01065 
01066     out_buffer = static_cast<char_t*>(buffer);
01067   }
01068 
01069   out_length = size / sizeof(char_t);
01070 
01071   return true;
01072 }
01073 
01074 #ifdef PUGIXML_WCHAR_MODE
01075 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
01076 {
01077   return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
01078          (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
01079 }
01080 
01081 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
01082 {
01083   const char_t* data = static_cast<const char_t*>(contents);
01084 
01085   if (is_mutable) {
01086     out_buffer = const_cast<char_t*>(data);
01087   } else {
01088     out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
01089     if (!out_buffer) return false;
01090   }
01091 
01092   out_length = size / sizeof(char_t);
01093 
01094   convert_wchar_endian_swap(out_buffer, data, out_length);
01095 
01096   return true;
01097 }
01098 
01099 PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
01100 {
01101   const uint8_t* data = static_cast<const uint8_t*>(contents);
01102 
01103   // first pass: get length in wchar_t units
01104   out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
01105 
01106   // allocate buffer of suitable length
01107   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01108   if (!out_buffer) return false;
01109 
01110   // second pass: convert utf8 input to wchar_t
01111   wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
01112   wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
01113 
01114   assert(out_end == out_begin + out_length);
01115   (void)!out_end;
01116 
01117   return true;
01118 }
01119 
01120 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
01121 {
01122   const uint16_t* data = static_cast<const uint16_t*>(contents);
01123   size_t length = size / sizeof(uint16_t);
01124 
01125   // first pass: get length in wchar_t units
01126   out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
01127 
01128   // allocate buffer of suitable length
01129   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01130   if (!out_buffer) return false;
01131 
01132   // second pass: convert utf16 input to wchar_t
01133   wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
01134   wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
01135 
01136   assert(out_end == out_begin + out_length);
01137   (void)!out_end;
01138 
01139   return true;
01140 }
01141 
01142 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
01143 {
01144   const uint32_t* data = static_cast<const uint32_t*>(contents);
01145   size_t length = size / sizeof(uint32_t);
01146 
01147   // first pass: get length in wchar_t units
01148   out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
01149 
01150   // allocate buffer of suitable length
01151   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01152   if (!out_buffer) return false;
01153 
01154   // second pass: convert utf32 input to wchar_t
01155   wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
01156   wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
01157 
01158   assert(out_end == out_begin + out_length);
01159   (void)!out_end;
01160 
01161   return true;
01162 }
01163 
01164 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
01165 {
01166   const uint8_t* data = static_cast<const uint8_t*>(contents);
01167 
01168   // get length in wchar_t units
01169   out_length = size;
01170 
01171   // allocate buffer of suitable length
01172   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01173   if (!out_buffer) return false;
01174 
01175   // convert latin1 input to wchar_t
01176   wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
01177   wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
01178 
01179   assert(out_end == out_begin + out_length);
01180   (void)!out_end;
01181 
01182   return true;
01183 }
01184 
01185 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
01186 {
01187   // get native encoding
01188   xml_encoding wchar_encoding = get_wchar_encoding();
01189 
01190   // fast path: no conversion required
01191   if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
01192 
01193   // only endian-swapping is required
01194   if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
01195 
01196   // source encoding is utf8
01197   if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
01198 
01199   // source encoding is utf16
01200   if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
01201     xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
01202 
01203     return (native_encoding == encoding) ?
01204            convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
01205            convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
01206   }
01207 
01208   // source encoding is utf32
01209   if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
01210     xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
01211 
01212     return (native_encoding == encoding) ?
01213            convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
01214            convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
01215   }
01216 
01217   // source encoding is latin1
01218   if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
01219 
01220   assert(!"Invalid encoding");
01221   return false;
01222 }
01223 #else
01224 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
01225 {
01226   const uint16_t* data = static_cast<const uint16_t*>(contents);
01227   size_t length = size / sizeof(uint16_t);
01228 
01229   // first pass: get length in utf8 units
01230   out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
01231 
01232   // allocate buffer of suitable length
01233   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01234   if (!out_buffer) return false;
01235 
01236   // second pass: convert utf16 input to utf8
01237   uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
01238   uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
01239 
01240   assert(out_end == out_begin + out_length);
01241   (void)!out_end;
01242 
01243   return true;
01244 }
01245 
01246 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
01247 {
01248   const uint32_t* data = static_cast<const uint32_t*>(contents);
01249   size_t length = size / sizeof(uint32_t);
01250 
01251   // first pass: get length in utf8 units
01252   out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
01253 
01254   // allocate buffer of suitable length
01255   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01256   if (!out_buffer) return false;
01257 
01258   // second pass: convert utf32 input to utf8
01259   uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
01260   uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
01261 
01262   assert(out_end == out_begin + out_length);
01263   (void)!out_end;
01264 
01265   return true;
01266 }
01267 
01268 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
01269 {
01270   for (size_t i = 0; i < size; ++i)
01271     if (data[i] > 127)
01272       return i;
01273 
01274   return size;
01275 }
01276 
01277 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
01278 {
01279   const uint8_t* data = static_cast<const uint8_t*>(contents);
01280 
01281   // get size of prefix that does not need utf8 conversion
01282   size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
01283   assert(prefix_length <= size);
01284 
01285   const uint8_t* postfix = data + prefix_length;
01286   size_t postfix_length = size - prefix_length;
01287 
01288   // if no conversion is needed, just return the original buffer
01289   if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
01290 
01291   // first pass: get length in utf8 units
01292   out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
01293 
01294   // allocate buffer of suitable length
01295   out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
01296   if (!out_buffer) return false;
01297 
01298   // second pass: convert latin1 input to utf8
01299   memcpy(out_buffer, data, prefix_length);
01300 
01301   uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
01302   uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
01303 
01304   assert(out_end == out_begin + out_length);
01305   (void)!out_end;
01306 
01307   return true;
01308 }
01309 
01310 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
01311 {
01312   // fast path: no conversion required
01313   if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
01314 
01315   // source encoding is utf16
01316   if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
01317     xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
01318 
01319     return (native_encoding == encoding) ?
01320            convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
01321            convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
01322   }
01323 
01324   // source encoding is utf32
01325   if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
01326     xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
01327 
01328     return (native_encoding == encoding) ?
01329            convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
01330            convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
01331   }
01332 
01333   // source encoding is latin1
01334   if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
01335 
01336   assert(!"Invalid encoding");
01337   return false;
01338 }
01339 #endif
01340 
01341 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
01342 {
01343   // get length in utf8 characters
01344   return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
01345 }
01346 
01347 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
01348 {
01349   // convert to utf8
01350   uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
01351   uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
01352 
01353   assert(begin + size == end);
01354   (void)!end;
01355 
01356   // zero-terminate
01357   buffer[size] = 0;
01358 }
01359 
01360 #ifndef PUGIXML_NO_STL
01361 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
01362 {
01363   // first pass: get length in utf8 characters
01364   size_t size = as_utf8_begin(str, length);
01365 
01366   // allocate resulting string
01367   std::string result;
01368   result.resize(size);
01369 
01370   // second pass: convert to utf8
01371   if (size > 0) as_utf8_end(&result[0], size, str, length);
01372 
01373   return result;
01374 }
01375 
01376 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
01377 {
01378   const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
01379 
01380   // first pass: get length in wchar_t units
01381   size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
01382 
01383   // allocate resulting string
01384   std::basic_string<wchar_t> result;
01385   result.resize(length);
01386 
01387   // second pass: convert to wchar_t
01388   if (length > 0) {
01389     wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
01390     wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
01391 
01392     assert(begin + length == end);
01393     (void)!end;
01394   }
01395 
01396   return result;
01397 }
01398 #endif
01399 
01400 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
01401 {
01402   assert(target);
01403   size_t target_length = strlength(target);
01404 
01405   // always reuse document buffer memory if possible
01406   if (!allocated) return target_length >= length;
01407 
01408   // reuse heap memory if waste is not too great
01409   const size_t reuse_threshold = 32;
01410 
01411   return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
01412 }
01413 
01414 PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
01415 {
01416   size_t source_length = strlength(source);
01417 
01418   if (source_length == 0) {
01419     // empty string and null pointer are equivalent, so just deallocate old memory
01420     xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
01421 
01422     if (header & header_mask) alloc->deallocate_string(dest);
01423 
01424     // mark the string as not allocated
01425     dest = 0;
01426     header &= ~header_mask;
01427 
01428     return true;
01429   } else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest)) {
01430     // we can reuse old buffer, so just copy the new data (including zero terminator)
01431     memcpy(dest, source, (source_length + 1) * sizeof(char_t));
01432 
01433     return true;
01434   } else {
01435     xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
01436 
01437     // allocate new buffer
01438     char_t* buf = alloc->allocate_string(source_length + 1);
01439     if (!buf) return false;
01440 
01441     // copy the string (including zero terminator)
01442     memcpy(buf, source, (source_length + 1) * sizeof(char_t));
01443 
01444     // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
01445     if (header & header_mask) alloc->deallocate_string(dest);
01446 
01447     // the string is now allocated, so set the flag
01448     dest = buf;
01449     header |= header_mask;
01450 
01451     return true;
01452   }
01453 }
01454 
01455 struct gap {
01456   char_t* end;
01457   size_t size;
01458 
01459   gap(): end(0), size(0) {
01460   }
01461 
01462   // Push new gap, move s count bytes further (skipping the gap).
01463   // Collapse previous gap.
01464   void push(char_t*& s, size_t count) {
01465     if (end) { // there was a gap already; collapse it
01466       // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
01467       assert(s >= end);
01468       memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
01469     }
01470 
01471     s += count; // end of current gap
01472 
01473     // "merge" two gaps
01474     end = s;
01475     size += count;
01476   }
01477 
01478   // Collapse all gaps, return past-the-end pointer
01479   char_t* flush(char_t* s) {
01480     if (end) {
01481       // Move [old_gap_end, current_pos) to [old_gap_start, ...)
01482       assert(s >= end);
01483       memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
01484 
01485       return s - size;
01486     } else return s;
01487   }
01488 };
01489 
01490 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
01491 {
01492   char_t* stre = s + 1;
01493 
01494   switch (*stre) {
01495   case '#': {   // &#...
01496     unsigned int ucsc = 0;
01497 
01498     if (stre[1] == 'x') { // &#x... (hex code)
01499       stre += 2;
01500 
01501       char_t ch = *stre;
01502 
01503       if (ch == ';') return stre;
01504 
01505       for (;;) {
01506         if (static_cast<unsigned int>(ch - '0') <= 9)
01507           ucsc = 16 * ucsc + (ch - '0');
01508         else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
01509           ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
01510         else if (ch == ';')
01511           break;
01512         else // cancel
01513           return stre;
01514 
01515         ch = *++stre;
01516       }
01517 
01518       ++stre;
01519     } else {    // &#... (dec code)
01520       char_t ch = *++stre;
01521 
01522       if (ch == ';') return stre;
01523 
01524       for (;;) {
01525         if (static_cast<unsigned int>(ch - '0') <= 9)
01526           ucsc = 10 * ucsc + (ch - '0');
01527         else if (ch == ';')
01528           break;
01529         else // cancel
01530           return stre;
01531 
01532         ch = *++stre;
01533       }
01534 
01535       ++stre;
01536     }
01537 
01538 #ifdef PUGIXML_WCHAR_MODE
01539     s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
01540 #else
01541     s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
01542 #endif
01543 
01544     g.push(s, stre - s);
01545     return stre;
01546   }
01547 
01548   case 'a': {   // &a
01549     ++stre;
01550 
01551     if (*stre == 'm') { // &am
01552       if (*++stre == 'p' && *++stre == ';') { // &amp;
01553         *s++ = '&';
01554         ++stre;
01555 
01556         g.push(s, stre - s);
01557         return stre;
01558       }
01559     } else if (*stre == 'p') { // &ap
01560       if (*++stre == 'o' && *++stre == 's' && *++stre == ';') { // &apos;
01561         *s++ = '\'';
01562         ++stre;
01563 
01564         g.push(s, stre - s);
01565         return stre;
01566       }
01567     }
01568     break;
01569   }
01570 
01571   case 'g': { // &g
01572     if (*++stre == 't' && *++stre == ';') { // &gt;
01573       *s++ = '>';
01574       ++stre;
01575 
01576       g.push(s, stre - s);
01577       return stre;
01578     }
01579     break;
01580   }
01581 
01582   case 'l': { // &l
01583     if (*++stre == 't' && *++stre == ';') { // &lt;
01584       *s++ = '<';
01585       ++stre;
01586 
01587       g.push(s, stre - s);
01588       return stre;
01589     }
01590     break;
01591   }
01592 
01593   case 'q': { // &q
01594     if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') { // &quot;
01595       *s++ = '"';
01596       ++stre;
01597 
01598       g.push(s, stre - s);
01599       return stre;
01600     }
01601     break;
01602   }
01603 
01604   default:
01605     break;
01606   }
01607 
01608   return stre;
01609 }
01610 
01611 // Utility macro for last character handling
01612 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
01613 
01614 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
01615 {
01616   gap g;
01617 
01618   while (true) {
01619     while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
01620 
01621     if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
01622       *s++ = '\n'; // replace first one with 0x0a
01623 
01624       if (*s == '\n') g.push(s, 1);
01625     } else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) { // comment ends here
01626       *g.flush(s) = 0;
01627 
01628       return s + (s[2] == '>' ? 3 : 2);
01629     } else if (*s == 0) {
01630       return 0;
01631     } else ++s;
01632   }
01633 }
01634 
01635 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
01636 {
01637   gap g;
01638 
01639   while (true) {
01640     while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
01641 
01642     if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
01643       *s++ = '\n'; // replace first one with 0x0a
01644 
01645       if (*s == '\n') g.push(s, 1);
01646     } else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) { // CDATA ends here
01647       *g.flush(s) = 0;
01648 
01649       return s + 1;
01650     } else if (*s == 0) {
01651       return 0;
01652     } else ++s;
01653   }
01654 }
01655 
01656 typedef char_t* (*strconv_pcdata_t)(char_t*);
01657 
01658 template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl {
01659   static char_t* parse(char_t* s) {
01660     gap g;
01661 
01662     while (true) {
01663       while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
01664 
01665       if (*s == '<') { // PCDATA ends here
01666         *g.flush(s) = 0;
01667 
01668         return s + 1;
01669       } else if (opt_eol::value && *s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
01670         *s++ = '\n'; // replace first one with 0x0a
01671 
01672         if (*s == '\n') g.push(s, 1);
01673       } else if (opt_escape::value && *s == '&') {
01674         s = strconv_escape(s, g);
01675       } else if (*s == 0) {
01676         return s;
01677       } else ++s;
01678     }
01679   }
01680 };
01681 
01682 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
01683 {
01684   PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
01685 
01686   switch ((optmask >> 4) & 3) { // get bitmask for flags (eol escapes)
01687   case 0:
01688     return strconv_pcdata_impl<opt_false, opt_false>::parse;
01689   case 1:
01690     return strconv_pcdata_impl<opt_false, opt_true>::parse;
01691   case 2:
01692     return strconv_pcdata_impl<opt_true, opt_false>::parse;
01693   case 3:
01694     return strconv_pcdata_impl<opt_true, opt_true>::parse;
01695   default:
01696     return 0; // should not get here
01697   }
01698 }
01699 
01700 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
01701 
01702 template <typename opt_escape> struct strconv_attribute_impl {
01703   static char_t* parse_wnorm(char_t* s, char_t end_quote) {
01704     gap g;
01705 
01706     // trim leading whitespaces
01707     if (PUGI__IS_CHARTYPE(*s, ct_space)) {
01708       char_t* str = s;
01709 
01710       do ++str;
01711       while (PUGI__IS_CHARTYPE(*str, ct_space));
01712 
01713       g.push(s, str - s);
01714     }
01715 
01716     while (true) {
01717       while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
01718 
01719       if (*s == end_quote) {
01720         char_t* str = g.flush(s);
01721 
01722         do *str-- = 0;
01723         while (PUGI__IS_CHARTYPE(*str, ct_space));
01724 
01725         return s + 1;
01726       } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
01727         *s++ = ' ';
01728 
01729         if (PUGI__IS_CHARTYPE(*s, ct_space)) {
01730           char_t* str = s + 1;
01731           while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
01732 
01733           g.push(s, str - s);
01734         }
01735       } else if (opt_escape::value && *s == '&') {
01736         s = strconv_escape(s, g);
01737       } else if (!*s) {
01738         return 0;
01739       } else ++s;
01740     }
01741   }
01742 
01743   static char_t* parse_wconv(char_t* s, char_t end_quote) {
01744     gap g;
01745 
01746     while (true) {
01747       while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
01748 
01749       if (*s == end_quote) {
01750         *g.flush(s) = 0;
01751 
01752         return s + 1;
01753       } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
01754         if (*s == '\r') {
01755           *s++ = ' ';
01756 
01757           if (*s == '\n') g.push(s, 1);
01758         } else *s++ = ' ';
01759       } else if (opt_escape::value && *s == '&') {
01760         s = strconv_escape(s, g);
01761       } else if (!*s) {
01762         return 0;
01763       } else ++s;
01764     }
01765   }
01766 
01767   static char_t* parse_eol(char_t* s, char_t end_quote) {
01768     gap g;
01769 
01770     while (true) {
01771       while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
01772 
01773       if (*s == end_quote) {
01774         *g.flush(s) = 0;
01775 
01776         return s + 1;
01777       } else if (*s == '\r') {
01778         *s++ = '\n';
01779 
01780         if (*s == '\n') g.push(s, 1);
01781       } else if (opt_escape::value && *s == '&') {
01782         s = strconv_escape(s, g);
01783       } else if (!*s) {
01784         return 0;
01785       } else ++s;
01786     }
01787   }
01788 
01789   static char_t* parse_simple(char_t* s, char_t end_quote) {
01790     gap g;
01791 
01792     while (true) {
01793       while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
01794 
01795       if (*s == end_quote) {
01796         *g.flush(s) = 0;
01797 
01798         return s + 1;
01799       } else if (opt_escape::value && *s == '&') {
01800         s = strconv_escape(s, g);
01801       } else if (!*s) {
01802         return 0;
01803       } else ++s;
01804     }
01805   }
01806 };
01807 
01808 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
01809 {
01810   PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
01811 
01812   switch ((optmask >> 4) & 15) { // get bitmask for flags (wconv wnorm eol escapes)
01813   case 0:
01814     return strconv_attribute_impl<opt_false>::parse_simple;
01815   case 1:
01816     return strconv_attribute_impl<opt_true>::parse_simple;
01817   case 2:
01818     return strconv_attribute_impl<opt_false>::parse_eol;
01819   case 3:
01820     return strconv_attribute_impl<opt_true>::parse_eol;
01821   case 4:
01822     return strconv_attribute_impl<opt_false>::parse_wconv;
01823   case 5:
01824     return strconv_attribute_impl<opt_true>::parse_wconv;
01825   case 6:
01826     return strconv_attribute_impl<opt_false>::parse_wconv;
01827   case 7:
01828     return strconv_attribute_impl<opt_true>::parse_wconv;
01829   case 8:
01830     return strconv_attribute_impl<opt_false>::parse_wnorm;
01831   case 9:
01832     return strconv_attribute_impl<opt_true>::parse_wnorm;
01833   case 10:
01834     return strconv_attribute_impl<opt_false>::parse_wnorm;
01835   case 11:
01836     return strconv_attribute_impl<opt_true>::parse_wnorm;
01837   case 12:
01838     return strconv_attribute_impl<opt_false>::parse_wnorm;
01839   case 13:
01840     return strconv_attribute_impl<opt_true>::parse_wnorm;
01841   case 14:
01842     return strconv_attribute_impl<opt_false>::parse_wnorm;
01843   case 15:
01844     return strconv_attribute_impl<opt_true>::parse_wnorm;
01845   default:
01846     return 0; // should not get here
01847   }
01848 }
01849 
01850 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
01851 {
01852   xml_parse_result result;
01853   result.status = status;
01854   result.offset = offset;
01855 
01856   return result;
01857 }
01858 
01859 struct xml_parser {
01860   xml_allocator alloc;
01861   char_t* error_offset;
01862   xml_parse_status error_status;
01863 
01864   // Parser utilities.
01865 #define PUGI__SKIPWS()                  { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
01866 #define PUGI__OPTSET(OPT)                       ( optmsk & (OPT) )
01867 #define PUGI__PUSHNODE(TYPE)            { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
01868 #define PUGI__POPNODE()                 { cursor = cursor->parent; }
01869 #define PUGI__SCANFOR(X)                        { while (*s != 0 && !(X)) ++s; }
01870 #define PUGI__SCANWHILE(X)              { while ((X)) ++s; }
01871 #define PUGI__ENDSEG()                  { ch = *s; *s = 0; ++s; }
01872 #define PUGI__THROW_ERROR(err, m)       return error_offset = m, error_status = err, static_cast<char_t*>(0)
01873 #define PUGI__CHECK_ERROR(err, m)       { if (*s == 0) PUGI__THROW_ERROR(err, m); }
01874 
01875   xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) {
01876   }
01877 
01878   // DOCTYPE consists of nested sections of the following possible types:
01879   // <!-- ... -->, <? ... ?>, "...", '...'
01880   // <![...]]>
01881   // <!...>
01882   // First group can not contain nested groups
01883   // Second group can contain nested groups of the same type
01884   // Third group can contain all other groups
01885   char_t* parse_doctype_primitive(char_t* s) {
01886     if (*s == '"' || *s == '\'') {
01887       // quoted string
01888       char_t ch = *s++;
01889       PUGI__SCANFOR(*s == ch);
01890       if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
01891 
01892       s++;
01893     } else if (s[0] == '<' && s[1] == '?') {
01894       // <? ... ?>
01895       s += 2;
01896       PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
01897       if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
01898 
01899       s += 2;
01900     } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') {
01901       s += 4;
01902       PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
01903       if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
01904 
01905       s += 4;
01906     } else PUGI__THROW_ERROR(status_bad_doctype, s);
01907 
01908     return s;
01909   }
01910 
01911   char_t* parse_doctype_ignore(char_t* s) {
01912     assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
01913     s++;
01914 
01915     while (*s) {
01916       if (s[0] == '<' && s[1] == '!' && s[2] == '[') {
01917         // nested ignore section
01918         s = parse_doctype_ignore(s);
01919         if (!s) return s;
01920       } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') {
01921         // ignore section end
01922         s += 3;
01923 
01924         return s;
01925       } else s++;
01926     }
01927 
01928     PUGI__THROW_ERROR(status_bad_doctype, s);
01929   }
01930 
01931   char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) {
01932     assert(s[0] == '<' && s[1] == '!');
01933     s++;
01934 
01935     while (*s) {
01936       if (s[0] == '<' && s[1] == '!' && s[2] != '-') {
01937         if (s[2] == '[') {
01938           // ignore
01939           s = parse_doctype_ignore(s);
01940           if (!s) return s;
01941         } else {
01942           // some control group
01943           s = parse_doctype_group(s, endch, false);
01944           if (!s) return s;
01945         }
01946       } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') {
01947         // unknown tag (forbidden), or some primitive group
01948         s = parse_doctype_primitive(s);
01949         if (!s) return s;
01950       } else if (*s == '>') {
01951         s++;
01952 
01953         return s;
01954       } else s++;
01955     }
01956 
01957     if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
01958 
01959     return s;
01960   }
01961 
01962   char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) {
01963     // parse node contents, starting with exclamation mark
01964     ++s;
01965 
01966     if (*s == '-') { // '<!-...'
01967       ++s;
01968 
01969       if (*s == '-') { // '<!--...'
01970         ++s;
01971 
01972         if (PUGI__OPTSET(parse_comments)) {
01973           PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
01974           cursor->value = s; // Save the offset.
01975         }
01976 
01977         if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) {
01978           s = strconv_comment(s, endch);
01979 
01980           if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
01981         } else {
01982           // Scan for terminating '-->'.
01983           PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
01984           PUGI__CHECK_ERROR(status_bad_comment, s);
01985 
01986           if (PUGI__OPTSET(parse_comments))
01987             *s = 0; // Zero-terminate this segment at the first terminating '-'.
01988 
01989           s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
01990         }
01991       } else PUGI__THROW_ERROR(status_bad_comment, s);
01992     } else if (*s == '[') {
01993       // '<![CDATA[...'
01994       if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') {
01995         ++s;
01996 
01997         if (PUGI__OPTSET(parse_cdata)) {
01998           PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
01999           cursor->value = s; // Save the offset.
02000 
02001           if (PUGI__OPTSET(parse_eol)) {
02002             s = strconv_cdata(s, endch);
02003 
02004             if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
02005           } else {
02006             // Scan for terminating ']]>'.
02007             PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
02008             PUGI__CHECK_ERROR(status_bad_cdata, s);
02009 
02010             *s++ = 0; // Zero-terminate this segment.
02011           }
02012         } else { // Flagged for discard, but we still have to scan for the terminator.
02013           // Scan for terminating ']]>'.
02014           PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
02015           PUGI__CHECK_ERROR(status_bad_cdata, s);
02016 
02017           ++s;
02018         }
02019 
02020         s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
02021       } else PUGI__THROW_ERROR(status_bad_cdata, s);
02022     } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E')) {
02023       s -= 2;
02024 
02025       if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
02026 
02027       char_t* mark = s + 9;
02028 
02029       s = parse_doctype_group(s, endch, true);
02030       if (!s) return s;
02031 
02032       if (PUGI__OPTSET(parse_doctype)) {
02033         while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
02034 
02035         PUGI__PUSHNODE(node_doctype);
02036 
02037         cursor->value = mark;
02038 
02039         assert((s[0] == 0 && endch == '>') || s[-1] == '>');
02040         s[*s == 0 ? 0 : -1] = 0;
02041 
02042         PUGI__POPNODE();
02043       }
02044     } else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
02045     else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
02046     else PUGI__THROW_ERROR(status_unrecognized_tag, s);
02047 
02048     return s;
02049   }
02050 
02051   char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) {
02052     // load into registers
02053     xml_node_struct* cursor = ref_cursor;
02054     char_t ch = 0;
02055 
02056     // parse node contents, starting with question mark
02057     ++s;
02058 
02059     // read PI target
02060     char_t* target = s;
02061 
02062     if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
02063 
02064     PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
02065     PUGI__CHECK_ERROR(status_bad_pi, s);
02066 
02067     // determine node type; stricmp / strcasecmp is not portable
02068     bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
02069 
02070     if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) {
02071       if (declaration) {
02072         // disallow non top-level declarations
02073         if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
02074 
02075         PUGI__PUSHNODE(node_declaration);
02076       } else {
02077         PUGI__PUSHNODE(node_pi);
02078       }
02079 
02080       cursor->name = target;
02081 
02082       PUGI__ENDSEG();
02083 
02084       // parse value/attributes
02085       if (ch == '?') {
02086         // empty node
02087         if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
02088         s += (*s == '>');
02089 
02090         PUGI__POPNODE();
02091       } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
02092         PUGI__SKIPWS();
02093 
02094         // scan for tag end
02095         char_t* value = s;
02096 
02097         PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
02098         PUGI__CHECK_ERROR(status_bad_pi, s);
02099 
02100         if (declaration) {
02101           // replace ending ? with / so that 'element' terminates properly
02102           *s = '/';
02103 
02104           // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
02105           s = value;
02106         } else {
02107           // store value and step over >
02108           cursor->value = value;
02109           PUGI__POPNODE();
02110 
02111           PUGI__ENDSEG();
02112 
02113           s += (*s == '>');
02114         }
02115       } else PUGI__THROW_ERROR(status_bad_pi, s);
02116     } else {
02117       // scan for tag end
02118       PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
02119       PUGI__CHECK_ERROR(status_bad_pi, s);
02120 
02121       s += (s[1] == '>' ? 2 : 1);
02122     }
02123 
02124     // store from registers
02125     ref_cursor = cursor;
02126 
02127     return s;
02128   }
02129 
02130   char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch) {
02131     strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
02132     strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
02133 
02134     char_t ch = 0;
02135     xml_node_struct* cursor = xmldoc;
02136     char_t* mark = s;
02137 
02138     while (*s != 0) {
02139       if (*s == '<') {
02140         ++s;
02141 
02142 LOC_TAG:
02143         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // '<#...'
02144           PUGI__PUSHNODE(node_element); // Append a new node to the tree.
02145 
02146           cursor->name = s;
02147 
02148           PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
02149           PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
02150 
02151           if (ch == '>') {
02152             // end of tag
02153           } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
02154 LOC_ATTRIBUTES:
02155             while (true) {
02156               PUGI__SKIPWS(); // Eat any whitespace.
02157 
02158               if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // <... #...
02159                 xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
02160                 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
02161 
02162                 a->name = s; // Save the offset.
02163 
02164                 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
02165                 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
02166 
02167                 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
02168                 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
02169 
02170                 if (PUGI__IS_CHARTYPE(ch, ct_space)) {
02171                   PUGI__SKIPWS(); // Eat any whitespace.
02172                   PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
02173 
02174                   ch = *s;
02175                   ++s;
02176                 }
02177 
02178                 if (ch == '=') { // '<... #=...'
02179                   PUGI__SKIPWS(); // Eat any whitespace.
02180 
02181                   if (*s == '"' || *s == '\'') { // '<... #="...'
02182                     ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
02183                     ++s; // Step over the quote.
02184                     a->value = s; // Save the offset.
02185 
02186                     s = strconv_attribute(s, ch);
02187 
02188                     if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
02189 
02190                     // After this line the loop continues from the start;
02191                     // Whitespaces, / and > are ok, symbols and EOF are wrong,
02192                     // everything else will be detected
02193                     if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
02194                   } else PUGI__THROW_ERROR(status_bad_attribute, s);
02195                 } else PUGI__THROW_ERROR(status_bad_attribute, s);
02196               } else if (*s == '/') {
02197                 ++s;
02198 
02199                 if (*s == '>') {
02200                   PUGI__POPNODE();
02201                   s++;
02202                   break;
02203                 } else if (*s == 0 && endch == '>') {
02204                   PUGI__POPNODE();
02205                   break;
02206                 } else PUGI__THROW_ERROR(status_bad_start_element, s);
02207               } else if (*s == '>') {
02208                 ++s;
02209 
02210                 break;
02211               } else if (*s == 0 && endch == '>') {
02212                 break;
02213               } else PUGI__THROW_ERROR(status_bad_start_element, s);
02214             }
02215 
02216             // !!!
02217           } else if (ch == '/') { // '<#.../'
02218             if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
02219 
02220             PUGI__POPNODE(); // Pop.
02221 
02222             s += (*s == '>');
02223           } else if (ch == 0) {
02224             // we stepped over null terminator, backtrack & handle closing tag
02225             --s;
02226 
02227             if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
02228           } else PUGI__THROW_ERROR(status_bad_start_element, s);
02229         } else if (*s == '/') {
02230           ++s;
02231 
02232           char_t* name = cursor->name;
02233           if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
02234 
02235           while (PUGI__IS_CHARTYPE(*s, ct_symbol)) {
02236             if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
02237           }
02238 
02239           if (*name) {
02240             if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
02241             else PUGI__THROW_ERROR(status_end_element_mismatch, s);
02242           }
02243 
02244           PUGI__POPNODE(); // Pop.
02245 
02246           PUGI__SKIPWS();
02247 
02248           if (*s == 0) {
02249             if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
02250           } else {
02251             if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
02252             ++s;
02253           }
02254         } else if (*s == '?') { // '<?...'
02255           s = parse_question(s, cursor, optmsk, endch);
02256           if (!s) return s;
02257 
02258           assert(cursor);
02259           if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
02260         } else if (*s == '!') { // '<!...'
02261           s = parse_exclamation(s, cursor, optmsk, endch);
02262           if (!s) return s;
02263         } else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
02264         else PUGI__THROW_ERROR(status_unrecognized_tag, s);
02265       } else {
02266         mark = s; // Save this offset while searching for a terminator.
02267 
02268         PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
02269 
02270         if (*s == '<') {
02271           // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
02272           assert(mark != s);
02273 
02274           if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single)) {
02275             continue;
02276           } else if (PUGI__OPTSET(parse_ws_pcdata_single)) {
02277             if (s[1] != '/' || cursor->first_child) continue;
02278           }
02279         }
02280 
02281         s = mark;
02282 
02283         if (cursor->parent) {
02284           PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
02285           cursor->value = s; // Save the offset.
02286 
02287           s = strconv_pcdata(s);
02288 
02289           PUGI__POPNODE(); // Pop since this is a standalone.
02290 
02291           if (!*s) break;
02292         } else {
02293           PUGI__SCANFOR(*s == '<'); // '...<'
02294           if (!*s) break;
02295 
02296           ++s;
02297         }
02298 
02299         // We're after '<'
02300         goto LOC_TAG;
02301       }
02302     }
02303 
02304     // check that last tag is closed
02305     if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
02306 
02307     return s;
02308   }
02309 
02310   static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk) {
02311     xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
02312 
02313     // store buffer for offset_debug
02314     xmldoc->buffer = buffer;
02315 
02316     // early-out for empty documents
02317     if (length == 0) return make_parse_result(status_ok);
02318 
02319     // create parser on stack
02320     xml_parser parser(*xmldoc);
02321 
02322     // save last character and make buffer zero-terminated (speeds up parsing)
02323     char_t endch = buffer[length - 1];
02324     buffer[length - 1] = 0;
02325 
02326     // perform actual parsing
02327     parser.parse(buffer, xmldoc, optmsk, endch);
02328 
02329     xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
02330     assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
02331 
02332     // update allocator state
02333     *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
02334 
02335     // since we removed last character, we have to handle the only possible false positive
02336     if (result && endch == '<') {
02337       // there's no possible well-formed document with < at the end
02338       return make_parse_result(status_unrecognized_tag, length);
02339     }
02340 
02341     return result;
02342   }
02343 };
02344 
02345 // Output facilities
02346 PUGI__FN xml_encoding get_write_native_encoding()
02347 {
02348 #ifdef PUGIXML_WCHAR_MODE
02349   return get_wchar_encoding();
02350 #else
02351   return encoding_utf8;
02352 #endif
02353 }
02354 
02355 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
02356 {
02357   // replace wchar encoding with utf implementation
02358   if (encoding == encoding_wchar) return get_wchar_encoding();
02359 
02360   // replace utf16 encoding with utf16 with specific endianness
02361   if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
02362 
02363   // replace utf32 encoding with utf32 with specific endianness
02364   if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
02365 
02366   // only do autodetection if no explicit encoding is requested
02367   if (encoding != encoding_auto) return encoding;
02368 
02369   // assume utf8 encoding
02370   return encoding_utf8;
02371 }
02372 
02373 #ifdef PUGIXML_WCHAR_MODE
02374 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
02375 {
02376   assert(length > 0);
02377 
02378   // discard last character if it's the lead of a surrogate pair
02379   return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
02380 }
02381 
02382 PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
02383 {
02384   // only endian-swapping is required
02385   if (need_endian_swap_utf(encoding, get_wchar_encoding())) {
02386     convert_wchar_endian_swap(r_char, data, length);
02387 
02388     return length * sizeof(char_t);
02389   }
02390 
02391   // convert to utf8
02392   if (encoding == encoding_utf8) {
02393     uint8_t* dest = r_u8;
02394     uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
02395 
02396     return static_cast<size_t>(end - dest);
02397   }
02398 
02399   // convert to utf16
02400   if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
02401     uint16_t* dest = r_u16;
02402 
02403     // convert to native utf16
02404     uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
02405 
02406     // swap if necessary
02407     xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
02408 
02409     if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
02410 
02411     return static_cast<size_t>(end - dest) * sizeof(uint16_t);
02412   }
02413 
02414   // convert to utf32
02415   if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
02416     uint32_t* dest = r_u32;
02417 
02418     // convert to native utf32
02419     uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
02420 
02421     // swap if necessary
02422     xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
02423 
02424     if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
02425 
02426     return static_cast<size_t>(end - dest) * sizeof(uint32_t);
02427   }
02428 
02429   // convert to latin1
02430   if (encoding == encoding_latin1) {
02431     uint8_t* dest = r_u8;
02432     uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
02433 
02434     return static_cast<size_t>(end - dest);
02435   }
02436 
02437   assert(!"Invalid encoding");
02438   return 0;
02439 }
02440 #else
02441 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
02442 {
02443   assert(length > 4);
02444 
02445   for (size_t i = 1; i <= 4; ++i) {
02446     uint8_t ch = static_cast<uint8_t>(data[length - i]);
02447 
02448     // either a standalone character or a leading one
02449     if ((ch & 0xc0) != 0x80) return length - i;
02450   }
02451 
02452   // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
02453   return length;
02454 }
02455 
02456 PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
02457 {
02458   if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
02459     uint16_t* dest = r_u16;
02460 
02461     // convert to native utf16
02462     uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
02463 
02464     // swap if necessary
02465     xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
02466 
02467     if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
02468 
02469     return static_cast<size_t>(end - dest) * sizeof(uint16_t);
02470   }
02471 
02472   if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
02473     uint32_t* dest = r_u32;
02474 
02475     // convert to native utf32
02476     uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
02477 
02478     // swap if necessary
02479     xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
02480 
02481     if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
02482 
02483     return static_cast<size_t>(end - dest) * sizeof(uint32_t);
02484   }
02485 
02486   if (encoding == encoding_latin1) {
02487     uint8_t* dest = r_u8;
02488     uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
02489 
02490     return static_cast<size_t>(end - dest);
02491   }
02492 
02493   assert(!"Invalid encoding");
02494   return 0;
02495 }
02496 #endif
02497 
02498 class xml_buffered_writer
02499 {
02500   xml_buffered_writer(const xml_buffered_writer&);
02501   xml_buffered_writer& operator=(const xml_buffered_writer&);
02502 
02503 public:
02504   xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) {
02505     PUGI__STATIC_ASSERT(bufcapacity >= 8);
02506   }
02507 
02508   ~xml_buffered_writer() {
02509     flush();
02510   }
02511 
02512   void flush() {
02513     flush(buffer, bufsize);
02514     bufsize = 0;
02515   }
02516 
02517   void flush(const char_t* data, size_t size) {
02518     if (size == 0) return;
02519 
02520     // fast path, just write data
02521     if (encoding == get_write_native_encoding())
02522       writer.write(data, size * sizeof(char_t));
02523     else {
02524       // convert chunk
02525       size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
02526       assert(result <= sizeof(scratch));
02527 
02528       // write data
02529       writer.write(scratch.data_u8, result);
02530     }
02531   }
02532 
02533   void write(const char_t* data, size_t length) {
02534     if (bufsize + length > bufcapacity) {
02535       // flush the remaining buffer contents
02536       flush();
02537 
02538       // handle large chunks
02539       if (length > bufcapacity) {
02540         if (encoding == get_write_native_encoding()) {
02541           // fast path, can just write data chunk
02542           writer.write(data, length * sizeof(char_t));
02543           return;
02544         }
02545 
02546         // need to convert in suitable chunks
02547         while (length > bufcapacity) {
02548           // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
02549           // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
02550           size_t chunk_size = get_valid_length(data, bufcapacity);
02551 
02552           // convert chunk and write
02553           flush(data, chunk_size);
02554 
02555           // iterate
02556           data += chunk_size;
02557           length -= chunk_size;
02558         }
02559 
02560         // small tail is copied below
02561         bufsize = 0;
02562       }
02563     }
02564 
02565     memcpy(buffer + bufsize, data, length * sizeof(char_t));
02566     bufsize += length;
02567   }
02568 
02569   void write(const char_t* data) {
02570     write(data, strlength(data));
02571   }
02572 
02573   void write(char_t d0) {
02574     if (bufsize + 1 > bufcapacity) flush();
02575 
02576     buffer[bufsize + 0] = d0;
02577     bufsize += 1;
02578   }
02579 
02580   void write(char_t d0, char_t d1) {
02581     if (bufsize + 2 > bufcapacity) flush();
02582 
02583     buffer[bufsize + 0] = d0;
02584     buffer[bufsize + 1] = d1;
02585     bufsize += 2;
02586   }
02587 
02588   void write(char_t d0, char_t d1, char_t d2) {
02589     if (bufsize + 3 > bufcapacity) flush();
02590 
02591     buffer[bufsize + 0] = d0;
02592     buffer[bufsize + 1] = d1;
02593     buffer[bufsize + 2] = d2;
02594     bufsize += 3;
02595   }
02596 
02597   void write(char_t d0, char_t d1, char_t d2, char_t d3) {
02598     if (bufsize + 4 > bufcapacity) flush();
02599 
02600     buffer[bufsize + 0] = d0;
02601     buffer[bufsize + 1] = d1;
02602     buffer[bufsize + 2] = d2;
02603     buffer[bufsize + 3] = d3;
02604     bufsize += 4;
02605   }
02606 
02607   void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) {
02608     if (bufsize + 5 > bufcapacity) flush();
02609 
02610     buffer[bufsize + 0] = d0;
02611     buffer[bufsize + 1] = d1;
02612     buffer[bufsize + 2] = d2;
02613     buffer[bufsize + 3] = d3;
02614     buffer[bufsize + 4] = d4;
02615     bufsize += 5;
02616   }
02617 
02618   void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) {
02619     if (bufsize + 6 > bufcapacity) flush();
02620 
02621     buffer[bufsize + 0] = d0;
02622     buffer[bufsize + 1] = d1;
02623     buffer[bufsize + 2] = d2;
02624     buffer[bufsize + 3] = d3;
02625     buffer[bufsize + 4] = d4;
02626     buffer[bufsize + 5] = d5;
02627     bufsize += 6;
02628   }
02629 
02630   // utf8 maximum expansion: x4 (-> utf32)
02631   // utf16 maximum expansion: x2 (-> utf32)
02632   // utf32 maximum expansion: x1
02633   enum {
02634     bufcapacitybytes =
02635 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
02636     PUGIXML_MEMORY_OUTPUT_STACK
02637 #else
02638     10240
02639 #endif
02640     ,
02641     bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
02642   };
02643 
02644   char_t buffer[bufcapacity];
02645 
02646   union {
02647     uint8_t data_u8[4 * bufcapacity];
02648     uint16_t data_u16[2 * bufcapacity];
02649     uint32_t data_u32[bufcapacity];
02650     char_t data_char[bufcapacity];
02651   } scratch;
02652 
02653   xml_writer& writer;
02654   size_t bufsize;
02655   xml_encoding encoding;
02656 };
02657 
02658 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
02659 {
02660   while (*s) {
02661     const char_t* prev = s;
02662 
02663     // While *s is a usual symbol
02664     while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
02665 
02666     writer.write(prev, static_cast<size_t>(s - prev));
02667 
02668     switch (*s) {
02669     case 0:
02670       break;
02671     case '&':
02672       writer.write('&', 'a', 'm', 'p', ';');
02673       ++s;
02674       break;
02675     case '<':
02676       writer.write('&', 'l', 't', ';');
02677       ++s;
02678       break;
02679     case '>':
02680       writer.write('&', 'g', 't', ';');
02681       ++s;
02682       break;
02683     case '"':
02684       writer.write('&', 'q', 'u', 'o', 't', ';');
02685       ++s;
02686       break;
02687     default: { // s is not a usual symbol
02688       unsigned int ch = static_cast<unsigned int>(*s++);
02689       assert(ch < 32);
02690 
02691       writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
02692     }
02693     }
02694   }
02695 }
02696 
02697 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
02698 {
02699   if (flags & format_no_escapes)
02700     writer.write(s);
02701   else
02702     text_output_escaped(writer, s, type);
02703 }
02704 
02705 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
02706 {
02707   do {
02708     writer.write('<', '!', '[', 'C', 'D');
02709     writer.write('A', 'T', 'A', '[');
02710 
02711     const char_t* prev = s;
02712 
02713     // look for ]]> sequence - we can't output it as is since it terminates CDATA
02714     while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
02715 
02716     // skip ]] if we stopped at ]]>, > will go to the next CDATA section
02717     if (*s) s += 2;
02718 
02719     writer.write(prev, static_cast<size_t>(s - prev));
02720 
02721     writer.write(']', ']', '>');
02722   } while (*s);
02723 }
02724 
02725 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
02726 {
02727   const char_t* default_name = PUGIXML_TEXT(":anonymous");
02728 
02729   for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) {
02730     writer.write(' ');
02731     writer.write(a.name()[0] ? a.name() : default_name);
02732     writer.write('=', '"');
02733 
02734     text_output(writer, a.value(), ctx_special_attr, flags);
02735 
02736     writer.write('"');
02737   }
02738 }
02739 
02740 PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
02741 {
02742   const char_t* default_name = PUGIXML_TEXT(":anonymous");
02743 
02744   if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
02745     for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
02746 
02747   switch (node.type()) {
02748   case node_document: {
02749     for (xml_node n = node.first_child(); n; n = n.next_sibling())
02750       node_output(writer, n, indent, flags, depth);
02751     break;
02752   }
02753 
02754   case node_element: {
02755     const char_t* name = node.name()[0] ? node.name() : default_name;
02756 
02757     writer.write('<');
02758     writer.write(name);
02759 
02760     node_output_attributes(writer, node, flags);
02761 
02762     if (flags & format_raw) {
02763       if (!node.first_child())
02764         writer.write(' ', '/', '>');
02765       else {
02766         writer.write('>');
02767 
02768         for (xml_node n = node.first_child(); n; n = n.next_sibling())
02769           node_output(writer, n, indent, flags, depth + 1);
02770 
02771         writer.write('<', '/');
02772         writer.write(name);
02773         writer.write('>');
02774       }
02775     } else if (!node.first_child())
02776       writer.write(' ', '/', '>', '\n');
02777     else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata)) {
02778       writer.write('>');
02779 
02780       if (node.first_child().type() == node_pcdata)
02781         text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
02782       else
02783         text_output_cdata(writer, node.first_child().value());
02784 
02785       writer.write('<', '/');
02786       writer.write(name);
02787       writer.write('>', '\n');
02788     } else {
02789       writer.write('>', '\n');
02790 
02791       for (xml_node n = node.first_child(); n; n = n.next_sibling())
02792         node_output(writer, n, indent, flags, depth + 1);
02793 
02794       if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
02795         for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
02796 
02797       writer.write('<', '/');
02798       writer.write(name);
02799       writer.write('>', '\n');
02800     }
02801 
02802     break;
02803   }
02804 
02805   case node_pcdata:
02806     text_output(writer, node.value(), ctx_special_pcdata, flags);
02807     if ((flags & format_raw) == 0) writer.write('\n');
02808     break;
02809 
02810   case node_cdata:
02811     text_output_cdata(writer, node.value());
02812     if ((flags & format_raw) == 0) writer.write('\n');
02813     break;
02814 
02815   case node_comment:
02816     writer.write('<', '!', '-', '-');
02817     writer.write(node.value());
02818     writer.write('-', '-', '>');
02819     if ((flags & format_raw) == 0) writer.write('\n');
02820     break;
02821 
02822   case node_pi:
02823   case node_declaration:
02824     writer.write('<', '?');
02825     writer.write(node.name()[0] ? node.name() : default_name);
02826 
02827     if (node.type() == node_declaration) {
02828       node_output_attributes(writer, node, flags);
02829     } else if (node.value()[0]) {
02830       writer.write(' ');
02831       writer.write(node.value());
02832     }
02833 
02834     writer.write('?', '>');
02835     if ((flags & format_raw) == 0) writer.write('\n');
02836     break;
02837 
02838   case node_doctype:
02839     writer.write('<', '!', 'D', 'O', 'C');
02840     writer.write('T', 'Y', 'P', 'E');
02841 
02842     if (node.value()[0]) {
02843       writer.write(' ');
02844       writer.write(node.value());
02845     }
02846 
02847     writer.write('>');
02848     if ((flags & format_raw) == 0) writer.write('\n');
02849     break;
02850 
02851   default:
02852     assert(!"Invalid node type");
02853   }
02854 }
02855 
02856 inline bool has_declaration(const xml_node& node)
02857 {
02858   for (xml_node child = node.first_child(); child; child = child.next_sibling()) {
02859     xml_node_type type = child.type();
02860 
02861     if (type == node_declaration) return true;
02862     if (type == node_element) return false;
02863   }
02864 
02865   return false;
02866 }
02867 
02868 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
02869 {
02870   if (parent != node_document && parent != node_element) return false;
02871   if (child == node_document || child == node_null) return false;
02872   if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
02873 
02874   return true;
02875 }
02876 
02877 PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
02878 {
02879   assert(dest.type() == source.type());
02880 
02881   switch (source.type()) {
02882   case node_element: {
02883     dest.set_name(source.name());
02884 
02885     for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
02886       dest.append_attribute(a.name()).set_value(a.value());
02887 
02888     for (xml_node c = source.first_child(); c; c = c.next_sibling()) {
02889       if (c == skip) continue;
02890 
02891       xml_node cc = dest.append_child(c.type());
02892       assert(cc);
02893 
02894       recursive_copy_skip(cc, c, skip);
02895     }
02896 
02897     break;
02898   }
02899 
02900   case node_pcdata:
02901   case node_cdata:
02902   case node_comment:
02903   case node_doctype:
02904     dest.set_value(source.value());
02905     break;
02906 
02907   case node_pi:
02908     dest.set_name(source.name());
02909     dest.set_value(source.value());
02910     break;
02911 
02912   case node_declaration: {
02913     dest.set_name(source.name());
02914 
02915     for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
02916       dest.append_attribute(a.name()).set_value(a.value());
02917 
02918     break;
02919   }
02920 
02921   default:
02922     assert(!"Invalid node type");
02923   }
02924 }
02925 
02926 inline bool is_text_node(xml_node_struct* node)
02927 {
02928   xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
02929 
02930   return type == node_pcdata || type == node_cdata;
02931 }
02932 
02933 // get value with conversion functions
02934 PUGI__FN int get_value_int(const char_t* value, int def)
02935 {
02936   if (!value) return def;
02937 
02938 #ifdef PUGIXML_WCHAR_MODE
02939   return static_cast<int>(wcstol(value, 0, 10));
02940 #else
02941   return static_cast<int>(strtol(value, 0, 10));
02942 #endif
02943 }
02944 
02945 PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
02946 {
02947   if (!value) return def;
02948 
02949 #ifdef PUGIXML_WCHAR_MODE
02950   return static_cast<unsigned int>(wcstoul(value, 0, 10));
02951 #else
02952   return static_cast<unsigned int>(strtoul(value, 0, 10));
02953 #endif
02954 }
02955 
02956 PUGI__FN double get_value_double(const char_t* value, double def)
02957 {
02958   if (!value) return def;
02959 
02960 #ifdef PUGIXML_WCHAR_MODE
02961   return wcstod(value, 0);
02962 #else
02963   return strtod(value, 0);
02964 #endif
02965 }
02966 
02967 PUGI__FN float get_value_float(const char_t* value, float def)
02968 {
02969   if (!value) return def;
02970 
02971 #ifdef PUGIXML_WCHAR_MODE
02972   return static_cast<float>(wcstod(value, 0));
02973 #else
02974   return static_cast<float>(strtod(value, 0));
02975 #endif
02976 }
02977 
02978 PUGI__FN bool get_value_bool(const char_t* value, bool def)
02979 {
02980   if (!value) return def;
02981 
02982   // only look at first char
02983   char_t first = *value;
02984 
02985   // 1*, t* (true), T* (True), y* (yes), Y* (YES)
02986   return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
02987 }
02988 
02989 // set value with conversion functions
02990 PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
02991 {
02992 #ifdef PUGIXML_WCHAR_MODE
02993   char_t wbuf[128];
02994   impl::widen_ascii(wbuf, buf);
02995 
02996   return strcpy_insitu(dest, header, header_mask, wbuf);
02997 #else
02998   return strcpy_insitu(dest, header, header_mask, buf);
02999 #endif
03000 }
03001 
03002 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
03003 {
03004   char buf[128];
03005   sprintf(buf, "%d", value);
03006 
03007   return set_value_buffer(dest, header, header_mask, buf);
03008 }
03009 
03010 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
03011 {
03012   char buf[128];
03013   sprintf(buf, "%u", value);
03014 
03015   return set_value_buffer(dest, header, header_mask, buf);
03016 }
03017 
03018 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
03019 {
03020   char buf[128];
03021   sprintf(buf, "%g", value);
03022 
03023   return set_value_buffer(dest, header, header_mask, buf);
03024 }
03025 
03026 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
03027 {
03028   return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
03029 }
03030 
03031 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
03032 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
03033 {
03034 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
03035   // there are 64-bit versions of fseek/ftell, let's use them
03036   typedef __int64 length_type;
03037 
03038   _fseeki64(file, 0, SEEK_END);
03039   length_type length = _ftelli64(file);
03040   _fseeki64(file, 0, SEEK_SET);
03041 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
03042   // there are 64-bit versions of fseek/ftell, let's use them
03043   typedef off64_t length_type;
03044 
03045   fseeko64(file, 0, SEEK_END);
03046   length_type length = ftello64(file);
03047   fseeko64(file, 0, SEEK_SET);
03048 #else
03049   // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
03050   typedef long length_type;
03051 
03052   fseek(file, 0, SEEK_END);
03053   length_type length = ftell(file);
03054   fseek(file, 0, SEEK_SET);
03055 #endif
03056 
03057   // check for I/O errors
03058   if (length < 0) return status_io_error;
03059 
03060   // check for overflow
03061   size_t result = static_cast<size_t>(length);
03062 
03063   if (static_cast<length_type>(result) != length) return status_out_of_memory;
03064 
03065   // finalize
03066   out_result = result;
03067 
03068   return status_ok;
03069 }
03070 
03071 PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
03072 {
03073   if (!file) return make_parse_result(status_file_not_found);
03074 
03075   // get file size (can result in I/O errors)
03076   size_t size = 0;
03077   xml_parse_status size_status = get_file_size(file, size);
03078 
03079   if (size_status != status_ok) {
03080     fclose(file);
03081     return make_parse_result(size_status);
03082   }
03083 
03084   // allocate buffer for the whole file
03085   char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
03086 
03087   if (!contents) {
03088     fclose(file);
03089     return make_parse_result(status_out_of_memory);
03090   }
03091 
03092   // read file in memory
03093   size_t read_size = fread(contents, 1, size, file);
03094   fclose(file);
03095 
03096   if (read_size != size) {
03097     xml_memory::deallocate(contents);
03098     return make_parse_result(status_io_error);
03099   }
03100 
03101   return doc.load_buffer_inplace_own(contents, size, options, encoding);
03102 }
03103 
03104 #ifndef PUGIXML_NO_STL
03105 template <typename T> struct xml_stream_chunk {
03106   static xml_stream_chunk* create() {
03107     void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
03108 
03109     return new (memory) xml_stream_chunk();
03110   }
03111 
03112   static void destroy(void* ptr) {
03113     xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
03114 
03115     // free chunk chain
03116     while (chunk) {
03117       xml_stream_chunk* next = chunk->next;
03118       xml_memory::deallocate(chunk);
03119       chunk = next;
03120     }
03121   }
03122 
03123   xml_stream_chunk(): next(0), size(0) {
03124   }
03125 
03126   xml_stream_chunk* next;
03127   size_t size;
03128 
03129   T data[xml_memory_page_size / sizeof(T)];
03130 };
03131 
03132 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
03133 {
03134   buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
03135 
03136   // read file to a chunk list
03137   size_t total = 0;
03138   xml_stream_chunk<T>* last = 0;
03139 
03140   while (!stream.eof()) {
03141     // allocate new chunk
03142     xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
03143     if (!chunk) return status_out_of_memory;
03144 
03145     // append chunk to list
03146     if (last) last = last->next = chunk;
03147     else chunks.data = last = chunk;
03148 
03149     // read data to chunk
03150     stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
03151     chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
03152 
03153     // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
03154     if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
03155 
03156     // guard against huge files (chunk size is small enough to make this overflow check work)
03157     if (total + chunk->size < total) return status_out_of_memory;
03158     total += chunk->size;
03159   }
03160 
03161   // copy chunk list to a contiguous buffer
03162   char* buffer = static_cast<char*>(xml_memory::allocate(total));
03163   if (!buffer) return status_out_of_memory;
03164 
03165   char* write = buffer;
03166 
03167   for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next) {
03168     assert(write + chunk->size <= buffer + total);
03169     memcpy(write, chunk->data, chunk->size);
03170     write += chunk->size;
03171   }
03172 
03173   assert(write == buffer + total);
03174 
03175   // return buffer
03176   *out_buffer = buffer;
03177   *out_size = total;
03178 
03179   return status_ok;
03180 }
03181 
03182 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
03183 {
03184   // get length of remaining data in stream
03185   typename std::basic_istream<T>::pos_type pos = stream.tellg();
03186   stream.seekg(0, std::ios::end);
03187   std::streamoff length = stream.tellg() - pos;
03188   stream.seekg(pos);
03189 
03190   if (stream.fail() || pos < 0) return status_io_error;
03191 
03192   // guard against huge files
03193   size_t read_length = static_cast<size_t>(length);
03194 
03195   if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
03196 
03197   // read stream data into memory (guard against stream exceptions with buffer holder)
03198   buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
03199   if (!buffer.data) return status_out_of_memory;
03200 
03201   stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
03202 
03203   // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
03204   if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
03205 
03206   // return buffer
03207   size_t actual_length = static_cast<size_t>(stream.gcount());
03208   assert(actual_length <= read_length);
03209 
03210   *out_buffer = buffer.release();
03211   *out_size = actual_length * sizeof(T);
03212 
03213   return status_ok;
03214 }
03215 
03216 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
03217 {
03218   void* buffer = 0;
03219   size_t size = 0;
03220 
03221   // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
03222   xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size);
03223   if (status != status_ok) return make_parse_result(status);
03224 
03225   return doc.load_buffer_inplace_own(buffer, size, options, encoding);
03226 }
03227 #endif
03228 
03229 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
03230 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
03231 {
03232   return _wfopen(path, mode);
03233 }
03234 #else
03235 PUGI__FN char* convert_path_heap(const wchar_t* str)
03236 {
03237   assert(str);
03238 
03239   // first pass: get length in utf8 characters
03240   size_t length = wcslen(str);
03241   size_t size = as_utf8_begin(str, length);
03242 
03243   // allocate resulting string
03244   char* result = static_cast<char*>(xml_memory::allocate(size + 1));
03245   if (!result) return 0;
03246 
03247   // second pass: convert to utf8
03248   as_utf8_end(result, size, str, length);
03249 
03250   return result;
03251 }
03252 
03253 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
03254 {
03255   // there is no standard function to open wide paths, so our best bet is to try utf8 path
03256   char* path_utf8 = convert_path_heap(path);
03257   if (!path_utf8) return 0;
03258 
03259   // convert mode to ASCII (we mirror _wfopen interface)
03260   char mode_ascii[4] = {0};
03261   for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
03262 
03263   // try to open the utf8 path
03264   FILE* result = fopen(path_utf8, mode_ascii);
03265 
03266   // free dummy buffer
03267   xml_memory::deallocate(path_utf8);
03268 
03269   return result;
03270 }
03271 #endif
03272 
03273 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
03274 {
03275   if (!file) return false;
03276 
03277   xml_writer_file writer(file);
03278   doc.save(writer, indent, flags, encoding);
03279 
03280   int result = ferror(file);
03281 
03282   fclose(file);
03283 
03284   return result == 0;
03285 }
03286 PUGI__NS_END
03287 
03288 namespace pugi
03289 {
03290 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
03291 {
03292 }
03293 
03294 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
03295 {
03296   size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
03297   (void)!result; // unfortunately we can't do proper error handling here
03298 }
03299 
03300 #ifndef PUGIXML_NO_STL
03301 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
03302 {
03303 }
03304 
03305 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
03306 {
03307 }
03308 
03309 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
03310 {
03311   if (narrow_stream) {
03312     assert(!wide_stream);
03313     narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
03314   } else {
03315     assert(wide_stream);
03316     assert(size % sizeof(wchar_t) == 0);
03317 
03318     wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
03319   }
03320 }
03321 #endif
03322 
03323 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
03324 {
03325 }
03326 
03327 PUGI__FN xml_tree_walker::~xml_tree_walker()
03328 {
03329 }
03330 
03331 PUGI__FN int xml_tree_walker::depth() const
03332 {
03333   return _depth;
03334 }
03335 
03336 PUGI__FN bool xml_tree_walker::begin(xml_node&)
03337 {
03338   return true;
03339 }
03340 
03341 PUGI__FN bool xml_tree_walker::end(xml_node&)
03342 {
03343   return true;
03344 }
03345 
03346 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
03347 {
03348 }
03349 
03350 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
03351 {
03352 }
03353 
03354 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
03355 {
03356 }
03357 
03358 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
03359 {
03360   return _attr ? unspecified_bool_xml_attribute : 0;
03361 }
03362 
03363 PUGI__FN bool xml_attribute::operator!() const
03364 {
03365   return !_attr;
03366 }
03367 
03368 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
03369 {
03370   return (_attr == r._attr);
03371 }
03372 
03373 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
03374 {
03375   return (_attr != r._attr);
03376 }
03377 
03378 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
03379 {
03380   return (_attr < r._attr);
03381 }
03382 
03383 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
03384 {
03385   return (_attr > r._attr);
03386 }
03387 
03388 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
03389 {
03390   return (_attr <= r._attr);
03391 }
03392 
03393 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
03394 {
03395   return (_attr >= r._attr);
03396 }
03397 
03398 PUGI__FN xml_attribute xml_attribute::next_attribute() const
03399 {
03400   return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
03401 }
03402 
03403 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
03404 {
03405   return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
03406 }
03407 
03408 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
03409 {
03410   return (_attr && _attr->value) ? _attr->value : def;
03411 }
03412 
03413 PUGI__FN int xml_attribute::as_int(int def) const
03414 {
03415   return impl::get_value_int(_attr ? _attr->value : 0, def);
03416 }
03417 
03418 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
03419 {
03420   return impl::get_value_uint(_attr ? _attr->value : 0, def);
03421 }
03422 
03423 PUGI__FN double xml_attribute::as_double(double def) const
03424 {
03425   return impl::get_value_double(_attr ? _attr->value : 0, def);
03426 }
03427 
03428 PUGI__FN float xml_attribute::as_float(float def) const
03429 {
03430   return impl::get_value_float(_attr ? _attr->value : 0, def);
03431 }
03432 
03433 PUGI__FN bool xml_attribute::as_bool(bool def) const
03434 {
03435   return impl::get_value_bool(_attr ? _attr->value : 0, def);
03436 }
03437 
03438 PUGI__FN bool xml_attribute::empty() const
03439 {
03440   return !_attr;
03441 }
03442 
03443 PUGI__FN const char_t* xml_attribute::name() const
03444 {
03445   return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
03446 }
03447 
03448 PUGI__FN const char_t* xml_attribute::value() const
03449 {
03450   return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
03451 }
03452 
03453 PUGI__FN size_t xml_attribute::hash_value() const
03454 {
03455   return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
03456 }
03457 
03458 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
03459 {
03460   return _attr;
03461 }
03462 
03463 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
03464 {
03465   set_value(rhs);
03466   return *this;
03467 }
03468 
03469 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
03470 {
03471   set_value(rhs);
03472   return *this;
03473 }
03474 
03475 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
03476 {
03477   set_value(rhs);
03478   return *this;
03479 }
03480 
03481 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
03482 {
03483   set_value(rhs);
03484   return *this;
03485 }
03486 
03487 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
03488 {
03489   set_value(rhs);
03490   return *this;
03491 }
03492 
03493 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
03494 {
03495   if (!_attr) return false;
03496 
03497   return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
03498 }
03499 
03500 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
03501 {
03502   if (!_attr) return false;
03503 
03504   return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
03505 }
03506 
03507 PUGI__FN bool xml_attribute::set_value(int rhs)
03508 {
03509   if (!_attr) return false;
03510 
03511   return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
03512 }
03513 
03514 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
03515 {
03516   if (!_attr) return false;
03517 
03518   return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
03519 }
03520 
03521 PUGI__FN bool xml_attribute::set_value(double rhs)
03522 {
03523   if (!_attr) return false;
03524 
03525   return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
03526 }
03527 
03528 PUGI__FN bool xml_attribute::set_value(bool rhs)
03529 {
03530   if (!_attr) return false;
03531 
03532   return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
03533 }
03534 
03535 #ifdef __BORLANDC__
03536 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
03537 {
03538   return (bool)lhs && rhs;
03539 }
03540 
03541 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
03542 {
03543   return (bool)lhs || rhs;
03544 }
03545 #endif
03546 
03547 PUGI__FN xml_node::xml_node(): _root(0)
03548 {
03549 }
03550 
03551 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
03552 {
03553 }
03554 
03555 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
03556 {
03557 }
03558 
03559 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
03560 {
03561   return _root ? unspecified_bool_xml_node : 0;
03562 }
03563 
03564 PUGI__FN bool xml_node::operator!() const
03565 {
03566   return !_root;
03567 }
03568 
03569 PUGI__FN xml_node::iterator xml_node::begin() const
03570 {
03571   return iterator(_root ? _root->first_child : 0, _root);
03572 }
03573 
03574 PUGI__FN xml_node::iterator xml_node::end() const
03575 {
03576   return iterator(0, _root);
03577 }
03578 
03579 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
03580 {
03581   return attribute_iterator(_root ? _root->first_attribute : 0, _root);
03582 }
03583 
03584 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
03585 {
03586   return attribute_iterator(0, _root);
03587 }
03588 
03589 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
03590 {
03591   return xml_object_range<xml_node_iterator>(begin(), end());
03592 }
03593 
03594 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
03595 {
03596   return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_), name_), xml_named_node_iterator());
03597 }
03598 
03599 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
03600 {
03601   return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
03602 }
03603 
03604 PUGI__FN bool xml_node::operator==(const xml_node& r) const
03605 {
03606   return (_root == r._root);
03607 }
03608 
03609 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
03610 {
03611   return (_root != r._root);
03612 }
03613 
03614 PUGI__FN bool xml_node::operator<(const xml_node& r) const
03615 {
03616   return (_root < r._root);
03617 }
03618 
03619 PUGI__FN bool xml_node::operator>(const xml_node& r) const
03620 {
03621   return (_root > r._root);
03622 }
03623 
03624 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
03625 {
03626   return (_root <= r._root);
03627 }
03628 
03629 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
03630 {
03631   return (_root >= r._root);
03632 }
03633 
03634 PUGI__FN bool xml_node::empty() const
03635 {
03636   return !_root;
03637 }
03638 
03639 PUGI__FN const char_t* xml_node::name() const
03640 {
03641   return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
03642 }
03643 
03644 PUGI__FN xml_node_type xml_node::type() const
03645 {
03646   return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
03647 }
03648 
03649 PUGI__FN const char_t* xml_node::value() const
03650 {
03651   return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
03652 }
03653 
03654 PUGI__FN xml_node xml_node::child(const char_t* name_) const
03655 {
03656   if (!_root) return xml_node();
03657 
03658   for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
03659     if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
03660 
03661   return xml_node();
03662 }
03663 
03664 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
03665 {
03666   if (!_root) return xml_attribute();
03667 
03668   for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
03669     if (i->name && impl::strequal(name_, i->name))
03670       return xml_attribute(i);
03671 
03672   return xml_attribute();
03673 }
03674 
03675 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
03676 {
03677   if (!_root) return xml_node();
03678 
03679   for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
03680     if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
03681 
03682   return xml_node();
03683 }
03684 
03685 PUGI__FN xml_node xml_node::next_sibling() const
03686 {
03687   if (!_root) return xml_node();
03688 
03689   if (_root->next_sibling) return xml_node(_root->next_sibling);
03690   else return xml_node();
03691 }
03692 
03693 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
03694 {
03695   if (!_root) return xml_node();
03696 
03697   for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
03698     if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
03699 
03700   return xml_node();
03701 }
03702 
03703 PUGI__FN xml_node xml_node::previous_sibling() const
03704 {
03705   if (!_root) return xml_node();
03706 
03707   if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
03708   else return xml_node();
03709 }
03710 
03711 PUGI__FN xml_node xml_node::parent() const
03712 {
03713   return _root ? xml_node(_root->parent) : xml_node();
03714 }
03715 
03716 PUGI__FN xml_node xml_node::root() const
03717 {
03718   if (!_root) return xml_node();
03719 
03720   impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
03721 
03722   return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
03723 }
03724 
03725 PUGI__FN xml_text xml_node::text() const
03726 {
03727   return xml_text(_root);
03728 }
03729 
03730 PUGI__FN const char_t* xml_node::child_value() const
03731 {
03732   if (!_root) return PUGIXML_TEXT("");
03733 
03734   for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
03735     if (i->value && impl::is_text_node(i))
03736       return i->value;
03737 
03738   return PUGIXML_TEXT("");
03739 }
03740 
03741 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
03742 {
03743   return child(name_).child_value();
03744 }
03745 
03746 PUGI__FN xml_attribute xml_node::first_attribute() const
03747 {
03748   return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
03749 }
03750 
03751 PUGI__FN xml_attribute xml_node::last_attribute() const
03752 {
03753   return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
03754 }
03755 
03756 PUGI__FN xml_node xml_node::first_child() const
03757 {
03758   return _root ? xml_node(_root->first_child) : xml_node();
03759 }
03760 
03761 PUGI__FN xml_node xml_node::last_child() const
03762 {
03763   return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
03764 }
03765 
03766 PUGI__FN bool xml_node::set_name(const char_t* rhs)
03767 {
03768   switch (type()) {
03769   case node_pi:
03770   case node_declaration:
03771   case node_element:
03772     return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
03773 
03774   default:
03775     return false;
03776   }
03777 }
03778 
03779 PUGI__FN bool xml_node::set_value(const char_t* rhs)
03780 {
03781   switch (type()) {
03782   case node_pi:
03783   case node_cdata:
03784   case node_pcdata:
03785   case node_comment:
03786   case node_doctype:
03787     return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
03788 
03789   default:
03790     return false;
03791   }
03792 }
03793 
03794 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
03795 {
03796   if (type() != node_element && type() != node_declaration) return xml_attribute();
03797 
03798   xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
03799   a.set_name(name_);
03800 
03801   return a;
03802 }
03803 
03804 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
03805 {
03806   if (type() != node_element && type() != node_declaration) return xml_attribute();
03807 
03808   xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
03809   if (!a) return xml_attribute();
03810 
03811   a.set_name(name_);
03812 
03813   xml_attribute_struct* head = _root->first_attribute;
03814 
03815   if (head) {
03816     a._attr->prev_attribute_c = head->prev_attribute_c;
03817     head->prev_attribute_c = a._attr;
03818   } else
03819     a._attr->prev_attribute_c = a._attr;
03820 
03821   a._attr->next_attribute = head;
03822   _root->first_attribute = a._attr;
03823 
03824   return a;
03825 }
03826 
03827 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
03828 {
03829   if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
03830 
03831   // check that attribute belongs to *this
03832   xml_attribute_struct* cur = attr._attr;
03833 
03834   while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
03835 
03836   if (cur != _root->first_attribute) return xml_attribute();
03837 
03838   xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
03839   if (!a) return xml_attribute();
03840 
03841   a.set_name(name_);
03842 
03843   if (attr._attr->prev_attribute_c->next_attribute)
03844     attr._attr->prev_attribute_c->next_attribute = a._attr;
03845   else
03846     _root->first_attribute = a._attr;
03847 
03848   a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
03849   a._attr->next_attribute = attr._attr;
03850   attr._attr->prev_attribute_c = a._attr;
03851 
03852   return a;
03853 }
03854 
03855 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
03856 {
03857   if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
03858 
03859   // check that attribute belongs to *this
03860   xml_attribute_struct* cur = attr._attr;
03861 
03862   while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
03863 
03864   if (cur != _root->first_attribute) return xml_attribute();
03865 
03866   xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
03867   if (!a) return xml_attribute();
03868 
03869   a.set_name(name_);
03870 
03871   if (attr._attr->next_attribute)
03872     attr._attr->next_attribute->prev_attribute_c = a._attr;
03873   else
03874     _root->first_attribute->prev_attribute_c = a._attr;
03875 
03876   a._attr->next_attribute = attr._attr->next_attribute;
03877   a._attr->prev_attribute_c = attr._attr;
03878   attr._attr->next_attribute = a._attr;
03879 
03880   return a;
03881 }
03882 
03883 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
03884 {
03885   if (!proto) return xml_attribute();
03886 
03887   xml_attribute result = append_attribute(proto.name());
03888   result.set_value(proto.value());
03889 
03890   return result;
03891 }
03892 
03893 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
03894 {
03895   if (!proto) return xml_attribute();
03896 
03897   xml_attribute result = prepend_attribute(proto.name());
03898   result.set_value(proto.value());
03899 
03900   return result;
03901 }
03902 
03903 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
03904 {
03905   if (!proto) return xml_attribute();
03906 
03907   xml_attribute result = insert_attribute_after(proto.name(), attr);
03908   result.set_value(proto.value());
03909 
03910   return result;
03911 }
03912 
03913 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
03914 {
03915   if (!proto) return xml_attribute();
03916 
03917   xml_attribute result = insert_attribute_before(proto.name(), attr);
03918   result.set_value(proto.value());
03919 
03920   return result;
03921 }
03922 
03923 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
03924 {
03925   if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
03926 
03927   xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
03928 
03929   if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
03930 
03931   return n;
03932 }
03933 
03934 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
03935 {
03936   if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
03937 
03938   xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
03939   if (!n) return xml_node();
03940 
03941   n._root->parent = _root;
03942 
03943   xml_node_struct* head = _root->first_child;
03944 
03945   if (head) {
03946     n._root->prev_sibling_c = head->prev_sibling_c;
03947     head->prev_sibling_c = n._root;
03948   } else
03949     n._root->prev_sibling_c = n._root;
03950 
03951   n._root->next_sibling = head;
03952   _root->first_child = n._root;
03953 
03954   if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
03955 
03956   return n;
03957 }
03958 
03959 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
03960 {
03961   if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
03962   if (!node._root || node._root->parent != _root) return xml_node();
03963 
03964   xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
03965   if (!n) return xml_node();
03966 
03967   n._root->parent = _root;
03968 
03969   if (node._root->prev_sibling_c->next_sibling)
03970     node._root->prev_sibling_c->next_sibling = n._root;
03971   else
03972     _root->first_child = n._root;
03973 
03974   n._root->prev_sibling_c = node._root->prev_sibling_c;
03975   n._root->next_sibling = node._root;
03976   node._root->prev_sibling_c = n._root;
03977 
03978   if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
03979 
03980   return n;
03981 }
03982 
03983 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
03984 {
03985   if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
03986   if (!node._root || node._root->parent != _root) return xml_node();
03987 
03988   xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
03989   if (!n) return xml_node();
03990 
03991   n._root->parent = _root;
03992 
03993   if (node._root->next_sibling)
03994     node._root->next_sibling->prev_sibling_c = n._root;
03995   else
03996     _root->first_child->prev_sibling_c = n._root;
03997 
03998   n._root->next_sibling = node._root->next_sibling;
03999   n._root->prev_sibling_c = node._root;
04000   node._root->next_sibling = n._root;
04001 
04002   if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
04003 
04004   return n;
04005 }
04006 
04007 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
04008 {
04009   xml_node result = append_child(node_element);
04010 
04011   result.set_name(name_);
04012 
04013   return result;
04014 }
04015 
04016 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
04017 {
04018   xml_node result = prepend_child(node_element);
04019 
04020   result.set_name(name_);
04021 
04022   return result;
04023 }
04024 
04025 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
04026 {
04027   xml_node result = insert_child_after(node_element, node);
04028 
04029   result.set_name(name_);
04030 
04031   return result;
04032 }
04033 
04034 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
04035 {
04036   xml_node result = insert_child_before(node_element, node);
04037 
04038   result.set_name(name_);
04039 
04040   return result;
04041 }
04042 
04043 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
04044 {
04045   xml_node result = append_child(proto.type());
04046 
04047   if (result) impl::recursive_copy_skip(result, proto, result);
04048 
04049   return result;
04050 }
04051 
04052 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
04053 {
04054   xml_node result = prepend_child(proto.type());
04055 
04056   if (result) impl::recursive_copy_skip(result, proto, result);
04057 
04058   return result;
04059 }
04060 
04061 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
04062 {
04063   xml_node result = insert_child_after(proto.type(), node);
04064 
04065   if (result) impl::recursive_copy_skip(result, proto, result);
04066 
04067   return result;
04068 }
04069 
04070 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
04071 {
04072   xml_node result = insert_child_before(proto.type(), node);
04073 
04074   if (result) impl::recursive_copy_skip(result, proto, result);
04075 
04076   return result;
04077 }
04078 
04079 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
04080 {
04081   return remove_attribute(attribute(name_));
04082 }
04083 
04084 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
04085 {
04086   if (!_root || !a._attr) return false;
04087 
04088   // check that attribute belongs to *this
04089   xml_attribute_struct* attr = a._attr;
04090 
04091   while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
04092 
04093   if (attr != _root->first_attribute) return false;
04094 
04095   if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
04096   else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
04097 
04098   if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
04099   else _root->first_attribute = a._attr->next_attribute;
04100 
04101   impl::destroy_attribute(a._attr, impl::get_allocator(_root));
04102 
04103   return true;
04104 }
04105 
04106 PUGI__FN bool xml_node::remove_child(const char_t* name_)
04107 {
04108   return remove_child(child(name_));
04109 }
04110 
04111 PUGI__FN bool xml_node::remove_child(const xml_node& n)
04112 {
04113   if (!_root || !n._root || n._root->parent != _root) return false;
04114 
04115   if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
04116   else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
04117 
04118   if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
04119   else _root->first_child = n._root->next_sibling;
04120 
04121   impl::destroy_node(n._root, impl::get_allocator(_root));
04122 
04123   return true;
04124 }
04125 
04126 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
04127 {
04128   if (!_root) return xml_node();
04129 
04130   for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
04131     if (i->name && impl::strequal(name_, i->name)) {
04132       for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
04133         if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
04134           return xml_node(i);
04135     }
04136 
04137   return xml_node();
04138 }
04139 
04140 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
04141 {
04142   if (!_root) return xml_node();
04143 
04144   for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
04145     for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
04146       if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
04147         return xml_node(i);
04148 
04149   return xml_node();
04150 }
04151 
04152 #ifndef PUGIXML_NO_STL
04153 PUGI__FN string_t xml_node::path(char_t delimiter) const
04154 {
04155   xml_node cursor = *this; // Make a copy.
04156 
04157   string_t result = cursor.name();
04158 
04159   while (cursor.parent()) {
04160     cursor = cursor.parent();
04161 
04162     string_t temp = cursor.name();
04163     temp += delimiter;
04164     temp += result;
04165     result.swap(temp);
04166   }
04167 
04168   return result;
04169 }
04170 #endif
04171 
04172 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
04173 {
04174   xml_node found = *this; // Current search context.
04175 
04176   if (!_root || !path_ || !path_[0]) return found;
04177 
04178   if (path_[0] == delimiter) {
04179     // Absolute path; e.g. '/foo/bar'
04180     found = found.root();
04181     ++path_;
04182   }
04183 
04184   const char_t* path_segment = path_;
04185 
04186   while (*path_segment == delimiter) ++path_segment;
04187 
04188   const char_t* path_segment_end = path_segment;
04189 
04190   while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
04191 
04192   if (path_segment == path_segment_end) return found;
04193 
04194   const char_t* next_segment = path_segment_end;
04195 
04196   while (*next_segment == delimiter) ++next_segment;
04197 
04198   if (*path_segment == '.' && path_segment + 1 == path_segment_end)
04199     return found.first_element_by_path(next_segment, delimiter);
04200   else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
04201     return found.parent().first_element_by_path(next_segment, delimiter);
04202   else {
04203     for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) {
04204       if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) {
04205         xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
04206 
04207         if (subsearch) return subsearch;
04208       }
04209     }
04210 
04211     return xml_node();
04212   }
04213 }
04214 
04215 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
04216 {
04217   walker._depth = -1;
04218 
04219   xml_node arg_begin = *this;
04220   if (!walker.begin(arg_begin)) return false;
04221 
04222   xml_node cur = first_child();
04223 
04224   if (cur) {
04225     ++walker._depth;
04226 
04227     do {
04228       xml_node arg_for_each = cur;
04229       if (!walker.for_each(arg_for_each))
04230         return false;
04231 
04232       if (cur.first_child()) {
04233         ++walker._depth;
04234         cur = cur.first_child();
04235       } else if (cur.next_sibling())
04236         cur = cur.next_sibling();
04237       else {
04238         // Borland C++ workaround
04239         while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) {
04240           --walker._depth;
04241           cur = cur.parent();
04242         }
04243 
04244         if (cur != *this)
04245           cur = cur.next_sibling();
04246       }
04247     } while (cur && cur != *this);
04248   }
04249 
04250   assert(walker._depth == -1);
04251 
04252   xml_node arg_end = *this;
04253   return walker.end(arg_end);
04254 }
04255 
04256 PUGI__FN size_t xml_node::hash_value() const
04257 {
04258   return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
04259 }
04260 
04261 PUGI__FN xml_node_struct* xml_node::internal_object() const
04262 {
04263   return _root;
04264 }
04265 
04266 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
04267 {
04268   if (!_root) return;
04269 
04270   impl::xml_buffered_writer buffered_writer(writer, encoding);
04271 
04272   impl::node_output(buffered_writer, *this, indent, flags, depth);
04273 }
04274 
04275 #ifndef PUGIXML_NO_STL
04276 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
04277 {
04278   xml_writer_stream writer(stream);
04279 
04280   print(writer, indent, flags, encoding, depth);
04281 }
04282 
04283 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
04284 {
04285   xml_writer_stream writer(stream);
04286 
04287   print(writer, indent, flags, encoding_wchar, depth);
04288 }
04289 #endif
04290 
04291 PUGI__FN ptrdiff_t xml_node::offset_debug() const
04292 {
04293   xml_node_struct* r = root()._root;
04294 
04295   if (!r) return -1;
04296 
04297   const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
04298 
04299   if (!buffer) return -1;
04300 
04301   switch (type()) {
04302   case node_document:
04303     return 0;
04304 
04305   case node_element:
04306   case node_declaration:
04307   case node_pi:
04308     return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
04309 
04310   case node_pcdata:
04311   case node_cdata:
04312   case node_comment:
04313   case node_doctype:
04314     return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
04315 
04316   default:
04317     return -1;
04318   }
04319 }
04320 
04321 #ifdef __BORLANDC__
04322 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
04323 {
04324   return (bool)lhs && rhs;
04325 }
04326 
04327 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
04328 {
04329   return (bool)lhs || rhs;
04330 }
04331 #endif
04332 
04333 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
04334 {
04335 }
04336 
04337 PUGI__FN xml_node_struct* xml_text::_data() const
04338 {
04339   if (!_root || impl::is_text_node(_root)) return _root;
04340 
04341   for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
04342     if (impl::is_text_node(node))
04343       return node;
04344 
04345   return 0;
04346 }
04347 
04348 PUGI__FN xml_node_struct* xml_text::_data_new()
04349 {
04350   xml_node_struct* d = _data();
04351   if (d) return d;
04352 
04353   return xml_node(_root).append_child(node_pcdata).internal_object();
04354 }
04355 
04356 PUGI__FN xml_text::xml_text(): _root(0)
04357 {
04358 }
04359 
04360 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
04361 {
04362 }
04363 
04364 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
04365 {
04366   return _data() ? unspecified_bool_xml_text : 0;
04367 }
04368 
04369 PUGI__FN bool xml_text::operator!() const
04370 {
04371   return !_data();
04372 }
04373 
04374 PUGI__FN bool xml_text::empty() const
04375 {
04376   return _data() == 0;
04377 }
04378 
04379 PUGI__FN const char_t* xml_text::get() const
04380 {
04381   xml_node_struct* d = _data();
04382 
04383   return (d && d->value) ? d->value : PUGIXML_TEXT("");
04384 }
04385 
04386 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
04387 {
04388   xml_node_struct* d = _data();
04389 
04390   return (d && d->value) ? d->value : def;
04391 }
04392 
04393 PUGI__FN int xml_text::as_int(int def) const
04394 {
04395   xml_node_struct* d = _data();
04396 
04397   return impl::get_value_int(d ? d->value : 0, def);
04398 }
04399 
04400 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
04401 {
04402   xml_node_struct* d = _data();
04403 
04404   return impl::get_value_uint(d ? d->value : 0, def);
04405 }
04406 
04407 PUGI__FN double xml_text::as_double(double def) const
04408 {
04409   xml_node_struct* d = _data();
04410 
04411   return impl::get_value_double(d ? d->value : 0, def);
04412 }
04413 
04414 PUGI__FN float xml_text::as_float(float def) const
04415 {
04416   xml_node_struct* d = _data();
04417 
04418   return impl::get_value_float(d ? d->value : 0, def);
04419 }
04420 
04421 PUGI__FN bool xml_text::as_bool(bool def) const
04422 {
04423   xml_node_struct* d = _data();
04424 
04425   return impl::get_value_bool(d ? d->value : 0, def);
04426 }
04427 
04428 PUGI__FN bool xml_text::set(const char_t* rhs)
04429 {
04430   xml_node_struct* dn = _data_new();
04431 
04432   return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
04433 }
04434 
04435 PUGI__FN bool xml_text::set(int rhs)
04436 {
04437   xml_node_struct* dn = _data_new();
04438 
04439   return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
04440 }
04441 
04442 PUGI__FN bool xml_text::set(unsigned int rhs)
04443 {
04444   xml_node_struct* dn = _data_new();
04445 
04446   return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
04447 }
04448 
04449 PUGI__FN bool xml_text::set(double rhs)
04450 {
04451   xml_node_struct* dn = _data_new();
04452 
04453   return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
04454 }
04455 
04456 PUGI__FN bool xml_text::set(bool rhs)
04457 {
04458   xml_node_struct* dn = _data_new();
04459 
04460   return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
04461 }
04462 
04463 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
04464 {
04465   set(rhs);
04466   return *this;
04467 }
04468 
04469 PUGI__FN xml_text& xml_text::operator=(int rhs)
04470 {
04471   set(rhs);
04472   return *this;
04473 }
04474 
04475 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
04476 {
04477   set(rhs);
04478   return *this;
04479 }
04480 
04481 PUGI__FN xml_text& xml_text::operator=(double rhs)
04482 {
04483   set(rhs);
04484   return *this;
04485 }
04486 
04487 PUGI__FN xml_text& xml_text::operator=(bool rhs)
04488 {
04489   set(rhs);
04490   return *this;
04491 }
04492 
04493 PUGI__FN xml_node xml_text::data() const
04494 {
04495   return xml_node(_data());
04496 }
04497 
04498 #ifdef __BORLANDC__
04499 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
04500 {
04501   return (bool)lhs && rhs;
04502 }
04503 
04504 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
04505 {
04506   return (bool)lhs || rhs;
04507 }
04508 #endif
04509 
04510 PUGI__FN xml_node_iterator::xml_node_iterator()
04511 {
04512 }
04513 
04514 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
04515 {
04516 }
04517 
04518 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
04519 {
04520 }
04521 
04522 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
04523 {
04524   return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
04525 }
04526 
04527 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
04528 {
04529   return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
04530 }
04531 
04532 PUGI__FN xml_node& xml_node_iterator::operator*() const
04533 {
04534   assert(_wrap._root);
04535   return _wrap;
04536 }
04537 
04538 PUGI__FN xml_node* xml_node_iterator::operator->() const
04539 {
04540   assert(_wrap._root);
04541   return const_cast<xml_node*>(&_wrap); // BCC32 workaround
04542 }
04543 
04544 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
04545 {
04546   assert(_wrap._root);
04547   _wrap._root = _wrap._root->next_sibling;
04548   return *this;
04549 }
04550 
04551 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
04552 {
04553   xml_node_iterator temp = *this;
04554   ++*this;
04555   return temp;
04556 }
04557 
04558 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
04559 {
04560   _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
04561   return *this;
04562 }
04563 
04564 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
04565 {
04566   xml_node_iterator temp = *this;
04567   --*this;
04568   return temp;
04569 }
04570 
04571 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
04572 {
04573 }
04574 
04575 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
04576 {
04577 }
04578 
04579 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
04580 {
04581 }
04582 
04583 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
04584 {
04585   return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
04586 }
04587 
04588 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
04589 {
04590   return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
04591 }
04592 
04593 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
04594 {
04595   assert(_wrap._attr);
04596   return _wrap;
04597 }
04598 
04599 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
04600 {
04601   assert(_wrap._attr);
04602   return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
04603 }
04604 
04605 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
04606 {
04607   assert(_wrap._attr);
04608   _wrap._attr = _wrap._attr->next_attribute;
04609   return *this;
04610 }
04611 
04612 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
04613 {
04614   xml_attribute_iterator temp = *this;
04615   ++*this;
04616   return temp;
04617 }
04618 
04619 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
04620 {
04621   _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
04622   return *this;
04623 }
04624 
04625 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
04626 {
04627   xml_attribute_iterator temp = *this;
04628   --*this;
04629   return temp;
04630 }
04631 
04632 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
04633 {
04634 }
04635 
04636 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name)
04637 {
04638 }
04639 
04640 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
04641 {
04642   return _node == rhs._node;
04643 }
04644 
04645 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
04646 {
04647   return _node != rhs._node;
04648 }
04649 
04650 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
04651 {
04652   assert(_node._root);
04653   return _node;
04654 }
04655 
04656 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
04657 {
04658   assert(_node._root);
04659   return const_cast<xml_node*>(&_node); // BCC32 workaround
04660 }
04661 
04662 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
04663 {
04664   assert(_node._root);
04665   _node = _node.next_sibling(_name);
04666   return *this;
04667 }
04668 
04669 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
04670 {
04671   xml_named_node_iterator temp = *this;
04672   ++*this;
04673   return temp;
04674 }
04675 
04676 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
04677 {
04678 }
04679 
04680 PUGI__FN xml_parse_result::operator bool() const
04681 {
04682   return status == status_ok;
04683 }
04684 
04685 PUGI__FN const char* xml_parse_result::description() const
04686 {
04687   switch (status) {
04688   case status_ok:
04689     return "No error";
04690 
04691   case status_file_not_found:
04692     return "File was not found";
04693   case status_io_error:
04694     return "Error reading from file/stream";
04695   case status_out_of_memory:
04696     return "Could not allocate memory";
04697   case status_internal_error:
04698     return "Internal error occurred";
04699 
04700   case status_unrecognized_tag:
04701     return "Could not determine tag type";
04702 
04703   case status_bad_pi:
04704     return "Error parsing document declaration/processing instruction";
04705   case status_bad_comment:
04706     return "Error parsing comment";
04707   case status_bad_cdata:
04708     return "Error parsing CDATA section";
04709   case status_bad_doctype:
04710     return "Error parsing document type declaration";
04711   case status_bad_pcdata:
04712     return "Error parsing PCDATA section";
04713   case status_bad_start_element:
04714     return "Error parsing start element tag";
04715   case status_bad_attribute:
04716     return "Error parsing element attribute";
04717   case status_bad_end_element:
04718     return "Error parsing end element tag";
04719   case status_end_element_mismatch:
04720     return "Start-end tags mismatch";
04721 
04722   default:
04723     return "Unknown error";
04724   }
04725 }
04726 
04727 PUGI__FN xml_document::xml_document(): _buffer(0)
04728 {
04729   create();
04730 }
04731 
04732 PUGI__FN xml_document::~xml_document()
04733 {
04734   destroy();
04735 }
04736 
04737 PUGI__FN void xml_document::reset()
04738 {
04739   destroy();
04740   create();
04741 }
04742 
04743 PUGI__FN void xml_document::reset(const xml_document& proto)
04744 {
04745   reset();
04746 
04747   for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
04748     append_copy(cur);
04749 }
04750 
04751 PUGI__FN void xml_document::create()
04752 {
04753   // initialize sentinel page
04754   PUGI__STATIC_ASSERT(offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
04755 
04756   // align upwards to page boundary
04757   void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
04758 
04759   // prepare page structure
04760   impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
04761 
04762   page->busy_size = impl::xml_memory_page_size;
04763 
04764   // allocate new root
04765   _root = new (page->data) impl::xml_document_struct(page);
04766   _root->prev_sibling_c = _root;
04767 
04768   // setup sentinel page
04769   page->allocator = static_cast<impl::xml_document_struct*>(_root);
04770 }
04771 
04772 PUGI__FN void xml_document::destroy()
04773 {
04774   // destroy static storage
04775   if (_buffer) {
04776     impl::xml_memory::deallocate(_buffer);
04777     _buffer = 0;
04778   }
04779 
04780   // destroy dynamic storage, leave sentinel page (it's in static memory)
04781   if (_root) {
04782     impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
04783     assert(root_page && !root_page->prev && !root_page->memory);
04784 
04785     // destroy all pages
04786     for (impl::xml_memory_page* page = root_page->next; page; ) {
04787       impl::xml_memory_page* next = page->next;
04788 
04789       impl::xml_allocator::deallocate_page(page);
04790 
04791       page = next;
04792     }
04793 
04794     // cleanup root page
04795     root_page->allocator = 0;
04796     root_page->next = 0;
04797     root_page->busy_size = root_page->freed_size = 0;
04798 
04799     _root = 0;
04800   }
04801 }
04802 
04803 #ifndef PUGIXML_NO_STL
04804 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
04805 {
04806   reset();
04807 
04808   return impl::load_stream_impl(*this, stream, options, encoding);
04809 }
04810 
04811 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
04812 {
04813   reset();
04814 
04815   return impl::load_stream_impl(*this, stream, options, encoding_wchar);
04816 }
04817 #endif
04818 
04819 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
04820 {
04821   // Force native encoding (skip autodetection)
04822 #ifdef PUGIXML_WCHAR_MODE
04823   xml_encoding encoding = encoding_wchar;
04824 #else
04825   xml_encoding encoding = encoding_utf8;
04826 #endif
04827 
04828   return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
04829 }
04830 
04831 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
04832 {
04833   reset();
04834 
04835   FILE* file = fopen(path_, "rb");
04836 
04837   return impl::load_file_impl(*this, file, options, encoding);
04838 }
04839 
04840 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
04841 {
04842   reset();
04843 
04844   FILE* file = impl::open_file_wide(path_, L"rb");
04845 
04846   return impl::load_file_impl(*this, file, options, encoding);
04847 }
04848 
04849 PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
04850 {
04851   reset();
04852 
04853   // check input buffer
04854   assert(contents || size == 0);
04855 
04856   // get actual encoding
04857   xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
04858 
04859   // get private buffer
04860   char_t* buffer = 0;
04861   size_t length = 0;
04862 
04863   if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
04864 
04865   // delete original buffer if we performed a conversion
04866   if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
04867 
04868   // parse
04869   xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options);
04870 
04871   // remember encoding
04872   res.encoding = buffer_encoding;
04873 
04874   // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
04875   if (own || buffer != contents) _buffer = buffer;
04876 
04877   return res;
04878 }
04879 
04880 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
04881 {
04882   return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
04883 }
04884 
04885 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
04886 {
04887   return load_buffer_impl(contents, size, options, encoding, true, false);
04888 }
04889 
04890 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
04891 {
04892   return load_buffer_impl(contents, size, options, encoding, true, true);
04893 }
04894 
04895 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
04896 {
04897   impl::xml_buffered_writer buffered_writer(writer, encoding);
04898 
04899   if ((flags & format_write_bom) && encoding != encoding_latin1) {
04900     // BOM always represents the codepoint U+FEFF, so just write it in native encoding
04901 #ifdef PUGIXML_WCHAR_MODE
04902     unsigned int bom = 0xfeff;
04903     buffered_writer.write(static_cast<wchar_t>(bom));
04904 #else
04905     buffered_writer.write('\xef', '\xbb', '\xbf');
04906 #endif
04907   }
04908 
04909   if (!(flags & format_no_declaration) && !impl::has_declaration(*this)) {
04910     buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
04911     if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
04912     buffered_writer.write('?', '>');
04913     if (!(flags & format_raw)) buffered_writer.write('\n');
04914   }
04915 
04916   impl::node_output(buffered_writer, *this, indent, flags, 0);
04917 }
04918 
04919 #ifndef PUGIXML_NO_STL
04920 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
04921 {
04922   xml_writer_stream writer(stream);
04923 
04924   save(writer, indent, flags, encoding);
04925 }
04926 
04927 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
04928 {
04929   xml_writer_stream writer(stream);
04930 
04931   save(writer, indent, flags, encoding_wchar);
04932 }
04933 #endif
04934 
04935 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
04936 {
04937   FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
04938   return impl::save_file_impl(*this, file, indent, flags, encoding);
04939 }
04940 
04941 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
04942 {
04943   FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
04944   return impl::save_file_impl(*this, file, indent, flags, encoding);
04945 }
04946 
04947 PUGI__FN xml_node xml_document::document_element() const
04948 {
04949   for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
04950     if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
04951       return xml_node(i);
04952 
04953   return xml_node();
04954 }
04955 
04956 #ifndef PUGIXML_NO_STL
04957 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
04958 {
04959   assert(str);
04960 
04961   return impl::as_utf8_impl(str, wcslen(str));
04962 }
04963 
04964 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
04965 {
04966   return impl::as_utf8_impl(str.c_str(), str.size());
04967 }
04968 
04969 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
04970 {
04971   assert(str);
04972 
04973   return impl::as_wide_impl(str, strlen(str));
04974 }
04975 
04976 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
04977 {
04978   return impl::as_wide_impl(str.c_str(), str.size());
04979 }
04980 #endif
04981 
04982 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
04983 {
04984   impl::xml_memory::allocate = allocate;
04985   impl::xml_memory::deallocate = deallocate;
04986 }
04987 
04988 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
04989 {
04990   return impl::xml_memory::allocate;
04991 }
04992 
04993 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
04994 {
04995   return impl::xml_memory::deallocate;
04996 }
04997 }
04998 
04999 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
05000 namespace std
05001 {
05002 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
05003 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
05004 {
05005   return std::bidirectional_iterator_tag();
05006 }
05007 
05008 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
05009 {
05010   return std::bidirectional_iterator_tag();
05011 }
05012 
05013 PUGI__FN std::forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
05014 {
05015   return std::forward_iterator_tag();
05016 }
05017 }
05018 #endif
05019 
05020 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
05021 namespace std
05022 {
05023 // Workarounds for (non-standard) iterator category detection
05024 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
05025 {
05026   return std::bidirectional_iterator_tag();
05027 }
05028 
05029 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
05030 {
05031   return std::bidirectional_iterator_tag();
05032 }
05033 
05034 PUGI__FN std::forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
05035 {
05036   return std::forward_iterator_tag();
05037 }
05038 }
05039 #endif
05040 
05041 #ifndef PUGIXML_NO_XPATH
05042 
05043 // STL replacements
05044 PUGI__NS_BEGIN
05045 struct equal_to {
05046   template <typename T> bool operator()(const T& lhs, const T& rhs) const {
05047     return lhs == rhs;
05048   }
05049 };
05050 
05051 struct not_equal_to {
05052   template <typename T> bool operator()(const T& lhs, const T& rhs) const {
05053     return lhs != rhs;
05054   }
05055 };
05056 
05057 struct less {
05058   template <typename T> bool operator()(const T& lhs, const T& rhs) const {
05059     return lhs < rhs;
05060   }
05061 };
05062 
05063 struct less_equal {
05064   template <typename T> bool operator()(const T& lhs, const T& rhs) const {
05065     return lhs <= rhs;
05066   }
05067 };
05068 
05069 template <typename T> void swap(T& lhs, T& rhs)
05070 {
05071   T temp = lhs;
05072   lhs = rhs;
05073   rhs = temp;
05074 }
05075 
05076 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
05077 {
05078   I result = begin;
05079 
05080   for (I it = begin + 1; it != end; ++it)
05081     if (pred(*it, *result))
05082       result = it;
05083 
05084   return result;
05085 }
05086 
05087 template <typename I> void reverse(I begin, I end)
05088 {
05089   while (begin + 1 < end) swap(*begin++, *--end);
05090 }
05091 
05092 template <typename I> I unique(I begin, I end)
05093 {
05094   // fast skip head
05095   while (begin + 1 < end && *begin != *(begin + 1)) begin++;
05096 
05097   if (begin == end) return begin;
05098 
05099   // last written element
05100   I write = begin++;
05101 
05102   // merge unique elements
05103   while (begin != end) {
05104     if (*begin != *write)
05105       *++write = *begin++;
05106     else
05107       begin++;
05108   }
05109 
05110   // past-the-end (write points to live element)
05111   return write + 1;
05112 }
05113 
05114 template <typename I> void copy_backwards(I begin, I end, I target)
05115 {
05116   while (begin != end) *--target = *--end;
05117 }
05118 
05119 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
05120 {
05121   assert(begin != end);
05122 
05123   for (I it = begin + 1; it != end; ++it) {
05124     T val = *it;
05125 
05126     if (pred(val, *begin)) {
05127       // move to front
05128       copy_backwards(begin, it, it + 1);
05129       *begin = val;
05130     } else {
05131       I hole = it;
05132 
05133       // move hole backwards
05134       while (pred(val, *(hole - 1))) {
05135         *hole = *(hole - 1);
05136         hole--;
05137       }
05138 
05139       // fill hole with element
05140       *hole = val;
05141     }
05142   }
05143 }
05144 
05145 // std variant for elements with ==
05146 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
05147 {
05148   I eqbeg = middle, eqend = middle + 1;
05149 
05150   // expand equal range
05151   while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
05152   while (eqend != end && *eqend == *eqbeg) ++eqend;
05153 
05154   // process outer elements
05155   I ltend = eqbeg, gtbeg = eqend;
05156 
05157   for (;;) {
05158     // find the element from the right side that belongs to the left one
05159     for (; gtbeg != end; ++gtbeg)
05160       if (!pred(*eqbeg, *gtbeg)) {
05161         if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
05162         else break;
05163       }
05164 
05165     // find the element from the left side that belongs to the right one
05166     for (; ltend != begin; --ltend)
05167       if (!pred(*(ltend - 1), *eqbeg)) {
05168         if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
05169         else break;
05170       }
05171 
05172     // scanned all elements
05173     if (gtbeg == end && ltend == begin) {
05174       *out_eqbeg = eqbeg;
05175       *out_eqend = eqend;
05176       return;
05177     }
05178 
05179     // make room for elements by moving equal area
05180     if (gtbeg == end) {
05181       if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
05182       swap(*eqbeg, *--eqend);
05183     } else if (ltend == begin) {
05184       if (eqend != gtbeg) swap(*eqbeg, *eqend);
05185       ++eqend;
05186       swap(*gtbeg++, *eqbeg++);
05187     } else swap(*gtbeg++, *--ltend);
05188   }
05189 }
05190 
05191 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
05192 {
05193   if (pred(*middle, *first)) swap(*middle, *first);
05194   if (pred(*last, *middle)) swap(*last, *middle);
05195   if (pred(*middle, *first)) swap(*middle, *first);
05196 }
05197 
05198 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
05199 {
05200   if (last - first <= 40) {
05201     // median of three for small chunks
05202     median3(first, middle, last, pred);
05203   } else {
05204     // median of nine
05205     size_t step = (last - first + 1) / 8;
05206 
05207     median3(first, first + step, first + 2 * step, pred);
05208     median3(middle - step, middle, middle + step, pred);
05209     median3(last - 2 * step, last - step, last, pred);
05210     median3(first + step, middle, last - step, pred);
05211   }
05212 }
05213 
05214 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
05215 {
05216   // sort large chunks
05217   while (end - begin > 32) {
05218     // find median element
05219     I middle = begin + (end - begin) / 2;
05220     median(begin, middle, end - 1, pred);
05221 
05222     // partition in three chunks (< = >)
05223     I eqbeg, eqend;
05224     partition(begin, middle, end, pred, &eqbeg, &eqend);
05225 
05226     // loop on larger half
05227     if (eqbeg - begin > end - eqend) {
05228       sort(eqend, end, pred);
05229       end = eqbeg;
05230     } else {
05231       sort(begin, eqbeg, pred);
05232       begin = eqend;
05233     }
05234   }
05235 
05236   // insertion sort small chunk
05237   if (begin != end) insertion_sort(begin, end, pred, &*begin);
05238 }
05239 PUGI__NS_END
05240 
05241 // Allocator used for AST and evaluation stacks
05242 PUGI__NS_BEGIN
05243 struct xpath_memory_block {
05244   xpath_memory_block* next;
05245 
05246   char data[
05247 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
05248     PUGIXML_MEMORY_XPATH_PAGE_SIZE
05249 #else
05250     4096
05251 #endif
05252   ];
05253 };
05254 
05255 class xpath_allocator
05256 {
05257   xpath_memory_block* _root;
05258   size_t _root_size;
05259 
05260 public:
05261 #ifdef PUGIXML_NO_EXCEPTIONS
05262   jmp_buf* error_handler;
05263 #endif
05264 
05265   xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) {
05266 #ifdef PUGIXML_NO_EXCEPTIONS
05267     error_handler = 0;
05268 #endif
05269   }
05270 
05271   void* allocate_nothrow(size_t size) {
05272     const size_t block_capacity = sizeof(_root->data);
05273 
05274     // align size so that we're able to store pointers in subsequent blocks
05275     size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
05276 
05277     if (_root_size + size <= block_capacity) {
05278       void* buf = _root->data + _root_size;
05279       _root_size += size;
05280       return buf;
05281     } else {
05282       size_t block_data_size = (size > block_capacity) ? size : block_capacity;
05283       size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
05284 
05285       xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
05286       if (!block) return 0;
05287 
05288       block->next = _root;
05289 
05290       _root = block;
05291       _root_size = size;
05292 
05293       return block->data;
05294     }
05295   }
05296 
05297   void* allocate(size_t size) {
05298     void* result = allocate_nothrow(size);
05299 
05300     if (!result) {
05301 #ifdef PUGIXML_NO_EXCEPTIONS
05302       assert(error_handler);
05303       longjmp(*error_handler, 1);
05304 #else
05305       throw std::bad_alloc();
05306 #endif
05307     }
05308 
05309     return result;
05310   }
05311 
05312   void* reallocate(void* ptr, size_t old_size, size_t new_size) {
05313     // align size so that we're able to store pointers in subsequent blocks
05314     old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
05315     new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
05316 
05317     // we can only reallocate the last object
05318     assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
05319 
05320     // adjust root size so that we have not allocated the object at all
05321     bool only_object = (_root_size == old_size);
05322 
05323     if (ptr) _root_size -= old_size;
05324 
05325     // allocate a new version (this will obviously reuse the memory if possible)
05326     void* result = allocate(new_size);
05327     assert(result);
05328 
05329     // we have a new block
05330     if (result != ptr && ptr) {
05331       // copy old data
05332       assert(new_size > old_size);
05333       memcpy(result, ptr, old_size);
05334 
05335       // free the previous page if it had no other objects
05336       if (only_object) {
05337         assert(_root->data == result);
05338         assert(_root->next);
05339 
05340         xpath_memory_block* next = _root->next->next;
05341 
05342         if (next) {
05343           // deallocate the whole page, unless it was the first one
05344           xml_memory::deallocate(_root->next);
05345           _root->next = next;
05346         }
05347       }
05348     }
05349 
05350     return result;
05351   }
05352 
05353   void revert(const xpath_allocator& state) {
05354     // free all new pages
05355     xpath_memory_block* cur = _root;
05356 
05357     while (cur != state._root) {
05358       xpath_memory_block* next = cur->next;
05359 
05360       xml_memory::deallocate(cur);
05361 
05362       cur = next;
05363     }
05364 
05365     // restore state
05366     _root = state._root;
05367     _root_size = state._root_size;
05368   }
05369 
05370   void release() {
05371     xpath_memory_block* cur = _root;
05372     assert(cur);
05373 
05374     while (cur->next) {
05375       xpath_memory_block* next = cur->next;
05376 
05377       xml_memory::deallocate(cur);
05378 
05379       cur = next;
05380     }
05381   }
05382 };
05383 
05384 struct xpath_allocator_capture {
05385   xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) {
05386   }
05387 
05388   ~xpath_allocator_capture() {
05389     _target->revert(_state);
05390   }
05391 
05392   xpath_allocator* _target;
05393   xpath_allocator _state;
05394 };
05395 
05396 struct xpath_stack {
05397   xpath_allocator* result;
05398   xpath_allocator* temp;
05399 };
05400 
05401 struct xpath_stack_data {
05402   xpath_memory_block blocks[2];
05403   xpath_allocator result;
05404   xpath_allocator temp;
05405   xpath_stack stack;
05406 
05407 #ifdef PUGIXML_NO_EXCEPTIONS
05408   jmp_buf error_handler;
05409 #endif
05410 
05411   xpath_stack_data(): result(blocks + 0), temp(blocks + 1) {
05412     blocks[0].next = blocks[1].next = 0;
05413 
05414     stack.result = &result;
05415     stack.temp = &temp;
05416 
05417 #ifdef PUGIXML_NO_EXCEPTIONS
05418     result.error_handler = temp.error_handler = &error_handler;
05419 #endif
05420   }
05421 
05422   ~xpath_stack_data() {
05423     result.release();
05424     temp.release();
05425   }
05426 };
05427 PUGI__NS_END
05428 
05429 // String class
05430 PUGI__NS_BEGIN
05431 class xpath_string
05432 {
05433   const char_t* _buffer;
05434   bool _uses_heap;
05435 
05436   static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) {
05437     char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
05438     assert(result);
05439 
05440     memcpy(result, string, length * sizeof(char_t));
05441     result[length] = 0;
05442 
05443     return result;
05444   }
05445 
05446   static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc) {
05447     return duplicate_string(string, strlength(string), alloc);
05448   }
05449 
05450 public:
05451   xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false) {
05452   }
05453 
05454   explicit xpath_string(const char_t* str, xpath_allocator* alloc) {
05455     bool empty_ = (*str == 0);
05456 
05457     _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
05458     _uses_heap = !empty_;
05459   }
05460 
05461   explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap) {
05462   }
05463 
05464   xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc) {
05465     assert(begin <= end);
05466 
05467     bool empty_ = (begin == end);
05468 
05469     _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
05470     _uses_heap = !empty_;
05471   }
05472 
05473   void append(const xpath_string& o, xpath_allocator* alloc) {
05474     // skip empty sources
05475     if (!*o._buffer) return;
05476 
05477     // fast append for constant empty target and constant source
05478     if (!*_buffer && !_uses_heap && !o._uses_heap) {
05479       _buffer = o._buffer;
05480     } else {
05481       // need to make heap copy
05482       size_t target_length = strlength(_buffer);
05483       size_t source_length = strlength(o._buffer);
05484       size_t result_length = target_length + source_length;
05485 
05486       // allocate new buffer
05487       char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
05488       assert(result);
05489 
05490       // append first string to the new buffer in case there was no reallocation
05491       if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
05492 
05493       // append second string to the new buffer
05494       memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
05495       result[result_length] = 0;
05496 
05497       // finalize
05498       _buffer = result;
05499       _uses_heap = true;
05500     }
05501   }
05502 
05503   const char_t* c_str() const {
05504     return _buffer;
05505   }
05506 
05507   size_t length() const {
05508     return strlength(_buffer);
05509   }
05510 
05511   char_t* data(xpath_allocator* alloc) {
05512     // make private heap copy
05513     if (!_uses_heap) {
05514       _buffer = duplicate_string(_buffer, alloc);
05515       _uses_heap = true;
05516     }
05517 
05518     return const_cast<char_t*>(_buffer);
05519   }
05520 
05521   bool empty() const {
05522     return *_buffer == 0;
05523   }
05524 
05525   bool operator==(const xpath_string& o) const {
05526     return strequal(_buffer, o._buffer);
05527   }
05528 
05529   bool operator!=(const xpath_string& o) const {
05530     return !strequal(_buffer, o._buffer);
05531   }
05532 
05533   bool uses_heap() const {
05534     return _uses_heap;
05535   }
05536 };
05537 
05538 PUGI__FN xpath_string xpath_string_const(const char_t* str)
05539 {
05540   return xpath_string(str, false);
05541 }
05542 PUGI__NS_END
05543 
05544 PUGI__NS_BEGIN
05545 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
05546 {
05547   while (*pattern && *string == *pattern) {
05548     string++;
05549     pattern++;
05550   }
05551 
05552   return *pattern == 0;
05553 }
05554 
05555 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
05556 {
05557 #ifdef PUGIXML_WCHAR_MODE
05558   return wcschr(s, c);
05559 #else
05560   return strchr(s, c);
05561 #endif
05562 }
05563 
05564 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
05565 {
05566 #ifdef PUGIXML_WCHAR_MODE
05567   // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
05568   return (*p == 0) ? s : wcsstr(s, p);
05569 #else
05570   return strstr(s, p);
05571 #endif
05572 }
05573 
05574 // Converts symbol to lower case, if it is an ASCII one
05575 PUGI__FN char_t tolower_ascii(char_t ch)
05576 {
05577   return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
05578 }
05579 
05580 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
05581 {
05582   if (na.attribute())
05583     return xpath_string_const(na.attribute().value());
05584   else {
05585     const xml_node& n = na.node();
05586 
05587     switch (n.type()) {
05588     case node_pcdata:
05589     case node_cdata:
05590     case node_comment:
05591     case node_pi:
05592       return xpath_string_const(n.value());
05593 
05594     case node_document:
05595     case node_element: {
05596       xpath_string result;
05597 
05598       xml_node cur = n.first_child();
05599 
05600       while (cur && cur != n) {
05601         if (cur.type() == node_pcdata || cur.type() == node_cdata)
05602           result.append(xpath_string_const(cur.value()), alloc);
05603 
05604         if (cur.first_child())
05605           cur = cur.first_child();
05606         else if (cur.next_sibling())
05607           cur = cur.next_sibling();
05608         else {
05609           while (!cur.next_sibling() && cur != n)
05610             cur = cur.parent();
05611 
05612           if (cur != n) cur = cur.next_sibling();
05613         }
05614       }
05615 
05616       return result;
05617     }
05618 
05619     default:
05620       return xpath_string();
05621     }
05622   }
05623 }
05624 
05625 PUGI__FN unsigned int node_height(xml_node n)
05626 {
05627   unsigned int result = 0;
05628 
05629   while (n) {
05630     ++result;
05631     n = n.parent();
05632   }
05633 
05634   return result;
05635 }
05636 
05637 PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
05638 {
05639   // normalize heights
05640   for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
05641   for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
05642 
05643   // one node is the ancestor of the other
05644   if (ln == rn) return lh < rh;
05645 
05646   // find common ancestor
05647   while (ln.parent() != rn.parent()) {
05648     ln = ln.parent();
05649     rn = rn.parent();
05650   }
05651 
05652   // there is no common ancestor (the shared parent is null), nodes are from different documents
05653   if (!ln.parent()) return ln < rn;
05654 
05655   // determine sibling order
05656   for (; ln; ln = ln.next_sibling())
05657     if (ln == rn)
05658       return true;
05659 
05660   return false;
05661 }
05662 
05663 PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
05664 {
05665   while (node && node != parent) node = node.parent();
05666 
05667   return parent && node == parent;
05668 }
05669 
05670 PUGI__FN const void* document_order(const xpath_node& xnode)
05671 {
05672   xml_node_struct* node = xnode.node().internal_object();
05673 
05674   if (node) {
05675     if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
05676     if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
05677     return 0;
05678   }
05679 
05680   xml_attribute_struct* attr = xnode.attribute().internal_object();
05681 
05682   if (attr) {
05683     if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
05684     if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
05685     return 0;
05686   }
05687 
05688   return 0;
05689 }
05690 
05691 struct document_order_comparator {
05692   bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
05693     // optimized document order based check
05694     const void* lo = document_order(lhs);
05695     const void* ro = document_order(rhs);
05696 
05697     if (lo && ro) return lo < ro;
05698 
05699     // slow comparison
05700     xml_node ln = lhs.node(), rn = rhs.node();
05701 
05702     // compare attributes
05703     if (lhs.attribute() && rhs.attribute()) {
05704       // shared parent
05705       if (lhs.parent() == rhs.parent()) {
05706         // determine sibling order
05707         for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
05708           if (a == rhs.attribute())
05709             return true;
05710 
05711         return false;
05712       }
05713 
05714       // compare attribute parents
05715       ln = lhs.parent();
05716       rn = rhs.parent();
05717     } else if (lhs.attribute()) {
05718       // attributes go after the parent element
05719       if (lhs.parent() == rhs.node()) return false;
05720 
05721       ln = lhs.parent();
05722     } else if (rhs.attribute()) {
05723       // attributes go after the parent element
05724       if (rhs.parent() == lhs.node()) return true;
05725 
05726       rn = rhs.parent();
05727     }
05728 
05729     if (ln == rn) return false;
05730 
05731     unsigned int lh = node_height(ln);
05732     unsigned int rh = node_height(rn);
05733 
05734     return node_is_before(ln, lh, rn, rh);
05735   }
05736 };
05737 
05738 struct duplicate_comparator {
05739   bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
05740     if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
05741     else return rhs.attribute() ? false : lhs.node() < rhs.node();
05742   }
05743 };
05744 
05745 PUGI__FN double gen_nan()
05746 {
05747 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
05748   union {
05749     float f;
05750     uint32_t i;
05751   } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
05752   u[0].i = 0x7fc00000;
05753   return u[0].f;
05754 #else
05755   // fallback
05756   const volatile double zero = 0.0;
05757   return zero / zero;
05758 #endif
05759 }
05760 
05761 PUGI__FN bool is_nan(double value)
05762 {
05763 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
05764   return !!_isnan(value);
05765 #elif defined(fpclassify) && defined(FP_NAN)
05766   return fpclassify(value) == FP_NAN;
05767 #else
05768   // fallback
05769   const volatile double v = value;
05770   return v != v;
05771 #endif
05772 }
05773 
05774 PUGI__FN const char_t* convert_number_to_string_special(double value)
05775 {
05776 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
05777   if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
05778   if (_isnan(value)) return PUGIXML_TEXT("NaN");
05779   return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
05780 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
05781   switch (fpclassify(value)) {
05782   case FP_NAN:
05783     return PUGIXML_TEXT("NaN");
05784 
05785   case FP_INFINITE:
05786     return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
05787 
05788   case FP_ZERO:
05789     return PUGIXML_TEXT("0");
05790 
05791   default:
05792     return 0;
05793   }
05794 #else
05795   // fallback
05796   const volatile double v = value;
05797 
05798   if (v == 0) return PUGIXML_TEXT("0");
05799   if (v != v) return PUGIXML_TEXT("NaN");
05800   if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
05801   return 0;
05802 #endif
05803 }
05804 
05805 PUGI__FN bool convert_number_to_boolean(double value)
05806 {
05807   return (value != 0 && !is_nan(value));
05808 }
05809 
05810 PUGI__FN void truncate_zeros(char* begin, char* end)
05811 {
05812   while (begin != end && end[-1] == '0') end--;
05813 
05814   *end = 0;
05815 }
05816 
05817 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
05818 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
05819 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
05820 {
05821   // get base values
05822   int sign, exponent;
05823   _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
05824 
05825   // truncate redundant zeros
05826   truncate_zeros(buffer, buffer + strlen(buffer));
05827 
05828   // fill results
05829   *out_mantissa = buffer;
05830   *out_exponent = exponent;
05831 }
05832 #else
05833 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
05834 {
05835   // get a scientific notation value with IEEE DBL_DIG decimals
05836   sprintf(buffer, "%.*e", DBL_DIG, value);
05837   assert(strlen(buffer) < buffer_size);
05838   (void)!buffer_size;
05839 
05840   // get the exponent (possibly negative)
05841   char* exponent_string = strchr(buffer, 'e');
05842   assert(exponent_string);
05843 
05844   int exponent = atoi(exponent_string + 1);
05845 
05846   // extract mantissa string: skip sign
05847   char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
05848   assert(mantissa[0] != '0' && mantissa[1] == '.');
05849 
05850   // divide mantissa by 10 to eliminate integer part
05851   mantissa[1] = mantissa[0];
05852   mantissa++;
05853   exponent++;
05854 
05855   // remove extra mantissa digits and zero-terminate mantissa
05856   truncate_zeros(mantissa, exponent_string);
05857 
05858   // fill results
05859   *out_mantissa = mantissa;
05860   *out_exponent = exponent;
05861 }
05862 #endif
05863 
05864 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
05865 {
05866   // try special number conversion
05867   const char_t* special = convert_number_to_string_special(value);
05868   if (special) return xpath_string_const(special);
05869 
05870   // get mantissa + exponent form
05871   char mantissa_buffer[64];
05872 
05873   char* mantissa;
05874   int exponent;
05875   convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
05876 
05877   // make the number!
05878   char_t result[512];
05879   char_t* s = result;
05880 
05881   // sign
05882   if (value < 0) *s++ = '-';
05883 
05884   // integer part
05885   if (exponent <= 0) {
05886     *s++ = '0';
05887   } else {
05888     while (exponent > 0) {
05889       assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
05890       *s++ = *mantissa ? *mantissa++ : '0';
05891       exponent--;
05892     }
05893   }
05894 
05895   // fractional part
05896   if (*mantissa) {
05897     // decimal point
05898     *s++ = '.';
05899 
05900     // extra zeroes from negative exponent
05901     while (exponent < 0) {
05902       *s++ = '0';
05903       exponent++;
05904     }
05905 
05906     // extra mantissa digits
05907     while (*mantissa) {
05908       assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
05909       *s++ = *mantissa++;
05910     }
05911   }
05912 
05913   // zero-terminate
05914   assert(s < result + sizeof(result) / sizeof(result[0]));
05915   *s = 0;
05916 
05917   return xpath_string(result, alloc);
05918 }
05919 
05920 PUGI__FN bool check_string_to_number_format(const char_t* string)
05921 {
05922   // parse leading whitespace
05923   while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
05924 
05925   // parse sign
05926   if (*string == '-') ++string;
05927 
05928   if (!*string) return false;
05929 
05930   // if there is no integer part, there should be a decimal part with at least one digit
05931   if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
05932 
05933   // parse integer part
05934   while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
05935 
05936   // parse decimal part
05937   if (*string == '.') {
05938     ++string;
05939 
05940     while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
05941   }
05942 
05943   // parse trailing whitespace
05944   while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
05945 
05946   return *string == 0;
05947 }
05948 
05949 PUGI__FN double convert_string_to_number(const char_t* string)
05950 {
05951   // check string format
05952   if (!check_string_to_number_format(string)) return gen_nan();
05953 
05954   // parse string
05955 #ifdef PUGIXML_WCHAR_MODE
05956   return wcstod(string, 0);
05957 #else
05958   return atof(string);
05959 #endif
05960 }
05961 
05962 PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
05963 {
05964   char_t buffer[32];
05965 
05966   size_t length = static_cast<size_t>(end - begin);
05967   char_t* scratch = buffer;
05968 
05969   if (length >= sizeof(buffer) / sizeof(buffer[0])) {
05970     // need to make dummy on-heap copy
05971     scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
05972     if (!scratch) return false;
05973   }
05974 
05975   // copy string to zero-terminated buffer and perform conversion
05976   memcpy(scratch, begin, length * sizeof(char_t));
05977   scratch[length] = 0;
05978 
05979   *out_result = convert_string_to_number(scratch);
05980 
05981   // free dummy buffer
05982   if (scratch != buffer) xml_memory::deallocate(scratch);
05983 
05984   return true;
05985 }
05986 
05987 PUGI__FN double round_nearest(double value)
05988 {
05989   return floor(value + 0.5);
05990 }
05991 
05992 PUGI__FN double round_nearest_nzero(double value)
05993 {
05994   // same as round_nearest, but returns -0 for [-0.5, -0]
05995   // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
05996   return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
05997 }
05998 
05999 PUGI__FN const char_t* qualified_name(const xpath_node& node)
06000 {
06001   return node.attribute() ? node.attribute().name() : node.node().name();
06002 }
06003 
06004 PUGI__FN const char_t* local_name(const xpath_node& node)
06005 {
06006   const char_t* name = qualified_name(node);
06007   const char_t* p = find_char(name, ':');
06008 
06009   return p ? p + 1 : name;
06010 }
06011 
06012 struct namespace_uri_predicate {
06013   const char_t* prefix;
06014   size_t prefix_length;
06015 
06016   namespace_uri_predicate(const char_t* name) {
06017     const char_t* pos = find_char(name, ':');
06018 
06019     prefix = pos ? name : 0;
06020     prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
06021   }
06022 
06023   bool operator()(const xml_attribute& a) const {
06024     const char_t* name = a.name();
06025 
06026     if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
06027 
06028     return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
06029   }
06030 };
06031 
06032 PUGI__FN const char_t* namespace_uri(const xml_node& node)
06033 {
06034   namespace_uri_predicate pred = node.name();
06035 
06036   xml_node p = node;
06037 
06038   while (p) {
06039     xml_attribute a = p.find_attribute(pred);
06040 
06041     if (a) return a.value();
06042 
06043     p = p.parent();
06044   }
06045 
06046   return PUGIXML_TEXT("");
06047 }
06048 
06049 PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
06050 {
06051   namespace_uri_predicate pred = attr.name();
06052 
06053   // Default namespace does not apply to attributes
06054   if (!pred.prefix) return PUGIXML_TEXT("");
06055 
06056   xml_node p = parent;
06057 
06058   while (p) {
06059     xml_attribute a = p.find_attribute(pred);
06060 
06061     if (a) return a.value();
06062 
06063     p = p.parent();
06064   }
06065 
06066   return PUGIXML_TEXT("");
06067 }
06068 
06069 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
06070 {
06071   return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
06072 }
06073 
06074 PUGI__FN void normalize_space(char_t* buffer)
06075 {
06076   char_t* write = buffer;
06077 
06078   for (char_t* it = buffer; *it; ) {
06079     char_t ch = *it++;
06080 
06081     if (PUGI__IS_CHARTYPE(ch, ct_space)) {
06082       // replace whitespace sequence with single space
06083       while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
06084 
06085       // avoid leading spaces
06086       if (write != buffer) *write++ = ' ';
06087     } else *write++ = ch;
06088   }
06089 
06090   // remove trailing space
06091   if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
06092 
06093   // zero-terminate
06094   *write = 0;
06095 }
06096 
06097 PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
06098 {
06099   size_t to_length = strlength(to);
06100 
06101   char_t* write = buffer;
06102 
06103   while (*buffer) {
06104     PUGI__DMC_VOLATILE char_t ch = *buffer++;
06105 
06106     const char_t* pos = find_char(from, ch);
06107 
06108     if (!pos)
06109       *write++ = ch; // do not process
06110     else if (static_cast<size_t>(pos - from) < to_length)
06111       *write++ = to[pos - from]; // replace
06112   }
06113 
06114   // zero-terminate
06115   *write = 0;
06116 }
06117 
06118 struct xpath_variable_boolean: xpath_variable {
06119   xpath_variable_boolean(): value(false) {
06120   }
06121 
06122   bool value;
06123   char_t name[1];
06124 };
06125 
06126 struct xpath_variable_number: xpath_variable {
06127   xpath_variable_number(): value(0) {
06128   }
06129 
06130   double value;
06131   char_t name[1];
06132 };
06133 
06134 struct xpath_variable_string: xpath_variable {
06135   xpath_variable_string(): value(0) {
06136   }
06137 
06138   ~xpath_variable_string() {
06139     if (value) xml_memory::deallocate(value);
06140   }
06141 
06142   char_t* value;
06143   char_t name[1];
06144 };
06145 
06146 struct xpath_variable_node_set: xpath_variable {
06147   xpath_node_set value;
06148   char_t name[1];
06149 };
06150 
06151 static const xpath_node_set dummy_node_set;
06152 
06153 PUGI__FN unsigned int hash_string(const char_t* str)
06154 {
06155   // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
06156   unsigned int result = 0;
06157 
06158   while (*str) {
06159     result += static_cast<unsigned int>(*str++);
06160     result += result << 10;
06161     result ^= result >> 6;
06162   }
06163 
06164   result += result << 3;
06165   result ^= result >> 11;
06166   result += result << 15;
06167 
06168   return result;
06169 }
06170 
06171 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
06172 {
06173   size_t length = strlength(name);
06174   if (length == 0) return 0; // empty variable names are invalid
06175 
06176   // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
06177   void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
06178   if (!memory) return 0;
06179 
06180   T* result = new (memory) T();
06181 
06182   memcpy(result->name, name, (length + 1) * sizeof(char_t));
06183 
06184   return result;
06185 }
06186 
06187 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
06188 {
06189   switch (type) {
06190   case xpath_type_node_set:
06191     return new_xpath_variable<xpath_variable_node_set>(name);
06192 
06193   case xpath_type_number:
06194     return new_xpath_variable<xpath_variable_number>(name);
06195 
06196   case xpath_type_string:
06197     return new_xpath_variable<xpath_variable_string>(name);
06198 
06199   case xpath_type_boolean:
06200     return new_xpath_variable<xpath_variable_boolean>(name);
06201 
06202   default:
06203     return 0;
06204   }
06205 }
06206 
06207 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
06208 {
06209   var->~T();
06210   xml_memory::deallocate(var);
06211 }
06212 
06213 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
06214 {
06215   switch (type) {
06216   case xpath_type_node_set:
06217     delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
06218     break;
06219 
06220   case xpath_type_number:
06221     delete_xpath_variable(static_cast<xpath_variable_number*>(var));
06222     break;
06223 
06224   case xpath_type_string:
06225     delete_xpath_variable(static_cast<xpath_variable_string*>(var));
06226     break;
06227 
06228   case xpath_type_boolean:
06229     delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
06230     break;
06231 
06232   default:
06233     assert(!"Invalid variable type");
06234   }
06235 }
06236 
06237 PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
06238 {
06239   char_t buffer[32];
06240 
06241   size_t length = static_cast<size_t>(end - begin);
06242   char_t* scratch = buffer;
06243 
06244   if (length >= sizeof(buffer) / sizeof(buffer[0])) {
06245     // need to make dummy on-heap copy
06246     scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
06247     if (!scratch) return 0;
06248   }
06249 
06250   // copy string to zero-terminated buffer and perform lookup
06251   memcpy(scratch, begin, length * sizeof(char_t));
06252   scratch[length] = 0;
06253 
06254   xpath_variable* result = set->get(scratch);
06255 
06256   // free dummy buffer
06257   if (scratch != buffer) xml_memory::deallocate(scratch);
06258 
06259   return result;
06260 }
06261 PUGI__NS_END
06262 
06263 // Internal node set class
06264 PUGI__NS_BEGIN
06265 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
06266 {
06267   xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
06268 
06269   if (type == xpath_node_set::type_unsorted) {
06270     sort(begin, end, document_order_comparator());
06271 
06272     type = xpath_node_set::type_sorted;
06273   }
06274 
06275   if (type != order) reverse(begin, end);
06276 
06277   return order;
06278 }
06279 
06280 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
06281 {
06282   if (begin == end) return xpath_node();
06283 
06284   switch (type) {
06285   case xpath_node_set::type_sorted:
06286     return *begin;
06287 
06288   case xpath_node_set::type_sorted_reverse:
06289     return *(end - 1);
06290 
06291   case xpath_node_set::type_unsorted:
06292     return *min_element(begin, end, document_order_comparator());
06293 
06294   default:
06295     assert(!"Invalid node set type");
06296     return xpath_node();
06297   }
06298 }
06299 
06300 class xpath_node_set_raw
06301 {
06302   xpath_node_set::type_t _type;
06303 
06304   xpath_node* _begin;
06305   xpath_node* _end;
06306   xpath_node* _eos;
06307 
06308 public:
06309   xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) {
06310   }
06311 
06312   xpath_node* begin() const {
06313     return _begin;
06314   }
06315 
06316   xpath_node* end() const {
06317     return _end;
06318   }
06319 
06320   bool empty() const {
06321     return _begin == _end;
06322   }
06323 
06324   size_t size() const {
06325     return static_cast<size_t>(_end - _begin);
06326   }
06327 
06328   xpath_node first() const {
06329     return xpath_first(_begin, _end, _type);
06330   }
06331 
06332   void push_back(const xpath_node& node, xpath_allocator* alloc) {
06333     if (_end == _eos) {
06334       size_t capacity = static_cast<size_t>(_eos - _begin);
06335 
06336       // get new capacity (1.5x rule)
06337       size_t new_capacity = capacity + capacity / 2 + 1;
06338 
06339       // reallocate the old array or allocate a new one
06340       xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
06341       assert(data);
06342 
06343       // finalize
06344       _begin = data;
06345       _end = data + capacity;
06346       _eos = data + new_capacity;
06347     }
06348 
06349     *_end++ = node;
06350   }
06351 
06352   void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) {
06353     size_t size_ = static_cast<size_t>(_end - _begin);
06354     size_t capacity = static_cast<size_t>(_eos - _begin);
06355     size_t count = static_cast<size_t>(end_ - begin_);
06356 
06357     if (size_ + count > capacity) {
06358       // reallocate the old array or allocate a new one
06359       xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
06360       assert(data);
06361 
06362       // finalize
06363       _begin = data;
06364       _end = data + size_;
06365       _eos = data + size_ + count;
06366     }
06367 
06368     memcpy(_end, begin_, count * sizeof(xpath_node));
06369     _end += count;
06370   }
06371 
06372   void sort_do() {
06373     _type = xpath_sort(_begin, _end, _type, false);
06374   }
06375 
06376   void truncate(xpath_node* pos) {
06377     assert(_begin <= pos && pos <= _end);
06378 
06379     _end = pos;
06380   }
06381 
06382   void remove_duplicates() {
06383     if (_type == xpath_node_set::type_unsorted)
06384       sort(_begin, _end, duplicate_comparator());
06385 
06386     _end = unique(_begin, _end);
06387   }
06388 
06389   xpath_node_set::type_t type() const {
06390     return _type;
06391   }
06392 
06393   void set_type(xpath_node_set::type_t value) {
06394     _type = value;
06395   }
06396 };
06397 PUGI__NS_END
06398 
06399 PUGI__NS_BEGIN
06400 struct xpath_context {
06401   xpath_node n;
06402   size_t position, size;
06403 
06404   xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) {
06405   }
06406 };
06407 
06408 enum lexeme_t {
06409   lex_none = 0,
06410   lex_equal,
06411   lex_not_equal,
06412   lex_less,
06413   lex_greater,
06414   lex_less_or_equal,
06415   lex_greater_or_equal,
06416   lex_plus,
06417   lex_minus,
06418   lex_multiply,
06419   lex_union,
06420   lex_var_ref,
06421   lex_open_brace,
06422   lex_close_brace,
06423   lex_quoted_string,
06424   lex_number,
06425   lex_slash,
06426   lex_double_slash,
06427   lex_open_square_brace,
06428   lex_close_square_brace,
06429   lex_string,
06430   lex_comma,
06431   lex_axis_attribute,
06432   lex_dot,
06433   lex_double_dot,
06434   lex_double_colon,
06435   lex_eof
06436 };
06437 
06438 struct xpath_lexer_string {
06439   const char_t* begin;
06440   const char_t* end;
06441 
06442   xpath_lexer_string(): begin(0), end(0) {
06443   }
06444 
06445   bool operator==(const char_t* other) const {
06446     size_t length = static_cast<size_t>(end - begin);
06447 
06448     return strequalrange(other, begin, length);
06449   }
06450 };
06451 
06452 class xpath_lexer
06453 {
06454   const char_t* _cur;
06455   const char_t* _cur_lexeme_pos;
06456   xpath_lexer_string _cur_lexeme_contents;
06457 
06458   lexeme_t _cur_lexeme;
06459 
06460 public:
06461   explicit xpath_lexer(const char_t* query): _cur(query) {
06462     next();
06463   }
06464 
06465   const char_t* state() const {
06466     return _cur;
06467   }
06468 
06469   void next() {
06470     const char_t* cur = _cur;
06471 
06472     while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
06473 
06474     // save lexeme position for error reporting
06475     _cur_lexeme_pos = cur;
06476 
06477     switch (*cur) {
06478     case 0:
06479       _cur_lexeme = lex_eof;
06480       break;
06481 
06482     case '>':
06483       if (*(cur+1) == '=') {
06484         cur += 2;
06485         _cur_lexeme = lex_greater_or_equal;
06486       } else {
06487         cur += 1;
06488         _cur_lexeme = lex_greater;
06489       }
06490       break;
06491 
06492     case '<':
06493       if (*(cur+1) == '=') {
06494         cur += 2;
06495         _cur_lexeme = lex_less_or_equal;
06496       } else {
06497         cur += 1;
06498         _cur_lexeme = lex_less;
06499       }
06500       break;
06501 
06502     case '!':
06503       if (*(cur+1) == '=') {
06504         cur += 2;
06505         _cur_lexeme = lex_not_equal;
06506       } else {
06507         _cur_lexeme = lex_none;
06508       }
06509       break;
06510 
06511     case '=':
06512       cur += 1;
06513       _cur_lexeme = lex_equal;
06514 
06515       break;
06516 
06517     case '+':
06518       cur += 1;
06519       _cur_lexeme = lex_plus;
06520 
06521       break;
06522 
06523     case '-':
06524       cur += 1;
06525       _cur_lexeme = lex_minus;
06526 
06527       break;
06528 
06529     case '*':
06530       cur += 1;
06531       _cur_lexeme = lex_multiply;
06532 
06533       break;
06534 
06535     case '|':
06536       cur += 1;
06537       _cur_lexeme = lex_union;
06538 
06539       break;
06540 
06541     case '$':
06542       cur += 1;
06543 
06544       if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
06545         _cur_lexeme_contents.begin = cur;
06546 
06547         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
06548 
06549         if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // qname
06550           cur++; // :
06551 
06552           while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
06553         }
06554 
06555         _cur_lexeme_contents.end = cur;
06556 
06557         _cur_lexeme = lex_var_ref;
06558       } else {
06559         _cur_lexeme = lex_none;
06560       }
06561 
06562       break;
06563 
06564     case '(':
06565       cur += 1;
06566       _cur_lexeme = lex_open_brace;
06567 
06568       break;
06569 
06570     case ')':
06571       cur += 1;
06572       _cur_lexeme = lex_close_brace;
06573 
06574       break;
06575 
06576     case '[':
06577       cur += 1;
06578       _cur_lexeme = lex_open_square_brace;
06579 
06580       break;
06581 
06582     case ']':
06583       cur += 1;
06584       _cur_lexeme = lex_close_square_brace;
06585 
06586       break;
06587 
06588     case ',':
06589       cur += 1;
06590       _cur_lexeme = lex_comma;
06591 
06592       break;
06593 
06594     case '/':
06595       if (*(cur+1) == '/') {
06596         cur += 2;
06597         _cur_lexeme = lex_double_slash;
06598       } else {
06599         cur += 1;
06600         _cur_lexeme = lex_slash;
06601       }
06602       break;
06603 
06604     case '.':
06605       if (*(cur+1) == '.') {
06606         cur += 2;
06607         _cur_lexeme = lex_double_dot;
06608       } else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) {
06609         _cur_lexeme_contents.begin = cur; // .
06610 
06611         ++cur;
06612 
06613         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
06614 
06615         _cur_lexeme_contents.end = cur;
06616 
06617         _cur_lexeme = lex_number;
06618       } else {
06619         cur += 1;
06620         _cur_lexeme = lex_dot;
06621       }
06622       break;
06623 
06624     case '@':
06625       cur += 1;
06626       _cur_lexeme = lex_axis_attribute;
06627 
06628       break;
06629 
06630     case '"':
06631     case '\'': {
06632       char_t terminator = *cur;
06633 
06634       ++cur;
06635 
06636       _cur_lexeme_contents.begin = cur;
06637       while (*cur && *cur != terminator) cur++;
06638       _cur_lexeme_contents.end = cur;
06639 
06640       if (!*cur)
06641         _cur_lexeme = lex_none;
06642       else {
06643         cur += 1;
06644         _cur_lexeme = lex_quoted_string;
06645       }
06646 
06647       break;
06648     }
06649 
06650     case ':':
06651       if (*(cur+1) == ':') {
06652         cur += 2;
06653         _cur_lexeme = lex_double_colon;
06654       } else {
06655         _cur_lexeme = lex_none;
06656       }
06657       break;
06658 
06659     default:
06660       if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) {
06661         _cur_lexeme_contents.begin = cur;
06662 
06663         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
06664 
06665         if (*cur == '.') {
06666           cur++;
06667 
06668           while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
06669         }
06670 
06671         _cur_lexeme_contents.end = cur;
06672 
06673         _cur_lexeme = lex_number;
06674       } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
06675         _cur_lexeme_contents.begin = cur;
06676 
06677         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
06678 
06679         if (cur[0] == ':') {
06680           if (cur[1] == '*') { // namespace test ncname:*
06681             cur += 2; // :*
06682           } else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // namespace test qname
06683             cur++; // :
06684 
06685             while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
06686           }
06687         }
06688 
06689         _cur_lexeme_contents.end = cur;
06690 
06691         _cur_lexeme = lex_string;
06692       } else {
06693         _cur_lexeme = lex_none;
06694       }
06695     }
06696 
06697     _cur = cur;
06698   }
06699 
06700   lexeme_t current() const {
06701     return _cur_lexeme;
06702   }
06703 
06704   const char_t* current_pos() const {
06705     return _cur_lexeme_pos;
06706   }
06707 
06708   const xpath_lexer_string& contents() const {
06709     assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
06710 
06711     return _cur_lexeme_contents;
06712   }
06713 };
06714 
06715 enum ast_type_t {
06716   ast_op_or,                                            // left or right
06717   ast_op_and,                                           // left and right
06718   ast_op_equal,                                 // left = right
06719   ast_op_not_equal,                             // left != right
06720   ast_op_less,                                  // left < right
06721   ast_op_greater,                                       // left > right
06722   ast_op_less_or_equal,                 // left <= right
06723   ast_op_greater_or_equal,              // left >= right
06724   ast_op_add,                                           // left + right
06725   ast_op_subtract,                              // left - right
06726   ast_op_multiply,                              // left * right
06727   ast_op_divide,                                        // left / right
06728   ast_op_mod,                                           // left % right
06729   ast_op_negate,                                        // left - right
06730   ast_op_union,                                 // left | right
06731   ast_predicate,                                        // apply predicate to set; next points to next predicate
06732   ast_filter,                                           // select * from left where right
06733   ast_filter_posinv,                            // select * from left where right; proximity position invariant
06734   ast_string_constant,                  // string constant
06735   ast_number_constant,                  // number constant
06736   ast_variable,                                 // variable
06737   ast_func_last,                                        // last()
06738   ast_func_position,                            // position()
06739   ast_func_count,                                       // count(left)
06740   ast_func_id,                                  // id(left)
06741   ast_func_local_name_0,                        // local-name()
06742   ast_func_local_name_1,                        // local-name(left)
06743   ast_func_namespace_uri_0,             // namespace-uri()
06744   ast_func_namespace_uri_1,             // namespace-uri(left)
06745   ast_func_name_0,                              // name()
06746   ast_func_name_1,                              // name(left)
06747   ast_func_string_0,                            // string()
06748   ast_func_string_1,                            // string(left)
06749   ast_func_concat,                              // concat(left, right, siblings)
06750   ast_func_starts_with,                 // starts_with(left, right)
06751   ast_func_contains,                            // contains(left, right)
06752   ast_func_substring_before,            // substring-before(left, right)
06753   ast_func_substring_after,             // substring-after(left, right)
06754   ast_func_substring_2,                 // substring(left, right)
06755   ast_func_substring_3,                 // substring(left, right, third)
06756   ast_func_string_length_0,             // string-length()
06757   ast_func_string_length_1,             // string-length(left)
06758   ast_func_normalize_space_0,           // normalize-space()
06759   ast_func_normalize_space_1,           // normalize-space(left)
06760   ast_func_translate,                           // translate(left, right, third)
06761   ast_func_boolean,                             // boolean(left)
06762   ast_func_not,                                 // not(left)
06763   ast_func_true,                                        // true()
06764   ast_func_false,                                       // false()
06765   ast_func_lang,                                        // lang(left)
06766   ast_func_number_0,                            // number()
06767   ast_func_number_1,                            // number(left)
06768   ast_func_sum,                                 // sum(left)
06769   ast_func_floor,                                       // floor(left)
06770   ast_func_ceiling,                             // ceiling(left)
06771   ast_func_round,                                       // round(left)
06772   ast_step,                                             // process set left with step
06773   ast_step_root                                 // select root node
06774 };
06775 
06776 enum axis_t {
06777   axis_ancestor,
06778   axis_ancestor_or_self,
06779   axis_attribute,
06780   axis_child,
06781   axis_descendant,
06782   axis_descendant_or_self,
06783   axis_following,
06784   axis_following_sibling,
06785   axis_namespace,
06786   axis_parent,
06787   axis_preceding,
06788   axis_preceding_sibling,
06789   axis_self
06790 };
06791 
06792 enum nodetest_t {
06793   nodetest_none,
06794   nodetest_name,
06795   nodetest_type_node,
06796   nodetest_type_comment,
06797   nodetest_type_pi,
06798   nodetest_type_text,
06799   nodetest_pi,
06800   nodetest_all,
06801   nodetest_all_in_namespace
06802 };
06803 
06804 template <axis_t N> struct axis_to_type {
06805   static const axis_t axis;
06806 };
06807 
06808 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
06809 
06810 class xpath_ast_node
06811 {
06812 private:
06813   // node type
06814   char _type;
06815   char _rettype;
06816 
06817   // for ast_step / ast_predicate
06818   char _axis;
06819   char _test;
06820 
06821   // tree node structure
06822   xpath_ast_node* _left;
06823   xpath_ast_node* _right;
06824   xpath_ast_node* _next;
06825 
06826   union {
06827     // value for ast_string_constant
06828     const char_t* string;
06829     // value for ast_number_constant
06830     double number;
06831     // variable for ast_variable
06832     xpath_variable* variable;
06833     // node test for ast_step (node name/namespace/node type/pi target)
06834     const char_t* nodetest;
06835   } _data;
06836 
06837   xpath_ast_node(const xpath_ast_node&);
06838   xpath_ast_node& operator=(const xpath_ast_node&);
06839 
06840   template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
06841     xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
06842 
06843     if (lt != xpath_type_node_set && rt != xpath_type_node_set) {
06844       if (lt == xpath_type_boolean || rt == xpath_type_boolean)
06845         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
06846       else if (lt == xpath_type_number || rt == xpath_type_number)
06847         return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
06848       else if (lt == xpath_type_string || rt == xpath_type_string) {
06849         xpath_allocator_capture cr(stack.result);
06850 
06851         xpath_string ls = lhs->eval_string(c, stack);
06852         xpath_string rs = rhs->eval_string(c, stack);
06853 
06854         return comp(ls, rs);
06855       }
06856     } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
06857       xpath_allocator_capture cr(stack.result);
06858 
06859       xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
06860       xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
06861 
06862       for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
06863         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
06864           xpath_allocator_capture cri(stack.result);
06865 
06866           if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
06867             return true;
06868         }
06869 
06870       return false;
06871     } else {
06872       if (lt == xpath_type_node_set) {
06873         swap(lhs, rhs);
06874         swap(lt, rt);
06875       }
06876 
06877       if (lt == xpath_type_boolean)
06878         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
06879       else if (lt == xpath_type_number) {
06880         xpath_allocator_capture cr(stack.result);
06881 
06882         double l = lhs->eval_number(c, stack);
06883         xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
06884 
06885         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
06886           xpath_allocator_capture cri(stack.result);
06887 
06888           if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
06889             return true;
06890         }
06891 
06892         return false;
06893       } else if (lt == xpath_type_string) {
06894         xpath_allocator_capture cr(stack.result);
06895 
06896         xpath_string l = lhs->eval_string(c, stack);
06897         xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
06898 
06899         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
06900           xpath_allocator_capture cri(stack.result);
06901 
06902           if (comp(l, string_value(*ri, stack.result)))
06903             return true;
06904         }
06905 
06906         return false;
06907       }
06908     }
06909 
06910     assert(!"Wrong types");
06911     return false;
06912   }
06913 
06914   template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
06915     xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
06916 
06917     if (lt != xpath_type_node_set && rt != xpath_type_node_set)
06918       return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
06919     else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
06920       xpath_allocator_capture cr(stack.result);
06921 
06922       xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
06923       xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
06924 
06925       for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
06926         xpath_allocator_capture cri(stack.result);
06927 
06928         double l = convert_string_to_number(string_value(*li, stack.result).c_str());
06929 
06930         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
06931           xpath_allocator_capture crii(stack.result);
06932 
06933           if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
06934             return true;
06935         }
06936       }
06937 
06938       return false;
06939     } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) {
06940       xpath_allocator_capture cr(stack.result);
06941 
06942       double l = lhs->eval_number(c, stack);
06943       xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
06944 
06945       for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
06946         xpath_allocator_capture cri(stack.result);
06947 
06948         if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
06949           return true;
06950       }
06951 
06952       return false;
06953     } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) {
06954       xpath_allocator_capture cr(stack.result);
06955 
06956       xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
06957       double r = rhs->eval_number(c, stack);
06958 
06959       for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
06960         xpath_allocator_capture cri(stack.result);
06961 
06962         if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
06963           return true;
06964       }
06965 
06966       return false;
06967     } else {
06968       assert(!"Wrong types");
06969       return false;
06970     }
06971   }
06972 
06973   void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) {
06974     assert(ns.size() >= first);
06975 
06976     size_t i = 1;
06977     size_t size = ns.size() - first;
06978 
06979     xpath_node* last = ns.begin() + first;
06980 
06981     // remove_if... or well, sort of
06982     for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
06983       xpath_context c(*it, i, size);
06984 
06985       if (expr->rettype() == xpath_type_number) {
06986         if (expr->eval_number(c, stack) == i)
06987           *last++ = *it;
06988       } else if (expr->eval_boolean(c, stack))
06989         *last++ = *it;
06990     }
06991 
06992     ns.truncate(last);
06993   }
06994 
06995   void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack) {
06996     if (ns.size() == first) return;
06997 
06998     for (xpath_ast_node* pred = _right; pred; pred = pred->_next) {
06999       apply_predicate(ns, first, pred->_left, stack);
07000     }
07001   }
07002 
07003   void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc) {
07004     if (!a) return;
07005 
07006     const char_t* name = a.name();
07007 
07008     // There are no attribute nodes corresponding to attributes that declare namespaces
07009     // That is, "xmlns:..." or "xmlns"
07010     if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
07011 
07012     switch (_test) {
07013     case nodetest_name:
07014       if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
07015       break;
07016 
07017     case nodetest_type_node:
07018     case nodetest_all:
07019       ns.push_back(xpath_node(a, parent), alloc);
07020       break;
07021 
07022     case nodetest_all_in_namespace:
07023       if (starts_with(name, _data.nodetest))
07024         ns.push_back(xpath_node(a, parent), alloc);
07025       break;
07026 
07027     default:
07028       ;
07029     }
07030   }
07031 
07032   void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc) {
07033     if (!n) return;
07034 
07035     switch (_test) {
07036     case nodetest_name:
07037       if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
07038       break;
07039 
07040     case nodetest_type_node:
07041       ns.push_back(n, alloc);
07042       break;
07043 
07044     case nodetest_type_comment:
07045       if (n.type() == node_comment)
07046         ns.push_back(n, alloc);
07047       break;
07048 
07049     case nodetest_type_text:
07050       if (n.type() == node_pcdata || n.type() == node_cdata)
07051         ns.push_back(n, alloc);
07052       break;
07053 
07054     case nodetest_type_pi:
07055       if (n.type() == node_pi)
07056         ns.push_back(n, alloc);
07057       break;
07058 
07059     case nodetest_pi:
07060       if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
07061         ns.push_back(n, alloc);
07062       break;
07063 
07064     case nodetest_all:
07065       if (n.type() == node_element)
07066         ns.push_back(n, alloc);
07067       break;
07068 
07069     case nodetest_all_in_namespace:
07070       if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
07071         ns.push_back(n, alloc);
07072       break;
07073 
07074     default:
07075       assert(!"Unknown axis");
07076     }
07077   }
07078 
07079   template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T) {
07080     const axis_t axis = T::axis;
07081 
07082     switch (axis) {
07083     case axis_attribute: {
07084       for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
07085         step_push(ns, a, n, alloc);
07086 
07087       break;
07088     }
07089 
07090     case axis_child: {
07091       for (xml_node c = n.first_child(); c; c = c.next_sibling())
07092         step_push(ns, c, alloc);
07093 
07094       break;
07095     }
07096 
07097     case axis_descendant:
07098     case axis_descendant_or_self: {
07099       if (axis == axis_descendant_or_self)
07100         step_push(ns, n, alloc);
07101 
07102       xml_node cur = n.first_child();
07103 
07104       while (cur && cur != n) {
07105         step_push(ns, cur, alloc);
07106 
07107         if (cur.first_child())
07108           cur = cur.first_child();
07109         else if (cur.next_sibling())
07110           cur = cur.next_sibling();
07111         else {
07112           while (!cur.next_sibling() && cur != n)
07113             cur = cur.parent();
07114 
07115           if (cur != n) cur = cur.next_sibling();
07116         }
07117       }
07118 
07119       break;
07120     }
07121 
07122     case axis_following_sibling: {
07123       for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
07124         step_push(ns, c, alloc);
07125 
07126       break;
07127     }
07128 
07129     case axis_preceding_sibling: {
07130       for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
07131         step_push(ns, c, alloc);
07132 
07133       break;
07134     }
07135 
07136     case axis_following: {
07137       xml_node cur = n;
07138 
07139       // exit from this node so that we don't include descendants
07140       while (cur && !cur.next_sibling()) cur = cur.parent();
07141       cur = cur.next_sibling();
07142 
07143       for (;;) {
07144         step_push(ns, cur, alloc);
07145 
07146         if (cur.first_child())
07147           cur = cur.first_child();
07148         else if (cur.next_sibling())
07149           cur = cur.next_sibling();
07150         else {
07151           while (cur && !cur.next_sibling()) cur = cur.parent();
07152           cur = cur.next_sibling();
07153 
07154           if (!cur) break;
07155         }
07156       }
07157 
07158       break;
07159     }
07160 
07161     case axis_preceding: {
07162       xml_node cur = n;
07163 
07164       while (cur && !cur.previous_sibling()) cur = cur.parent();
07165       cur = cur.previous_sibling();
07166 
07167       for (;;) {
07168         if (cur.last_child())
07169           cur = cur.last_child();
07170         else {
07171           // leaf node, can't be ancestor
07172           step_push(ns, cur, alloc);
07173 
07174           if (cur.previous_sibling())
07175             cur = cur.previous_sibling();
07176           else {
07177             do {
07178               cur = cur.parent();
07179               if (!cur) break;
07180 
07181               if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
07182             } while (!cur.previous_sibling());
07183 
07184             cur = cur.previous_sibling();
07185 
07186             if (!cur) break;
07187           }
07188         }
07189       }
07190 
07191       break;
07192     }
07193 
07194     case axis_ancestor:
07195     case axis_ancestor_or_self: {
07196       if (axis == axis_ancestor_or_self)
07197         step_push(ns, n, alloc);
07198 
07199       xml_node cur = n.parent();
07200 
07201       while (cur) {
07202         step_push(ns, cur, alloc);
07203 
07204         cur = cur.parent();
07205       }
07206 
07207       break;
07208     }
07209 
07210     case axis_self: {
07211       step_push(ns, n, alloc);
07212 
07213       break;
07214     }
07215 
07216     case axis_parent: {
07217       if (n.parent()) step_push(ns, n.parent(), alloc);
07218 
07219       break;
07220     }
07221 
07222     default:
07223       assert(!"Unimplemented axis");
07224     }
07225   }
07226 
07227   template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v) {
07228     const axis_t axis = T::axis;
07229 
07230     switch (axis) {
07231     case axis_ancestor:
07232     case axis_ancestor_or_self: {
07233       if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
07234         step_push(ns, a, p, alloc);
07235 
07236       xml_node cur = p;
07237 
07238       while (cur) {
07239         step_push(ns, cur, alloc);
07240 
07241         cur = cur.parent();
07242       }
07243 
07244       break;
07245     }
07246 
07247     case axis_descendant_or_self:
07248     case axis_self: {
07249       if (_test == nodetest_type_node) // reject attributes based on principal node type test
07250         step_push(ns, a, p, alloc);
07251 
07252       break;
07253     }
07254 
07255     case axis_following: {
07256       xml_node cur = p;
07257 
07258       for (;;) {
07259         if (cur.first_child())
07260           cur = cur.first_child();
07261         else if (cur.next_sibling())
07262           cur = cur.next_sibling();
07263         else {
07264           while (cur && !cur.next_sibling()) cur = cur.parent();
07265           cur = cur.next_sibling();
07266 
07267           if (!cur) break;
07268         }
07269 
07270         step_push(ns, cur, alloc);
07271       }
07272 
07273       break;
07274     }
07275 
07276     case axis_parent: {
07277       st