// Markup.cpp: implementation of the CMarkup class.
/**
* @copyright
CMarkup Release 6.5 Lite
Copyright (C) 1999-2003 First Objective Software, Inc. All rights reserved
This entire notice must be retained in this source code
Redistributing this source code requires written permission
This software is provided "as is", with no warranty.
Latest fixes enhancements and documentation at www.firstobject.com
Retrieved from codeproject.com by GJP and PSM on 08/02/05
Posted license:
CMarkup Lite is free for compiling into your commercial, personal and
educational applications. Modify it as much as you like, but retain the
copyright notice in the source code remarks. Redistribution of the
modified or unmodified CMarkup Lite class source code is limited to your
own development team and it cannot be made publicly available or
distributable as part of any source code library or product, even if that
offering is free.
* @class CMarkup
*
* @brief This class provides methods to create and extract data and attributes from a XML tree.
* @author
* Modifications as noted by Lance Deaver
*
* $Id$
*/
#include "CMarkup.h"
// Added by Lance Deaver
#include "GATS_CMarkup_Exception.h"
//#include
#include
#include
// end of adds
using namespace std;
void CMarkup::operator=( const CMarkup& markup )
{
m_iPosParent = markup.m_iPosParent;
m_iPos = markup.m_iPos;
m_iPosChild = markup.m_iPosChild;
m_iPosFree = markup.m_iPosFree;
m_nNodeType = markup.m_nNodeType;
m_aPos.clear();
//m_aPos.Append( markup.m_aPos );
m_aPos = markup.m_aPos;
m_csDoc = markup.m_csDoc;
}
bool CMarkup::SetDoc( LPCTSTR szDoc )
{
// Reset indexes
m_iPosFree = 1;
ResetPos();
// Set document text
if ( szDoc )
m_csDoc = szDoc;
else
m_csDoc = "";
// Starting size of position array: 1 element per 64 bytes of document
// Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
// Start at 8 when creating new document
int nStartSize = m_csDoc.length() / 64 + 8;
if ( m_aPos.size() < nStartSize )
m_aPos.resize( nStartSize );
// Parse document
bool bWellFormed = false;
if ( m_csDoc.length() )
{
m_aPos[0].Clear();
int iPos = x_ParseElem( 0 );
if ( iPos > 0 )
{
m_aPos[0].iElemChild = iPos;
bWellFormed = true;
}
}
// Clear indexes if parse failed or empty document
if ( ! bWellFormed )
{
m_aPos[0].Clear();
m_iPosFree = 1;
}
ResetPos();
return bWellFormed;
};
bool CMarkup::IsWellFormed()
{
if ( m_aPos.size() && m_aPos[0].iElemChild )
return true;
return false;
}
bool CMarkup::FindElem( LPCTSTR szName )
{
// Change current position only if found
//
if ( m_aPos.size() )
{
int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
if ( iPos )
{
// Assign new position
x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
return true;
}
}
return false;
}
bool CMarkup::FindChildElem( LPCTSTR szName )
{
// Change current child position only if found
//
// Shorthand: call this with no current main position
// means find child under root element
if ( ! m_iPos )
FindElem();
int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
if ( iPosChild )
{
// Assign new position
int iPos = m_aPos[iPosChild].iElemParent;
x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
return true;
}
return false;
}
string CMarkup::GetTagName() const
{
// Return the tag name at the current main position
string csTagName;
if ( m_iPos )
csTagName = x_GetTagName( m_iPos );
return csTagName;
}
bool CMarkup::IntoElem()
{
// If there is no child position and IntoElem is called it will succeed in release 6.3
// (A subsequent call to FindElem will find the first element)
// The following short-hand behavior was never part of EDOM and was misleading
// It would find a child element if there was no current child element position and go into it
// It is removed in release 6.3, this change is NOT backwards compatible!
// if ( ! m_iPosChild )
// FindChildElem();
if ( m_iPos && m_nNodeType == MarkupNodeType::MNT_ELEMENT )
{
x_SetPos( m_iPos, m_iPosChild, 0 );
return true;
}
return false;
}
bool CMarkup::OutOfElem()
{
// Go to parent element
if ( m_iPosParent )
{
x_SetPos( m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos );
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////
// Private Methods
//////////////////////////////////////////////////////////////////////
int CMarkup::x_GetFreePos()
{
//
// This returns the index of the next unused ElemPos in the array
//
if ( m_iPosFree == m_aPos.size() )
m_aPos.resize( m_iPosFree + m_iPosFree / 2 );
++m_iPosFree;
return m_iPosFree - 1;
}
int CMarkup::x_ReleasePos()
{
//
// This decrements the index of the next unused ElemPos in the array
// allowing the element index returned by GetFreePos() to be reused
//
--m_iPosFree;
return 0;
}
int CMarkup::x_ParseError( LPCTSTR szError, LPCTSTR szName )
{
if ( szName ) {
char errorMessage[512];
sprintf(errorMessage, szError, szName);
m_csError = errorMessage;
//m_csError.Format( szError, szName );
//formatting not supported in the STL (that I know of)
}
else {
m_csError = szError;
}
x_ReleasePos();
return -1;
}
int CMarkup::x_ParseElem( int iPosParent )
{
// This is either called by SetDoc, x_AddSubDoc, or itself recursively
// m_aPos[iPosParent].nEndL is where to start parsing for the child element
// This returns the new position if a tag is found, otherwise zero
// In all cases we need to get a new ElemPos, but release it if unused
//
int iPos = x_GetFreePos();
m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
m_aPos[iPos].iElemParent = iPosParent;
m_aPos[iPos].iElemChild = 0;
m_aPos[iPos].iElemNext = 0;
// Start Tag
// A loop is used to ignore all remarks tags and special tags
// i.e. , and
// So any tag beginning with ? or ! is ignored
// Loop past ignored tags
TokenPos token( m_csDoc.c_str() );
token.nNext = m_aPos[iPosParent].nEndL;
string csName;
while ( csName.empty() )
{
// Look for left angle bracket of start tag
m_aPos[iPos].nStartL = token.nNext;
if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nStartL, _T('<') ) )
return x_ParseError( _T("Element tag not found") );
// Set parent's End tag to start looking from here (or later)
m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
// Determine whether this is an element, or bypass other type of node
token.nNext = m_aPos[iPos].nStartL + 1;
if ( x_FindToken( token ) )
{
if ( token.bIsString )
return x_ParseError( _T("Tag starts with quote") );
_TCHAR cFirstChar = m_csDoc[token.nL];
if ( cFirstChar == _T('?') || cFirstChar == _T('!') )
{
token.nNext = m_aPos[iPos].nStartL;
if ( ! x_ParseNode(token) )
return x_ParseError( _T("Invalid node") );
}
else if ( cFirstChar != _T('/') )
{
csName = x_GetToken( token );
// Look for end of tag
if ( ! x_FindChar(token.szDoc, token.nNext, _T('>')) )
return x_ParseError( _T("End of tag not found") );
}
else
return x_ReleasePos(); // probably end tag of parent
}
else
return x_ParseError( _T("Abrupt end within tag") );
}
m_aPos[iPos].nStartR = token.nNext;
// Is ending mark within start tag, i.e. empty element?
if ( m_csDoc[m_aPos[iPos].nStartR-1] == _T('/') )
{
// Empty element
// Close tag left is set to ending mark, and right to open tag right
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
}
else // look for end tag
{
// Element probably has contents
// Determine where to start looking for left angle bracket of end tag
// This is done by recursively parsing the contents of this element
int iInner, iInnerPrev = 0;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
while ( (iInner = x_ParseElem( iPos )) > 0 )
{
// Set links to iInner
if ( iInnerPrev )
m_aPos[iInnerPrev].iElemNext = iInner;
else
m_aPos[iPos].iElemChild = iInner;
iInnerPrev = iInner;
// Set offset to reflect child
m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
}
if ( iInner == -1 )
return -1;
// Look for left angle bracket of end tag
if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nEndL, _T('<') ) )
return x_ParseError( _T("End tag of %s element not found"), csName.c_str() );
// Look through tokens of end tag
token.nNext = m_aPos[iPos].nEndL + 1;
int nTokenCount = 0;
while ( x_FindToken( token ) )
{
++nTokenCount;
if ( ! token.bIsString )
{
// Is first token not an end slash mark?
if ( nTokenCount == 1 && m_csDoc[token.nL] != _T('/') )
return x_ParseError( _T("Expecting end tag of element %s"), csName.c_str() );
else if ( nTokenCount == 2 && ! token.Match(csName.c_str()) )
return x_ParseError( _T("End tag does not correspond to %s"), csName.c_str() );
// Else is it a right angle bracket?
else if ( m_csDoc[token.nL] == _T('>') )
break;
}
}
// Was a right angle bracket not found?
if ( ! token.szDoc[token.nL] || nTokenCount < 2 )
return x_ParseError( _T("End tag not completed for element %s"), csName.c_str() );
m_aPos[iPos].nEndR = token.nL;
}
// Successfully parsed element (and contained elements)
return iPos;
}
bool CMarkup::x_FindChar( LPCTSTR szDoc, int& nChar, _TCHAR c )
{
// static function
LPCTSTR pChar = &szDoc[nChar];
while ( *pChar && *pChar != c )
pChar += _tclen(pChar);
nChar = pChar - szDoc;
if ( ! *pChar )
return false;
/*
while ( szDoc[nChar] && szDoc[nChar] != c )
nChar += _tclen( &szDoc[nChar] );
if ( ! szDoc[nChar] )
return false;
*/
return true;
}
bool CMarkup::x_FindAny( LPCTSTR szDoc, int& nChar )
{
// Starting at nChar, find a non-whitespace char
// return false if no non-whitespace before end of document, nChar points to end
// otherwise return true and nChar points to non-whitespace char
while ( szDoc[nChar] && _tcschr(_T(" \t\n\r"),szDoc[nChar]) )
++nChar;
return szDoc[nChar] != '\0';
}
bool CMarkup::x_FindToken( CMarkup::TokenPos& token )
{
// Starting at token.nNext, bypass whitespace and find the next token
// returns true on success, members of token point to token
// returns false on end of document, members point to end of document
LPCTSTR szDoc = token.szDoc;
int nChar = token.nNext;
token.bIsString = false;
// By-pass leading whitespace
if ( ! x_FindAny(szDoc,nChar) )
{
// No token was found before end of document
token.nL = nChar;
token.nR = nChar;
token.nNext = nChar;
return false;
}
// Is it an opening quote?
_TCHAR cFirstChar = szDoc[nChar];
if ( cFirstChar == _T('\"') || cFirstChar == _T('\'') )
{
token.bIsString = true;
// Move past opening quote
++nChar;
token.nL = nChar;
// Look for closing quote
x_FindChar( token.szDoc, nChar, cFirstChar );
// Set right to before closing quote
token.nR = nChar - 1;
// Set nChar past closing quote unless at end of document
if ( szDoc[nChar] )
++nChar;
}
else
{
// Go until special char or whitespace
token.nL = nChar;
while ( szDoc[nChar] && ! _tcschr (_T(" \t\n\r<>=\\/?!"),szDoc[nChar]) )
nChar += _tclen(&szDoc[nChar]);
// Adjust end position if it is one special char
if ( nChar == token.nL )
++nChar; // it is a special char
token.nR = nChar - 1;
}
// nNext points to one past last char of token
token.nNext = nChar;
return true;
}
string CMarkup::x_GetToken( const CMarkup::TokenPos& token ) const
{
// The token contains indexes into the document identifying a small substring
// Build the substring from those indexes and return it
if ( token.nL > token.nR )
return _T("");
return m_csDoc.substr(token.nL,
token.nR - token.nL + ((token.nR comment
// dtd
// processing instruction
// cdata section
// element
//
if ( ! szDoc[token.nL+1] || ! szDoc[token.nL+2] )
return 0;
_TCHAR cFirstChar = szDoc[token.nL+1];
LPCTSTR szEndOfNode = NULL;
if ( cFirstChar == _T('?') )
{
nTypeFound = MarkupNodeType::MNT_PROCESSING_INSTRUCTION;
szEndOfNode = _T("?>");
}
else if ( cFirstChar == _T('!') )
{
_TCHAR cSecondChar = szDoc[token.nL+2];
if ( cSecondChar == _T('[') )
{
nTypeFound = MarkupNodeType::MNT_CDATA_SECTION;
szEndOfNode = _T("]]>");
}
else if ( cSecondChar == _T('-') )
{
nTypeFound = MarkupNodeType::MNT_COMMENT;
szEndOfNode = _T("-->");
}
else
{
// Document type requires tokenizing because of strings and brackets
nTypeFound = 0;
int nBrackets = 0;
while ( x_FindToken(token) )
{
if ( ! token.bIsString )
{
_TCHAR cChar = szDoc[token.nL];
if ( cChar == _T('[') )
++nBrackets;
else if ( cChar == _T(']') )
--nBrackets;
else if ( nBrackets == 0 && cChar == _T('>') )
{
nTypeFound = MarkupNodeType::MNT_DOCUMENT_TYPE;
break;
}
}
}
if ( ! nTypeFound )
return 0;
}
}
else if ( cFirstChar == _T('/') )
{
// End tag means no node found within parent element
return 0;
}
else
{
nTypeFound = MarkupNodeType::MNT_ELEMENT;
}
// Search for end of node if not found yet
if ( szEndOfNode )
{
LPCTSTR pEnd = _tcsstr( &szDoc[token.nNext], szEndOfNode );
if ( ! pEnd )
return 0; // not well-formed
token.nNext = (pEnd - szDoc) + _tcslen(szEndOfNode);
}
}
else if ( szDoc[token.nL] )
{
// It is text or whitespace because it did not start with <
nTypeFound = MarkupNodeType::MNT_WHITESPACE;
token.nNext = token.nL;
if ( x_FindAny(szDoc,token.nNext) )
{
if ( szDoc[token.nNext] != _T('<') )
{
nTypeFound = MarkupNodeType::MNT_TEXT;
x_FindChar( szDoc, token.nNext, _T('<') );
}
}
}
return nTypeFound;
}
string CMarkup::x_GetTagName( int iPos ) const
{
// Return the tag name at specified element
TokenPos token( m_csDoc.c_str() );
token.nNext = m_aPos[iPos].nStartL + 1;
if ( ! iPos || ! x_FindToken( token ) )
return _T("");
// Return substring of document
return x_GetToken( token );
}
bool CMarkup::x_FindAttrib( CMarkup::TokenPos& token, LPCTSTR szAttrib ) const
{
// If szAttrib is NULL find next attrib, otherwise find named attrib
// Return true if found
int nAttrib = 0;
for ( int nCount = 0; x_FindToken(token); ++nCount )
{
if ( ! token.bIsString )
{
// Is it the right angle bracket?
_TCHAR cChar = m_csDoc[token.nL];
if ( cChar == _T('>') || cChar == _T('/') || cChar == _T('?') )
break; // attrib not found
// Equal sign
if ( cChar == _T('=') )
continue;
// Potential attribute
if ( ! nAttrib && nCount )
{
// Attribute name search?
if ( ! szAttrib || ! szAttrib[0] )
return true; // return with token at attrib name
// Compare szAttrib
if ( token.Match(szAttrib) )
nAttrib = nCount;
}
}
else if ( nAttrib && nCount == nAttrib + 2 )
{
return true;
}
}
// Not found
return false;
}
string CMarkup::x_GetAttrib( int iPos, LPCTSTR szAttrib ) const
{
// Return the value of the attrib
TokenPos token( m_csDoc.c_str() );
if ( iPos && m_nNodeType == MarkupNodeType::MNT_ELEMENT )
token.nNext = m_aPos[iPos].nStartL + 1;
else
return _T("");
if ( szAttrib && x_FindAttrib( token, szAttrib ) )
return x_TextFromDoc( token.nL, token.nR - ((token.nR"), nChar );
int nEndCDATA = m_csDoc.find( _T("]]>"), nChar );
if ( nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL )
{
//return m_csDoc.Mid( nChar, nEndCDATA - nChar );
return m_csDoc.substr( nChar, nEndCDATA - nChar );
}
}
return x_TextFromDoc( m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1 );
}
return _T("");
}
string CMarkup::x_TextToDoc( LPCTSTR szText, bool bAttrib ) const
{
// Convert text as seen outside XML document to XML friendly
// replacing special characters with ampersand escape codes
// E.g. convert "6>7" to "6>7"
//
// < less than
// & ampersand
// > greater than
//
// and for attributes:
//
// ' apostrophe or single quote
// " double quote
//
static _TCHAR* szaReplace[] = { _T("<"),_T("&"),_T(">"),_T("'"),_T(""") };
const _TCHAR* pFind = bAttrib?_T("<&>\'\""):_T("<&>");
string csText;
const _TCHAR* pSource = szText;
int nDestSize = _tcslen(pSource);
nDestSize += nDestSize / 10 + 7;
//_TCHAR* pDest = csText.GetBuffer(nDestSize); //there are no GetBuffer and ReleaseBuffer
//methods for the STL string object
//so we must perform this feature here
_TCHAR *pDest = new _TCHAR[nDestSize];
_tcscpy(pDest,csText.c_str());
int nLen = 0;
_TCHAR cSource = *pSource;
_TCHAR* pFound;
while ( cSource )
{
if ( nLen > nDestSize - 6 )
{
//csText.ReleaseBuffer(nLen);
delete [] pDest;
nDestSize *= 2;
//pDest = csText.GetBuffer(nDestSize);
pDest = new _TCHAR[nDestSize];
}
if ( (pFound=_tcschr(pFind,cSource)) != NULL )
{
pFound = szaReplace[pFound-pFind];
_tcscpy(&pDest[nLen],pFound);
nLen += _tclen(pFound);
}
else
{
_tccpy( &pDest[nLen], pSource );
nLen += _tclen( pSource );
}
pSource += _tcslen( pSource );
cSource = *pSource;
}
//csText.ReleaseBuffer(nLen); //since there is no ReleaseBuffer method as part of
//the STL string here, we finish our manual hack below
csText.clear();
for (int i=0; i7"
// Conveniently the result is always the same or shorter in byte length
//
static _TCHAR* szaCode[] = { _T("lt;"),_T("amp;"),_T("gt;"),_T("apos;"),_T("quot;") };
static int anCodeLen[] = { 3,4,3,5,5 };
static _TCHAR* szSymbol = _T("<&>\'\"");
string csText;
const _TCHAR* pSource = m_csDoc.c_str();
int nDestSize = nRight - nLeft + 1;
//_TCHAR* pDest = csText.GetBuffer(nDestSize); //there are no GetBuffer and ReleaseBuffer
//methods for the STL string object
//so we must perform this feature here
_TCHAR *pDest = NULL;
pDest = new _TCHAR[nDestSize];
pDest[0] = '\0';
int nLen = 0;
int nCharLen;
int nChar = nLeft;
while ( nChar <= nRight )
{
if ( pSource[nChar] == _T('&') )
{
// Look for matching &code;
bool bCodeConverted = false;
for ( int nMatch = 0; nMatch < 5; ++nMatch )
{
if ( nChar <= nRight - anCodeLen[nMatch]
&& _tcsncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
{
// Insert symbol and increment index past ampersand semi-colon
pDest[nLen++] = szSymbol[nMatch];
nChar += anCodeLen[nMatch] + 1;
bCodeConverted = true;
break;
}
}
// If the code is not converted, leave it as is
if ( ! bCodeConverted )
{
pDest[nLen++] = _T('&');
++nChar;
}
}
else // not &
{
nCharLen = _tclen(&pSource[nChar]);
_tccpy( &pDest[nLen], &pSource[nChar] );
nLen += nCharLen;
nChar += nCharLen;
}
}
//csText.ReleaseBuffer(nLen); //since there is no ReleaseBuffer method as part of
//the STL string here, we finish our manual hack below
csText.clear();
for (int i=0; ivalue or
//
int iPos = x_GetFreePos();
m_aPos[iPos].nStartL = nOffset;
// Set links
m_aPos[iPos].iElemParent = iPosParent;
m_aPos[iPos].iElemChild = 0;
m_aPos[iPos].iElemNext = 0;
if ( iPosBefore )
{
// Link in after iPosBefore
m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext;
m_aPos[iPosBefore].iElemNext = iPos;
}
else
{
// First child
m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild;
m_aPos[iPosParent].iElemChild = iPos;
}
// Create string for insert
string csInsert;
int nLenName = _tcslen(szName);
int nLenValue = szValue? _tcslen(szValue) : 0;
if ( ! nLenValue )
{
// empty element
csInsert = _T("<");
csInsert += szName;
csInsert += _T("/>\r\n");
m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 2;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1;
m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + 1;
}
else
{
// value
string csValue = x_TextToDoc( szValue );
nLenValue = csValue.length();
csInsert = _T("<");
csInsert += szName;
csInsert += _T(">");
csInsert += csValue;
csInsert += _T("");
csInsert += szName;
csInsert += _T(">\r\n");
m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 1;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + nLenValue + 1;
m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + nLenName + 2;
}
// Insert
int nReplace = 0, nLeft = m_aPos[iPos].nStartL;
if ( bEmptyParent )
{
string csParentTagName = x_GetTagName(iPosParent);
string csFormat;
csFormat = _T(">\r\n");
csFormat += csInsert;
csFormat += _T("");
csFormat += csParentTagName;
csInsert = csFormat;
nLeft = m_aPos[iPosParent].nStartR - 1;
nReplace = 1;
// x_Adjust is going to update all affected indexes by one amount
// This will satisfy all except the empty parent
// Here we pre-adjust for the empty parent
// The empty tag slash is removed
m_aPos[iPosParent].nStartR -= 1;
// For the newly created end tag, see the following example:
// (len 4) becomes (len 11)
// In x_Adjust everything will be adjusted 11 - 4 = 7
// But the nEndL of element A should only be adjusted 5
m_aPos[iPosParent].nEndL -= (csParentTagName.length() + 1);
}
else if ( m_aPos[iPosParent].nStartR + 1 == m_aPos[iPosParent].nEndL )
{
csInsert = _T("\r\n") + csInsert;
nLeft = m_aPos[iPosParent].nStartR + 1;
}
x_DocChange( nLeft, nReplace, csInsert );
x_Adjust( iPos, csInsert.length() - nReplace );
if ( bAddChild )
x_SetPos( m_iPosParent, iPosParent, iPos );
else
x_SetPos( iPosParent, iPos, 0 );
return true;
}
/**
* This static member function returns a pointer to a new CMarkup object given the
* name of an XML file. This class will open and read the file and create a new CMarkup
* object containing all the data from the file. You are responsible
* for deleting this object when you are completed with it. I turned one
* of Patrick's code fragments into a member of this class.
*
* @author Lance Deaver
*
* @param[in] file The XML file name.
*
* @return A pointer to a newly allocated CMarkup object.
*
* @exception CanNotOpenFile The file is not found or not readable.
* @exception IllFormedXMLFile The XML file contains invalid syntax.
*
* @bug
* None Known
*/
CMarkup* CMarkup::LoadXMLFile(const std::string& file)
{
std::ifstream xmlFile;
//open the file
xmlFile.open(file.c_str() , std::ios_base::in );
if(!(xmlFile.good()) ) {
std::string errMsg = std::string("CMarkup::LoadXMLFile: Cannot open or access file: ") + file;
THROW_GATS_EXCEPTION(CanNotOpenFile, errMsg );
}
// determine the file size in bytes
xmlFile.seekg( 0, std::ios_base::end );
unsigned int fileSizeInBytes = static_cast(xmlFile.tellg());
// reset file pointer to beginning of the file
xmlFile.seekg(0,std::ios_base::beg);
// Allocate and initialize memory for the file contents.
std::vector theXMLFileContents(fileSizeInBytes+1);
std::fill(theXMLFileContents.begin(),theXMLFileContents.end(), '\0');
// Read in the contents of the file into the allocated memory
xmlFile.read(&theXMLFileContents[0], fileSizeInBytes);
// Close the file
xmlFile.close();
//create a new CMarkup object for this XML file
std::auto_ptr cmarkup(new CMarkup(&theXMLFileContents[0] ) ) ;
// check that is well formed
if( ! cmarkup->IsWellFormed() ) {
std::string errMsg = std::string("CMarkup::LoadXMLFile: Badly formed XML file: ") + file;
THROW_GATS_EXCEPTION(IllFormedXMLFile, errMsg );
}
//release the new CMarkup object from the auto_ptr and return its raw pointer
return cmarkup.release();
}
/**
* The CMarkup class has a bad habit of pushing a NULL onto the end of a std::string container.
* This messes up virtually everything that makes strings so nice to use. This static
* function takes a string argument and returns the string argument with all leading and
* trailing whitespace removed and in particular trims off that pesky NULL character.
*
* @author Lance Deaver
*
* @param[in] stringToTrim The string argument that will be cleaned up.
*
* @return The trimmed string.
*
* @bug
* None Known
*/
std::string CMarkup::TrimString( const std::string& stringToTrim)
{
return boost::algorithm::trim_copy( std::string( stringToTrim.c_str()) );
}