Files
Jedi-Academy/tools/ModView/genericparser.cpp
2013-04-04 14:32:05 -07:00

832 lines
20 KiB
C++

// Main Parsing Class
//
#if !defined(GENERICPARSER_H_INC)
#include "GenericParser.h"
#endif
#if 0
//
//
// CParseGroup
//
//
/************************************************************************************************
* CParseGroup
* copy constructor. recursively copies all subgroups and pairs.
*
* Input
* an existing parse group
*
* Output
* none
*
************************************************************************************************/
CParseGroup::CParseGroup(CParseGroup *orig)
{
CParseGroup *newGroup = 0;
if (orig == 0)
{
return;
}
mWriteable = orig->mWriteable;
mName = orig->mName;
mParent = orig->mParent;
for (TParsePairIter itp = orig->PairsBegin(); itp != orig->PairsEnd(); itp++)
{
AddPair((*itp).first, (*itp).second);
}
for (TParseGroupIter itg = orig->GroupsBegin(); itg != orig->GroupsEnd(); itg++)
{
newGroup = new CParseGroup((*itg).second);
AddSubGroup(newGroup);
}
}
/************************************************************************************************
* ~CParseGroup
* basic destructor. deallocate any sub groups before going away
*
* inputs:
* none
*
* return:
* nothing
************************************************************************************************/
CParseGroup::~CParseGroup()
{
Clean();
}
/************************************************************************************************
* FindPairValue
* Return the value assigned to the provided key. If said key doesn't exist for this
* ParseGroup, the provided default value is returned.
*
* inputs:
* a key string to be used as an element in CParseGroup::mPairs
* a default value string to be returned in the event that the provided key isn't found
*
* return:
* either the string value stored at CParseGroup::mPairs[key] or, failing that, the provided
* default value
*
************************************************************************************************/
string CParseGroup::FindPairValue(string key, string defaultVal)
{
map<string, string>::iterator it;
it = mPairs.find(key);
if (it != mPairs.end())
{
return (*it).second;
}
return defaultVal;
}
/************************************************************************************************
* FindSubGroup
* search for a subgroup with a particular name
*
* Input
* name of subgroup, boolean indicating whether or not to search recursively
*
* Output
* pointer to the desired subgroup, NULL if not found
*
************************************************************************************************/
CParseGroup *CParseGroup::FindSubGroup(string name, bool recurse/*false*/)
{
TParseGroupIter it = mSubGroups.find(name);
CParseGroup *subGroup = 0, *findGroup = 0;
if (it != mSubGroups.end())
{
return (*it).second;
}
// didn't find it in the top level of groups. if we're supposed to recurse, search
//our top level's subgroups, else return NULL
if (recurse)
{
for (it = mSubGroups.begin(); it != mSubGroups.end(); it++)
{
subGroup = (*it).second;
if (subGroup)
{
findGroup = subGroup->FindSubGroup(name, recurse);
if (findGroup)
{
return findGroup;
}
}
}
}
return NULL;
}
/************************************************************************************************
* FindSubGroupWithPair
* finds a subgroup that contains the provided key/value pair
*
* inputs:
* strings key and value for locating the subgroup
*
* return:
* pointer to the ParseGroup representing the subgroup we're after or NULL if we didn't find it
************************************************************************************************/
CParseGroup *CParseGroup::FindSubGroupWithPair(string key, string value)
{
TParseGroupIter it;
TParsePairIter itp;
for (it = mSubGroups.begin(); it != mSubGroups.end(); it++)
{
itp = ((CParseGroup*)(*it).second)->mPairs.find(key);
if (itp != ((CParseGroup*)(*it).second)->mPairs.end())
{
if (value.compare((*itp).second) == 0)
{
// found a subgroup with a key/value pair matching the one we're looking for
return ((CParseGroup*)(*it).second);
}
}
}
// none of our subgroups contains the key/value pair
return NULL;
}
/************************************************************************************************
* DeleteSubGroup
* find the given subgroup in our multimap of subgroups and remove it. also, try to find
* it in our vector of ordered subgroups and remove it from there.
*
* Input
* parse group to be deleted
*
* Output
* true if it was found and deleted, false otherwise
*
************************************************************************************************/
bool CParseGroup::DeleteSubGroup(CParseGroup *delGroup)
{
TParseGroupIter itg;
if (delGroup == 0)
{
return false;
}
itg = mSubGroups.find(delGroup->GetName());
if (itg != mSubGroups.end())
{
// this is a multimap, so make sure our pointers match
while ( ((*itg).second != delGroup) && (itg != mSubGroups.end()) )
{
itg++;
}
if ((*itg).second == delGroup)
{
// try to find it in mInOrderSubGroups
if (mInOrderSubGroups.size())
{
vector<pair<string, CParseGroup*> >::iterator itv = mInOrderSubGroups.begin();
for (int i = 0; i < mInOrderSubGroups.size(); i++, itv++)
{
if (mInOrderSubGroups[i].second == delGroup)
{
mInOrderSubGroups.erase(itv);
break;
}
}
}
delete delGroup;
delGroup = 0;
mSubGroups.erase(itg);
return true;
}
}
return false;
}
/************************************************************************************************
* Clean
* cleans up our sub group list
*
* inputs:
* none
*
* return:
* nothing
************************************************************************************************/
void CParseGroup::Clean()
{
TParseGroupIter it;
for (it = mSubGroups.begin(); it != mSubGroups.end(); it++)
{
delete (CParseGroup*)(*it).second;
}
mSubGroups.clear();
mInOrderPairs.clear();
mInOrderSubGroups.clear();
}
/************************************************************************************************
* WriteLine
* writes a line of data to the provided output string
*
* inputs:
* a brace depth, allowing pretty formatted text, a line of text to be written, and an output string
*
* return:
* true if data is written successfully, false otherwise
************************************************************************************************/
bool CParseGroup::WriteLine(int depth, const string line, string &output)
{
for (int i = 0; i < depth; i++)
{
output += "\t";
}
output += line;
output += "\n";
return true;
}
/************************************************************************************************
* WriteGroup
* write this group's pairs and then its subgroups
*
* inputs:
* name of the object, indentation depth, the destination string
*
* return:
* true if data is written successfully, false otherwise
************************************************************************************************/
bool CParseGroup::WriteGroup(string name, int depth, string &output)
{
string curLine;
CParseGroup *subGroup = 0;
vector< pair<string, string> > &inOrderPairs = GetInOrderPairs();
vector< pair<string, CParseGroup*> > &inOrderGroups = GetInOrderGroups();
// first, write out the name (e.g. "hook") of the object
WriteLine(depth, name, output);
// now write out an open brace to begin the innards of our object
WriteLine(depth, "{", output);
// write out the key/value pairs
for (int p = 0; p < inOrderPairs.size(); p++)
{
curLine = "\"";
curLine += inOrderPairs[p].first;
curLine += "\"\t\"";
curLine += inOrderPairs[p].second;
curLine += "\"";
WriteLine(depth+1, curLine, output);
}
// recursively write out the subgroups
for (int g = 0; g < inOrderGroups.size(); g++)
{
WriteLine(depth+1, "", output);
subGroup = inOrderGroups[g].second;
if (subGroup)
{
subGroup->WriteGroup(inOrderGroups[g].first, depth+1, output);
}
}
// now write out a close brace
WriteLine(depth, "}", output);
return true;
}
//
//
// CGenericParser
//
//
/************************************************************************************************
* CGenericParser
* constructor. inits the max token length.
*
* input:
* none
*
* return:
* nothing
************************************************************************************************/
CGenericParser::CGenericParser():
mMaxTokenLength(1024)
{
mLineCount = 0;
mBraceDepth = 0;
mCurGroup = NULL;
}
/************************************************************************************************
* ~CGenericParser
* deallocates any CParseGroups we created, general clean up
*
* inputs:
* none
*
* return:
* nothing
************************************************************************************************/
CGenericParser::~CGenericParser()
{
mGroups.Clean();
}
/************************************************************************************************
* SkipWhiteSpace
* loops through the char's in data and points data to the next non-whitespace character. sets
* newLine variable to true if one of the whitespace char's is a new line.
*
* input:
* pointer to text data, pointer to a boolean which states whether or not a newline is encountered
*
* return:
* the altered data pointer if it's pointing to a valid char. returns NULL if we reached the
* end of the data.
************************************************************************************************/
char *CGenericParser::SkipWhiteSpace( char *data, bool *newLine )
{
char ch;
while(isspace(ch = *data))
{
if( '\n' == ch )
{
*newLine = true;
mLineCount++;
}
data++;
}
if (0 == ch)
{
// reached the end of our data
return NULL;
}
return data;
}
/************************************************************************************************
* EatCurrentLine
* increments the data pointer until it hits a new line or we run out of char data
*
* input:
* pointer to text data
*
* return:
* nothing
************************************************************************************************/
void CGenericParser::EatCurrentLine( char **data )
{
while (**data && **data != '\n')
{
if ('{' == **data)
{
mBraceDepth++;
}
else if ('}' == **data)
{
if (0 == mBraceDepth)
{
// too many closing braces
}
else
{
mBraceDepth--;
}
}
(*data)++;
}
}
/************************************************************************************************
* GetToken
* parses a text string to procure the next viable (non-whitespace, non-comment) token.
* said token will be stored in CGenericParser::mToken
*
* input:
* pointer to character data to be parsed, boolean stating whether or not new lines are
* acceptable characters for the upcoming token
*
* return:
* reference to the string containing the text value of the new token
*
************************************************************************************************/
string &CGenericParser::GetToken( char **dataPtr, const bool allowNewLines )
{
char ch = 0;
int len = 0;
bool newLine = false;
char *data = *dataPtr;
// reset our current token
mToken = "";
// return an empty string if text data is NULL
if ( !data )
{
*dataPtr = NULL;
return mToken;
}
// loop through the text data, skip whitespace, skip comments, get to the
//beginning of the next token
while ( data = SkipWhiteSpace( data, &newLine ) )
{
if ( newLine && !allowNewLines )
{
*dataPtr = data;
return mToken;
}
ch = *data;
// skip // comments
if ( ('/' == ch) && ('/' == data[1]) )
{
EatCurrentLine(&data);
}
// skip /* */ comments
else if ( ('/' == ch) && ('*' == data[1]) )
{
while ( *data && ((*data != '*') || (data[1] != '/')) )
{
data++;
}
if ( *data )
{
data += 2;
}
}
else
{
break;
}
}
// if SkipWhiteSpace returned NULL, we got to the end of our data without finding
//another valid token
if ( !data )
{
*dataPtr = NULL;
return mToken;
}
//
// at this point we're pointing at the next valid char in our data stream
//
// handle quoted strings
if ('\"' == ch)
{
data++;
while (1)
{
ch = *data++;
if (('\"' == ch) || !ch)
{
*dataPtr = (char*)data;
return mToken;
}
if (len < mMaxTokenLength)
{
mToken.append(1,ch);
len++;
}
}
}
// ok, these next characters are a token
do
{
if ('{' == *data)
{
mBraceDepth++;
}
else if ('}' == *data)
{
if (0 == mBraceDepth)
{
// too many closing braces
}
else
{
mBraceDepth--;
}
}
if (len < mMaxTokenLength)
{
mToken.append(1,ch);
len++;
}
data++;
ch = *data;
if ( '\n' == ch )
{
mLineCount++;
}
} while (!isspace(ch));
if (len == mMaxTokenLength)
{
// token is too long...excess characters discarded
len = 0;
}
*dataPtr = ( char * ) data;
return mToken;
}
/************************************************************************************************
* ReturnToBraceDepthZero
* this function is called when a parse error is encountered. its goal is to eat any and all
* text until it returns to brace depth zero, where the parser can hopefully start over
*
* inputs:
* pointer to character data to be parsed
*
* return:
* nothing
************************************************************************************************/
void CGenericParser::ReturnToBraceDepthZero(char **dataPtr)
{
// we already parsed a mismatched '}' so discount it in terms of subtracting from brace depth
mBraceDepth++;
// commence the eating of tokens
while (1)
{
GetToken(dataPtr);
if ( (0 == mBraceDepth) || (mToken.empty()) )
{
return;
}
}
}
/************************************************************************************************
* ParseGroup
* recursively parses a text string using a nested-block format. basically parses a pair of
* tokens at once, if possible.
*
* inputs:
* pointer to character data to be parsed
*
* return:
* true if a group is parsed, false if end-of-file
************************************************************************************************/
bool CGenericParser::ParseGroup(char **dataPtr)
{
string tok1;
string tok2;
while (1)
{
tok1 = GetToken(dataPtr);
if (tok1.empty())
{
// reached end of data
return false;
}
if ('}' == tok1[0])
{
// ending a ParseGroup
mCurGroup = mCurGroup->GetParent();
return true;
}
tok2 = GetToken(dataPtr);
if (tok2.empty())
{
// empty group or a key-value pair with no value, just a key
}
else if ('{' == tok2[0])
{
// we're starting a new CParseGroup and its name is currently in tok1
CParseGroup *newGroup = new CParseGroup(tok1, mCurGroup, mWriteable);
if (NULL == mCurGroup)
{
// this is a top level group so add it to the parser's list
mGroups.AddSubGroup(newGroup);
mCurGroup = newGroup;
}
else
{
// this is somebody's subgroup
mCurGroup->AddSubGroup(newGroup);
// time for some ol' fashioned, down-home recursion. yeehaw!
mCurGroup = newGroup;
ParseGroup(dataPtr);
}
}
else if ('}' == tok2[0])
{
// error. try to recover.
ReturnToBraceDepthZero(dataPtr);
}
else
{
if (mCurGroup == 0)
{
// assume we're adding a key/value pair at the file level
mGroups.AddPair(tok1, tok2);
}
else
{
// we're considering tok1 as a key and tok2 as a value
mCurGroup->AddPair(tok1, tok2);
}
}
}
}
/************************************************************************************************
* Parse
* parses text data for CParseGroup's until end-of-file is reached
*
* inputs:
* pointer to character data to be parsed, boolean indicating if we should wipe our
* stored data before parsing more, boolean indicating if we should make this data
* writeable
*
* return:
* nothing
************************************************************************************************/
bool CGenericParser::Parse(char **dataPtr, bool cleanFirst/*true*/, bool writeable /*false*/)
{
// a quick test to find hand-edit errors that always seem to manifest as perplexing bugs before we find them...
{
string strBraceTest(*dataPtr);
int iOpeningBraces = 0;
int iClosingBraces = 0;
char *p;
while ((p=strchr(strBraceTest.c_str(),'{'))!=NULL)
{
*p = '#'; // anything that's not a '{'
iOpeningBraces++;
}
while ((p=strchr(strBraceTest.c_str(),'}'))!=NULL)
{
*p = '#'; // anything that's not a '}'
iClosingBraces++;
}
if (iOpeningBraces != iClosingBraces)
{
// maybe print something here, but in any case...
//
return false;
}
}
if (cleanFirst)
{
// if this is a new stream of data, init some stuff
mGroups.Clean();
}
mWriteable = writeable;
mGroups.SetWriteable(mWriteable);
while (ParseGroup(dataPtr))
{
;
}
return true;
}
/************************************************************************************************
* Write
* take all of the stuff we have stored as ParseGroups and write it in an aesthetically
* pleasing form to an output string
*
* Input
* output string
*
* Output
* true if every parsegroup wrote without incident, false otherwise
*
************************************************************************************************/
bool CGenericParser::Write(string &output)
{
string curLine;
CParseGroup *subGroup = 0;
vector< pair<string, string> > &inOrderPairs = mGroups.GetInOrderPairs();
vector< pair<string, CParseGroup*> > &inOrderGroups = mGroups.GetInOrderGroups();
if (!mWriteable)
{
return false;
}
// write out the key/value pairs
for (int p = 0; p < inOrderPairs.size(); p++)
{
curLine = "\"";
curLine += inOrderPairs[p].first;
curLine += "\"\t\"";
curLine += inOrderPairs[p].second;
curLine += "\"";
mGroups.WriteLine(0, curLine, output);
}
// recursively write out the subgroups
for (int g = 0; g < inOrderGroups.size(); g++)
{
mGroups.WriteLine(0, "", output);
subGroup = inOrderGroups[g].second;
if (subGroup)
{
subGroup->WriteGroup(inOrderGroups[g].first, 0, output);
}
}
return true;
}
/************************************************************************************************
* AddParseGroup
* if you want to be able to write out a parsegroup hierarchy, you may want to create
* some of it yourself rather than just rely on what you've read in. that's what this
* fn is for. this is returning a pointer to an object allocate on the heap, so make sure
* it gets deallocated (if you add it into the existing hierarchy of a CGenericParser that
* will take care of it for you)
*
* Input
* name of the new parse group, parent of your new parse group (can't be NULL), whether
* or not your new parse group will be writeable (defaults to true)
*
* Output
* pointer to new parse group
*
************************************************************************************************/
CParseGroup *CGenericParser::AddParseGroup(string groupName, CParseGroup &groupParent, bool writeable /*true*/)
{
CParseGroup *newGroup = new CParseGroup(groupName, &groupParent, writeable);
groupParent.AddSubGroup(newGroup);
return newGroup;
}
/************************************************************************************************
* DeleteParseGroup
* recursively deletes a parse group
*
* Input
* parent of group to be deleted, pointer to parse group to be deleted
*
* Output
* true if it was found and deleted
*
************************************************************************************************/
bool CGenericParser::DeleteParseGroup(CParseGroup *parentGroup, CParseGroup *delGroup)
{
return parentGroup->DeleteSubGroup(delGroup);
}
/************************************************************************************************
* <function>
*
* inputs:
*
* return:
*
************************************************************************************************/
#endif