// Read the documentation to learn more about C++ code generator // versioning. // %X% %Q% %Z% %W% #include #include #include #include #include #include // Expression #include #include #include #include // Parameterized Class atLocation // Class Expression::ExpressionError Expression::ExpressionError::ExpressionError (const string& errMessage) : YellowAlert("\nParse Error: ") { *IosHolder::errHolder() << errMessage << '\n'; } // Class Expression::TokenType Expression::TokenType::TokenType (Token type, int loc, int seq, string charToken) : type(type), location(loc), sequenceNo(seq), tokenString(charToken) { } int Expression::TokenType::operator==(const Expression::TokenType &right) const { if (type != right.type) return false; if (location != right.location) return false; if (sequenceNo != right.sequenceNo) return false; return true; } int Expression::TokenType::operator!=(const Expression::TokenType &right) const { return !operator==(right); } // Class Expression::GEError Expression::GEError::GEError (const string& errMessage) : ExpressionError(errMessage) { } // Class Expression::GEFatal Expression::GEFatal::GEFatal (const string& errMessage) : ExpressionError(errMessage) { } // Class Expression::Word Expression::Word::Word() : location(0), sequence(-1), content("") { } Expression::Word::Word(const Expression::Word &right) : location(right.location), sequence(right.sequence), content(right.content) { } Expression::Word::Word (string str, int loc) : location(loc), sequence(-1), content(str) { } Expression::Word & Expression::Word::operator=(const Expression::Word &right) { if ( this != &right ) { Word tmp(right); std::swap(location,tmp.location); std::swap(sequence,tmp.sequence); std::swap(content,tmp.content); } return *this; } // Class Expression::RangeError Expression::RangeError::RangeError (const string& diag) : ExpressionError(" internal array out of range ") { *IosHolder::errHolder() << diag << '\n'; } // Class Expression::EmptyExpression Expression::EmptyExpression::EmptyExpression() : ExpressionError(" string is empty ") { } // Class Expression Expression::Expression(const Expression &right) : m_parseString(right.m_parseString), m_group(right.m_group), m_location(right.m_location), m_validChar(right.m_validChar), m_delim(right.m_delim), m_base(right.m_base), m_lcurl(right.m_lcurl), m_rcurl(right.m_rcurl), m_wordExp(right.m_wordExp), m_tokenList(right.m_tokenList), m_plus(right.m_plus), m_star(right.m_star), m_rbrace(right.m_rbrace), m_lbrace(right.m_lbrace) { } Expression::Expression (const string& s, const string& base) : m_parseString(s), m_group(0), m_location(0), m_validChar("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$"), m_delim(" \t()+*"), m_base(""), m_lcurl(), m_rcurl(), m_wordExp(), m_tokenList(), m_plus(), m_star(), m_rbrace(), m_lbrace() { // If base string is not empty, assume we are creating an // object for a sub-expression, and therefore it comes from // a string that has already been validated. if (base.empty()) { // Replace redundant parentheses with *'s or blanks // as needed. This may throw. standardizeInputStyle(); m_base = m_parseString; } else m_base = base; init(); } Expression::Expression (const string& s, const string& validWord, const string& wordDelim, const string& base) : m_parseString(s), m_group(0), m_location(0), m_validChar(validWord), m_delim(wordDelim), m_base(""), m_lcurl(), m_rcurl(), m_wordExp(), m_tokenList(), m_plus(), m_star(), m_rbrace(), m_lbrace() { // If base string is not empty, assume we are creating an // object for a sub-expression, and therefore it comes from // a string that has already been validated. if (base.empty()) { // Replace redundant parentheses with *'s or blanks // as needed. This may throw. standardizeInputStyle(); m_base = m_parseString; } else m_base = base; } Expression::~Expression() { } Expression & Expression::operator=(const Expression &right) { if (this != &right) { copy(right); } return *this; } void Expression::analyze (XSModExpTree& expTree) { // Get location of all TOP level component groups, not nested. std::vector groupLocs = findGroups(m_parseString); string::size_type startPos = 0; IntegerArray groupBoundary; for (int iGroup=0; iGroup<(int)groupLocs.size(); ++iGroup) { string compGroupString(m_parseString.substr(startPos, groupLocs[iGroup] - startPos)); // makeSubExpressions is a recursive function that will // create Expression objs for any nested component groups, // and add them to the expTree in post-order. makeSubExpressions(compGroupString, m_base, expTree); groupBoundary.push_back((int)expTree.size()-1); startPos = groupLocs[iGroup] + 1; } expTree.insertRoot(groupBoundary); // Set m_group to be the same for every sub-Exp in a top level // component group, enforcing the CONSTRAINT that only top level // component groups will have m_group = m_location. ExpTreeIt itExp = expTree.begin(); ExpTreeIt itEnd = expTree.end(); //Sequence numbers are 1-based. int wordStart=1; for (int iGroup=0; iGroup<(int)groupBoundary.size(); ++iGroup) { int groupNum = groupBoundary[iGroup]; while (itExp != itEnd && expTree.position(itExp) <= groupNum) { itExp->m_group = groupNum; if (expTree.position(itExp) == groupNum) sequenceSubExpressions(itExp, wordStart); ++itExp; } } XSstream* xscout = dynamic_cast(IosHolder::outHolder()); int savConVerbose = 10; int savLogVerbose = 10; if (xscout) { savConVerbose = xscout->conVerbose(); savLogVerbose = xscout->logVerbose(); XSstream::verbose(*xscout,30,30); } { ExpTreeIt etEnd(expTree.end()); for (ExpTreeIt et = expTree.begin(); et != etEnd; ++et) { *IosHolder::outHolder() << "Subexpressions: \n"; *IosHolder::outHolder() << *et << std::endl; } } if (xscout) { XSstream::verbose(*xscout, savConVerbose, savLogVerbose); } } void Expression::check () { // This is intended for doing another layer of checking beyond what // was performed in constructor via standardizeInputStyle. Unlike // there, these rules need not apply to subclasses. // 1. Checks that no '-' or '/' exists anywhere except between // curly brackets (where they could be part of a file path and name). // 2. Expression may not lead off with '+' or '*' (not allowing // a unary '+' in this context). // 3. '+' and '*' operators must followed by a word. // 4. There must not be 2 consecutive word tokens. const string notAllowed("-/"); string::size_type pos = 0; while (pos != string::npos) { pos = m_parseString.find_first_of(notAllowed, pos); if (pos != string::npos) { if (!isBetweenCurls(pos)) { throw ExpressionError("Simple expression cannot contain operator /,-"); } ++pos; } } if (nTokens() && (token(0) == Plus || token(0) == Star)) { string msg("Expression may not begin with a binary operator."); throw ExpressionError(msg); } for (int t = 0; t < nTokens(); t++) { Token currentTok = token(t); Token nextTok = nextToken(t); if (currentTok == WordExp && nextTok == WordExp) { string msg = "Syntax using consecutive string literals is no longer supported.\n"; msg += " For multiplication use '*' or '()' operators.\n"; msg += " For table models use '{}', eg. atable{file.mod}."; throw ExpressionError(msg); } if ( (currentTok == Plus) || (currentTok == Star)) { if ( nextTok != WordExp && nextTok != Lbrace) { string msg = "Binary operator must be followed by string literal"; throw ExpressionError(msg); } } } } void Expression::insertWordbySequence (int index, const string& newWord, char op) { // find the insert point. by the time this is called we must know that the // word of the right index lies in this expression. std::vector::iterator iw (m_wordExp.begin()); std::vector::iterator iwEnd (m_wordExp.end()); while ( iw != iwEnd && iw->sequence != index ) ++iw; //int NT = m_tokenList.size(); std::vector::iterator it (m_tokenList.begin()); std::vector::iterator itEnd (m_tokenList.end()); string newInsert(newWord); string opStr(" "); opStr[1] = op; int insertLocation(0); bool previousOperationWasCombine (false); // insertion before the end if (iw != iwEnd ) { insertLocation = iw->location; while (it != itEnd && it->location != insertLocation) ++it; while (it != m_tokenList.begin() && !previousOperationWasCombine ) { --it; if ( it->type == Plus || it->type == Lbrace || it->type == Rbrace ) break; previousOperationWasCombine = ( it->type == Star); } newInsert = newWord + opStr; } // insertion at the end. else { insertLocation = m_parseString.size(); newInsert = opStr + newWord; } int firstWord(m_wordExp[0].sequence); switch ( op ) { case '+': { // case where we have something like A*B -> A(+B) if ( previousOperationWasCombine ) { m_parseString = starToParen(m_parseString,insertLocation); // find locations of words in the new string. std::vector oldWords(m_wordExp); findWordLocation(); for (size_t j = 0; j < oldWords.size(); ++j) { m_wordExp[j].sequence = oldWords[j].sequence; } iw = m_wordExp.begin(); while ( iw != iwEnd && iw->sequence != index ) ++iw; insertLocation = iw->location; } m_parseString.insert(insertLocation,newInsert); init(); // problem if the expression contains a nest. sequenceWords(firstWord); break; } default: { m_parseString.insert(insertLocation,newInsert); init(); sequenceWords(firstWord); break; } } } std::vector< int > Expression::locateChars (const char c) { std::vector value (0); int loc = m_parseString.find_first_of(c); while ( loc >= 0 ) { value.push_back(loc); loc = m_parseString.find_first_of(c,value.back()+1); } return value; } void Expression::findWordLocation () { int loc = m_parseString.find_first_of(validChar()); m_wordExp.clear(); while (loc >= 0) { int endstr = parseString().find_first_of(delim(),loc); endstr = (endstr > 0) ? endstr : size(); m_wordExp.push_back(Word(m_parseString.substr(loc,endstr-loc),loc)); loc = parseString().find_first_of(validChar(),endstr); } return; } std::vector Expression::findGroups (const string& inString) { // Intended to be of use PRIOR to creating sub-Expression object from // inString. Therefore cannot use any Token location information. // Fill groupLocs with positions = 1 AFTER the end of a top level // (meaning non-nested) component group. Essentially this means the // position of every top level '+', with the last entry being // inString.length(). std::vector groupLocs; const string searchChars("+("); string::size_type pos = 0; while (pos != string::npos) { pos = inString.find_first_of(searchChars, pos); if (pos != string::npos) { if (inString[pos] == '+') { groupLocs.push_back(pos); ++pos; } else { pos = findMatchingRightParen(inString, pos); } } } groupLocs.push_back(inString.length()); return groupLocs; } void Expression::makeTokenList () { static const size_t NPOS (std::string::npos); m_tokenList.clear(); size_t wc = 0; size_t lc = 0; size_t rc = 0; size_t pc = 0; size_t sc = 0; std::vector test(5); std::vector::const_iterator j; do { std::fill(test.begin(),test.end(),NPOS); if (m_wordExp.size() > 0 && wc < m_wordExp.size()) test[0] = m_wordExp[wc].location; if (m_lbrace.size() > 0 && lc < m_lbrace.size()) test[1] = m_lbrace[lc]; if (m_rbrace.size() > 0 && rc < m_rbrace.size()) test[2] = m_rbrace[rc]; if (m_plus.size() > 0 && pc < m_plus.size()) test[3] = m_plus[pc]; if (m_star.size() > 0 && sc < m_star.size()) test[4] = m_star[sc]; j = std::min_element(test.begin(),test.end()); std::vector::difference_type k = j - test.begin(); if ( *j < size()) { switch (k) { case 0: m_tokenList.push_back(TokenType(WordExp,m_wordExp[wc].location,wc+1,"$W")); ++wc; break; case 1: m_tokenList.push_back(TokenType(Lbrace,m_lbrace[lc],lc+1,"(")); ++lc; break; case 2: m_tokenList.push_back(TokenType(Rbrace,m_rbrace[rc],rc+1,")")); ++rc; break; case 3: m_tokenList.push_back(TokenType(Plus,m_plus[pc],pc+1,"+")); ++pc; break; case 4: m_tokenList.push_back(TokenType(Star,m_star[sc],sc+1,"*")); ++sc; break; } } } while ( *j < size() ); } void Expression::failCheck () { // failCheck is called after subexpression processing. // the somewhat more generous check() function is called when an // expression is initially parsed. if (m_lbrace.size() != m_rbrace.size()) { throw RedAlert(" internal error in parentheses"); } for (int t = 0; t < nTokens(); t++) { if ( (token(t) == Plus) || (token(t) == Star) ) { if ( nextToken(t) != WordExp && nextToken(t) != Lbrace) { throw RedAlert(" internal error: unary operator must be followed by string literal"); } } } } void Expression::init () { if (m_parseString.empty()) { m_lbrace.clear(); m_rbrace.clear(); m_star.clear(); m_plus.clear(); m_lcurl.clear(); m_rcurl.clear(); clearWordList(); clearTokenList(); } else { // find string locations of delimiters setLbrace(locateChars('(')); setRbrace(locateChars(')')); setStar(locateChars('*')); setPlus(locateChars('+')); m_lcurl = locateChars('{'); m_rcurl = locateChars('}'); // Locations of word strings, delimited by (,),* and +, and the words // themselves. // For these purposes, curly-brackets are considered part of a word // since they are not delimiters. Therefore they are NOT made into // their own Token objects. findWordLocation(); // the ordered list of tokens in the expression. makeTokenList(); } } void Expression::copy (const Expression& right) { Expression tmp(right); Swap(tmp); } int Expression::tokenByLocation (int i) const throw (RedAlert) { atLocation findToken; std::vector::const_iterator x = find_if(m_tokenList.begin(),m_tokenList.end(),bind2nd(findToken,i)); // find_if returns an input iterator, but since tokenList // is a vector, its iterators are random access type so // operations such as it1 +/- it2 defined. if ( x == m_tokenList.end() ) throw RedAlert("Misindex token operation"); return x - m_tokenList.begin(); } int Expression::findEndofNest (int start) const { // return the token number of the end of the nest that begins at this token. string::size_type lParPos = static_cast(tokenList(start).location); string::size_type rParPos = findMatchingRightParen(m_parseString, lParPos); return tokenByLocation(static_cast(rParPos)); } void Expression::parseString (const char* cs) { m_parseString = string(cs); } Expression* Expression::clone () const { return new Expression(*this); } void Expression::Swap (Expression& right) { std::swap(m_parseString,right.m_parseString); std::swap(m_validChar,right.m_validChar); std::swap(m_delim,right.m_delim); std::swap(m_group,right.m_group); std::swap(m_location,right.m_location); std::swap(m_rbrace,right.m_rbrace); std::swap(m_base,right.m_base); std::swap(m_lbrace,right.m_lbrace); std::swap(m_star,right.m_star); std::swap(m_plus,right.m_plus); std::swap(m_wordExp,right.m_wordExp); std::swap(m_tokenList,right.m_tokenList); std::swap(m_lcurl,right.m_lcurl); std::swap(m_rcurl,right.m_rcurl); } void Expression::sequenceWords (int start, int firstWord) { if ( firstWord < static_cast(m_wordExp.size())) { std::vector::iterator w = m_wordExp.begin() + firstWord; std::vector::iterator wEnd = m_wordExp.end(); // add 1-based sequence number for strings in expression int i (start); while ( w != wEnd ) { Word& current = (*w); if (current.content.substr(0,6) != "$GROUP") current.sequence = i; else (current.sequence = -1); ++w, ++i; } } } const std::string& Expression::words (size_t index) const { return m_wordExp[index].content; } int Expression::wordLocation (size_t index) const { return m_wordExp[index].location; } int Expression::wordNumber (size_t index) const { return m_wordExp[index].sequence; } void Expression::setWordNumber (int index, int seq) { m_wordExp[index].sequence = seq; } void Expression::setWord (const string& str, size_t index) { if ( index < m_wordExp.size()) { m_wordExp[index].content = str; } else if (index == m_wordExp.size()) { // Assume we are putting back a new word in a place that // has just been deleted (ie. an editmod exchange). // Don't need to know new location and sequence values // for this word. Presumably an update call will handle that. Word newWord; newWord.content = str; m_wordExp.push_back(newWord); } else throw RangeError(""); } void Expression::update () { // rewrite the strings. string updatedExpression(""); string SPACE(" "); int NT = nTokens(); int wc (0); for (int t = 0; t < NT; ++t) { Token current = token(t); bool pm (current == Plus || current == Minus); if (current != WordExp) { if ( pm ) updatedExpression += SPACE; updatedExpression += m_tokenList[t].tokenString; if ( pm ) updatedExpression += SPACE; } else { updatedExpression += words(wc); ++wc; } } reinit(updatedExpression); } void Expression::deleteWordbySequence (int seq) { int NW = m_wordExp.size(); int NT = m_tokenList.size(); int j (0); Word currentWord; std::vector::iterator start = m_tokenList.begin(); do { currentWord = m_wordExp[j]; if ( currentWord.sequence == seq ) { int loc (currentWord.location); int k(0); bool foundToken(false); do { if ( foundToken = (m_tokenList[k].location == loc)) { Token previous (previousToken(k)); Token next (nextToken(k)); bool erasePrevious(false); bool eraseNext(false); int b(0),e(0); if ( isArithmeticOperator(previous)) { //isMultiplication now identifies Lbrace as multiplicative if ( (isAddition(previous) || previous == Null) && isMultiplication(next)) { eraseNext = true; b = k; e = next == Star ? k + 2 : k + 1; } else { erasePrevious = true; b = k - 1; e = k + 1; } m_tokenList.erase(start + b, start + e); } else if ( (previous == Lbrace || previous == Null) && isArithmeticOperator(next)) { eraseNext = true; b = k; e = k + 2; m_tokenList.erase(start + b, start + e); } else { m_tokenList.erase(start + k); } std::vector::iterator startWord = m_wordExp.begin()+j; std::vector::iterator endWord = m_wordExp.end(); for (std::vector::iterator is = startWord + 1 ; is != endWord; ++is) { is->sequence -= 1; } m_wordExp.erase(startWord); } ++k; } while (k != NT && !foundToken); } ++j; } while ( j != NW && !(currentWord.sequence == seq)); } bool Expression::isArithmeticOperator (const Token& token) { return ( token == Plus || token == Minus || token == Star || token == Slash ); } bool Expression::isAddition (const Token& token) { return ( token == Plus ); } bool Expression::isMultiplication (const Token& token) { return ( token == Star || token == Lbrace); } void Expression::clearWordList () { m_wordExp.clear(); } void Expression::clearTokenList () { m_tokenList.clear(); } void Expression::reinit (const string& newParseString) { if (!newParseString.empty()) { m_parseString = newParseString; standardizeInputStyle(); if (m_parseString.empty()) throw EmptyExpression(); std::vector oldWordSequence(m_wordExp); // recompute all of the locations. Doesn't (deliberately!) fix the // word sequencing since this might be a subexpression, so reimpose // the sequencing from *this. this only ensures that the words keep // the same sequence numbers as they had before the update. The entire // expression will need to be reindexed in practically all cases. init(); int l = 0; for ( size_t k = 0; k < m_wordExp.size(); ++k) { if ( m_wordExp[k].content == oldWordSequence[l].content ) { m_wordExp[k].sequence = oldWordSequence[l].sequence; ++l; } } } else { m_parseString = ""; init(); } } string Expression::starToParen (const string& input, int location) { // ASSUMES that it is dealing with an input // string that has already been fully verified and used // during the construction of a model. Furthermore, it assumes // that 'location' points to the start of an AddComponent string. // It only places parentheses around that particular AddComponent, // and then only as necessary. const string delim(")(+*/-"); string output = input; string::size_type acLoc = static_cast(location); string::size_type rdLoc = input.find_first_of(delim, acLoc); string::size_type ldLoc = input.find_last_of(delim, acLoc); // The rule to follow: if a '*' is the first delimiter to the // left OR right of addComp, it is removed and replaced by // a set of parentheses immediately around addComp if(ldLoc != string::npos || rdLoc != string::npos) { if(ldLoc != string::npos && output[ldLoc] == '*') output[ldLoc] = '('; else output.insert(acLoc, 1, '('); if(rdLoc != string::npos) if(output[ldLoc] == '*') output[rdLoc] = ')'; else output.insert(rdLoc, 1, ')'); else output += ')'; } return output; } bool Expression::operator == (const Expression& right) const { bool equal = true; if(nTokens() == right.nTokens()) { int nNumTokens = nTokens(); for(int i = 0; i < nNumTokens && equal; ++i) { const TokenType & tCurToken = tokenList(i), & tNewToken = right.tokenList(i); string strCurTokenString = "", strNewTokenString = ""; if(tCurToken.type == WordExp) strCurTokenString = getWordFromLocation(tCurToken.location); else strCurTokenString = tCurToken.tokenString; if(tNewToken.type == WordExp) strNewTokenString = right.getWordFromLocation(tNewToken.location); else strNewTokenString = tNewToken.tokenString; if(strNewTokenString != strCurTokenString) equal = false; } } else equal = false; return equal; } string Expression::getWordFromLocation (const int& location) const { std::vector::const_iterator i_wordsBeg = m_wordExp.begin(), i_wordsEnd = m_wordExp.end(); bool done = false; while(i_wordsBeg != i_wordsEnd && !done) if(i_wordsBeg->location == location) done = true; else ++i_wordsBeg; return done ? i_wordsBeg->content : string(""); } string::size_type Expression::findMatchingRightParen (const string& inString, string::size_type pos) { // ASSUMES "pos" is the location of a '(' in inString. // Returns the location of its matching ')' OR // string::npos if no match is found. int count = 0; string::size_type i = pos; do { switch(inString[i]) { case '(': ++count; break; case ')': --count; break; } } while(count && ++i < inString.length()); if (i == inString.length()) i = string::npos; return i; } void Expression::checkBalance () { // Simple left/right parentheses balance check, not checking // any sort of context. // Also check curly brackets for table model notation, // which is more strict since no nesting is allowed. int count = 0; int curlCount = 0; string::size_type sz = m_parseString.length(); for (string::size_type i=0; i 1) throw ExpressionError("Cannot nest '{}' brackets."); } else if (c == '}') { --curlCount; if (curlCount < 0) throw ExpressionError("Unbalanced '{}' brackets."); } } if (count) throw ExpressionError("Unbalanced parentheses."); if (curlCount) throw ExpressionError("Unbalbanced '{}' brackets."); } void Expression::standardizeInputStyle () { // Purpose is to get rid of at an early stage any redundant // parentheses either through removal or replacement with '*'. // This way, the functions that do the more complicated context // checking can expect the input to be of a more uniform style. // None of the functions this calls should modify the length // of m_parseString, nor do they make no assumptions regarding // the location of whitespace. if (m_parseString.size()) { // The checkBalance function will THROW if it finds a // parentheses or curly-bracket mismatch. checkBalance(); removeStarParenCombo(); // Calling the recursive killRedundantParen from the top level killRedundantParen(0, m_parseString.size()-1); } } void Expression::removeStarParenCombo () { // Look for any cases of "*(" and replace with just "(". // Do same for ")*". const string WS(" \t\n"); string::size_type loc=m_parseString.find_first_not_of(WS); string::size_type prevLoc = 0; std::vector toErase; char prevChar(0); while (loc != string::npos && loc < m_parseString.size()-1) { if (m_parseString[loc] == '*' && prevChar == ')') { toErase.push_back(loc); } else if (m_parseString[loc] == '(' && prevChar == '*') { toErase.push_back(prevLoc); } prevChar = m_parseString[loc]; prevLoc = loc; loc = m_parseString.find_first_not_of(WS, loc+1); } for (size_t i=0; i (leftPos+1)) { killRedundantParen(leftPos+1, rightPos-1); // OK, we now know our pre and post operator types, and we // know that any nested parentheses have already been // converted to their standardized form. So let's apply // the redundancy rules. // First find the lowest precedence between these pars, // not counting anything between nested subpars. 3 // possibilities: low ("+-"), high ("*/()"), or none // (only words in between). int insidePrecedence = findPrecedenceLevel(leftPos+1, rightPos-1); int prePrecedence = determinePrecedence(preType, true); int postPrecedence = determinePrecedence(postType, false); bool isRedundant = false; if (preType == '-' || preType == '/') { // non-associative pre-operator if (insidePrecedence > prePrecedence && insidePrecedence >= postPrecedence) isRedundant = true; } else { if (insidePrecedence >= prePrecedence && insidePrecedence >= postPrecedence) isRedundant = true; } if (isRedundant) { if (preType == 'w' && insidePrecedence > 1) m_parseString[leftPos] = '*'; else m_parseString[leftPos] = ' '; if (postType == 'w' && insidePrecedence > 1) m_parseString[rightPos] = '*'; else m_parseString[rightPos] = ' '; } } else { // This must be the case of "()". There's no // earthly reason for this to exist, so simply remove. m_parseString[leftPos] = (preType == 'w') ? '*' : ' '; m_parseString[rightPos] = (postType == 'w') ? '*' : ' '; } leftPos = m_parseString.find('(', rightPos+1); } } char Expression::findPreParenType (const string::size_type leftParenPos) const { // Parsing helper function used very early on in Expression // construction, and therefore can't make use of Token object // information. // ASSUMES ONLY that leftParenPos is the location of a '(' in // m_parseString. const string WS(" \t"); const string allowedOPS("*/+-()"); char preType = ' '; if (leftParenPos > 0) { string::size_type prePos = m_parseString.find_last_not_of(WS, leftParenPos - 1); if (prePos != string::npos) { preType = m_parseString[prePos]; // If not one of the allowed ops, assume it is // the last character in a word. if (allowedOPS.find(preType) == string::npos) preType = 'w'; } } return preType; } char Expression::findPostParenType (const string::size_type rightParenPos) const { // Parsing helper function used very early on in Expression // construction, and therefore can't make use of Token object // information. // ASSUMES ONLY that rightParenPos is the location of a ')' in // m_parseString. char postType = ' '; const string WS(" \t"); const string allowedOPS("*/+-()"); if (rightParenPos < m_parseString.length()-1) { string::size_type postPos = m_parseString.find_first_not_of(WS, rightParenPos + 1); if (postPos != string::npos) { postType = m_parseString[postPos]; // If not one of the allowed ops, assume it is // the first character in a word. if (allowedOPS.find(postType) == string::npos) postType = 'w'; } } return postType; } int Expression::findPrecedenceLevel (string::size_type startPos, string::size_type endPos) const { // Private helper function used during standardizeInput routines. // ASSUMES parentheses balance between startPos and endPos, and that // startPos and endPos represent the entire range between a pair of // outer parentheses. Also assumes any nested parentheses have already // been checked for redundancy. // Looks for the lowest precedence operator between startPos and endPos // inclusive, ignoring operators inside nested paretheses. (It assumes // startPos and endPos are the entire range within a pair of outer // parentheses.) In this context, we're determining if an operator makes // its enclosing parentheses "necessary". The lower the operator precedence, // the more necessary the parentheses. Therefore, if only a word ( or // consecutive words -- table models) exists, give it "very high" precedence // since it's not making the parentheses necessary. // Start with a default higher than anything determinePrecedence gives out. int level = 3; bool nestFound = false; string nonNestedParts; while (startPos != string::npos && startPos <= endPos) { string::size_type nestedLeft = m_parseString.find('(', startPos); string::size_type nestedRight = string::npos; if (nestedLeft != string::npos) { nestFound = true; nestedRight = findMatchingRightParen(m_parseString, nestedLeft); // begin next search 1 after ')' ++nestedRight; } string::size_type len = std::min(nestedLeft, endPos+1) - startPos; nonNestedParts += m_parseString.substr(startPos, len); startPos = nestedRight; } if (nonNestedParts.find_first_of("+-") != string::npos) level = 1; else if (nestFound || nonNestedParts.find_first_of("*/") != string::npos) level = 2; return level; } int Expression::determinePrecedence (char typeCode, bool isPre) { // Private function used during standardizeInput routines. int level = 0; switch (typeCode) { case 'w': // When pre or post operator is a word, treat as if '*'. level = 2; break; case '*': case '/': level = 2; break; case '(': level = isPre ? 0 : 2; break; case ')': level = isPre ? 2 : 0; break; case '+': case '-': level = 1; break; case ' ': level = 0; break; default: break; } return level; } bool Expression::isBetweenCurls (string::size_type pos) const { // ASSUMES curly braces have already been checked for balance, // so that m_lcurl.size() = m_rcurl.size(). If pos = npos // that's OK, this will simply return false. bool between = false; if (pos != string::npos) { int iPos = static_cast(pos); for (size_t i=0; i m_lcurl[i] && iPos < m_rcurl[i]) { between = true; break; } } } return between; } void Expression::makeSubExpressions (const string& inString, const string& base, XSModExpTree& expTree) { // ASSUMES inString corresponds to a single component group, // meaning it has no non-nested '+'. // Uses recursion to perform post-order traversal of sub-Expressions // contained in inString. std::vector nestedLocs = findNests(inString); size_t nNests = nestedLocs.size()/2; string subExpStr(inString); int offset = 0; IntegerArray subNodes; for (size_t i=0; i(expTree.size()-1); subNodes.push_back(locNum); string groupHolder(makeGroupString(locNum)); // Need to replace the nested part of subExpStr and keep track // of the size adjustment this causes. Otherwise the positions // stored in nestedLocs would become meaningless after the // first substitution. if (-1*offset > (int)nestPos) throw RedAlert("Position error during $GROUP substitution in expression string."); subExpStr.replace(static_cast(nestPos+offset), nestN, groupHolder); offset += (int)groupHolder.size() - (int)nestN; } Expression subExpression(subExpStr, base); subExpression.m_location = static_cast(expTree.size()); // Don't need to set m_group here. This is best done at the top // of the expression tree in the analyze() function. expTree.insert(subExpression, subNodes); } std::vector Expression::findNests (const string& inString) { // ASSUMES inString corresponds to a single component group which // may or may not contain ()'s. If it does, this looks for any // ()'s 1 AND ONLY 1 level further down, and stores the starting // pos and nChars of the nested group. // Naturally assumes parentheses are balanced by this point. // Example: If inString = wa(pha(ga+wa(grad+bbody))+po+wa(ga+grad)) // insidePar = pha(ga+wa(grad+bbody))+po+wa(ga+grad) // groupLocs = [22, 25, 36] (pos relative to insidePar) // return nestLocs = [3, 22, 29, 11] (pos relative to inString) std::vector nestLocs; string::size_type leftParen = inString.find('('); if (leftParen != string::npos) { string::size_type rightParen = findMatchingRightParen(inString, leftParen); string insidePar(inString.substr(leftParen+1, rightParen-leftParen-1)); std::vector groupLocs = findGroups(insidePar); string::size_type start = 0; for (size_t i=0; i& expWords = itExp->m_wordExp; for (size_t i=0; i(position) >= pow(10.0,nDigits)) { string msg("Number of nested expressions exceeds current implementation's limit."); throw ExpressionError(msg); } char* rString = new char[7 + nDigits]; sprintf(rString, "%s%.2d", flag, position); string groupString(rString); delete [] rString; return groupString; } std::pair Expression::findWordDifference (const Expression& oldExp, const Expression& newExp) { std::pair diffIndices(0,0); // This ASSUMES the old and new nWords DO NOT differ by more than 1, // else it wouldn't get in here. // Return values should refer to the 1-based word position of the // first detected difference. If no ambiguity, the 2 values will be // the same. Else, they will be the start and end pos (inclusive) // of consecutive repeated words. If two expressions are of // different size, values refer to positions in the larger. If no // difference found return (0,0). const int nOldWords = oldExp.nWords(); const int nNewWords = newExp.nWords(); const std::vector& larger = (nOldWords >= nNewWords) ? oldExp.wordExp() : newExp.wordExp(); const std::vector& smaller = (nOldWords >= nNewWords) ? newExp.wordExp() : oldExp.wordExp(); const size_t nWords = smaller.size(); Word prevWord; size_t start=0, stop=0; bool diffFound = false; for (size_t i=0; !diffFound && i nWords) { // Remember, we're ASSUMING larger only has 1 more element. if (larger[nWords].content != prevWord.content) start = nWords+1; stop = nWords+1; diffFound = true; } } else { // A difference was found, keep checking in larger for trailing // repeated words. for (size_t i=stop; i(start); diffIndices.second = static_cast(stop); } return diffIndices; } bool Expression::testEditOperation (const Expression& oldExp, const Expression& newExp, const int compIdx) { // ASSUME sizeDiff has already been verified to be -1, 0, or +1 // by this point. Expression oldExpression(oldExp); Expression newExpression(newExp); const int sizeDiff = newExpression.nWords() - oldExpression.nWords(); switch (sizeDiff) { case -1: oldExpression.deleteWordbySequence(compIdx); oldExpression.update(); break; case 0: oldExpression.deleteWordbySequence(compIdx); oldExpression.update(); newExpression.deleteWordbySequence(compIdx); newExpression.update(); break; case 1: newExpression.deleteWordbySequence(compIdx); newExpression.update(); break; default: throw RedAlert("Edit expression size diff not equal to -1, 0, or 1."); } return (oldExpression == newExpression); } void Expression::replaceWordBySequence (int sequence, const string& newWord) { // ONLY replaces a word, makes no changes to operators on either side. bool isFound = false; std::vector::iterator itWord = m_wordExp.begin(); std::vector::iterator itWordEnd = m_wordExp.end(); while (itWord != itWordEnd && !isFound) { if (itWord->sequence == sequence) isFound = true; else ++itWord; } if (!isFound) { std::ostringstream oss; oss << "Error searching for sequence number " << sequence << " in Expression::replaceWordBySequence\n"; throw RedAlert(oss.str()); } itWord->content = newWord; update(); } // Additional Declarations std::ostream & operator<< (std::ostream& s, const Expression& right) { using namespace std; ostream_iterator n(s,""); ostream_iterator dash(s,":"); ostream_iterator strdash(s,":"); s << "Substring " << right.m_parseString << " From base string: " << right.m_base << endl; s << "Open (: "; copy(right.lbrace().begin(),right.lbrace().end(),dash); s << endl; s << "Close): "; copy(right.rbrace().begin(),right.rbrace().end(),dash); s << endl; s << "Plus +: "; copy(right.plus().begin(),right.plus().end(),dash); s << endl; s << "Star *: "; copy(right.star().begin(),right.star().end(),dash); s << endl; s << "Word : "; copy(right.m_wordExp.begin(),right.m_wordExp.end(),strdash); s << endl; s << "Word Index :"; for (std::vector::const_iterator j = right.m_wordExp.begin(); j != right.m_wordExp.end(); ++j) { s << j->sequence << ":"; } s << endl; copy(right.m_wordExp.begin(),right.m_wordExp.end(),strdash); s << endl; s << "Group, Location: " << right.group() << " " << right.location() << endl; return s; } std::ostream & operator<< (std::ostream& s, const Expression::TokenType& right) { s << right.type << ':' << right.location << ':' << right.sequenceNo; return s; } std::ostream & operator<< (std::ostream& s, const Expression::Word& right) { s << right.content << '[' << right.sequence << ']'; return s; }