Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

One vector for regexes and matchers #133

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
157 changes: 83 additions & 74 deletions Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,59 @@ namespace fs = std::filesystem;
namespace commonItems
{
std::string getNextLexeme(std::istream& theStream);


RegisteredRegexBase::RegisteredRegexBase(const std::string& keyword): regex(std::regex(keyword)) {}


RegisteredRegex::RegisteredRegex(const std::string& keyword, const parsingFunction& function): RegisteredRegexBase(keyword), function(function) {}

bool RegisteredRegex::match(const std::string& lexeme, std::istream& theStream)
{
if (!std::regex_match(lexeme, regex))
return false;
else
{
function(lexeme, theStream);
return true;
}
}

bool RegisteredRegex::matchStripped(const std::string& lexeme, const std::string& strippedLexeme, std::istream& theStream)
{
if (!std::regex_match(strippedLexeme, regex))
return false;
else
{
function(lexeme, theStream);
return true;
}
}


RegisteredRegexStreamOnly::RegisteredRegexStreamOnly(const std::string& keyword, const parsingFunctionStreamOnly& function): RegisteredRegexBase(keyword), function(function) {}

bool RegisteredRegexStreamOnly::match(const std::string& lexeme, std::istream& theStream)
{
if (!std::regex_match(lexeme, regex))
return false;
else
{
function(theStream);
return true;
}
}

bool RegisteredRegexStreamOnly::matchStripped(const std::string& lexeme, const std::string& strippedLexeme, std::istream& theStream)
{
if (!std::regex_match(strippedLexeme, regex))
return false;
else
{
function(theStream);
return true;
}
}
} // namespace commonItems


Expand All @@ -27,19 +80,25 @@ void commonItems::absorbBOM(std::istream& theStream)

void commonItems::parser::registerKeyword(const std::string& keyword, const parsingFunction& function)
{
registeredKeywordStrings.insert(std::make_pair(keyword, function));
registeredRegexes.emplace_back(std::make_unique<RegisteredRegex>(keyword, function));
}


void commonItems::parser::registerKeyword(const std::string& keyword, const parsingFunctionStreamOnly& function)
{
registeredKeywordStringsStreamOnly.insert(std::make_pair(keyword, function));
registeredRegexes.emplace_back(std::make_unique<RegisteredRegexStreamOnly>(keyword, function));
}


void commonItems::parser::registerRegex(const std::string& keyword, const parsingFunction& function)
{
generatedRegexes.emplace_back(std::regex(keyword), function);
registeredRegexes.emplace_back(std::make_unique<RegisteredRegex>(keyword, function));
}


void commonItems::parser::registerRegex(const std::string& keyword, const parsingFunctionStreamOnly& function)
{
registeredRegexes.emplace_back(std::make_unique<RegisteredRegexStreamOnly>(keyword, function));
}


Expand Down Expand Up @@ -115,9 +174,7 @@ void commonItems::parser::parseFile(std::string_view filename)

void commonItems::parser::clearRegisteredKeywords() noexcept
{
std::map<std::string, parsingFunction>().swap(registeredKeywordStrings);
std::map<std::string, parsingFunctionStreamOnly>().swap(registeredKeywordStringsStreamOnly);
std::vector<std::pair<std::regex, parsingFunction>>().swap(generatedRegexes);
std::vector<std::unique_ptr<RegisteredRegexBase>>().swap(registeredRegexes);
}


Expand All @@ -137,11 +194,27 @@ std::optional<std::string> commonItems::parser::getNextToken(std::istream& theSt
const auto strippedLexeme = remQuotes(toReturn);
const auto isLexemeQuoted = (strippedLexeme.size() < toReturn.size());

auto matched = tryToMatchAgainstKeywords(toReturn, strippedLexeme, isLexemeQuoted, theStream);
bool matched = false;
for (const auto& registered: registeredRegexes)
{
if (registered->match(toReturn, theStream))
{
matched = true;
break;
}
}
if (!matched && isLexemeQuoted)
{
for (const auto& registered: registeredRegexes)
{
if (registered->matchStripped(toReturn, strippedLexeme, theStream))
{
matched = true;
break;
}
}
}

if (!matched)
matched = tryToMatchAgainstRegexes(toReturn, strippedLexeme, isLexemeQuoted, theStream);

if (!matched)
gotToken = true;
}
Expand All @@ -152,70 +225,6 @@ std::optional<std::string> commonItems::parser::getNextToken(std::istream& theSt
}


inline bool commonItems::parser::tryToMatchAgainstKeywords(const std::string& toReturn,
const std::string& strippedLexeme,
bool isLexemeQuoted,
std::istream& theStream)
{
if (const auto& match = registeredKeywordStringsStreamOnly.find(toReturn); match != registeredKeywordStringsStreamOnly.end())
{
match->second(theStream);
return true;
}
else if (const auto& match = registeredKeywordStrings.find(toReturn); match != registeredKeywordStrings.end())
{
match->second(toReturn, theStream);
return true;
}
else if (isLexemeQuoted)
{
if (const auto& strippedMatch = registeredKeywordStringsStreamOnly.find(strippedLexeme);
strippedMatch != registeredKeywordStringsStreamOnly.end())
{
strippedMatch->second(theStream);
return true;
}
else if (const auto& strippedMatch = registeredKeywordStrings.find(strippedLexeme);
strippedMatch != registeredKeywordStrings.end())
{
strippedMatch->second(toReturn, theStream);
return true;
}
}

return false;
}

inline bool commonItems::parser::tryToMatchAgainstRegexes(const std::string& toReturn,
const std::string& strippedLexeme,
bool isLexemeQuoted,
std::istream& theStream)
{
for (const auto& [regex, parsingFunction]: generatedRegexes)
{
std::smatch match;
if (std::regex_match(toReturn, match, regex))
{
parsingFunction(toReturn, theStream);
return true;
}
}
if (isLexemeQuoted)
{
for (const auto& [regex, parsingFunction]: generatedRegexes)
{
std::smatch match;
if (std::regex_match(strippedLexeme, match, regex))
{
parsingFunction(toReturn, theStream);
return true;
}
}
}
return false;
}


std::optional<std::string> commonItems::parser::getNextTokenWithoutMatching(std::istream& theStream)
{
theStream >> std::noskipws;
Expand Down
47 changes: 35 additions & 12 deletions Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,39 @@ typedef std::function<void(std::istream&)> parsingFunctionStreamOnly;
void absorbBOM(std::istream& theStream);


class RegisteredRegexBase
{
protected:
std::regex regex;
public:
RegisteredRegexBase(const std::string& keyword);
virtual ~RegisteredRegexBase() = default;
virtual bool match(const std::string& lexeme, std::istream& theStream) = 0;
virtual bool matchStripped(const std::string& lexeme, const std::string& strippedLexeme, std::istream& theStream) = 0;
};


class RegisteredRegex: public RegisteredRegexBase
{
private:
parsingFunction function;
public:
RegisteredRegex(const std::string& keyword, const parsingFunction& function);
bool match(const std::string& lexeme, std::istream& theStream);
bool matchStripped(const std::string& lexeme, const std::string& strippedLexeme, std::istream& theStream);
};

class RegisteredRegexStreamOnly: public RegisteredRegexBase
{
private:
parsingFunctionStreamOnly function;
public:
RegisteredRegexStreamOnly(const std::string& keyword, const parsingFunctionStreamOnly& function);
bool match(const std::string& lexeme, std::istream& theStream);
bool matchStripped(const std::string& lexeme, const std::string& strippedLexeme, std::istream& theStream);
};


class parser
{
public:
Expand All @@ -32,6 +65,7 @@ class parser

void registerKeyword(const std::string& keyword, const parsingFunctionStreamOnly& function);
void registerKeyword(const std::string& keyword, const parsingFunction& function); // for the few keywords that need to be returned
void registerRegex(const std::string& keyword, const parsingFunctionStreamOnly& function);
void registerRegex(const std::string& keyword, const parsingFunction& function);

void clearRegisteredKeywords() noexcept;
Expand All @@ -44,18 +78,7 @@ class parser


private:
inline bool tryToMatchAgainstKeywords(const std::string& toReturn,
const std::string& strippedLexeme,
bool isLexemeQuoted,
std::istream& theStream);
inline bool tryToMatchAgainstRegexes(const std::string& toReturn,
const std::string& strippedLexeme,
bool isLexemeQuoted,
std::istream& theStream);

std::map<std::string, parsingFunctionStreamOnly> registeredKeywordStringsStreamOnly;
std::map<std::string, parsingFunction> registeredKeywordStrings;
std::vector<std::pair<std::regex, parsingFunction>> generatedRegexes;
std::vector<std::unique_ptr<RegisteredRegexBase>> registeredRegexes;
};

} // namespace commonItems
Expand Down
8 changes: 4 additions & 4 deletions ParserHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace commonItems
std::string getNextLexeme(std::istream& theStream);


void ignoreItem(const std::string& unused, std::istream& theStream)
void ignoreItem(std::istream& theStream)
{
auto next = getNextLexeme(theStream);
if (next == "=")
Expand Down Expand Up @@ -59,7 +59,7 @@ void ignoreItem(const std::string& unused, std::istream& theStream)
}


void ignoreObject(const std::string& unused, std::istream& theStream)
void ignoreObject(std::istream& theStream)
{
auto braceDepth = 0;
while (true)
Expand All @@ -86,7 +86,7 @@ void ignoreObject(const std::string& unused, std::istream& theStream)
}


void ignoreString(const std::string& unused, std::istream& theStream)
void ignoreString(std::istream& theStream)
{
singleString ignore(theStream);
}
Expand Down Expand Up @@ -519,7 +519,7 @@ stringsOfItems::stringsOfItems(std::istream& theStream)
stringsOfItemNames::stringsOfItemNames(std::istream& theStream)
{
registerRegex(catchallRegex, [this](const std::string& itemName, std::istream& theStream) {
ignoreItem(itemName, theStream);
ignoreItem(theStream);
theStrings.push_back(itemName);
});

Expand Down
6 changes: 3 additions & 3 deletions ParserHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
namespace commonItems
{

void ignoreItem(const std::string& unused, std::istream& theStream);
void ignoreObject(const std::string& unused, std::istream& theStream);
void ignoreString(const std::string& unused, std::istream& theStream);
void ignoreItem(std::istream& theStream);
void ignoreObject(std::istream& theStream);
void ignoreString(std::istream& theStream);


/*function template only enabled for integer types
Expand Down
4 changes: 3 additions & 1 deletion tests/CommonItemsTests.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@
<CopyFileToFolders Include="TestFiles\version.txt">
<Filter>TestFiles</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="TestFiles\emptyVersion.txt" />
<CopyFileToFolders Include="TestFiles\emptyVersion.txt">
<Filter>TestFiles</Filter>
</CopyFileToFolders>
</ItemGroup>
</Project>
Loading