ã¯ã¼ããªã¹ãããæ£è¦è¡¨ç¾ã®ãã¿ã¼ã³ãçæ
ç°å¢
Visual C++ 2019
æ¦è¦
ã¯ã¼ããªã¹ãï¼é åï¼ããæ£è¦è¡¨ç¾ã®ãã¿ã¼ã³ãçæãã¾ãã
PerlãRubyã«ã¯ãããã©ãc++ãè¦ã¤ãããªãã£ãã®ã§åèãåºã«ä½æãã¦ã¿ã¾ããã
ä¸ã®ã³ã¼ãã®ã¯ã¼ããªã¹ãã®çµæã 㨠(?:tange(?:rine|lo)|pomelo|orange|l(?:ime|emon)|grape(?:fruit)?|citron)
ãçæããã¾ãã
ã³ã¼ã
#pragma once class RegexpTrie { public: static void Initialize(void); static void Terminate(void); static const char* ToRegexPattern(const char** ppWords, size_t size); };
#define _CRT_SECURE_NO_WARNINGS #include "RegexpTrie.h" #include <string> #include <map> #include <vector> #include <algorithm> struct TrieNode { std::map<char, TrieNode> nodes; }; static TrieNode g_head; static char* g_pPattern = nullptr; static size_t g_patternLen = 0; void Add(std::string word) { auto pTrie = &g_head.nodes; for (auto i = 0; i < word.length(); i++) { auto chr = word[i]; if (pTrie->find(chr) == pTrie->end()) { (*pTrie)[chr].nodes.clear(); } pTrie = &(*pTrie)[chr].nodes; } (*pTrie)['*'].nodes.clear(); } void JoinString(std::vector<std::string>& sequences, std::string& outStr, std::string delimiter) { outStr = ""; for (auto it = sequences.begin(); it != sequences.end(); it++) { auto str = *it; if (str == "*") break; if (it != sequences.begin()) outStr += delimiter; outStr += str; } } void ToRegixp(std::map<char, TrieNode>* pTrie, std::string& pattern) { if (pTrie->size() == 0) return; else if (pTrie->size() == 1) { auto it = pTrie->begin(); auto pair = *it; auto key = pair.first; auto value = pair.second; if (key == '*') { return; } else { pattern += key; ToRegixp(&(*pTrie)[key].nodes, pattern); return; } } else { std::vector<std::string> sequences; for (auto it = pTrie->begin(); it != pTrie->end(); it++) { auto pair = *it; auto key = pair.first; auto value = pair.second; std::string str = ""; str += key; ToRegixp(&(*pTrie)[key].nodes, str); sequences.push_back(str); } std::sort(sequences.rbegin(), sequences.rend()); std::string result = ""; if (sequences.size() == 1) { result = sequences[0]; if (sequences[0].size() > 1) { result = "(?:" + result + ")"; } } else { std::string join = ""; JoinString(sequences, join, ""); if (sequences.size() == join.size()) result = "[" + join + "]"; else { std::string joinOr = ""; JoinString(sequences, joinOr, "|"); result = "(?:" + joinOr + ")"; } if (pTrie->find('*') != pTrie->end()) result = result + "?"; } pattern += result; } } static bool g_initialized = false; void RegexpTrie::Initialize(void) { if (g_initialized == true) return; g_initialized = true; g_pPattern = nullptr; g_patternLen = 0; } void RegexpTrie::Terminate(void) { free(g_pPattern); g_pPattern = nullptr; g_initialized = false; } const char* RegexpTrie::ToRegexPattern(const char** ppWords, size_t size) { g_head.nodes.clear(); for (auto i = 0; i < size; i++) { Add(ppWords[i]); } std::string pattern = ""; ToRegixp(&g_head.nodes, pattern); if (g_patternLen < pattern.size()) { free(g_pPattern); g_pPattern = (char*)malloc(pattern.size() + 1); if (g_pPattern != nullptr) { g_patternLen = pattern.size(); g_pPattern[g_patternLen] = '\0'; } } if (g_pPattern == nullptr) return ""; strncpy(g_pPattern, pattern.c_str(), g_patternLen); return g_pPattern; }
#include "RegexpTrie.h" #include <stdio.h> #include <regex> int main() { RegexpTrie::Initialize(); const char* pWords[] = { "lemon", "lime", "pomelo", "orange", "citron", "grapefruit", "grape", "tangerine", "tangelo", }; const size_t wordNum = sizeof(pWords) / sizeof(char*); auto pPattern = RegexpTrie::ToRegexPattern(pWords, wordNum); printf("%s\n", pPattern); auto regex = std::regex(pPattern); const char* pAddWords[] = { "olemon", "lemon0", "doraemon", }; const size_t addWordNum = sizeof(pAddWords) / sizeof(char*); auto ppSearchWords = new char* [wordNum + addWordNum]; memcpy(ppSearchWords, pWords, sizeof(char*) * wordNum); memcpy(&ppSearchWords[wordNum], pAddWords, sizeof(char*) * addWordNum); const size_t searchNum = wordNum + addWordNum; for (int i = 0; i < searchNum; i++) { std::cmatch cmatch; auto res = std::regex_search(ppSearchWords[i], cmatch, regex); printf("%s:%d match:%s prefix:%s sufix:%s\n", ppSearchWords[i], res, cmatch.str().c_str(), cmatch.prefix().str().c_str(), cmatch.suffix().str().c_str()); } delete[] ppSearchWords; RegexpTrie::Terminate(); }
åè
https://github.com/s9e/RegexpBuilder
https://github.com/ermanh/trieregex