Last active
May 21, 2021 10:09
-
-
Save phith0n/e31ba266ec6fff45bc8b316b1101b723 to your computer and use it in GitHub Desktop.
Revisions
-
phith0n revised this gist
May 21, 2021 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -85,6 +85,6 @@ def is_w(self, ch: str): if __name__ == '__main__': # output: \w+\s\w+\s\w+\s\w+,\s\w'\w\s\d+\s\w+\s\w+ regexp = ReGenerate().generate('My name is Bob, I\'m 25 years old') print(regexp) -
phith0n created this gist
May 21, 2021 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ import re from enum import Enum class State(Enum): Initial = 0 D = 10 DRest = 11 S = 20 SRest = 21 W = 30 WRest = 31 Other = 100 class ReGenerate(object): def __init__(self): self.tokens = [] self.current_state: State.value = State.Initial self.fragment = '' def flush(self, ch: str): if self.fragment: self.tokens.append(self.fragment) self.fragment = '' if self.is_d(ch): self.fragment = r'\d' self.current_state = State.D elif self.is_w(ch): self.fragment = r'\w' self.current_state = State.W elif self.is_space(ch): self.fragment = r'\s' self.current_state = State.S else: self.fragment = re.escape(ch) self.current_state = State.Other def generate(self, data: str): for ch in data: if self.current_state == State.Initial: self.flush(ch) elif self.current_state == State.D: if self.is_d(ch): self.current_state = State.DRest self.fragment += r'+' else: self.flush(ch) elif self.current_state == State.DRest: if not self.is_d(ch): self.flush(ch) elif self.current_state == State.W: if self.is_w(ch): self.current_state = State.WRest self.fragment += r'+' else: self.flush(ch) elif self.current_state == State.WRest: if not self.is_w(ch): self.flush(ch) elif self.current_state == State.S: if self.is_space(ch): self.current_state = State.SRest self.fragment += r'+' else: self.flush(ch) elif self.current_state == State.SRest: if not self.is_space(ch): self.flush(ch) else: self.flush(ch) self.flush('') return ''.join(self.tokens) def is_d(self, ch: str): return re.match(r'[\d]', ch) def is_space(self, ch: str): return re.match(r'[\s]', ch) def is_w(self, ch: str): return re.match(r'[\w]', ch) if __name__ == '__main__': # output: \w+\s\w+\s\w+\s\w+,\s\w\s\w+\s\d+\s\w+'\w\s\w+ regexp = ReGenerate().generate('My name is Bob, I am 25 year\'s old') print(regexp)