Skip to content

Instantly share code, notes, and snippets.

@phith0n
Last active May 21, 2021 10:09
Show Gist options
  • Save phith0n/e31ba266ec6fff45bc8b316b1101b723 to your computer and use it in GitHub Desktop.
Save phith0n/e31ba266ec6fff45bc8b316b1101b723 to your computer and use it in GitHub Desktop.

Revisions

  1. phith0n revised this gist May 21, 2021. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions regenerate.py
    Original file line number Diff line number Diff line change
    @@ -85,6 +85,6 @@ def is_w(self, ch: str):


    if __name__ == '__main__':
    # output: \w+\s\w+\s\w+\s\w+,\s\w\s\w+\s\d+\s\w+'\w\s\w+
    regexp = ReGenerate().generate('My name is Bob, I am 25 year\'s old')
    # output: \w+\s\w+\s\w+\s\w+,\s\w'\w\s\d+\s\w+\s\w+
    regexp = ReGenerate().generate('My name is Bob, I\'m 25 years old')
    print(regexp)
  2. phith0n created this gist May 21, 2021.
    90 changes: 90 additions & 0 deletions regenerate.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@
    import re
    from enum import Enum


    class State(Enum):
    Initial = 0
    D = 10
    DRest = 11
    S = 20
    SRest = 21
    W = 30
    WRest = 31
    Other = 100


    class ReGenerate(object):
    def __init__(self):
    self.tokens = []
    self.current_state: State.value = State.Initial
    self.fragment = ''

    def flush(self, ch: str):
    if self.fragment:
    self.tokens.append(self.fragment)
    self.fragment = ''

    if self.is_d(ch):
    self.fragment = r'\d'
    self.current_state = State.D
    elif self.is_w(ch):
    self.fragment = r'\w'
    self.current_state = State.W
    elif self.is_space(ch):
    self.fragment = r'\s'
    self.current_state = State.S
    else:
    self.fragment = re.escape(ch)
    self.current_state = State.Other

    def generate(self, data: str):
    for ch in data:
    if self.current_state == State.Initial:
    self.flush(ch)
    elif self.current_state == State.D:
    if self.is_d(ch):
    self.current_state = State.DRest
    self.fragment += r'+'
    else:
    self.flush(ch)
    elif self.current_state == State.DRest:
    if not self.is_d(ch):
    self.flush(ch)
    elif self.current_state == State.W:
    if self.is_w(ch):
    self.current_state = State.WRest
    self.fragment += r'+'
    else:
    self.flush(ch)
    elif self.current_state == State.WRest:
    if not self.is_w(ch):
    self.flush(ch)
    elif self.current_state == State.S:
    if self.is_space(ch):
    self.current_state = State.SRest
    self.fragment += r'+'
    else:
    self.flush(ch)
    elif self.current_state == State.SRest:
    if not self.is_space(ch):
    self.flush(ch)
    else:
    self.flush(ch)

    self.flush('')
    return ''.join(self.tokens)

    def is_d(self, ch: str):
    return re.match(r'[\d]', ch)

    def is_space(self, ch: str):
    return re.match(r'[\s]', ch)

    def is_w(self, ch: str):
    return re.match(r'[\w]', ch)


    if __name__ == '__main__':
    # output: \w+\s\w+\s\w+\s\w+,\s\w\s\w+\s\d+\s\w+'\w\s\w+
    regexp = ReGenerate().generate('My name is Bob, I am 25 year\'s old')
    print(regexp)