Skip to content

Commit

Permalink
Support multiple regexp matches per line
Browse files Browse the repository at this point in the history
By using the match group's indices, this also avoids replacing unrelated
chunks of the line.
  • Loading branch information
jplitza committed Jan 11, 2022
1 parent abdef4c commit 02ff47c
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 17 deletions.
47 changes: 30 additions & 17 deletions anonip.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,35 @@ def process_ip(self, ip):
)
return trunc_ip

def process_regex_match(self, match):
"""
This function processes a single regex match.
It returns the anonymized match as string and can be called with re.sub.
:param match: re.Match
:return: str
"""
ret = []
last_pos = 0

for i, g in enumerate(match.groups(), start=1):
if not g:
continue
ip_str, ip = self.extract_ip(g)
replacement = (
self.process_ip(ip) if ip
else self.replace or g
)
ret.extend((
match.group(0)[last_pos:match.start(i) - match.start(0)],
str(replacement),
))
last_pos = match.end(i) - match.start(0)

ret.append(match.group(0)[last_pos:])
return "".join(ret)

def process_line_regex(self, line):
"""
This function processes a single line based on the provided regex.
Expand All @@ -189,23 +218,7 @@ def process_line_regex(self, line):
:param line: str
:return: str
"""
match = re.match(self.regex, line)
if not match:
logger.debug("Regex did not match!")
return line
groups = match.groups()

for m in set(groups):
if not m:
continue
ip_str, ip = self.extract_ip(m)
if ip:
trunc_ip = self.process_ip(ip)
line = line.replace(ip_str, str(trunc_ip))
elif self.replace:
line = line.replace(m, self.replace)

return line
return re.sub(self.regex, self.process_regex_match, line)

def process_line_column(self, line):
"""
Expand Down
6 changes: 6 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ def test_column(line, columns, expected):
'3.3.0.0 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent"',
None,
),
(
'3.3.3.3 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent [ip:1.2.3.4]"',
re.compile(r"\b([0-9a-fA-F][0-9a-fA-F:\.]*|::[0-9a-fA-F:\.]+)\b"),
'3.3.0.0 - - [20/May/2015:21:05:01 +0000] "GET /723.3.3.357 HTTP/1.1" 200 13358 "-" "useragent [ip:1.2.0.0]"',
None,
),
(
"blabla/ 3.3.3.3 /blublu",
re.compile(r"^blabla/ ([^,]+) /blublu"),
Expand Down

0 comments on commit 02ff47c

Please sign in to comment.