summaryrefslogtreecommitdiffstats
path: root/apps/patchwork
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2008-09-16 17:34:55 +1000
committerJeremy Kerr <jk@ozlabs.org>2008-09-16 17:34:55 +1000
commit96467db48884d72bc04fc23c8f957190fa004779 (patch)
tree06586446333bfbf6e23ec54bc930f20b5597943f /apps/patchwork
parent6034b55eee81c82058615f2a349d6c64cfb61e08 (diff)
downloadpatchwork-96467db48884d72bc04fc23c8f957190fa004779.tar.bz2
patchwork-96467db48884d72bc04fc23c8f957190fa004779.tar.xz
Improve [PATCH] subject parsing
We'd like to leave some fragments of [PATCH]-style headers in the patch subject, so add functionality to do this in clean_subject Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'apps/patchwork')
-rwxr-xr-xapps/patchwork/bin/parsemail.py93
1 files changed, 89 insertions, 4 deletions
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index 15b62d2..2310ae8 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -199,15 +199,100 @@ def find_patch_for_comment(mail):
return None
+split_re = re.compile('[,\s]+')
+
+def split_prefixes(prefix):
+ """ Turn a prefix string into a list of prefix tokens
+
+ >>> split_prefixes('PATCH')
+ ['PATCH']
+ >>> split_prefixes('PATCH,RFC')
+ ['PATCH', 'RFC']
+ >>> split_prefixes('')
+ []
+ >>> split_prefixes('PATCH,')
+ ['PATCH']
+ >>> split_prefixes('PATCH ')
+ ['PATCH']
+ >>> split_prefixes('PATCH,RFC')
+ ['PATCH', 'RFC']
+ >>> split_prefixes('PATCH 1/2')
+ ['PATCH', '1/2']
+ """
+ matches = split_re.split(prefix)
+ return [ s for s in matches if s != '' ]
+
re_re = re.compile('^(re|fwd?)[:\s]\s*', re.I)
-prefix_re = re.compile('^\[.*\]\s*')
+prefix_re = re.compile('^\[([^\]]*)\]\s*(.*)$')
whitespace_re = re.compile('\s+')
-def clean_subject(subject):
+def clean_subject(subject, drop_prefixes = None):
+ """ Clean a Subject: header from an incoming patch.
+
+ Removes Re: and Fwd: strings, as well as [PATCH]-style prefixes. By
+ default, only [PATCH] is removed, and we keep any other bracketed data
+ in the subject. If drop_prefixes is provided, remove those too,
+ comparing case-insensitively.
+
+ >>> clean_subject('meep')
+ 'meep'
+ >>> clean_subject('Re: meep')
+ 'meep'
+ >>> clean_subject('[PATCH] meep')
+ 'meep'
+ >>> clean_subject('[PATCH RFC] meep')
+ '[RFC] meep'
+ >>> clean_subject('[PATCH,RFC] meep')
+ '[RFC] meep'
+ >>> clean_subject('[PATCH,1/2] meep')
+ '[1/2] meep'
+ >>> clean_subject('[PATCH RFC 1/2] meep')
+ '[RFC,1/2] meep'
+ >>> clean_subject('[PATCH] [RFC] meep')
+ '[RFC] meep'
+ >>> clean_subject('[PATCH] [RFC,1/2] meep')
+ '[RFC,1/2] meep'
+ >>> clean_subject('[PATCH] [RFC] [1/2] meep')
+ '[RFC,1/2] meep'
+ >>> clean_subject('[PATCH] rewrite [a-z] regexes')
+ 'rewrite [a-z] regexes'
+ >>> clean_subject('[PATCH] [RFC] rewrite [a-z] regexes')
+ '[RFC] rewrite [a-z] regexes'
+ >>> clean_subject('[foo] [bar] meep', ['foo'])
+ '[bar] meep'
+ >>> clean_subject('[FOO] [bar] meep', ['foo'])
+ '[bar] meep'
+ """
+
+ if drop_prefixes is None:
+ drop_prefixes = []
+ else:
+ drop_prefixes = [ s.lower() for s in drop_prefixes ]
+
+ drop_prefixes.append('patch')
+
+ # remove Re:, Fwd:, etc
subject = re_re.sub(' ', subject)
- subject = prefix_re.sub('', subject)
+
+ prefixes = []
+
+ match = prefix_re.match(subject)
+
+ while match:
+ prefix_str = match.group(1)
+ prefixes += [ p for p in split_prefixes(prefix_str) \
+ if p.lower() not in drop_prefixes]
+
+ subject = match.group(2)
+ match = prefix_re.match(subject)
+
subject = whitespace_re.sub(' ', subject)
- return subject.strip()
+
+ subject = subject.strip()
+ if prefixes:
+ subject = '[%s] %s' % (','.join(prefixes), subject)
+
+ return subject
sig_re = re.compile('^(-{2,3} ?|_+)\n.*', re.S | re.M)
def clean_content(str):