diff --git a/backport-weed-out-more-backtracking-string-regexes.patch b/backport-weed-out-more-backtracking-string-regexes.patch new file mode 100644 index 0000000000000000000000000000000000000000..73eb324323338463ecc2cd0066498ba8af309c6f --- /dev/null +++ b/backport-weed-out-more-backtracking-string-regexes.patch @@ -0,0 +1,812 @@ +From 681487f82f55fba66f01f9913e4ff103e5b2ef4c Mon Sep 17 00:00:00 2001 +From: Georg Brandl +Date: Fri, 25 Dec 2020 13:16:56 +0100 +Subject: [PATCH] all: weed out more backtracking string regexes + +Conflict:do not change test +Reference:https://github.com/pygments/pygments/commit/681487f82f55fba66f01f9913e4ff103e5b2ef4c + +--- + pygments/lexers/actionscript.py | 4 ++-- + pygments/lexers/ambient.py | 2 +- + pygments/lexers/boa.py | 6 ++--- + pygments/lexers/configs.py | 2 +- + pygments/lexers/d.py | 4 ++-- + pygments/lexers/dotnet.py | 8 +++---- + pygments/lexers/dsls.py | 4 ++-- + pygments/lexers/go.py | 2 +- + pygments/lexers/graphics.py | 4 ++-- + pygments/lexers/haxe.py | 4 ++-- + pygments/lexers/iolang.py | 2 +- + pygments/lexers/jvm.py | 16 +++++++------- + pygments/lexers/lisp.py | 12 +++++----- + pygments/lexers/make.py | 4 ++-- + pygments/lexers/parsers.py | 48 ++++++++++++++++++++-------------------- + pygments/lexers/php.py | 4 ++-- + pygments/lexers/prolog.py | 4 ++-- + pygments/lexers/ruby.py | 34 ++++++++++++++-------------- + pygments/lexers/scripting.py | 8 +++---- + pygments/lexers/supercollider.py | 4 ++-- + pygments/lexers/templates.py | 24 ++++++++++---------- + pygments/lexers/textedit.py | 4 ++-- + pygments/lexers/urbi.py | 4 ++-- + pygments/lexers/webmisc.py | 4 ++-- + pygments/lexers/x10.py | 2 +- + 25 files changed, 107 insertions(+), 107 deletions(-) + +diff --git a/pygments/lexers/actionscript.py b/pygments/lexers/actionscript.py +index f4b4964..7992358 100644 +--- a/pygments/lexers/actionscript.py ++++ b/pygments/lexers/actionscript.py +@@ -37,7 +37,7 @@ class ActionScriptLexer(RegexLexer): + (r'\s+', Text), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline), +- (r'/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex), ++ (r'/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex), + (r'[~^*!%&<>|+=:;,/?\\-]+', Operator), + (r'[{}\[\]();.]+', Punctuation), + (words(( +@@ -144,7 +144,7 @@ class ActionScript3Lexer(RegexLexer): + bygroups(Keyword, Text, Keyword.Type, Text, Operator)), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline), +- (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex), ++ (r'/(\\\\|\\[^\\]|[^\\\n])*/[gisx]*', String.Regex), + (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), + (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' + r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' +diff --git a/pygments/lexers/ambient.py b/pygments/lexers/ambient.py +index 7d42d12..d2cc06a 100644 +--- a/pygments/lexers/ambient.py ++++ b/pygments/lexers/ambient.py +@@ -44,7 +44,7 @@ class AmbientTalkLexer(RegexLexer): + (builtin, Name.Builtin), + (r'(true|false|nil)\b', Keyword.Constant), + (r'(~|lobby|jlobby|/)\.', Keyword.Constant, 'namespace'), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r'\|', Punctuation, 'arglist'), + (r'<:|[*^!%&<>+=,./?-]|:=', Operator), + (r"`[a-zA-Z_]\w*", String.Symbol), +diff --git a/pygments/lexers/boa.py b/pygments/lexers/boa.py +index a57c0e4..2425583 100644 +--- a/pygments/lexers/boa.py ++++ b/pygments/lexers/boa.py +@@ -92,9 +92,9 @@ class BoaLexer(RegexLexer): + (classes, Name.Classes), + (words(operators), Operator), + (r'[][(),;{}\\.]', Punctuation), +- (r'"(\\\\|\\"|[^"])*"', String), +- (r'`(\\\\|\\`|[^`])*`', String), +- (words(string_sep), String.Delimeter), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"`(\\\\|\\[^\\]|[^`\\])*`", String.Backtick), ++ (words(string_sep), String.Delimiter), + (r'[a-zA-Z_]+', Name.Variable), + (r'[0-9]+', Number.Integer), + (r'\s+?', Text), # Whitespace +diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py +index 0911b6e..0bae4eb 100644 +--- a/pygments/lexers/configs.py ++++ b/pygments/lexers/configs.py +@@ -909,7 +909,7 @@ class TOMLLexer(RegexLexer): + (r'\s+', Text), + (r'#.*?$', Comment.Single), + # Basic string +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # Literal string + (r'\'\'\'(.*)\'\'\'', String), + (r'\'[^\']*\'', String), +diff --git a/pygments/lexers/d.py b/pygments/lexers/d.py +index b14f7dc..f833e5e 100644 +--- a/pygments/lexers/d.py ++++ b/pygments/lexers/d.py +@@ -93,7 +93,7 @@ class DLexer(RegexLexer): + # -- AlternateWysiwygString + (r'`[^`]*`[cwd]?', String), + # -- DoubleQuotedString +- (r'"(\\\\|\\"|[^"])*"[cwd]?', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"[cwd]?', String), + # -- EscapeSequence + (r"\\(['\"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}" + r"|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)", +@@ -224,7 +224,7 @@ class CrocLexer(RegexLexer): + (r'@`(``|[^`])*`', String), + (r"@'(''|[^'])*'", String), + # -- DoubleQuotedString +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # Tokens + (r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>' + r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)' +diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py +index 458a9eb..c4d2077 100644 +--- a/pygments/lexers/dotnet.py ++++ b/pygments/lexers/dotnet.py +@@ -88,7 +88,7 @@ class CSharpLexer(RegexLexer): + (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), + (r'[{}]', Punctuation), + (r'@"(""|[^"])*"', String), +- (r'"(\\\\|\\"|[^"\n])*["\n]', String), ++ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), + (r"'\\.'|'[^\\]'", String.Char), + (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" + r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), +@@ -213,7 +213,7 @@ class NemerleLexer(RegexLexer): + (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), + (r'[{}]', Punctuation), + (r'@"(""|[^"])*"', String), +- (r'"(\\\\|\\"|[^"\n])*["\n]', String), ++ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), + (r"'\\.'|'[^\\]'", String.Char), + (r"0[xX][0-9a-fA-F]+[Ll]?", Number), + (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number), +@@ -315,8 +315,8 @@ class BooLexer(RegexLexer): + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), +- (r'/(\\\\|\\/|[^/\s])/', String.Regex), +- (r'@/(\\\\|\\/|[^/])*/', String.Regex), ++ (r'/(\\\\|\\[^\\]|[^/\\\s])/', String.Regex), ++ (r'@/(\\\\|\\[^\\]|[^/\\])*/', String.Regex), + (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), + (r'(as|abstract|callable|constructor|destructor|do|import|' + r'enum|event|final|get|interface|internal|of|override|' +diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py +index 0af3c6c..0e4ba40 100644 +--- a/pygments/lexers/dsls.py ++++ b/pygments/lexers/dsls.py +@@ -632,7 +632,7 @@ class AlloyLexer(RegexLexer): + (iden_rex, Name), + (r'[:,]', Punctuation), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r'\n', Text), + ] + } +@@ -827,7 +827,7 @@ class FlatlineLexer(RegexLexer): + (r'0x-?[a-f\d]+', Number.Hex), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"\\(.|[a-z]+)", String.Char), + + # expression template placeholder +diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py +index f6bb7fc..ebb34b5 100644 +--- a/pygments/lexers/go.py ++++ b/pygments/lexers/go.py +@@ -90,7 +90,7 @@ class GoLexer(RegexLexer): + # -- raw_string_lit + (r'`[^`]*`', String), + # -- interpreted_string_lit +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # Tokens + (r'(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\|' + r'|<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])', Operator), +diff --git a/pygments/lexers/graphics.py b/pygments/lexers/graphics.py +index b0b9145..61031a4 100644 +--- a/pygments/lexers/graphics.py ++++ b/pygments/lexers/graphics.py +@@ -425,7 +425,7 @@ class AsymptoteLexer(RegexLexer): + ], + 'statements': [ + # simple string (TeX friendly) +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # C style string (with character escapes) + (r"'", String, 'string'), + (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), +@@ -775,7 +775,7 @@ class PovrayLexer(RegexLexer): + (r'[0-9]+\.[0-9]*', Number.Float), + (r'\.[0-9]+', Number.Float), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r'\s+', Text), + ] + } +diff --git a/pygments/lexers/haxe.py b/pygments/lexers/haxe.py +index b357508..e85e61c 100644 +--- a/pygments/lexers/haxe.py ++++ b/pygments/lexers/haxe.py +@@ -467,7 +467,7 @@ class HaxeLexer(ExtendedRegexLexer): + (r'"', String.Double, ('#pop', 'expr-chain', 'string-double')), + + # EReg +- (r'~/(\\\\|\\/|[^/\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')), ++ (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')), + + # Array + (r'\[', Punctuation, ('#pop', 'expr-chain', 'array-decl')), +@@ -722,7 +722,7 @@ class HaxeLexer(ExtendedRegexLexer): + (r'"', String.Double, ('#pop', 'string-double')), + + # EReg +- (r'~/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex, '#pop'), ++ (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex, '#pop'), + + # Array + (r'\[', Operator, ('#pop', 'array-decl')), +diff --git a/pygments/lexers/iolang.py b/pygments/lexers/iolang.py +index f33c871..b108939 100644 +--- a/pygments/lexers/iolang.py ++++ b/pygments/lexers/iolang.py +@@ -37,7 +37,7 @@ class IoLexer(RegexLexer): + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + (r'/\+', Comment.Multiline, 'nestedcomment'), + # DoubleQuotedString +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # Operators + (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}', + Operator), +diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py +index 7f6d166..f008c1d 100644 +--- a/pygments/lexers/jvm.py ++++ b/pygments/lexers/jvm.py +@@ -285,12 +285,12 @@ class ScalaLexer(RegexLexer): + (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'), + (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), + (r'""".*?"""(?!")', String), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), + (u"'%s" % idrest, Text.Symbol), + (r'[fs]"""', String, 'interptriplestring'), # interpolated strings + (r'[fs]"', String, 'interpstring'), # interpolated strings +- (r'raw"(\\\\|\\"|[^"])*"', String), # raw strings ++ (r'raw"(\\\\|\\[^\\]|[^"\\])*"', String), # raw strings + # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, + # Name.Attribute)), + (idrest, Name), +@@ -612,7 +612,7 @@ class IokeLexer(RegexLexer): + # Symbols + (r':[\w!:?]+', String.Symbol), + (r'[\w!:?]+:(?![\w!?])', String.Other), +- (r':"(\\\\|\\"|[^"])*"', String.Symbol), ++ (r':"(\\\\|\\[^\\]|[^"\\])*"', String.Symbol), + + # Documentation + (r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()' +@@ -830,7 +830,7 @@ class ClojureLexer(RegexLexer): + (r'0x-?[abcdef\d]+', Number.Hex), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'" + valid_name, String.Symbol), + (r"\\(.|[a-z]+)", String.Char), + +@@ -973,7 +973,7 @@ class CeylonLexer(RegexLexer): + (r'(class|interface|object|alias)(\s+)', + bygroups(Keyword.Declaration, Text), 'class'), + (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char), + (r'(\.)([a-z_]\w*)', + bygroups(Operator, Name.Attribute)), +@@ -1049,7 +1049,7 @@ class KotlinLexer(RegexLexer): + (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), + (r'[{}]', Punctuation), + (r'@"(""|[^"])*"', String), +- (r'"(\\\\|\\"|[^"\n])*["\n]', String), ++ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), + (r"'\\.'|'[^\\]'", String.Char), + (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), +@@ -1646,8 +1646,8 @@ class SarlLexer(RegexLexer): + (r'(agent|annotation|artifact|behavior|capacity|class|enum|event|interface|skill|space)(\s+)', bygroups(Keyword.Declaration, Text), + 'class'), + (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), +- (r'"(\\\\|\\"|[^"])*"', String), +- (r"'(\\\\|\\'|[^'])*'", String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_$]\w*', Name), + (r'[~^*!%&\[\](){}<>\|+=:;,./?-]', Operator), +diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py +index 601d5a5..9e9b9ee 100644 +--- a/pygments/lexers/lisp.py ++++ b/pygments/lexers/lisp.py +@@ -119,7 +119,7 @@ class SchemeLexer(RegexLexer): + # (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'" + valid_name, String.Symbol), + (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), + +@@ -403,7 +403,7 @@ class HyLexer(RegexLexer): + (r'0[xX][a-fA-F0-9]+', Number.Hex), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'" + valid_name, String.Symbol), + (r"\\(.|[a-z]+)", String.Char), + (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), +@@ -1490,7 +1490,7 @@ class NewLispLexer(RegexLexer): + (r'\s+', Text), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + + # braces + (r'\{', String, "bracestring"), +@@ -2385,7 +2385,7 @@ class CPSALexer(SchemeLexer): + # (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), + + # strings, symbols and characters +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'" + valid_name, String.Symbol), + (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), + +@@ -2598,7 +2598,7 @@ class XtlangLexer(RegexLexer): + (r'(#b|#o|#x)[\d.]+', Number), + + # strings +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + + # true/false constants + (r'(#t|#f)', Name.Constant), +@@ -2669,7 +2669,7 @@ class FennelLexer(RegexLexer): + (r'-?\d+\.\d+', Number.Float), + (r'-?\d+', Number.Integer), + +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'(\\\\|\\'|[^'])*'", String), + + # these are technically strings, but it's worth visually +diff --git a/pygments/lexers/make.py b/pygments/lexers/make.py +index f67f109..8b9477d 100644 +--- a/pygments/lexers/make.py ++++ b/pygments/lexers/make.py +@@ -93,8 +93,8 @@ class BaseMakefileLexer(RegexLexer): + (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', + bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), + # strings +- (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), +- (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + # targets + (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), + 'block-header'), +diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py +index 8bcbfc5..bada329 100644 +--- a/pygments/lexers/parsers.py ++++ b/pygments/lexers/parsers.py +@@ -65,10 +65,10 @@ class RagelLexer(RegexLexer): + (r'[+-]?[0-9]+', Number.Integer), + ], + 'literals': [ +- (r'"(\\\\|\\"|[^"])*"', String), # double quote string +- (r"'(\\\\|\\'|[^'])*'", String), # single quote string +- (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals +- (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ++ (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals ++ (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions + ], + 'identifiers': [ + (r'[a-zA-Z_]\w*', Name.Variable), +@@ -107,15 +107,15 @@ class RagelLexer(RegexLexer): + r'[^\\]\\[{}]', # allow escaped { or } + + # strings and comments may safely contain unsafe characters +- r'"(\\\\|\\"|[^"])*"', # double quote string +- r"'(\\\\|\\'|[^'])*'", # single quote string ++ r'"(\\\\|\\[^\\]|[^"\\])*"', ++ r"'(\\\\|\\[^\\]|[^'\\])*'", + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'\#.*$\n?', # ruby comment + + # regular expression: There's no reason for it to start + # with a * and this stops confusion with comments. +- r'/(?!\*)(\\\\|\\/|[^/])*/', ++ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', + + # / is safe now that we've handled regex and javadoc comments + r'/', +@@ -148,12 +148,12 @@ class RagelEmbeddedLexer(RegexLexer): + r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them + + # strings and comments may safely contain unsafe characters +- r'"(\\\\|\\"|[^"])*"', # double quote string +- r"'(\\\\|\\'|[^'])*'", # single quote string ++ r'"(\\\\|\\[^\\]|[^"\\])*"', ++ r"'(\\\\|\\[^\\]|[^'\\])*'", + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment +- r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression ++ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression + + # / is safe now that we've handled regex and javadoc comments + r'/', +@@ -183,7 +183,7 @@ class RagelEmbeddedLexer(RegexLexer): + + # specifically allow regex followed immediately by * + # so it doesn't get mistaken for a comment +- r'/(?!\*)(\\\\|\\/|[^/])*/\*', ++ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*', + + # allow / as long as it's not followed by another / or by a * + r'/(?=[^/*]|$)', +@@ -194,9 +194,9 @@ class RagelEmbeddedLexer(RegexLexer): + )) + r')+', + + # strings and comments may safely contain unsafe characters +- r'"(\\\\|\\"|[^"])*"', # double quote string +- r"'(\\\\|\\'|[^'])*'", # single quote string +- r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal ++ r'"(\\\\|\\[^\\]|[^"\\])*"', ++ r"'(\\\\|\\[^\\]|[^'\\])*'", ++ r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment +@@ -422,8 +422,8 @@ class AntlrLexer(RegexLexer): + (r':', Punctuation), + + # literals +- (r"'(\\\\|\\'|[^'])*'", String), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'<<([^>]|>[^>])>>', String), + # identifiers + # Tokens start with capital letter. +@@ -462,14 +462,14 @@ class AntlrLexer(RegexLexer): + r'[^${}\'"/\\]+', # exclude unsafe characters + + # strings and comments may safely contain unsafe characters +- r'"(\\\\|\\"|[^"])*"', # double quote string +- r"'(\\\\|\\'|[^'])*'", # single quote string ++ r'"(\\\\|\\[^\\]|[^"\\])*"', ++ r"'(\\\\|\\[^\\]|[^'\\])*'", + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + + # regular expression: There's no reason for it to start + # with a * and this stops confusion with comments. +- r'/(?!\*)(\\\\|\\/|[^/])*/', ++ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', + + # backslashes are okay, as long as we are not backslashing a % + r'\\(?!%)', +@@ -489,14 +489,14 @@ class AntlrLexer(RegexLexer): + r'[^$\[\]\'"/]+', # exclude unsafe characters + + # strings and comments may safely contain unsafe characters +- r'"(\\\\|\\"|[^"])*"', # double quote string +- r"'(\\\\|\\'|[^'])*'", # single quote string ++ r'"(\\\\|\\[^\\]|[^"\\])*"', ++ r"'(\\\\|\\[^\\]|[^'\\])*'", + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + + # regular expression: There's no reason for it to start + # with a * and this stops confusion with comments. +- r'/(?!\*)(\\\\|\\/|[^/])*/', ++ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', + + # Now that we've handled regex and javadoc comments + # it's safe to let / through. +@@ -736,8 +736,8 @@ class TreetopBaseLexer(RegexLexer): + 'rule': [ + include('space'), + include('end'), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)), + (r'[A-Za-z_]\w*', Name), + (r'[()]', Punctuation), +diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py +index bd4a237..8b49de7 100644 +--- a/pygments/lexers/php.py ++++ b/pygments/lexers/php.py +@@ -79,8 +79,8 @@ class ZephirLexer(RegexLexer): + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + ] + } + +diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py +index 7078362..ee5bf2e 100644 +--- a/pygments/lexers/prolog.py ++++ b/pygments/lexers/prolog.py +@@ -227,7 +227,7 @@ class LogtalkLexer(RegexLexer): + # Existential quantifier + (r'\^', Operator), + # Strings +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # Punctuation + (r'[()\[\],.|]', Text), + # Atoms +@@ -277,7 +277,7 @@ class LogtalkLexer(RegexLexer): + (r"[a-z][a-zA-Z0-9_]*", Text), + (r"'", String, 'quoted_atom'), + # Strings +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # End of entity-opening directive + (r'([)]\.)', Text, 'root'), + # Scope operator +diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py +index 8bcbde6..ac79a58 100644 +--- a/pygments/lexers/ruby.py ++++ b/pygments/lexers/ruby.py +@@ -108,7 +108,7 @@ class RubyLexer(ExtendedRegexLexer): + # easy ones + (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), + (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), +- (r":'(\\\\|\\'|[^'])*'", String.Symbol), ++ (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol), + (r"'(\\\\|\\'|[^'])*'", String.Single), + (r':"', String.Symbol, 'simple-sym'), + (r'([a-zA-Z_]\w*)(:)(?!:)', +@@ -451,26 +451,26 @@ class FancyLexer(RegexLexer): + tokens = { + # copied from PerlLexer: + 'balanced-regex': [ +- (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'), +- (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), ++ (r'/(\\\\|\\[^\\]|[^/\\])*/[egimosx]*', String.Regex, '#pop'), ++ (r'!(\\\\|\\[^\\]|[^!\\])*![egimosx]*', String.Regex, '#pop'), + (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), +- (r'\{(\\\\|\\\}|[^}])*\}[egimosx]*', String.Regex, '#pop'), +- (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'), +- (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'), +- (r'\((\\\\|\\\)|[^)])*\)[egimosx]*', String.Regex, '#pop'), +- (r'@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex, '#pop'), +- (r'%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex, '#pop'), +- (r'\$(\\\\|\\\$|[^$])*\$[egimosx]*', String.Regex, '#pop'), ++ (r'\{(\\\\|\\[^\\]|[^}\\])*\}[egimosx]*', String.Regex, '#pop'), ++ (r'<(\\\\|\\[^\\]|[^>\\])*>[egimosx]*', String.Regex, '#pop'), ++ (r'\[(\\\\|\\[^\\]|[^\]\\])*\][egimosx]*', String.Regex, '#pop'), ++ (r'\((\\\\|\\[^\\]|[^)\\])*\)[egimosx]*', String.Regex, '#pop'), ++ (r'@(\\\\|\\[^\\]|[^@\\])*@[egimosx]*', String.Regex, '#pop'), ++ (r'%(\\\\|\\[^\\]|[^%\\])*%[egimosx]*', String.Regex, '#pop'), ++ (r'\$(\\\\|\\[^\\]|[^$\\])*\$[egimosx]*', String.Regex, '#pop'), + ], + 'root': [ + (r'\s+', Text), + + # balanced delimiters (copied from PerlLexer): +- (r's\{(\\\\|\\\}|[^}])*\}\s*', String.Regex, 'balanced-regex'), +- (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'), +- (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'), +- (r's\((\\\\|\\\)|[^)])*\)\s*', String.Regex, 'balanced-regex'), +- (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), ++ (r's\{(\\\\|\\[^\\]|[^}\\])*\}\s*', String.Regex, 'balanced-regex'), ++ (r's<(\\\\|\\[^\\]|[^>\\])*>\s*', String.Regex, 'balanced-regex'), ++ (r's\[(\\\\|\\[^\\]|[^\]\\])*\]\s*', String.Regex, 'balanced-regex'), ++ (r's\((\\\\|\\[^\\]|[^)\\])*\)\s*', String.Regex, 'balanced-regex'), ++ (r'm?/(\\\\|\\[^\\]|[^///\n])*/[gcimosx]*', String.Regex), + (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), + + # Comments +@@ -478,9 +478,9 @@ class FancyLexer(RegexLexer): + # Symbols + (r'\'([^\'\s\[\](){}]+|\[\])', String.Symbol), + # Multi-line DoubleQuotedString +- (r'"""(\\\\|\\"|[^"])*"""', String), ++ (r'"""(\\\\|\\[^\\]|[^\\])*?"""', String), + # DoubleQuotedString +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # keywords + (r'(def|class|try|catch|finally|retry|return|return_local|match|' + r'case|->|=>)\b', Keyword), +diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py +index a340f8e..90701e7 100644 +--- a/pygments/lexers/scripting.py ++++ b/pygments/lexers/scripting.py +@@ -284,7 +284,7 @@ class ChaiscriptLexer(RegexLexer): + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'"', String.Double, 'dqstring'), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + ], + 'dqstring': [ + (r'\$\{[^"}]+?\}', String.Interpol), +@@ -690,7 +690,7 @@ class AppleScriptLexer(RegexLexer): + (r'\b(%s)s?\b' % '|'.join(StudioClasses), Name.Builtin), + (r'\b(%s)\b' % '|'.join(StudioCommands), Name.Builtin), + (r'\b(%s)\b' % '|'.join(References), Name.Builtin), +- (r'"(\\\\|\\"|[^"])*"', String.Double), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r'\b(%s)\b' % Identifiers, Name.Variable), + (r'[-+]?(\d+\.\d*|\d*\.\d+)(E[-+][0-9]+)?', Number.Float), + (r'[-+]?\d+', Number.Integer), +@@ -834,7 +834,7 @@ class MOOCodeLexer(RegexLexer): + # Numbers + (r'(0|[1-9][0-9_]*)', Number.Integer), + # Strings +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + # exceptions + (r'(E_PERM|E_DIV)', Name.Exception), + # db-refs +@@ -925,7 +925,7 @@ class HybrisLexer(RegexLexer): + 'Runnable', 'CGI', 'ClientSocket', 'Socket', 'ServerSocket', + 'File', 'Console', 'Directory', 'Exception'), suffix=r'\b'), + Keyword.Type), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), + (r'(\.)([a-zA-Z_]\w*)', + bygroups(Operator, Name.Attribute)), +diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py +index d0d033a..ed4218a 100644 +--- a/pygments/lexers/supercollider.py ++++ b/pygments/lexers/supercollider.py +@@ -84,7 +84,7 @@ class SuperColliderLexer(RegexLexer): + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + ] + } +diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py +index d909662..6cfed37 100644 +--- a/pygments/lexers/templates.py ++++ b/pygments/lexers/templates.py +@@ -179,8 +179,8 @@ class SmartyLexer(RegexLexer): + (r'(true|false|null)\b', Keyword.Constant), + (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'[a-zA-Z_]\w*', Name.Attribute) + ] + } +@@ -252,8 +252,8 @@ class VelocityLexer(RegexLexer): + (r'\$!?\{?', Punctuation, 'variable'), + (r'\s+', Text), + (r'[,:]', Punctuation), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r"0[xX][0-9a-fA-F]+[Ll]?", Number), + (r"\b[0-9]+\b", Number), + (r'(true|false|null)\b', Keyword.Constant), +@@ -373,8 +373,8 @@ class DjangoLexer(RegexLexer): + (r'(loop|block|super|forloop)\b', Name.Builtin), + (r'[a-zA-Z_][\w-]*', Name.Variable), + (r'\.\w+', Name.Variable), +- (r':?"(\\\\|\\"|[^"])*"', String.Double), +- (r":?'(\\\\|\\'|[^'])*'", String.Single), ++ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'([{}()\[\]+\-*/%,:~]|[><=]=?|!=)', Operator), + (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), +@@ -1852,8 +1852,8 @@ class HandlebarsLexer(RegexLexer): + include('variable'), + + # borrowed from DjangoLexer +- (r':?"(\\\\|\\"|[^"])*"', String.Double), +- (r":?'(\\\\|\\'|[^'])*'", String.Single), ++ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), + ] +@@ -2165,8 +2165,8 @@ class TwigLexer(RegexLexer): + (_ident_inner, Name.Variable), + (r'\.' + _ident_inner, Name.Variable), + (r'\.[0-9]+', Number), +- (r':?"(\\\\|\\"|[^"])*"', String.Double), +- (r":?'(\\\\|\\'|[^'])*'", String.Single), ++ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'([{}()\[\]+\-*/,:~%]|\.\.|\?|:|\*\*|\/\/|!=|[><=]=?)', Operator), + (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), +@@ -2245,8 +2245,8 @@ class Angular2Lexer(RegexLexer): + + # Literals + (r':?(true|false)', String.Boolean), +- (r':?"(\\\\|\\"|[^"])*"', String.Double), +- (r":?'(\\\\|\\'|[^'])*'", String.Single), ++ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" + r"0[xX][0-9a-fA-F]+[Ll]?", Number), + +diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py +index 3c6fb57..04c8015 100644 +--- a/pygments/lexers/textedit.py ++++ b/pygments/lexers/textedit.py +@@ -69,8 +69,8 @@ class AwkLexer(RegexLexer): + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + ] + } + +diff --git a/pygments/lexers/urbi.py b/pygments/lexers/urbi.py +index 72349cb..28b358d 100644 +--- a/pygments/lexers/urbi.py ++++ b/pygments/lexers/urbi.py +@@ -117,11 +117,11 @@ class UrbiscriptLexer(ExtendedRegexLexer): + ], + 'string.double': [ + (r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback), +- (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'), ++ (r'(\\\\|\\[^\\]|[^"\\])*?"', String.Double, '#pop'), + ], + 'string.single': [ + (r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback), +- (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'), ++ (r"(\\\\|\\[^\\]|[^'\\])*?'", String.Single, '#pop'), + ], + # from http://pygments.org/docs/lexerdevelopment/#changing-states + 'comment': [ +diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py +index b39334b..a1a73f4 100644 +--- a/pygments/lexers/webmisc.py ++++ b/pygments/lexers/webmisc.py +@@ -855,8 +855,8 @@ class QmlLexer(RegexLexer): + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), +- (r'"(\\\\|\\"|[^"])*"', String.Double), +- (r"'(\\\\|\\'|[^'])*'", String.Single), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), ++ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + ] + } + +diff --git a/pygments/lexers/x10.py b/pygments/lexers/x10.py +index eac87b1..317c66c 100644 +--- a/pygments/lexers/x10.py ++++ b/pygments/lexers/x10.py +@@ -62,7 +62,7 @@ class X10Lexer(RegexLexer): + (r'\b(%s)\b' % '|'.join(types), Keyword.Type), + (r'\b(%s)\b' % '|'.join(values), Keyword.Constant), + (r'\b(%s)\b' % '|'.join(modifiers), Keyword.Declaration), +- (r'"(\\\\|\\"|[^"])*"', String), ++ (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), + (r'.', Text) + ], +-- +1.8.3.1 + diff --git a/python-pygments.spec b/python-pygments.spec index 7fa23d7c22a668fd450696780e70c505a804818a..24dcd724967c41a9eece3ade767440f80d276a34 100644 --- a/python-pygments.spec +++ b/python-pygments.spec @@ -16,7 +16,7 @@ need to prettify source code. Highlights are: \ Name: python-pygments Summary: Syntax highlighting engine written in Python Version: 2.5.2 -Release: 3 +Release: 4 License: BSD URL: http://pygments.org/ Source0: https://pypi.org/packages/source/P/Pygments/Pygments-%{version}.tar.gz @@ -24,6 +24,7 @@ BuildArch: noarch Patch6000: backport-CVE-2021-20270.patch Patch6001: backport-CVE-2021-27291.patch +Patch6002: backport-weed-out-more-backtracking-string-regexes.patch %description %{_description} @@ -84,6 +85,9 @@ cp -r doc/docs doc/reST %lang(en) %{_mandir}/man1/pygmentize.1* %changelog +* Sun Sep 26 2021 huangduirong - 2.5.2-4 +- Infinite loop in get_tokens_unprocessed + * Mon Apr 12 2021 shixuantong - 2.5.2-3 - fix CVE-2021-20270 CVE-2021-27291