""" ldap.schema.tokenizer - Low-level parsing functions for schema element strings See https://www.python-ldap.org/ for details. """ import re TOKENS_FINDALL = re.compile( r"(\()" # opening parenthesis r"|" # or r"(\))" # closing parenthesis r"|" # or r"([^'$()\s]+)" # string of length >= 1 without '$() or whitespace r"|" # or r"('.*?'(?!\w))" # any string or empty string surrounded by single quotes # except if right quote is succeeded by alphanumeric char r"|" # or r"([^\s]+?)", # residue, all non-whitespace strings ).findall def split_tokens(s): """ Returns list of syntax elements with quotes and spaces stripped. """ parts = [] parens = 0 for opar, cpar, unquoted, quoted, residue in TOKENS_FINDALL(s): if unquoted: parts.append(unquoted) elif quoted: parts.append(quoted[1:-1]) elif opar: parens += 1 parts.append(opar) elif cpar: parens -= 1 parts.append(cpar) elif residue == '$': if not parens: raise ValueError("'$' outside parenthesis in %r" % (s)) else: raise ValueError(residue, s) if parens: raise ValueError("Unbalanced parenthesis in %r" % (s)) return parts def extract_tokens(l,known_tokens): """ Returns dictionary of known tokens with all values """ assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l) result = {} result.update(known_tokens) i = 0 l_len = len(l) while i