Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ldif.py 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. """
  2. ldif - generate and parse LDIF data (see RFC 2849)
  3. See https://www.python-ldap.org/ for details.
  4. """
  5. from __future__ import unicode_literals
  6. __version__ = '3.1.0'
  7. __all__ = [
  8. # constants
  9. 'ldif_pattern',
  10. # functions
  11. 'CreateLDIF','ParseLDIF',
  12. # classes
  13. 'LDIFWriter',
  14. 'LDIFParser',
  15. 'LDIFRecordList',
  16. 'LDIFCopy',
  17. ]
  18. import re
  19. from base64 import b64encode, b64decode
  20. from io import StringIO
  21. import warnings
  22. from ldap.compat import urlparse, urlopen
  23. attrtype_pattern = r'[\w;.-]+(;[\w_-]+)*'
  24. attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
  25. attrtypeandvalue_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
  26. rdn_pattern = attrtypeandvalue_pattern + r'([ ]*\+[ ]*' + attrtypeandvalue_pattern + r')*[ ]*'
  27. dn_pattern = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
  28. dn_regex = re.compile('^%s$' % dn_pattern)
  29. ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
  30. MOD_OP_INTEGER = {
  31. 'add':0, # ldap.MOD_ADD
  32. 'delete':1, # ldap.MOD_DELETE
  33. 'replace':2, # ldap.MOD_REPLACE
  34. 'increment':3, # ldap.MOD_INCREMENT
  35. }
  36. MOD_OP_STR = {
  37. 0:'add',1:'delete',2:'replace',3:'increment'
  38. }
  39. CHANGE_TYPES = ['add','delete','modify','modrdn']
  40. valid_changetype_dict = {}
  41. for c in CHANGE_TYPES:
  42. valid_changetype_dict[c]=None
  43. def is_dn(s):
  44. """
  45. returns 1 if s is a LDAP DN
  46. """
  47. if s=='':
  48. return 1
  49. rm = dn_regex.match(s)
  50. return rm!=None and rm.group(0)==s
  51. SAFE_STRING_PATTERN = b'(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
  52. safe_string_re = re.compile(SAFE_STRING_PATTERN)
  53. def list_dict(l):
  54. """
  55. return a dictionary with all items of l being the keys of the dictionary
  56. """
  57. return {i: None for i in l}
  58. class LDIFWriter:
  59. """
  60. Write LDIF entry or change records to file object
  61. Copy LDIF input to a file output object containing all data retrieved
  62. via URLs
  63. """
  64. def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
  65. """
  66. output_file
  67. file object for output; should be opened in *text* mode
  68. base64_attrs
  69. list of attribute types to be base64-encoded in any case
  70. cols
  71. Specifies how many columns a line may have before it's
  72. folded into many lines.
  73. line_sep
  74. String used as line separator
  75. """
  76. self._output_file = output_file
  77. self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
  78. self._cols = cols
  79. self._last_line_sep = line_sep
  80. self.records_written = 0
  81. def _unfold_lines(self,line):
  82. """
  83. Write string line as one or more folded lines
  84. """
  85. # Check maximum line length
  86. line_len = len(line)
  87. if line_len<=self._cols:
  88. self._output_file.write(line)
  89. self._output_file.write(self._last_line_sep)
  90. else:
  91. # Fold line
  92. pos = self._cols
  93. self._output_file.write(line[0:min(line_len,self._cols)])
  94. self._output_file.write(self._last_line_sep)
  95. while pos<line_len:
  96. self._output_file.write(' ')
  97. self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
  98. self._output_file.write(self._last_line_sep)
  99. pos = pos+self._cols-1
  100. return # _unfold_lines()
  101. def _needs_base64_encoding(self,attr_type,attr_value):
  102. """
  103. returns 1 if attr_value has to be base-64 encoded because
  104. of special chars or because attr_type is in self._base64_attrs
  105. """
  106. return attr_type.lower() in self._base64_attrs or \
  107. not safe_string_re.search(attr_value) is None
  108. def _unparseAttrTypeandValue(self,attr_type,attr_value):
  109. """
  110. Write a single attribute type/value pair
  111. attr_type
  112. attribute type (text)
  113. attr_value
  114. attribute value (bytes)
  115. """
  116. if self._needs_base64_encoding(attr_type,attr_value):
  117. # Encode with base64
  118. encoded = b64encode(attr_value).decode('ascii')
  119. encoded = encoded.replace('\n','')
  120. self._unfold_lines(':: '.join([attr_type, encoded]))
  121. else:
  122. self._unfold_lines(': '.join([attr_type, attr_value.decode('ascii')]))
  123. return # _unparseAttrTypeandValue()
  124. def _unparseEntryRecord(self,entry):
  125. """
  126. entry
  127. dictionary holding an entry
  128. """
  129. for attr_type, values in sorted(entry.items()):
  130. for attr_value in values:
  131. self._unparseAttrTypeandValue(attr_type,attr_value)
  132. def _unparseChangeRecord(self,modlist):
  133. """
  134. modlist
  135. list of additions (2-tuple) or modifications (3-tuple)
  136. """
  137. mod_len = len(modlist[0])
  138. if mod_len==2:
  139. changetype = 'add'
  140. elif mod_len==3:
  141. changetype = 'modify'
  142. else:
  143. raise ValueError("modlist item of wrong length: %d" % (mod_len))
  144. self._unparseAttrTypeandValue('changetype',changetype.encode('ascii'))
  145. for mod in modlist:
  146. if mod_len==2:
  147. mod_type,mod_vals = mod
  148. elif mod_len==3:
  149. mod_op,mod_type,mod_vals = mod
  150. self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],
  151. mod_type.encode('ascii'))
  152. else:
  153. raise ValueError("Subsequent modlist item of wrong length")
  154. if mod_vals:
  155. for mod_val in mod_vals:
  156. self._unparseAttrTypeandValue(mod_type,mod_val)
  157. if mod_len==3:
  158. self._output_file.write('-'+self._last_line_sep)
  159. def unparse(self,dn,record):
  160. """
  161. dn
  162. string-representation of distinguished name
  163. record
  164. Either a dictionary holding the LDAP entry {attrtype:record}
  165. or a list with a modify list like for LDAPObject.modify().
  166. """
  167. # Start with line containing the distinguished name
  168. dn = dn.encode('utf-8')
  169. self._unparseAttrTypeandValue('dn', dn)
  170. # Dispatch to record type specific writers
  171. if isinstance(record,dict):
  172. self._unparseEntryRecord(record)
  173. elif isinstance(record,list):
  174. self._unparseChangeRecord(record)
  175. else:
  176. raise ValueError('Argument record must be dictionary or list instead of %s' % (repr(record)))
  177. # Write empty line separating the records
  178. self._output_file.write(self._last_line_sep)
  179. # Count records written
  180. self.records_written = self.records_written+1
  181. return # unparse()
  182. def CreateLDIF(dn,record,base64_attrs=None,cols=76):
  183. """
  184. Create LDIF single formatted record including trailing empty line.
  185. This is a compatibility function.
  186. dn
  187. string-representation of distinguished name
  188. record
  189. Either a dictionary holding the LDAP entry {attrtype:record}
  190. or a list with a modify list like for LDAPObject.modify().
  191. base64_attrs
  192. list of attribute types to be base64-encoded in any case
  193. cols
  194. Specifies how many columns a line may have before it's
  195. folded into many lines.
  196. """
  197. warnings.warn(
  198. 'ldif.CreateLDIF() is deprecated. Use LDIFWriter.unparse() instead. It '
  199. 'will be removed in python-ldap 3.1',
  200. category=DeprecationWarning,
  201. stacklevel=2,
  202. )
  203. f = StringIO()
  204. ldif_writer = LDIFWriter(f,base64_attrs,cols,'\n')
  205. ldif_writer.unparse(dn,record)
  206. s = f.getvalue()
  207. f.close()
  208. return s
  209. class LDIFParser:
  210. """
  211. Base class for a LDIF parser. Applications should sub-class this
  212. class and override method handle() to implement something meaningful.
  213. Public class attributes:
  214. records_read
  215. Counter for records processed so far
  216. """
  217. def __init__(
  218. self,
  219. input_file,
  220. ignored_attr_types=None,
  221. max_entries=0,
  222. process_url_schemes=None,
  223. line_sep='\n'
  224. ):
  225. """
  226. Parameters:
  227. input_file
  228. File-object to read the LDIF input from
  229. ignored_attr_types
  230. Attributes with these attribute type names will be ignored.
  231. max_entries
  232. If non-zero specifies the maximum number of entries to be
  233. read from f.
  234. process_url_schemes
  235. List containing strings with URLs schemes to process with urllib.
  236. An empty list turns off all URL processing and the attribute
  237. is ignored completely.
  238. line_sep
  239. String used as line separator
  240. """
  241. self._input_file = input_file
  242. # Detect whether the file is open in text or bytes mode.
  243. self._file_sends_bytes = isinstance(self._input_file.read(0), bytes)
  244. self._max_entries = max_entries
  245. self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
  246. self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
  247. self._last_line_sep = line_sep
  248. self.version = None
  249. # Initialize counters
  250. self.line_counter = 0
  251. self.byte_counter = 0
  252. self.records_read = 0
  253. self.changetype_counter = {}.fromkeys(CHANGE_TYPES,0)
  254. # Store some symbols for better performance
  255. self._b64decode = b64decode
  256. # Read very first line
  257. try:
  258. self._last_line = self._readline()
  259. except EOFError:
  260. self._last_line = ''
  261. def handle(self,dn,entry):
  262. """
  263. Process a single content LDIF record. This method should be
  264. implemented by applications using LDIFParser.
  265. """
  266. pass
  267. def _readline(self):
  268. s = self._input_file.readline()
  269. if self._file_sends_bytes:
  270. # The RFC does not allow UTF-8 values; we support it as a
  271. # non-official, backwards compatibility layer
  272. s = s.decode('utf-8')
  273. self.line_counter = self.line_counter + 1
  274. self.byte_counter = self.byte_counter + len(s)
  275. if not s:
  276. return None
  277. elif s[-2:]=='\r\n':
  278. return s[:-2]
  279. elif s[-1:]=='\n':
  280. return s[:-1]
  281. else:
  282. return s
  283. def _unfold_lines(self):
  284. """
  285. Unfold several folded lines with trailing space into one line
  286. """
  287. if self._last_line is None:
  288. raise EOFError('EOF reached after %d lines (%d bytes)' % (
  289. self.line_counter,
  290. self.byte_counter,
  291. ))
  292. unfolded_lines = [ self._last_line ]
  293. next_line = self._readline()
  294. while next_line and next_line[0]==' ':
  295. unfolded_lines.append(next_line[1:])
  296. next_line = self._readline()
  297. self._last_line = next_line
  298. return ''.join(unfolded_lines)
  299. def _next_key_and_value(self):
  300. """
  301. Parse a single attribute type and value pair from one or
  302. more lines of LDIF data
  303. Returns attr_type (text) and attr_value (bytes)
  304. """
  305. # Reading new attribute line
  306. unfolded_line = self._unfold_lines()
  307. # Ignore comments which can also be folded
  308. while unfolded_line and unfolded_line[0]=='#':
  309. unfolded_line = self._unfold_lines()
  310. if not unfolded_line:
  311. return None,None
  312. if unfolded_line=='-':
  313. return '-',None
  314. try:
  315. colon_pos = unfolded_line.index(':')
  316. except ValueError as e:
  317. raise ValueError('no value-spec in %s' % (repr(unfolded_line)))
  318. attr_type = unfolded_line[0:colon_pos]
  319. # if needed attribute value is BASE64 decoded
  320. value_spec = unfolded_line[colon_pos:colon_pos+2]
  321. if value_spec==': ':
  322. attr_value = unfolded_line[colon_pos+2:].lstrip()
  323. # All values should be valid ascii; we support UTF-8 as a
  324. # non-official, backwards compatibility layer.
  325. attr_value = attr_value.encode('utf-8')
  326. elif value_spec=='::':
  327. # attribute value needs base64-decoding
  328. # base64 makes sens only for ascii
  329. attr_value = unfolded_line[colon_pos+2:]
  330. attr_value = attr_value.encode('ascii')
  331. attr_value = self._b64decode(attr_value)
  332. elif value_spec==':<':
  333. # fetch attribute value from URL
  334. url = unfolded_line[colon_pos+2:].strip()
  335. attr_value = None
  336. if self._process_url_schemes:
  337. u = urlparse(url)
  338. if u[0] in self._process_url_schemes:
  339. attr_value = urlopen(url).read()
  340. else:
  341. # All values should be valid ascii; we support UTF-8 as a
  342. # non-official, backwards compatibility layer.
  343. attr_value = unfolded_line[colon_pos+1:].encode('utf-8')
  344. return attr_type,attr_value
  345. def _consume_empty_lines(self):
  346. """
  347. Consume empty lines until first non-empty line.
  348. Must only be used between full records!
  349. Returns non-empty key-value-tuple.
  350. """
  351. # Local symbol for better performance
  352. next_key_and_value = self._next_key_and_value
  353. # Consume empty lines
  354. try:
  355. k,v = next_key_and_value()
  356. while k is None and v is None:
  357. k,v = next_key_and_value()
  358. except EOFError:
  359. k,v = None,None
  360. return k,v
  361. def parse_entry_records(self):
  362. """
  363. Continuously read and parse LDIF entry records
  364. """
  365. # Local symbol for better performance
  366. next_key_and_value = self._next_key_and_value
  367. try:
  368. # Consume empty lines
  369. k,v = self._consume_empty_lines()
  370. # Consume 'version' line
  371. if k=='version':
  372. self.version = int(v.decode('ascii'))
  373. k,v = self._consume_empty_lines()
  374. except EOFError:
  375. return
  376. # Loop for processing whole records
  377. while k!=None and \
  378. (not self._max_entries or self.records_read<self._max_entries):
  379. # Consume first line which must start with "dn: "
  380. if k!='dn':
  381. raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k)))
  382. # Value of a 'dn' field *has* to be valid UTF-8
  383. # k is text, v is bytes.
  384. v = v.decode('utf-8')
  385. if not is_dn(v):
  386. raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v)))
  387. dn = v
  388. entry = {}
  389. # Consume second line of record
  390. k,v = next_key_and_value()
  391. # Loop for reading the attributes
  392. while k!=None:
  393. # Add the attribute to the entry if not ignored attribute
  394. if not k.lower() in self._ignored_attr_types:
  395. try:
  396. entry[k].append(v)
  397. except KeyError:
  398. entry[k]=[v]
  399. # Read the next line within the record
  400. try:
  401. k,v = next_key_and_value()
  402. except EOFError:
  403. k,v = None,None
  404. # handle record
  405. self.handle(dn,entry)
  406. self.records_read = self.records_read + 1
  407. # Consume empty separator line(s)
  408. k,v = self._consume_empty_lines()
  409. return # parse_entry_records()
  410. def parse(self):
  411. """
  412. Invokes LDIFParser.parse_entry_records() for backward compatibility
  413. """
  414. return self.parse_entry_records() # parse()
  415. def handle_modify(self,dn,modops,controls=None):
  416. """
  417. Process a single LDIF record representing a single modify operation.
  418. This method should be implemented by applications using LDIFParser.
  419. """
  420. controls = [] or None
  421. pass
  422. def parse_change_records(self):
  423. # Local symbol for better performance
  424. next_key_and_value = self._next_key_and_value
  425. # Consume empty lines
  426. k,v = self._consume_empty_lines()
  427. # Consume 'version' line
  428. if k=='version':
  429. self.version = int(v)
  430. k,v = self._consume_empty_lines()
  431. # Loop for processing whole records
  432. while k!=None and \
  433. (not self._max_entries or self.records_read<self._max_entries):
  434. # Consume first line which must start with "dn: "
  435. if k!='dn':
  436. raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k)))
  437. # Value of a 'dn' field *has* to be valid UTF-8
  438. # k is text, v is bytes.
  439. v = v.decode('utf-8')
  440. if not is_dn(v):
  441. raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v)))
  442. dn = v
  443. # Consume second line of record
  444. k,v = next_key_and_value()
  445. # Read "control:" lines
  446. controls = []
  447. while k!=None and k=='control':
  448. # v is still bytes, spec says it should be valid utf-8; decode it.
  449. v = v.decode('utf-8')
  450. try:
  451. control_type,criticality,control_value = v.split(' ',2)
  452. except ValueError:
  453. control_value = None
  454. control_type,criticality = v.split(' ',1)
  455. controls.append((control_type,criticality,control_value))
  456. k,v = next_key_and_value()
  457. # Determine changetype first
  458. changetype = None
  459. # Consume changetype line of record
  460. if k=='changetype':
  461. # v is still bytes, spec says it should be valid utf-8; decode it.
  462. v = v.decode('utf-8')
  463. if not v in valid_changetype_dict:
  464. raise ValueError('Invalid changetype: %s' % repr(v))
  465. changetype = v
  466. k,v = next_key_and_value()
  467. if changetype=='modify':
  468. # From here we assume a change record is read with changetype: modify
  469. modops = []
  470. try:
  471. # Loop for reading the list of modifications
  472. while k!=None:
  473. # Extract attribute mod-operation (add, delete, replace)
  474. try:
  475. modop = MOD_OP_INTEGER[k]
  476. except KeyError:
  477. raise ValueError('Line %d: Invalid mod-op string: %s' % (self.line_counter,repr(k)))
  478. # we now have the attribute name to be modified
  479. # v is still bytes, spec says it should be valid utf-8; decode it.
  480. v = v.decode('utf-8')
  481. modattr = v
  482. modvalues = []
  483. try:
  484. k,v = next_key_and_value()
  485. except EOFError:
  486. k,v = None,None
  487. while k==modattr:
  488. modvalues.append(v)
  489. try:
  490. k,v = next_key_and_value()
  491. except EOFError:
  492. k,v = None,None
  493. modops.append((modop,modattr,modvalues or None))
  494. k,v = next_key_and_value()
  495. if k=='-':
  496. # Consume next line
  497. k,v = next_key_and_value()
  498. except EOFError:
  499. k,v = None,None
  500. if modops:
  501. # append entry to result list
  502. self.handle_modify(dn,modops,controls)
  503. else:
  504. # Consume the unhandled change record
  505. while k!=None:
  506. k,v = next_key_and_value()
  507. # Consume empty separator line(s)
  508. k,v = self._consume_empty_lines()
  509. # Increment record counters
  510. try:
  511. self.changetype_counter[changetype] = self.changetype_counter[changetype] + 1
  512. except KeyError:
  513. self.changetype_counter[changetype] = 1
  514. self.records_read = self.records_read + 1
  515. return # parse_change_records()
  516. class LDIFRecordList(LDIFParser):
  517. """
  518. Collect all records of a LDIF file. It can be a memory hog!
  519. Records are stored in :attr:`.all_records` as a single list
  520. of 2-tuples (dn, entry), after calling :meth:`.parse`.
  521. """
  522. def __init__(
  523. self,
  524. input_file,
  525. ignored_attr_types=None,max_entries=0,process_url_schemes=None
  526. ):
  527. LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
  528. #: List storing parsed records.
  529. self.all_records = []
  530. self.all_modify_changes = []
  531. def handle(self,dn,entry):
  532. """
  533. Append a single record to the list of all records (:attr:`.all_records`).
  534. """
  535. self.all_records.append((dn,entry))
  536. def handle_modify(self,dn,modops,controls=None):
  537. """
  538. Process a single LDIF record representing a single modify operation.
  539. This method should be implemented by applications using LDIFParser.
  540. """
  541. controls = [] or None
  542. self.all_modify_changes.append((dn,modops,controls))
  543. class LDIFCopy(LDIFParser):
  544. """
  545. Copy LDIF input to LDIF output containing all data retrieved
  546. via URLs
  547. """
  548. def __init__(
  549. self,
  550. input_file,output_file,
  551. ignored_attr_types=None,max_entries=0,process_url_schemes=None,
  552. base64_attrs=None,cols=76,line_sep='\n'
  553. ):
  554. """
  555. See LDIFParser.__init__() and LDIFWriter.__init__()
  556. """
  557. LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
  558. self._output_ldif = LDIFWriter(output_file,base64_attrs,cols,line_sep)
  559. def handle(self,dn,entry):
  560. """
  561. Write single LDIF record to output file.
  562. """
  563. self._output_ldif.unparse(dn,entry)
  564. def ParseLDIF(f,ignore_attrs=None,maxentries=0):
  565. """
  566. Parse LDIF records read from file.
  567. This is a compatibility function.
  568. """
  569. warnings.warn(
  570. 'ldif.ParseLDIF() is deprecated. Use LDIFRecordList.parse() instead. It '
  571. 'will be removed in python-ldap 3.1',
  572. category=DeprecationWarning,
  573. stacklevel=2,
  574. )
  575. ldif_parser = LDIFRecordList(
  576. f,ignored_attr_types=ignore_attrs,max_entries=maxentries,process_url_schemes=0
  577. )
  578. ldif_parser.parse()
  579. return ldif_parser.all_records