7 Commits

Author SHA1 Message Date
36be259177 documentation 2025-12-18 09:40:26 -08:00
4b73dc7730 removed breakpoints 2025-12-17 19:49:02 -08:00
3cf31fa462 autopep 2025-12-17 19:42:32 -08:00
33bd5a0001 autopep 2025-12-17 19:39:38 -08:00
ebdc73198c implementation of tolerant parsing 2025-12-17 19:38:53 -08:00
cf1b3fb42c Leaving this for now 2025-12-17 15:05:40 -08:00
6041d4158e Added problem edl 2025-12-17 14:21:51 -08:00
9 changed files with 137 additions and 18 deletions

View File

@@ -14,6 +14,8 @@ The `pycmx` package parses a CMX 3600 EDL and its most most common variations.
read. Event number field and source name field sizes are determined read. Event number field and source name field sizes are determined
dynamically for each statement for a high level of compliance at the expense dynamically for each statement for a high level of compliance at the expense
of strictness. of strictness.
* An more relaxed "tolerant" mode allows parsing of an EDL file where columns
use non-standard widths.
* Preserves relationship between events and individual edits/clips. * Preserves relationship between events and individual edits/clips.
* Remark or comment fields with common recognized forms are read and * Remark or comment fields with common recognized forms are read and
available to the client, including clip name and source file data. available to the client, including clip name and source file data.

View File

@@ -1,7 +1,7 @@
.. pycmx documentation master file, created by .. pycmx documentation master file, created by
sphinx-quickstart on Wed Dec 26 21:51:43 2018. sphinx-quickstart on Wed Dec 26 21:51:43 2018.
You can adapt this file completely to your liking, but it should at least You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive. contain the root `toctree` directive.
pycmx - A CMX EDL Parser in Python pycmx - A CMX EDL Parser in Python
==================================== ====================================
@@ -16,6 +16,8 @@ The `pycmx` package parses a CMX 3600 EDL and its most most common variations.
read. Event number field and source name field sizes are determined read. Event number field and source name field sizes are determined
dynamically for each statement for a high level of compliance at the expense dynamically for each statement for a high level of compliance at the expense
of strictness. of strictness.
* An more relaxed "tolerant" mode allows parsing of an EDL file where columns
use non-standard widths.
* Preserves relationship between events and individual edits/clips. * Preserves relationship between events and individual edits/clips.
* Remark or comment fields with common recognized forms are read and * Remark or comment fields with common recognized forms are read and
available to the client, including clip name and source file data. available to the client, including clip name and source file data.

View File

@@ -15,7 +15,7 @@ class EditList:
:func:`~pycmx.parse_cmx3600()`. :func:`~pycmx.parse_cmx3600()`.
""" """
def __init__(self, statements): def __init__(self, statements: list):
self.title_statement: StmtTitle = statements[0] self.title_statement: StmtTitle = statements[0]
self.event_statements = statements[1:] self.event_statements = statements[1:]

View File

@@ -70,7 +70,6 @@ class Event:
the_zip.append(trans_names) the_zip.append(trans_names)
except IndexError: except IndexError:
the_zip.append([None] * len(edits_audio)) the_zip.append([None] * len(edits_audio))
return [Edit(edit_statement=e1[0], return [Edit(edit_statement=e1[0],
audio_ext_statement=e1[1], audio_ext_statement=e1[1],
clip_name_statement=n1, clip_name_statement=n1,
@@ -105,11 +104,16 @@ class Event:
def _statements_with_audio_ext(self) -> Generator[ def _statements_with_audio_ext(self) -> Generator[
Tuple[StmtEvent, Optional[StmtAudioExt]], None, None]: Tuple[StmtEvent, Optional[StmtAudioExt]], None, None]:
for (s1, s2) in zip(self.statements, self.statements[1:]):
if type(s1) is StmtEvent and type(s2) is StmtAudioExt: if len(self.statements) == 1 and type(self.statements[0]) is StmtEvent:
yield (s1, s2) yield (self.statements[0], None)
elif type(s1) is StmtEvent:
yield (s1, None) else:
for (s1, s2) in zip(self.statements, self.statements[1:]):
if type(s1) is StmtEvent and type(s2) is StmtAudioExt:
yield (s1, s2)
elif type(s1) is StmtEvent:
yield (s1, None)
def _asc_sop_statement(self) -> Optional[StmtCdlSop]: def _asc_sop_statement(self) -> Optional[StmtCdlSop]:
return next((s for s in self.statements if type(s) is StmtCdlSop), return next((s for s in self.statements if type(s) is StmtCdlSop),

View File

@@ -7,12 +7,14 @@ from .parse_cmx_statements import (parse_cmx3600_statements)
from .edit_list import EditList from .edit_list import EditList
def parse_cmx3600(f: TextIO) -> EditList: def parse_cmx3600(f: TextIO, tolerant: bool = False) -> EditList:
""" """
Parse a CMX 3600 EDL. Parse a CMX 3600 EDL.
:param TextIO f: a file-like object, an opened CMX 3600 .EDL file. :param TextIO f: a file-like object, an opened CMX 3600 .EDL file.
:param bool tolerant: If `True`, a relaxed event line parsing method will
be used, in the case the default method fails.
:returns: An :class:`pycmx.edit_list.EditList`. :returns: An :class:`pycmx.edit_list.EditList`.
""" """
statements = parse_cmx3600_statements(f) statements = parse_cmx3600_statements(f, tolerant)
return EditList(statements) return EditList(statements)

View File

@@ -13,12 +13,13 @@ from .statements import (StmtCdlSat, StmtCdlSop, StmtCorruptRemark, StmtFrmc,
from .util import collimate from .util import collimate
def parse_cmx3600_statements(file: TextIO) -> List[object]: def parse_cmx3600_statements(file: TextIO,
tolerant: bool = False) -> List[object]:
""" """
Return a list of every statement in the file argument. Return a list of every statement in the file argument.
""" """
lines = file.readlines() lines = file.readlines()
return [_parse_cmx3600_line(line.strip(), line_number) return [_parse_cmx3600_line(line.strip(), line_number, tolerant)
for (line_number, line) in enumerate(lines)] for (line_number, line) in enumerate(lines)]
@@ -38,7 +39,8 @@ def _edl_column_widths(event_field_length, source_field_length) -> List[int]:
# 8,8,1,4,2,1,4,13,3,1,1] # 8,8,1,4,2,1,4,13,3,1,1]
def _parse_cmx3600_line(line: str, line_number: int) -> object: def _parse_cmx3600_line(line: str, line_number: int,
tolerant: bool = False) -> object:
""" """
Parses a single CMX EDL line. Parses a single CMX EDL line.
@@ -54,9 +56,20 @@ def _parse_cmx3600_line(line: str, line_number: int) -> object:
return _parse_fcm(line, line_number) return _parse_fcm(line, line_number)
if line_matcher is not None: if line_matcher is not None:
event_field_len = len(line_matcher.group(1)) event_field_len = len(line_matcher.group(1))
source_field_len = len(line) - (event_field_len + 65) source_field_len = len(line) - (event_field_len + 65)
return _parse_columns_for_standard_form(line, event_field_len,
source_field_len, line_number) try:
return _parse_columns_for_standard_form(line, event_field_len,
source_field_len,
line_number)
except EventFormError:
if tolerant:
return _parse_columns_tolerant(line, line_number)
else:
return StmtUnrecognized(line, line_number)
if line.startswith("AUD"): if line.startswith("AUD"):
return _parse_extended_audio_channels(line, line_number) return _parse_extended_audio_channels(line, line_number)
if line.startswith("*"): if line.startswith("*"):
@@ -186,6 +199,10 @@ def _parse_split(line: str, line_number):
# return StmtMotionMemory(source="", fps="") # return StmtMotionMemory(source="", fps="")
# #
class EventFormError(RuntimeError):
pass
def _parse_unrecognized(line, line_number): def _parse_unrecognized(line, line_number):
return StmtUnrecognized(content=line, line_number=line_number) return StmtUnrecognized(content=line, line_number=line_number)
@@ -193,17 +210,24 @@ def _parse_unrecognized(line, line_number):
def _parse_columns_for_standard_form(line: str, event_field_length: int, def _parse_columns_for_standard_form(line: str, event_field_length: int,
source_field_length: int, source_field_length: int,
line_number: int): line_number: int):
# breakpoint()
col_widths = _edl_column_widths(event_field_length, source_field_length) col_widths = _edl_column_widths(event_field_length, source_field_length)
if sum(col_widths) > len(line): if sum(col_widths) > len(line):
return StmtUnrecognized(content=line, line_number=line_number) raise EventFormError()
column_strings = collimate(line, col_widths) column_strings = collimate(line, col_widths)
channels = column_strings[4].strip()
trans = column_strings[6].strip()
if len(channels) == 0 or len(trans) == 0:
raise EventFormError()
return StmtEvent(event=column_strings[0], return StmtEvent(event=column_strings[0],
source=column_strings[2].strip(), source=column_strings[2].strip(),
channels=column_strings[4].strip(), channels=channels,
trans=column_strings[6].strip(), trans=trans,
trans_op=column_strings[8].strip(), trans_op=column_strings[8].strip(),
source_in=column_strings[10].strip(), source_in=column_strings[10].strip(),
source_out=column_strings[12].strip(), source_out=column_strings[12].strip(),
@@ -213,6 +237,26 @@ def _parse_columns_for_standard_form(line: str, event_field_length: int,
source_field_size=source_field_length) source_field_size=source_field_length)
def _parse_columns_tolerant(line: str, line_number: int):
pattern = re.compile(r'^\s*(\d+)\s+(.{8,128}?)\s+'
r'(V|A|A2|AA|NONE|AA/V|A2/V|B)\s+'
r'(C|D|W|KB|K|KO)\s+(\d*)\s+(\d\d.\d\d.\d\d.\d\d)\s'
r'(\d\d.\d\d.\d\d.\d\d)\s(\d\d.\d\d.\d\d.\d\d)\s'
r'(\d\d.\d\d.\d\d.\d\d)'
)
match = pattern.match(line)
if match:
return StmtEvent(event=int(match.group(1)), source=match.group(2),
channels=match.group(3), trans=match.group(4),
trans_op=match.group(5), source_in=match.group(6),
source_out=match.group(7), record_in=match.group(8),
record_out=match.group(9), line_number=line_number,
source_field_size=len(match.group(2)))
else:
return StmtUnrecognized(line, line_number)
def _parse_source_umid_statement(line, line_number): def _parse_source_umid_statement(line, line_number):
# trimmed = line[3:].strip() # trimmed = line[3:].strip()
# return StmtSourceUMID(name=None, umid=None, line_number=line_number) # return StmtSourceUMID(name=None, umid=None, line_number=line_number)

View File

@@ -1 +1,2 @@
from . import test_parse from . import test_parse
from . import test_issue_19

View File

@@ -0,0 +1,39 @@
TITLE: Final Master Generated by LTedlMixer...
0001 Z125C001_220217_ROLX V C 15:51:58:10 15:52:02:16 00:00:00:00 00:00:04:06
0002 B505C014_230224_RNBP V C 20:19:58:21 20:20:00:21 00:00:04:06 00:00:06:06
0003 B505C014_230224_RNBP V C 20:19:59:21 20:20:01:22 00:00:06:06 00:00:08:07
0004 B505C014_230224_RNBP V C 20:20:01:23 20:20:02:01 00:00:08:07 00:00:08:09
0005 B505C014_230224_RNBP V C 20:20:02:01 20:20:06:10 00:00:08:09 00:00:12:18
0006 B505C011_230224_RNBP V C 19:44:21:04 19:44:27:08 00:00:12:18 00:00:18:22
0007 B505C016_230224_RNBP V C 20:24:54:14 20:24:58:19 00:00:18:22 00:00:23:03
0008 Y022C029_211201_YNJI V C 12:42:37:04 12:42:39:14 00:00:23:03 00:00:25:13
0009 A054C025_211022_R24B V C 12:30:50:11 12:30:54:16 00:00:25:13 00:00:29:18
0010 Z040C026_211206_ROLX V C 14:42:25:21 14:42:28:17 00:00:29:18 00:00:32:14
0011 J001_C002_20211007_R V C 12:38:48:18 12:38:51:13 00:00:32:14 00:00:35:09
0012 C006C005_211007_RO2A V C 11:49:08:02 11:49:15:13 00:00:35:09 00:00:42:20
0013 A021C020_211007_R24B V C 18:14:52:00 18:14:58:22 00:00:42:20 00:00:49:18
0014 A023C013_211008_R24B V C 11:12:57:23 11:12:59:18 00:00:49:18 00:00:51:13
0015 U001C010_211029_R268 V C 04:38:42:21 04:38:49:21 00:00:51:13 00:00:58:13
0016 A021C009_211007_R24B V C 17:06:12:10 17:06:19:14 00:00:58:13 00:01:05:17
0017 A055C008_211022_R24B V C 15:16:03:10 15:16:05:09 00:01:05:17 00:01:07:16
0018 A055C008_211022_R24B V C 15:16:05:09 15:16:12:20 00:01:07:16 00:01:15:03
0019 A055C008_211022_R24B V C 15:16:12:20 15:16:14:19 00:01:15:03 00:01:17:02
0020 A056C011_211022_R24B V C 17:40:13:01 17:40:16:04 00:01:17:02 00:01:20:05
0021 A024C011_211008_R24B V C 17:32:07:02 17:32:10:01 00:01:20:05 00:01:23:04
0022 B070C001_211203_RP40 V C 17:39:20:20 17:39:22:14 00:01:23:04 00:01:24:22
0023 A055C019_211022_R24B V C 16:31:05:06 16:31:12:10 00:01:24:22 00:01:32:02
0024 A248C012_220224_R1Y2 V C 16:01:35:08 16:01:40:04 00:01:32:02 00:01:36:22
0025 A127C005_211206_R24B V C 10:58:23:06 10:58:24:09 00:01:36:22 00:01:38:01
0026 A040C006_211015_R24B V C 13:00:09:04 13:00:23:17 00:01:38:01 00:01:52:14
0027 A041C006_211015_R24B V C 16:10:32:08 16:10:35:12 00:01:52:14 00:01:55:18
0028 A040C006_211015_R24B V C 13:00:34:13 13:00:37:03 00:01:55:18 00:01:58:08
0029 A041C005_211015_R24B V C 15:57:22:05 15:57:27:11 00:01:58:08 00:02:03:14
0030 A040C008_211015_R24B V C 13:09:51:18 13:09:55:07 00:02:03:14 00:02:07:03
0031 A040C016_211015_R24B V C 14:09:15:11 14:09:20:02 00:02:07:03 00:02:11:18
0032 Z089C007_220122_ROLX V C 17:03:34:23 17:03:59:15 00:02:11:18 00:02:36:10
0033 A507C008_230227_RNHZ V C 09:55:35:10 09:55:41:02 00:02:36:10 00:02:42:02
0034 B049C021_211111_RP40 V C 17:38:55:11 17:38:56:17 00:02:42:02 00:02:43:08
0035 Z036C012_211202_ROLX V C 17:30:23:12 17:30:25:05 00:02:43:08 00:02:45:01
0036 A157C023_220112_R24B V C 14:13:18:04 14:13:20:06 00:02:45:01 00:02:47:03
0037 A095C014_211110_R24B V C 19:34:35:16 19:34:37:10 00:02:47:03 00:02:48:21
0038 Z089C010_220122_ROLX V C 17:28:55:21 17:28:58:09 00:02:48:21 00:02:51:09

25
tests/test_issue_19.py Normal file
View File

@@ -0,0 +1,25 @@
from unittest import TestCase
from pycmx import parse_cmx3600
class Issue19Test(TestCase):
def setUp(self):
self.f = open("tests/edls/ISSUE_19_unusual01.edl")
def test_parse(self):
edl = parse_cmx3600(self.f, tolerant=True)
for event in edl.events:
self.assertIsNotNone(event.edits)
if event.number == 1:
self.assertEqual(len(event.edits), 1)
self.assertEqual(event.edits[0].source, "Z125C001_220217_ROLX")
self.assertEqual(event.edits[0].channels.v, True)
self.assertEqual(event.edits[0].transition.kind, "C")
self.assertEqual(event.edits[0].transition.operand, "")
self.assertEqual(event.edits[0].source_in, "15:51:58:10")
self.assertEqual(event.edits[0].record_out, "00:00:04:06")
break
def tearDown(self):
self.f.close()