Merge pull request #20 from iluvcapra/19-unusual-edl

Tolerant Parsing Mode
This commit is contained in:
Jamie Hardt
2025-12-18 10:06:07 -08:00
committed by GitHub
9 changed files with 137 additions and 18 deletions

View File

@@ -14,6 +14,8 @@ The `pycmx` package parses a CMX 3600 EDL and its most most common variations.
read. Event number field and source name field sizes are determined
dynamically for each statement for a high level of compliance at the expense
of strictness.
* An more relaxed "tolerant" mode allows parsing of an EDL file where columns
use non-standard widths.
* Preserves relationship between events and individual edits/clips.
* Remark or comment fields with common recognized forms are read and
available to the client, including clip name and source file data.

View File

@@ -16,6 +16,8 @@ The `pycmx` package parses a CMX 3600 EDL and its most most common variations.
read. Event number field and source name field sizes are determined
dynamically for each statement for a high level of compliance at the expense
of strictness.
* An more relaxed "tolerant" mode allows parsing of an EDL file where columns
use non-standard widths.
* Preserves relationship between events and individual edits/clips.
* Remark or comment fields with common recognized forms are read and
available to the client, including clip name and source file data.

View File

@@ -15,7 +15,7 @@ class EditList:
:func:`~pycmx.parse_cmx3600()`.
"""
def __init__(self, statements):
def __init__(self, statements: list):
self.title_statement: StmtTitle = statements[0]
self.event_statements = statements[1:]

View File

@@ -69,7 +69,6 @@ class Event:
the_zip.append(trans_names)
except IndexError:
the_zip.append([None] * len(edits_audio))
return [Edit(edit_statement=e1[0],
audio_ext_statement=e1[1],
clip_name_statement=n1,
@@ -104,6 +103,11 @@ class Event:
def _statements_with_audio_ext(self) -> Generator[
Tuple[StmtEvent, Optional[StmtAudioExt]], None, None]:
if len(self.statements) == 1 and type(self.statements[0]) is StmtEvent:
yield (self.statements[0], None)
else:
for (s1, s2) in zip(self.statements, self.statements[1:]):
if type(s1) is StmtEvent and type(s2) is StmtAudioExt:
yield (s1, s2)

View File

@@ -7,12 +7,14 @@ from .parse_cmx_statements import (parse_cmx3600_statements)
from .edit_list import EditList
def parse_cmx3600(f: TextIO) -> EditList:
def parse_cmx3600(f: TextIO, tolerant: bool = False) -> EditList:
"""
Parse a CMX 3600 EDL.
:param TextIO f: a file-like object, an opened CMX 3600 .EDL file.
:param bool tolerant: If `True`, a relaxed event line parsing method will
be used, in the case the default method fails.
:returns: An :class:`pycmx.edit_list.EditList`.
"""
statements = parse_cmx3600_statements(f)
statements = parse_cmx3600_statements(f, tolerant)
return EditList(statements)

View File

@@ -13,12 +13,13 @@ from .statements import (StmtCdlSat, StmtCdlSop, StmtCorruptRemark, StmtFrmc,
from .util import collimate
def parse_cmx3600_statements(file: TextIO) -> List[object]:
def parse_cmx3600_statements(file: TextIO,
tolerant: bool = False) -> List[object]:
"""
Return a list of every statement in the file argument.
"""
lines = file.readlines()
return [_parse_cmx3600_line(line.strip(), line_number)
return [_parse_cmx3600_line(line.strip(), line_number, tolerant)
for (line_number, line) in enumerate(lines)]
@@ -38,7 +39,8 @@ def _edl_column_widths(event_field_length, source_field_length) -> List[int]:
# 8,8,1,4,2,1,4,13,3,1,1]
def _parse_cmx3600_line(line: str, line_number: int) -> object:
def _parse_cmx3600_line(line: str, line_number: int,
tolerant: bool = False) -> object:
"""
Parses a single CMX EDL line.
@@ -54,9 +56,20 @@ def _parse_cmx3600_line(line: str, line_number: int) -> object:
return _parse_fcm(line, line_number)
if line_matcher is not None:
event_field_len = len(line_matcher.group(1))
source_field_len = len(line) - (event_field_len + 65)
try:
return _parse_columns_for_standard_form(line, event_field_len,
source_field_len, line_number)
source_field_len,
line_number)
except EventFormError:
if tolerant:
return _parse_columns_tolerant(line, line_number)
else:
return StmtUnrecognized(line, line_number)
if line.startswith("AUD"):
return _parse_extended_audio_channels(line, line_number)
if line.startswith("*"):
@@ -190,6 +203,10 @@ def _parse_split(line: str, line_number):
# return StmtMotionMemory(source="", fps="")
#
class EventFormError(RuntimeError):
pass
def _parse_unrecognized(line, line_number):
return StmtUnrecognized(content=line, line_number=line_number)
@@ -197,17 +214,24 @@ def _parse_unrecognized(line, line_number):
def _parse_columns_for_standard_form(line: str, event_field_length: int,
source_field_length: int,
line_number: int):
# breakpoint()
col_widths = _edl_column_widths(event_field_length, source_field_length)
if sum(col_widths) > len(line):
return StmtUnrecognized(content=line, line_number=line_number)
raise EventFormError()
column_strings = collimate(line, col_widths)
channels = column_strings[4].strip()
trans = column_strings[6].strip()
if len(channels) == 0 or len(trans) == 0:
raise EventFormError()
return StmtEvent(event=column_strings[0],
source=column_strings[2].strip(),
channels=column_strings[4].strip(),
trans=column_strings[6].strip(),
channels=channels,
trans=trans,
trans_op=column_strings[8].strip(),
source_in=column_strings[10].strip(),
source_out=column_strings[12].strip(),
@@ -217,6 +241,26 @@ def _parse_columns_for_standard_form(line: str, event_field_length: int,
source_field_size=source_field_length)
def _parse_columns_tolerant(line: str, line_number: int):
pattern = re.compile(r'^\s*(\d+)\s+(.{8,128}?)\s+'
r'(V|A|A2|AA|NONE|AA/V|A2/V|B)\s+'
r'(C|D|W|KB|K|KO)\s+(\d*)\s+(\d\d.\d\d.\d\d.\d\d)\s'
r'(\d\d.\d\d.\d\d.\d\d)\s(\d\d.\d\d.\d\d.\d\d)\s'
r'(\d\d.\d\d.\d\d.\d\d)'
)
match = pattern.match(line)
if match:
return StmtEvent(event=int(match.group(1)), source=match.group(2),
channels=match.group(3), trans=match.group(4),
trans_op=match.group(5), source_in=match.group(6),
source_out=match.group(7), record_in=match.group(8),
record_out=match.group(9), line_number=line_number,
source_field_size=len(match.group(2)))
else:
return StmtUnrecognized(line, line_number)
def _parse_source_umid_statement(line, line_number):
# trimmed = line[3:].strip()
# return StmtSourceUMID(name=None, umid=None, line_number=line_number)

View File

@@ -1 +1,2 @@
from . import test_parse
from . import test_issue_19

View File

@@ -0,0 +1,39 @@
TITLE: Final Master Generated by LTedlMixer...
0001 Z125C001_220217_ROLX V C 15:51:58:10 15:52:02:16 00:00:00:00 00:00:04:06
0002 B505C014_230224_RNBP V C 20:19:58:21 20:20:00:21 00:00:04:06 00:00:06:06
0003 B505C014_230224_RNBP V C 20:19:59:21 20:20:01:22 00:00:06:06 00:00:08:07
0004 B505C014_230224_RNBP V C 20:20:01:23 20:20:02:01 00:00:08:07 00:00:08:09
0005 B505C014_230224_RNBP V C 20:20:02:01 20:20:06:10 00:00:08:09 00:00:12:18
0006 B505C011_230224_RNBP V C 19:44:21:04 19:44:27:08 00:00:12:18 00:00:18:22
0007 B505C016_230224_RNBP V C 20:24:54:14 20:24:58:19 00:00:18:22 00:00:23:03
0008 Y022C029_211201_YNJI V C 12:42:37:04 12:42:39:14 00:00:23:03 00:00:25:13
0009 A054C025_211022_R24B V C 12:30:50:11 12:30:54:16 00:00:25:13 00:00:29:18
0010 Z040C026_211206_ROLX V C 14:42:25:21 14:42:28:17 00:00:29:18 00:00:32:14
0011 J001_C002_20211007_R V C 12:38:48:18 12:38:51:13 00:00:32:14 00:00:35:09
0012 C006C005_211007_RO2A V C 11:49:08:02 11:49:15:13 00:00:35:09 00:00:42:20
0013 A021C020_211007_R24B V C 18:14:52:00 18:14:58:22 00:00:42:20 00:00:49:18
0014 A023C013_211008_R24B V C 11:12:57:23 11:12:59:18 00:00:49:18 00:00:51:13
0015 U001C010_211029_R268 V C 04:38:42:21 04:38:49:21 00:00:51:13 00:00:58:13
0016 A021C009_211007_R24B V C 17:06:12:10 17:06:19:14 00:00:58:13 00:01:05:17
0017 A055C008_211022_R24B V C 15:16:03:10 15:16:05:09 00:01:05:17 00:01:07:16
0018 A055C008_211022_R24B V C 15:16:05:09 15:16:12:20 00:01:07:16 00:01:15:03
0019 A055C008_211022_R24B V C 15:16:12:20 15:16:14:19 00:01:15:03 00:01:17:02
0020 A056C011_211022_R24B V C 17:40:13:01 17:40:16:04 00:01:17:02 00:01:20:05
0021 A024C011_211008_R24B V C 17:32:07:02 17:32:10:01 00:01:20:05 00:01:23:04
0022 B070C001_211203_RP40 V C 17:39:20:20 17:39:22:14 00:01:23:04 00:01:24:22
0023 A055C019_211022_R24B V C 16:31:05:06 16:31:12:10 00:01:24:22 00:01:32:02
0024 A248C012_220224_R1Y2 V C 16:01:35:08 16:01:40:04 00:01:32:02 00:01:36:22
0025 A127C005_211206_R24B V C 10:58:23:06 10:58:24:09 00:01:36:22 00:01:38:01
0026 A040C006_211015_R24B V C 13:00:09:04 13:00:23:17 00:01:38:01 00:01:52:14
0027 A041C006_211015_R24B V C 16:10:32:08 16:10:35:12 00:01:52:14 00:01:55:18
0028 A040C006_211015_R24B V C 13:00:34:13 13:00:37:03 00:01:55:18 00:01:58:08
0029 A041C005_211015_R24B V C 15:57:22:05 15:57:27:11 00:01:58:08 00:02:03:14
0030 A040C008_211015_R24B V C 13:09:51:18 13:09:55:07 00:02:03:14 00:02:07:03
0031 A040C016_211015_R24B V C 14:09:15:11 14:09:20:02 00:02:07:03 00:02:11:18
0032 Z089C007_220122_ROLX V C 17:03:34:23 17:03:59:15 00:02:11:18 00:02:36:10
0033 A507C008_230227_RNHZ V C 09:55:35:10 09:55:41:02 00:02:36:10 00:02:42:02
0034 B049C021_211111_RP40 V C 17:38:55:11 17:38:56:17 00:02:42:02 00:02:43:08
0035 Z036C012_211202_ROLX V C 17:30:23:12 17:30:25:05 00:02:43:08 00:02:45:01
0036 A157C023_220112_R24B V C 14:13:18:04 14:13:20:06 00:02:45:01 00:02:47:03
0037 A095C014_211110_R24B V C 19:34:35:16 19:34:37:10 00:02:47:03 00:02:48:21
0038 Z089C010_220122_ROLX V C 17:28:55:21 17:28:58:09 00:02:48:21 00:02:51:09

25
tests/test_issue_19.py Normal file
View File

@@ -0,0 +1,25 @@
from unittest import TestCase
from pycmx import parse_cmx3600
class Issue19Test(TestCase):
def setUp(self):
self.f = open("tests/edls/ISSUE_19_unusual01.edl")
def test_parse(self):
edl = parse_cmx3600(self.f, tolerant=True)
for event in edl.events:
self.assertIsNotNone(event.edits)
if event.number == 1:
self.assertEqual(len(event.edits), 1)
self.assertEqual(event.edits[0].source, "Z125C001_220217_ROLX")
self.assertEqual(event.edits[0].channels.v, True)
self.assertEqual(event.edits[0].transition.kind, "C")
self.assertEqual(event.edits[0].transition.operand, "")
self.assertEqual(event.edits[0].source_in, "15:51:58:10")
self.assertEqual(event.edits[0].record_out, "00:00:04:06")
break
def tearDown(self):
self.f.close()