For recursivly parsing the input file I would make a class representing the tag. Each tag can have its children. Every children is first a string added manually with tag.children.append("value") or by calling tag.add_value(tag.name, "value").
class Tag:
def __init__(self, name, parent=None):
self.name = name
self.children = []
self.has_root = True
self.parent = parent
def __str__(self):
""" compose string for this tag (recursivly) """
if not self.children:
return self.name
children_str = ' '.join([str(child) for child in self.children])
if not self.parent:
return children_str
return '<%s>%s</%s>' % (self.name, children_str, self.name)
@classmethod
def from_file(cls, file):
""" create root tag from file """
obj = cls('root')
columns = []
with open(file) as in_file:
for line in in_file:
value, tag = line.strip().split(' ')
obj.add_tag(tag, value)
return obj
def search_tag(self, tag):
""" search for a tag in the children """
if self.name == tag:
return self
for i, c in enumerate(self.children):
if isinstance(c, Tag) and c.name == tag:
return c
elif isinstance(c, str):
if c.strip() == tag.strip():
self.children[i] = Tag(tag, self)
return self.children[i]
else:
result = c.search_tag(tag)
if result:
return result
def add_tag(self, tag, value):
"""
add a value, tag pair to the children
Firstly this searches if the value is an child. If this is the
case it moves the children to the new location
Afterwards it searches the tag in the children. When found
the value is added to this tag. If not a new tag object
is created and added to this Tag. The flag has_root
is set to False so the element can be moved later.
"""
value_tag = self.search_tag(value)
if value_tag and not value_tag.has_root:
print("Found value: %s" % value)
if value_tag.parent:
i = value_tag.parent.children.index(value_tag)
value = value_tag.parent.children.pop(i)
value.has_root = True
else:
print("not %s" % value)
found = self.search_tag(tag)
if found:
found.children.append(value)
else:
# no root
tag_obj = Tag(tag, self)
self.children.append(tag_obj)
tag_obj.add_tag(tag, value)
tag_obj.has_root = False
tags = Tag.from_file('final')
print(tags)
I know in this example the speed-Tag is not added twice. I hope that's ok.
Sorry for the long code.