Module gedcom.element.element
Base GEDCOM element
Expand source code
# -*- coding: utf-8 -*-
# Python GEDCOM Parser
#
# Copyright (C) 2018 Damon Brodie (damon.brodie at gmail.com)
# Copyright (C) 2018-2019 Nicklas Reincke (contact at reynke.com)
# Copyright (C) 2016 Andreas Oberritter
# Copyright (C) 2012 Madeleine Price Ball
# Copyright (C) 2005 Daniel Zappala (zappala at cs.byu.edu)
# Copyright (C) 2005 Brigham Young University
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Further information about the license: http://www.gnu.org/licenses/gpl-2.0.html
"""
Base GEDCOM element
"""
from sys import version_info
from gedcom.helpers import deprecated
import gedcom.tags
class Element(object):
"""GEDCOM element
Each line in a GEDCOM file is an element with the format
`level [pointer] tag [value]`
where `level` and `tag` are required, and `pointer` and `value` are
optional. Elements are arranged hierarchically according to their
level, and elements with a level of zero are at the top level.
Elements with a level greater than zero are children of their
parent.
A pointer has the format `@pname@`, where `pname` is any sequence of
characters and numbers. The pointer identifies the object being
pointed to, so that any pointer included as the value of any
element points back to the original object. For example, an
element may have a `FAMS` tag whose value is `@F1@`, meaning that this
element points to the family record in which the associated person
is a spouse. Likewise, an element with a tag of `FAMC` has a value
that points to a family record in which the associated person is a
child.
See a GEDCOM file for examples of tags and their values.
Tags available to an element are seen here: `gedcom.tags`
"""
def __init__(self, level, pointer, tag, value, crlf="\n", multi_line=True):
# basic element info
self.__level = level
self.__pointer = pointer
self.__tag = tag
self.__value = value
self.__crlf = crlf
# structuring
self.__children = []
self.__parent = None
if multi_line:
self.set_multi_line_value(value)
def get_level(self):
"""Returns the level of this element from within the GEDCOM file
:rtype: int
"""
return self.__level
def get_pointer(self):
"""Returns the pointer of this element from within the GEDCOM file
:rtype: str
"""
return self.__pointer
def get_tag(self):
"""Returns the tag of this element from within the GEDCOM file
:rtype: str
"""
return self.__tag
def get_value(self):
"""Return the value of this element from within the GEDCOM file
:rtype: str
"""
return self.__value
def set_value(self, value):
"""Sets the value of this element
:type value: str
"""
self.__value = value
def get_multi_line_value(self):
"""Returns the value of this element including concatenations or continuations
:rtype: str
"""
result = self.get_value()
last_crlf = self.__crlf
for element in self.get_child_elements():
tag = element.get_tag()
if tag == gedcom.tags.GEDCOM_TAG_CONCATENATION:
result += element.get_value()
last_crlf = element.__crlf
elif tag == gedcom.tags.GEDCOM_TAG_CONTINUED:
result += last_crlf + element.get_value()
last_crlf = element.__crlf
return result
def __available_characters(self):
"""Get the number of available characters of the elements original string
:rtype: int
"""
element_characters = len(self.to_gedcom_string())
return 0 if element_characters > 255 else 255 - element_characters
def __line_length(self, line):
"""@TODO Write docs.
:type line: str
:rtype: int
"""
total_characters = len(line)
available_characters = self.__available_characters()
if total_characters <= available_characters:
return total_characters
spaces = 0
while spaces < available_characters and line[available_characters - spaces - 1] == ' ':
spaces += 1
if spaces == available_characters:
return available_characters
return available_characters - spaces
def __set_bounded_value(self, value):
"""@TODO Write docs.
:type value: str
:rtype: int
"""
line_length = self.__line_length(value)
self.set_value(value[:line_length])
return line_length
def __add_bounded_child(self, tag, value):
"""@TODO Write docs.
:type tag: str
:type value: str
:rtype: int
"""
child = self.new_child_element(tag)
return child.__set_bounded_value(value)
def __add_concatenation(self, string):
"""@TODO Write docs.
:rtype: str
"""
index = 0
size = len(string)
while index < size:
index += self.__add_bounded_child(gedcom.tags.GEDCOM_TAG_CONCATENATION, string[index:])
def set_multi_line_value(self, value):
"""Sets the value of this element, adding concatenation and continuation lines when necessary
:type value: str
"""
self.set_value('')
self.get_child_elements()[:] = [child for child in self.get_child_elements() if
child.get_tag() not in (gedcom.tags.GEDCOM_TAG_CONCATENATION, gedcom.tags.GEDCOM_TAG_CONTINUED)]
lines = value.splitlines()
if lines:
line = lines.pop(0)
n = self.__set_bounded_value(line)
self.__add_concatenation(line[n:])
for line in lines:
n = self.__add_bounded_child(gedcom.tags.GEDCOM_TAG_CONTINUED, line)
self.__add_concatenation(line[n:])
def get_child_elements(self):
"""Returns the direct child elements of this element
:rtype: list of Element
"""
return self.__children
def new_child_element(self, tag, pointer="", value=""):
"""Creates and returns a new child element of this element
:type tag: str
:type pointer: str
:type value: str
:rtype: Element
"""
from gedcom.element.family import FamilyElement
from gedcom.element.file import FileElement
from gedcom.element.individual import IndividualElement
from gedcom.element.object import ObjectElement
# Differentiate between the type of the new child element
if tag == gedcom.tags.GEDCOM_TAG_FAMILY:
child_element = FamilyElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
elif tag == gedcom.tags.GEDCOM_TAG_FILE:
child_element = FileElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
elif tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL:
child_element = IndividualElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
elif tag == gedcom.tags.GEDCOM_TAG_OBJECT:
child_element = ObjectElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
else:
child_element = Element(self.get_level() + 1, pointer, tag, value, self.__crlf)
self.add_child_element(child_element)
return child_element
def add_child_element(self, element):
"""Adds a child element to this element
:type element: Element
"""
self.get_child_elements().append(element)
element.set_parent_element(self)
return element
def get_parent_element(self):
"""Returns the parent element of this element
:rtype: Element
"""
return self.__parent
def set_parent_element(self, element):
"""Adds a parent element to this element
There's usually no need to call this method manually,
`add_child_element()` calls it automatically.
:type element: Element
"""
self.__parent = element
@deprecated
def get_individual(self):
"""Returns this element and all of its sub-elements represented as a GEDCOM string
::deprecated:: As of version 1.0.0 use `to_gedcom_string()` method instead
:rtype: str
"""
return self.to_gedcom_string(True)
def to_gedcom_string(self, recursive=False):
"""Formats this element and optionally all of its sub-elements into a GEDCOM string
:type recursive: bool
:rtype: str
"""
result = str(self.get_level())
if self.get_pointer() != "":
result += ' ' + self.get_pointer()
result += ' ' + self.get_tag()
if self.get_value() != "":
result += ' ' + self.get_value()
result += self.__crlf
if self.get_level() < 0:
result = ''
if recursive:
for child_element in self.get_child_elements():
result += child_element.to_gedcom_string(True)
return result
def __str__(self):
""":rtype: str"""
if version_info[0] >= 3:
return self.to_gedcom_string()
return self.to_gedcom_string().encode('utf-8-sig')
Classes
class Element (level, pointer, tag, value, crlf='\n', multi_line=True)
-
GEDCOM element
Each line in a GEDCOM file is an element with the format
level [pointer] tag [value]
where
level
andtag
are required, andpointer
andvalue
are optional. Elements are arranged hierarchically according to their level, and elements with a level of zero are at the top level. Elements with a level greater than zero are children of their parent.A pointer has the format
@pname@
, wherepname
is any sequence of characters and numbers. The pointer identifies the object being pointed to, so that any pointer included as the value of any element points back to the original object. For example, an element may have aFAMS
tag whose value is@F1@
, meaning that this element points to the family record in which the associated person is a spouse. Likewise, an element with a tag ofFAMC
has a value that points to a family record in which the associated person is a child.See a GEDCOM file for examples of tags and their values.
Tags available to an element are seen here:
gedcom.tags
Expand source code
class Element(object): """GEDCOM element Each line in a GEDCOM file is an element with the format `level [pointer] tag [value]` where `level` and `tag` are required, and `pointer` and `value` are optional. Elements are arranged hierarchically according to their level, and elements with a level of zero are at the top level. Elements with a level greater than zero are children of their parent. A pointer has the format `@pname@`, where `pname` is any sequence of characters and numbers. The pointer identifies the object being pointed to, so that any pointer included as the value of any element points back to the original object. For example, an element may have a `FAMS` tag whose value is `@F1@`, meaning that this element points to the family record in which the associated person is a spouse. Likewise, an element with a tag of `FAMC` has a value that points to a family record in which the associated person is a child. See a GEDCOM file for examples of tags and their values. Tags available to an element are seen here: `gedcom.tags` """ def __init__(self, level, pointer, tag, value, crlf="\n", multi_line=True): # basic element info self.__level = level self.__pointer = pointer self.__tag = tag self.__value = value self.__crlf = crlf # structuring self.__children = [] self.__parent = None if multi_line: self.set_multi_line_value(value) def get_level(self): """Returns the level of this element from within the GEDCOM file :rtype: int """ return self.__level def get_pointer(self): """Returns the pointer of this element from within the GEDCOM file :rtype: str """ return self.__pointer def get_tag(self): """Returns the tag of this element from within the GEDCOM file :rtype: str """ return self.__tag def get_value(self): """Return the value of this element from within the GEDCOM file :rtype: str """ return self.__value def set_value(self, value): """Sets the value of this element :type value: str """ self.__value = value def get_multi_line_value(self): """Returns the value of this element including concatenations or continuations :rtype: str """ result = self.get_value() last_crlf = self.__crlf for element in self.get_child_elements(): tag = element.get_tag() if tag == gedcom.tags.GEDCOM_TAG_CONCATENATION: result += element.get_value() last_crlf = element.__crlf elif tag == gedcom.tags.GEDCOM_TAG_CONTINUED: result += last_crlf + element.get_value() last_crlf = element.__crlf return result def __available_characters(self): """Get the number of available characters of the elements original string :rtype: int """ element_characters = len(self.to_gedcom_string()) return 0 if element_characters > 255 else 255 - element_characters def __line_length(self, line): """@TODO Write docs. :type line: str :rtype: int """ total_characters = len(line) available_characters = self.__available_characters() if total_characters <= available_characters: return total_characters spaces = 0 while spaces < available_characters and line[available_characters - spaces - 1] == ' ': spaces += 1 if spaces == available_characters: return available_characters return available_characters - spaces def __set_bounded_value(self, value): """@TODO Write docs. :type value: str :rtype: int """ line_length = self.__line_length(value) self.set_value(value[:line_length]) return line_length def __add_bounded_child(self, tag, value): """@TODO Write docs. :type tag: str :type value: str :rtype: int """ child = self.new_child_element(tag) return child.__set_bounded_value(value) def __add_concatenation(self, string): """@TODO Write docs. :rtype: str """ index = 0 size = len(string) while index < size: index += self.__add_bounded_child(gedcom.tags.GEDCOM_TAG_CONCATENATION, string[index:]) def set_multi_line_value(self, value): """Sets the value of this element, adding concatenation and continuation lines when necessary :type value: str """ self.set_value('') self.get_child_elements()[:] = [child for child in self.get_child_elements() if child.get_tag() not in (gedcom.tags.GEDCOM_TAG_CONCATENATION, gedcom.tags.GEDCOM_TAG_CONTINUED)] lines = value.splitlines() if lines: line = lines.pop(0) n = self.__set_bounded_value(line) self.__add_concatenation(line[n:]) for line in lines: n = self.__add_bounded_child(gedcom.tags.GEDCOM_TAG_CONTINUED, line) self.__add_concatenation(line[n:]) def get_child_elements(self): """Returns the direct child elements of this element :rtype: list of Element """ return self.__children def new_child_element(self, tag, pointer="", value=""): """Creates and returns a new child element of this element :type tag: str :type pointer: str :type value: str :rtype: Element """ from gedcom.element.family import FamilyElement from gedcom.element.file import FileElement from gedcom.element.individual import IndividualElement from gedcom.element.object import ObjectElement # Differentiate between the type of the new child element if tag == gedcom.tags.GEDCOM_TAG_FAMILY: child_element = FamilyElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_FILE: child_element = FileElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL: child_element = IndividualElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_OBJECT: child_element = ObjectElement(self.get_level() + 1, pointer, tag, value, self.__crlf) else: child_element = Element(self.get_level() + 1, pointer, tag, value, self.__crlf) self.add_child_element(child_element) return child_element def add_child_element(self, element): """Adds a child element to this element :type element: Element """ self.get_child_elements().append(element) element.set_parent_element(self) return element def get_parent_element(self): """Returns the parent element of this element :rtype: Element """ return self.__parent def set_parent_element(self, element): """Adds a parent element to this element There's usually no need to call this method manually, `add_child_element()` calls it automatically. :type element: Element """ self.__parent = element @deprecated def get_individual(self): """Returns this element and all of its sub-elements represented as a GEDCOM string ::deprecated:: As of version 1.0.0 use `to_gedcom_string()` method instead :rtype: str """ return self.to_gedcom_string(True) def to_gedcom_string(self, recursive=False): """Formats this element and optionally all of its sub-elements into a GEDCOM string :type recursive: bool :rtype: str """ result = str(self.get_level()) if self.get_pointer() != "": result += ' ' + self.get_pointer() result += ' ' + self.get_tag() if self.get_value() != "": result += ' ' + self.get_value() result += self.__crlf if self.get_level() < 0: result = '' if recursive: for child_element in self.get_child_elements(): result += child_element.to_gedcom_string(True) return result def __str__(self): """:rtype: str""" if version_info[0] >= 3: return self.to_gedcom_string() return self.to_gedcom_string().encode('utf-8-sig')
Subclasses
Methods
def add_child_element(self, element)
-
Adds a child element to this element
:type element: Element
Expand source code
def add_child_element(self, element): """Adds a child element to this element :type element: Element """ self.get_child_elements().append(element) element.set_parent_element(self) return element
def get_child_elements(self)
-
Returns the direct child elements of this element :rtype: list of Element
Expand source code
def get_child_elements(self): """Returns the direct child elements of this element :rtype: list of Element """ return self.__children
def get_individual(self)
-
Returns this element and all of its sub-elements represented as a GEDCOM string ::deprecated:: As of version 1.0.0 use
to_gedcom_string()
method instead :rtype: strExpand source code
@deprecated def get_individual(self): """Returns this element and all of its sub-elements represented as a GEDCOM string ::deprecated:: As of version 1.0.0 use `to_gedcom_string()` method instead :rtype: str """ return self.to_gedcom_string(True)
def get_level(self)
-
Returns the level of this element from within the GEDCOM file :rtype: int
Expand source code
def get_level(self): """Returns the level of this element from within the GEDCOM file :rtype: int """ return self.__level
def get_multi_line_value(self)
-
Returns the value of this element including concatenations or continuations :rtype: str
Expand source code
def get_multi_line_value(self): """Returns the value of this element including concatenations or continuations :rtype: str """ result = self.get_value() last_crlf = self.__crlf for element in self.get_child_elements(): tag = element.get_tag() if tag == gedcom.tags.GEDCOM_TAG_CONCATENATION: result += element.get_value() last_crlf = element.__crlf elif tag == gedcom.tags.GEDCOM_TAG_CONTINUED: result += last_crlf + element.get_value() last_crlf = element.__crlf return result
def get_parent_element(self)
-
Returns the parent element of this element :rtype: Element
Expand source code
def get_parent_element(self): """Returns the parent element of this element :rtype: Element """ return self.__parent
def get_pointer(self)
-
Returns the pointer of this element from within the GEDCOM file :rtype: str
Expand source code
def get_pointer(self): """Returns the pointer of this element from within the GEDCOM file :rtype: str """ return self.__pointer
def get_tag(self)
-
Returns the tag of this element from within the GEDCOM file :rtype: str
Expand source code
def get_tag(self): """Returns the tag of this element from within the GEDCOM file :rtype: str """ return self.__tag
def get_value(self)
-
Return the value of this element from within the GEDCOM file :rtype: str
Expand source code
def get_value(self): """Return the value of this element from within the GEDCOM file :rtype: str """ return self.__value
def new_child_element(self, tag, pointer='', value='')
-
Creates and returns a new child element of this element
:type tag: str :type pointer: str :type value: str :rtype: Element
Expand source code
def new_child_element(self, tag, pointer="", value=""): """Creates and returns a new child element of this element :type tag: str :type pointer: str :type value: str :rtype: Element """ from gedcom.element.family import FamilyElement from gedcom.element.file import FileElement from gedcom.element.individual import IndividualElement from gedcom.element.object import ObjectElement # Differentiate between the type of the new child element if tag == gedcom.tags.GEDCOM_TAG_FAMILY: child_element = FamilyElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_FILE: child_element = FileElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL: child_element = IndividualElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_OBJECT: child_element = ObjectElement(self.get_level() + 1, pointer, tag, value, self.__crlf) else: child_element = Element(self.get_level() + 1, pointer, tag, value, self.__crlf) self.add_child_element(child_element) return child_element
def set_multi_line_value(self, value)
-
Sets the value of this element, adding concatenation and continuation lines when necessary :type value: str
Expand source code
def set_multi_line_value(self, value): """Sets the value of this element, adding concatenation and continuation lines when necessary :type value: str """ self.set_value('') self.get_child_elements()[:] = [child for child in self.get_child_elements() if child.get_tag() not in (gedcom.tags.GEDCOM_TAG_CONCATENATION, gedcom.tags.GEDCOM_TAG_CONTINUED)] lines = value.splitlines() if lines: line = lines.pop(0) n = self.__set_bounded_value(line) self.__add_concatenation(line[n:]) for line in lines: n = self.__add_bounded_child(gedcom.tags.GEDCOM_TAG_CONTINUED, line) self.__add_concatenation(line[n:])
def set_parent_element(self, element)
-
Adds a parent element to this element
There's usually no need to call this method manually,
add_child_element()
calls it automatically.:type element: Element
Expand source code
def set_parent_element(self, element): """Adds a parent element to this element There's usually no need to call this method manually, `add_child_element()` calls it automatically. :type element: Element """ self.__parent = element
def set_value(self, value)
-
Sets the value of this element :type value: str
Expand source code
def set_value(self, value): """Sets the value of this element :type value: str """ self.__value = value
def to_gedcom_string(self, recursive=False)
-
Formats this element and optionally all of its sub-elements into a GEDCOM string :type recursive: bool :rtype: str
Expand source code
def to_gedcom_string(self, recursive=False): """Formats this element and optionally all of its sub-elements into a GEDCOM string :type recursive: bool :rtype: str """ result = str(self.get_level()) if self.get_pointer() != "": result += ' ' + self.get_pointer() result += ' ' + self.get_tag() if self.get_value() != "": result += ' ' + self.get_value() result += self.__crlf if self.get_level() < 0: result = '' if recursive: for child_element in self.get_child_elements(): result += child_element.to_gedcom_string(True) return result