Guitar-Sheet-Parser/lib/dataStructures.py
2021-07-09 21:05:31 +02:00

476 lines
19 KiB
Python

#!/usr/bin/env python3
##
# @file dataStructures.py
#
# @brief This file contains the internal data structures required for each tablature file
#
# @section description Description
# -
#
# @section notes Notes
#
# @section todo TODO
# - Move helper functions like stripEmptyLines to a separate file for
# - Move read functions to separate input functions (also to support more types of inputs)
import re
import lib.config
from PIL import ImageFont
A4 = {'width': 210, 'height': 297}
A5 = {'width': 210, 'height': 148}
"""!@brief Removes empty lines and makes sure every line ends with \r\n
@param inputString raw txt input
@return string of parsed input
"""
def stripEmptyLines(inputString):
nonEmptyLines = ""
lines = inputString.split("\n")
for line in lines:
if line.strip() != "":
nonEmptyLines += line + "\r\n"
return nonEmptyLines
"""!@brief Opens a .txt file and loads it's contents into buffer
@param inputFile path to .txt file
@return .txt file raw contents
"""
def readSourceFile(inputFile):
with open(inputFile, 'r') as file:
return file.read()
"""!@brief Returns whether the string is a line of lyrics or a line of tablature data
@param inputString single line of text
@return True if it is tablature data, False if it is lyric data
"""
def isTablatureData(inputString):
if not inputString:
return
#print("Checking '{}' for line type".format(inputString))
# Assume tablature line if any character {/, #, (, ), }
tablatureSpecificCharacterString = r"/#"
if any(elem in inputString for elem in tablatureSpecificCharacterString):
#print("'{}' is a tablature line, since it contains a tablature specific character".format(inputString))
return True
# Assume LYRIC line if any TEXT character OTHER THAN {a, b, c, d, e, f, g, h, b, x, m, j, n}
lyricSpecificCharacterString = r"abcdefghbxmjn"
for char in inputString:
if char.isalpha():
if not char.lower() in lyricSpecificCharacterString:
#print("'{}' is a LYRIC line, since it contains lyric specific text characters".format(inputString))
return False
# Assume tablature line if any digit
if any(char.isdigit() for char in inputString):
#print("'{}' is a tablature line, since it contains a number".format(inputString))
return True
# Assume LYRIC line if any character {.}
lyricSpecialChars = r"."
if any(elem in inputString for elem in lyricSpecialChars):
#print("'{}' is a LYRIC line, since it contains lyric specific special characters".format(inputString))
return False
# Else warn and assume tablature line
#print("Unable to identify if '{}' is a lyric or tablature line. Assuming it is a tablature line. Please improve the isTablatureData function".format(inputString))
return True
"""!@brief Class containing Section specific data
"""
class Section:
def __init__(self):
# List of lines of lyrics strings
self.lyrics = []
# List of lines of tablature strings
self.tablatures = []
# section type string
self.header = ""
# string of tablature and lyric data
self.rawData = ""
# Flag for succesfully parsed
self.isParsed = False
# Expected dimensions of this section
self.expectedWidth = -1
self.expectedHeight = -1
"""!@brief Calculates dimensions of rendered text
@return None
"""
def calculateSectionDimensions(self, fontTablature, fontLyrics):
lineIterator = 0
amountOfLines = len(self.lyrics)
heightSum = 0
maxWidth = 0
# consider section title
headerWidth, headerHeight = fontTablature.getsize(self.header)
heightSum += headerHeight
maxWidth = headerWidth
#print("With header, dimensions of section '{}' start at {}H{}B".format(self.header[:-2], heightSum, maxWidth))
while lineIterator < amountOfLines:
# Get chord&lyric line dimensions
lyricTextWidth, lyricTextHeight = fontLyrics.getsize(self.lyrics[lineIterator])
tablatureTextWidth, chordTextHeight = fontTablature.getsize(self.tablatures[lineIterator])
heightSum += lyricTextHeight + chordTextHeight
if lyricTextWidth > maxWidth:
maxWidth = lyricTextWidth
if tablatureTextWidth > maxWidth:
maxWidth = tablatureTextWidth
lineIterator += 1
self.expectedWidth = maxWidth
self.expectedHeight = heightSum
"""!@brief Converts raw buffered data into separate Lyric and tablature lines
@return None
"""
# Parses self.rawData into lyrics and tablature strings
def initSections(self):
isFirstLine = True
# Input sections may have tablature-only or lyric-only sections
# So we have to insert empty lines if we have subsequent tablature or lyric lines
lines = self.rawData.split('\r\n')
for line in lines:
if not len(line):
continue
# Determine lyric or tablature line
currentIsTablature = isTablatureData(line)
#print("Have line {} isTab={}, isLyric={}".format(line, currentIsTablature, not currentIsTablature))
# Initially just fill in the first line correctly
if isFirstLine:
isFirstLine = False
if currentIsTablature:
self.tablatures.append(line)
else:
self.lyrics.append(line)
# We want alternating lines, so if the prev is of the same type
# we need to insert an empty line of the other type
elif currentIsTablature == prevWasTablature:
if currentIsTablature:
#print("Inserting empty Lyric line")
self.tablatures.append(line)
self.lyrics.append("")
else:
#print("Inserting empty tablature line")
self.lyrics.append(line)
self.tablatures.append("")
# also insert the current line
elif currentIsTablature:
#print("Inserting empty Lyric line")
self.tablatures.append(line)
else:
self.lyrics.append(line)
# move on to next line, save current type
prevWasTablature = currentIsTablature
# Simple check to see if it probably exported correctly
if abs(len(self.lyrics) - len(self.tablatures)) > 1:
print("Unable to parse section {}, since there is a mismatch between the amount of lyrics ({}) and tablature ({}) lines.".format(self.header, len(self.lyrics), len(self.tablatures)))
return
# Add a trailing empty line if necessary
elif len(self.lyrics) > len(self.tablatures):
self.tablatures.append("")
elif len(self.lyrics) < len(self.tablatures):
self.lyrics.append("")
self.isParsed = True
"""!@brief Class containing Sections which fit on 1 page
"""
class Page:
def __init__(self):
self.sections = []
self.totalHeight = -1
"""!@brief Class containing Song specific data
"""
class Song:
def __init__(self):
# Src file
self.inputFile = ""
# Path to folder
self.outputLocation = ""
self.fileExtension = ""
# Title - based on input file
self.title = ""
# List of Section objects
self.sections = []
# Meta info: the text before the first section
self.metadata = ""
self.metadataWidth = -1
self.metadataHeight = -1
# String of entire input
self.rawData = ""
# List of pages, which contain sections which fit on a page
self.pages = []
# Flag for succesfully parsed
self.isParsed = False
configObj = lib.config.config['output']
self.topMargin = int(configObj['topMargin'])
self.fontColour = tuple(int(var) for var in configObj['fontColour'].split(','))
self.backgroundColour = tuple(int(var) for var in configObj['backgroundColour'].split(','))
self.metadataColour = tuple(int(var) for var in configObj['metadataColour'].split(','))
self.ppi = int(configObj['imageppi'])
# 0.03937 pixels per minimeter per ppi
self.imageWidth = int(self.ppi * A4['width'] * 0.03937)
self.imageHeight = int(self.ppi * A4['height'] * 0.03937)
# With a PPI of 72, a font size of 14-18 is a good starting point (PPI / 4 or 4.5)
# Since font size is then shrunk and grown to fit whitespace we do not need to be as accurate
# PPI of 144 -> fontSize of 32
self.fontSize = int(self.ppi / 4.5)
self.leftMargin = int(configObj['leftMargin'])
self.rightMargin = int(configObj['rightMargin'])
self.fontLyrics = ImageFont.truetype(configObj['lyricfontfamily'], self.fontSize)
self.fontTablature = ImageFont.truetype(configObj['tablaturefontfamliy'], self.fontSize)
self.fontFamilyLyrics = configObj['lyricfontfamily']
self.fontFamilyTablature = configObj['tablaturefontfamliy']
self.metadataFontsize = int(configObj['metaFontWeight'])
self.metadataFontFamily = configObj['metafontfamily']
self.fontMetadata = ImageFont.truetype(self.metadataFontFamily, self.metadataFontsize)
"""!@brief Calculates dimensions of metadata
@param section lib.dataStructures.Section object
@return None
"""
def calculateMetadataDimensions(self):
# metadata starts topMargin removed from top
currentHeight = self.topMargin
maxWidth = 0
for line in self.metadata.split('\n'):
line = line.rstrip()
if not line:
continue
metadataTextWidth, metadataTextHeight = self.fontMetadata.getsize(line)
if metadataTextWidth > maxWidth:
maxWidth = metadataTextWidth
currentHeight += metadataTextHeight
self.metadataWidth = maxWidth
self.metadataHeight = currentHeight
#print("metadata dimensions are {}h : {}w".format(currentHeight, maxWidth))
"""!@brief Resizes all sections by a specified amount
Also recalculates all section sizes afterwards
@param mutator amount of fontSize to add/dec from current font size
@return None
"""
def resizeAllSections(self, mutator):
#print("Resizing font by {} to {}".format(mutator, self.fontSize))
self.fontSize += mutator
self.fontLyrics = ImageFont.truetype(self.fontFamilyLyrics, self.fontSize)
self.fontTablature = ImageFont.truetype(self.fontFamilyTablature, self.fontSize)
self.prerenderSections()
"""!@brief Resizes metadata and recalcs its size
@param mutator amount of fontSize to add/dec from current font size
@return None
"""
def resizeMetadata(self, mutator):
self.metadataFontsize += mutator
self.fontMetadata = ImageFont.truetype(self.metadataFontFamily, self.metadataFontsize)
self.calculateMetadataDimensions()
"""!@brief Calculates the expected dimensions of all sections
@return None
"""
def prerenderSections(self):
self.calculateMetadataDimensions()
for section in self.sections:
section.calculateSectionDimensions(self.fontTablature, self.fontLyrics)
"""!@brief Calculates the expected dimensions of all sections
@return None
"""
def fitSectionsByWidth(self):
self.prerenderSections()
while not self.checkOverflowX():
#print("Resizing down to prevent overflow on the width of the page")
self.resizeAllSections(-1)
while not self.checkOverflowMetadata():
#print("Resizing down to prevent overflow on the width of the page")
self.resizeMetadata(-1)
"""!@brief Checks whether we are overflowing on the width of the page
@return True if everything OK, False if overflowing
"""
def checkOverflowX(self):
for section in self.sections:
if section.expectedWidth > self.imageWidth - self.leftMargin - self.rightMargin:
print("There is an overflow on width: this section has a width of {}, but we have {} ({}-{}-{}) amount of space".format(section.expectedWidth, self.imageWidth - self.leftMargin - self.rightMargin, self.imageWidth, self.leftMargin, self.rightMargin))
return False
return True
"""!@brief Checks whether the metadata is overflowing on the width of the page
@return True if everything OK, False if overflowing
"""
def checkOverflowMetadata(self):
if self.metadataWidth > self.imageWidth - self.leftMargin - self.rightMargin:
return False
return True
"""!@brief Checks whether we can increase the font size without creating more pages
@return None
"""
def increaseWhileSameAmountOfPages(self):
targetPageAmount = len(self.pages)
originalFontsize = self.fontSize
self.resizeAllSections(1)
self.sectionsToPages()
currentPageAmount = len(self.pages)
# Increase fontSize as long as we do not add a page
while currentPageAmount <= targetPageAmount and self.checkOverflowX():
self.resizeAllSections(+1)
self.sectionsToPages()
currentPageAmount = len(self.pages)
# Now undo latest increase to go back to target page amount
self.resizeAllSections(-1)
self.sectionsToPages()
currentPageAmount = len(self.pages)
if targetPageAmount != currentPageAmount:
print("Oops! While resizing up we changed the amount of pages from {} to {}".format(targetPageAmount, currentPageAmount))
if self.fontSize != originalFontsize:
print("Managed to change the font size from {} to {}".format(originalFontsize, self.fontSize))
"""!@brief Tries to fill in the whitespace on the current render
It will compare the size of existing whitespace with the size of the first section on the next page
While the amount we are short is within X% of the current image height, resize down
@return True if we should resize down, False if we are fine
"""
def canFillWhitespace(self):
amountOfPages = len(self.pages)
currentPageIt = 0
if not amountOfPages:
return False
# Stop resizing if we are creating too much widespace on the width
smallestWhitespace = self.imageHeight
biggestWhitespace = -1
for page in self.pages:
for section in page.sections:
whitespaceOnWidth = self.imageWidth - self.leftMargin - self.rightMargin - section.expectedWidth
if whitespaceOnWidth < smallestWhitespace:
smallestWhitespace = whitespaceOnWidth
if whitespaceOnWidth > biggestWhitespace:
biggestWhitespace = whitespaceOnWidth
# Sections vary in width, some are very small to begin with
# Since (almost empty) lines will result in large whitespace sizes, we are less strict on checking that
if biggestWhitespace / self.imageWidth > 0.9:
print("Stopping resizing down, since the smallest section has {}% whitespace on the width of the image".format((biggestWhitespace / self.imageWidth )* 100))
return False
# But the largest section on the page should be able to fit at least half of the available page
if smallestWhitespace / self.imageWidth > 0.4:
print("Stopping resizing down, since we largest section has {}% whitespace on the width of the image".format((smallestWhitespace / self.imageWidth )* 100))
return False
# get first section on next page, if we have a next page to begin with
while currentPageIt < amountOfPages - 1:
curPage = self.pages[currentPageIt]
nextPage = self.pages[currentPageIt + 1]
nextFirstSection = nextPage.sections[0]
whitespace = self.imageHeight - curPage.totalHeight
amountWeAreShort = nextFirstSection.expectedHeight - whitespace
shortInPercentages = amountWeAreShort / self.imageHeight
#print("Whitespace {} vs next section height {}".format(whitespace, nextFirstSection.expectedHeight))
#print("We are {} short to fit the next image (total image height {} => {}% of total height)".format(amountWeAreShort, self.imageHeight, shortInPercentages*100))
if shortInPercentages < 0.15:
return True
currentPageIt += 1
return False
"""!@brief Fits current sections into pages
@return None
"""
def sectionsToPages(self):
self.prerenderSections()
self.pages = []
# First page contains metadata
currentHeight = self.topMargin
currentHeight += self.metadataHeight
currentHeight += self.topMargin
curPage = Page()
# Now fit all sections
for section in self.sections:
if (section.expectedHeight == -1 or section.expectedWidth == -1):
print("Warning: this file was not processed correctly. The expected dimensions are not set")
# See if the section would fit on the current page - if it does not, we have a filled page
if currentHeight + section.expectedHeight > self.imageHeight:
curPage.totalHeight = currentHeight
self.pages.append(curPage)
currentHeight = self.topMargin
curPage = Page()
# Add setion header size and size of lines of data
headerWidth, headerHeight = self.fontTablature.getsize(section.header)
currentHeight += headerHeight
currentHeight += section.expectedHeight
curPage.sections.append(section)
# Margin between each section
currentHeight += self.topMargin
# No more sections left, so the current buffered image is ready to be written to file
curPage.totalHeight = currentHeight
self.pages.append(curPage)
"""!@brief Parses self.rawData into Section objects and metadata
Assumes the raw data is preprocessed, so it parses it using set rules instead of guessing line attributes
@return None
"""
def initPreprocessed(self):
pass
"""!@brief Parses self.rawData into Section objects and metadata
@return None
"""
def initSections(self):
# Get raw data
self.rawData = readSourceFile(self.inputFile)
# Clean up input
parseData = stripEmptyLines(self.rawData)
#print("Clean data='{}'\n".format(parseData))
# While not EOF: build sections untill new section found.
delimiterIndex = parseData.find("[")
if delimiterIndex == -1:
print("Cannot parse input file, since it is not delimited by '[<sectionName>]' entries")
return
# Start with metadata
self.metadata = parseData[:delimiterIndex]
#print("Set '{}' as metadata".format(self.metadata))
parseData = parseData[delimiterIndex:]
# We are now at the start of the first section, at the '[' character
while parseData:
# Init new Section object
thisSection = Section()
# Get header on the first line
delimiterIndex = parseData.find("]\r\n")
if delimiterIndex == -1:
print("Cannot parse input file, delimiter did not match '[<sectionName>]'")
return
# Skip the ']\r\n' characters
thisSection.header = parseData[:delimiterIndex+3]
parseData = parseData[delimiterIndex+3:]
# Find next section
delimiterIndex = parseData.find("[")
# If EOF, current buffer is final section
if delimiterIndex == -1:
# Set thisSection's data to remaining buffer
thisSection.rawData = parseData
parseData = ""
else:
# Set thisSection's data and remove it from the buffer
thisSection.rawData = parseData[:delimiterIndex]
#print("set rawData of '{}' to this section".format(thisSection.rawData))
parseData = parseData[delimiterIndex:]
# Finally parse section data
thisSection.initSections()
if thisSection.isParsed:
self.sections.append(thisSection)
else:
print("Aborting parse due to section not being parseable.")
return
self.isParsed = True