Shamusworld >> Repos - ardour-manual/blob - build.py

   1 #!/usr/bin/python3
   2 #
   3 # Script to take the master document and ancillary files and create the
   4 # finished manual/website.
   5 #
   6 # by James Hammons
   7 # (C) 2017 Underground Software
   8 #
   9
  10 # Remnants (could go into the master document as the first header)
  11
  12 #bootstrap_path: /bootstrap-2.2.2
  13 #page_title: The Ardour Manual
  14
  15 import os
  16 import re
  17 import shutil
  18 import argparse
  19
  20
  21 # Global vars
  22 # This matches all *non* letter/number, ' ', '.', '-', and '_' chars
  23 cleanString = re.compile(r'[^a-zA-Z0-9 \._-]+')
  24 # This matches new 'unbreakable' links, up to the closing quote or anchor
  25 findLinks = re.compile(r'@@[^#"]*')
  26
  27 #
  28 # Create an all lowercase filename without special characters and with spaces
  29 # replaced with dashes.
  30 #
  31 def MakeFilename(s):
  32         global cleanString
  33         # Clean up the file name, removing all non letter/number or " .-_" chars.
  34         # Also, convert to lower case and replace all spaces with dashes.
  35         fn = cleanString.sub('', s).lower().replace(' ', '-')
  36         # Double dashes can creep in from the above replacement, so we check for
  37         # that here.
  38         fn = fn.replace('--', '-')
  39
  40         return fn
  41
  42
  43 #
  44 # Parse headers into a dictionary
  45 #
  46 def ParseHeader(fileObj):
  47         header = {}
  48
  49         while (True):
  50                 hdrLine = fileObj.readline().rstrip('\r\n')
  51
  52                 # Break out of the loop if we hit the end of header marker
  53                 if hdrLine.startswith('---'):
  54                         break
  55
  56                 # Check to see that we have a well-formed header construct
  57                 match = re.findall(': ', hdrLine)
  58
  59                 if match:
  60                         # Parse out foo: bar pairs & put into header dictionary
  61                         a = re.split(': ', hdrLine, 1)
  62                         header[a[0]] = a[1]
  63
  64         return header
  65
  66
  67 #
  68 # Turn a "part" name into an int
  69 #
  70 def PartToLevel(s):
  71         level = -1
  72
  73         if s == 'part':
  74                 level = 0
  75         elif s == 'chapter':
  76                 level = 1
  77         elif s == 'subchapter':
  78                 level = 2
  79         elif s == 'section':
  80                 level = 3
  81         elif s == 'subsection':
  82                 level = 4
  83
  84         return level
  85
  86 #
  87 # Converts a integer to a roman number
  88 #
  89 def num2roman(num):
  90         num_map = [(1000, 'M'), (900, 'CM'), (500, 'D'), (400, 'CD'), (100, 'C'), (90, 'XC'), (50, 'L'), (40, 'XL'), (10, 'X'), (9, 'IX'), (5, 'V'), (4, 'IV'), (1, 'I')]
  91         roman = ''
  92
  93         while num > 0:
  94                 for i, r in num_map:
  95                         while num >= i:
  96                                 roman += r
  97                                 num -= i
  98
  99         return roman
 100
 101 #
 102 # Capture the master document's structure (and content, if any) in a list
 103 #
 104 def GetFileStructure():
 105         fs = []
 106         fnames = [None]*6
 107         content = ''
 108         grab = False
 109         mf = open('master-doc.txt')
 110
 111         for ln in mf:
 112                 if ln.startswith('---'):
 113                         # First, stuff any content that we may have read into the current
 114                         # header's dictionary
 115                         if grab:
 116                                 fs[-1]['content'] = content
 117                                 grab = False
 118                                 content = ''
 119
 120                         # Then, get the new header and do things to it
 121                         hdr = ParseHeader(mf)
 122                         level = PartToLevel(hdr['part'])
 123                         hdr['level'] = level
 124                         fnames[level] = MakeFilename(hdr['title'])
 125
 126                         # Ickyness--user specified URIs
 127                         if 'uri' in hdr:
 128                                 hdr['filename'] = hdr['uri']
 129                         else:
 130                                 fullName = ''
 131
 132                                 for i in range(level + 1):
 133                                         fullName = fullName + fnames[i] + '/'
 134
 135                                 # Strip trailing '/' on filename
 136                                 hdr['filename'] = fullName[:-1]
 137
 138                         fs.append(hdr)
 139
 140                         if ('include' not in hdr) and (level > 0):
 141                                 grab = True
 142                 else:
 143                         if grab:
 144                                 content = content + ln
 145
 146         # Catch the last file, since it would be missed above
 147         if grab:
 148                 fs[-1]['content'] = content
 149
 150         mf.close()
 151         return fs
 152
 153
 154 #
 155 # Determine if a particular node has child nodes
 156 #
 157 def HaveChildren(fs, pos):
 158         # If we're at the end of the list, there can be no children
 159         if pos == len(fs) - 1:
 160                 return False
 161
 162         # If the next node is at a lower level than the current node, we have
 163         # children.
 164         if fs[pos]['level'] < fs[pos + 1]['level']:
 165                 return True
 166
 167         # Otherwise, no children at this node.
 168         return False
 169
 170
 171 #
 172 # Get the children at this level, and return them in a list
 173 #
 174 def GetChildren(fs, pos):
 175         children = []
 176         pos = pos + 1
 177         childLevel =  fs[pos]['level']
 178
 179         while fs[pos]['level'] >= childLevel:
 180                 if fs[pos]['level'] == childLevel:
 181                         children.append(pos)
 182
 183                 pos = pos + 1
 184
 185                 # Sanity check
 186                 if pos == len(fs):
 187                         break
 188
 189         return children
 190
 191
 192 #
 193 # Make an array of children attached to each node in the file structure
 194 # (It's a quasi-tree structure, and can be traversed as such.)
 195 #
 196 def FindChildren(fs):
 197         childArray = []
 198
 199         for i in range(len(fs)):
 200                 if HaveChildren(fs, i):
 201                         childArray.append(GetChildren(fs, i))
 202                 else:
 203                         childArray.append([])
 204
 205         return childArray
 206
 207
 208 #
 209 # Make an array of the top level nodes in the file structure
 210 #
 211 def FindTopLevelNodes(fs):
 212         level0 = []
 213
 214         for i in range(len(fs)):
 215                 if fs[i]['level'] == 0:
 216                         level0.append(i)
 217
 218         return level0
 219
 220
 221 #
 222 # Find all header links and create a dictionary out of them
 223 #
 224 def FindInternalLinks(fs):
 225         linkDict = {}
 226
 227         for hdr in fs:
 228                 if 'link' in hdr:
 229                         linkDict['@@' + hdr['link']] = '/' + hdr['filename'] + '/'
 230
 231         return linkDict
 232
 233
 234 #
 235 # Internal links are of the form '@@link-name', which are references to the
 236 # 'link:' field in the part header. We have to find all occurances and replace
 237 # them with the appropriate link.
 238 #
 239 def FixInternalLinks(links, content, title):
 240         global findLinks
 241         match = findLinks.findall(content)
 242         missing = []
 243
 244         if len(match) > 0:
 245                 for s in match:
 246                         if s in links:
 247                                 content = content.replace(s, links[s])
 248                         else:
 249                                 missing.append(s)
 250
 251         # Report missing link targets to the user (if any)
 252         if len(missing) > 0:
 253                 print('\nMissing link target' + ('s' if len(missing) > 1 else '') + ' in "' + title + '":')
 254
 255                 for s in missing:
 256                         print('  ' + s)
 257
 258                 print()
 259
 260         return content
 261
 262
 263 #
 264 # Recursively build a list of links based on the location of the page we're
 265 # looking at currently
 266 #
 267 def BuildList(lst, fs, pagePos, cList):
 268         content = '\n\n<dl>\n'
 269
 270         for i in range(len(lst)):
 271                 curPos = lst[i]
 272                 nextPos = lst[i + 1] if i + 1 < len(lst)  else len(fs)
 273
 274                 active = ' class=active' if curPos == pagePos else ''
 275                 menuTitle = fs[curPos]['menu_title'] if 'menu_title' in fs[curPos] else fs[curPos]['title']
 276                 content = content + '<dt' + active + '><a href="/' + fs[curPos]['filename'] + '/">' + menuTitle + '</a></dt><dd' + active + '>'
 277
 278                 # If the current page is our page, and it has children, enumerate them
 279                 if curPos == pagePos:
 280                         if len(cList[curPos]) > 0:
 281                                 content = content + BuildList(cList[curPos], fs, -1, cList)
 282
 283                 # Otherwise, if our page lies between the current one and the next,
 284                 # build a list of links from those nodes one level down.
 285                 elif (pagePos > curPos) and (pagePos < nextPos):
 286                         content = content + BuildList(cList[curPos], fs, pagePos, cList)
 287
 288                 content = content + '</dd>\n'
 289
 290         content = content + '</dl>\n'
 291
 292         return content
 293
 294 #
 295 # Create link sidebar given a position in the list.
 296 #
 297 def CreateLinkSidebar(fs, pos, childList):
 298
 299         # Build the list recursively from the top level nodes
 300         content = BuildList(FindTopLevelNodes(fs), fs, pos, childList)
 301         # Shove the TOC link in the top...
 302         content = content[:7] + '<dt><a href="/toc/">Table of Contents</a></dt><dd></dd>\n' + content[7:]
 303
 304         return content
 305
 306
 307 # Preliminaries
 308
 309 # We have command line arguments now, so deal with them
 310 parser = argparse.ArgumentParser(description='A build script for the Ardour Manual')
 311 parser.add_argument('-v', '--verbose', action='store_true', help='Display the high-level structure of the manual')
 312 parser.add_argument('-q', '--quiet', action='store_true', help='Suppress all output (overrides -v)')
 313 parser.add_argument('-d', '--devmode', action='store_true', help='Add content to pages to help developers debug them')
 314 args = parser.parse_args()
 315 verbose = args.verbose
 316 quiet = args.quiet
 317 devmode = args.devmode
 318
 319 if quiet:
 320         verbose = False
 321
 322 level = 0
 323 fileCount = 0
 324 levelNums = [0]*6
 325 lastFile = ''
 326 page = ''
 327 toc = ''
 328 pageNumber = 0
 329
 330 siteDir = './website/'
 331
 332 if not quiet and devmode:
 333         print('Devmode active: scribbling extra junk to the manual...')
 334
 335 if os.access(siteDir, os.F_OK):
 336         if not quiet:
 337                 print('Removing stale HTML data...')
 338
 339         shutil.rmtree(siteDir)
 340
 341 shutil.copytree('./source', siteDir)
 342
 343
 344 # Read the template, and fix the stuff that's fixed for all pages
 345 temp = open('page-template.txt')
 346 template = temp.read()
 347 temp.close()
 348
 349 template = template.replace('{{page.bootstrap_path}}', '/bootstrap-2.2.2')
 350 template = template.replace('{{page.page_title}}', 'The Ardour Manual')
 351
 352
 353 # Parse out the master docuemnt's structure into a dictionary list
 354 fileStruct = GetFileStructure()
 355
 356 # Build a quasi-tree structure listing children at level + 1 for each node
 357 nodeChildren = FindChildren(fileStruct)
 358
 359 # Create a dictionary for translation of internal links to real links
 360 links = FindInternalLinks(fileStruct)
 361
 362 if not quiet:
 363         print('Found ' + str(len(links)) + ' internal link target', end='')
 364         print('.') if len(links) == 1 else print('s.')
 365
 366 if not quiet:
 367         master = open('master-doc.txt')
 368         firstLine = master.readline().rstrip('\r\n')
 369         master.close()
 370
 371         if firstLine == '<!-- exploded -->':
 372                 print('Parsing exploded file...')
 373         elif firstLine == '<!-- imploded -->':
 374                 print('Parsing imploded file...')
 375         else:
 376                 print('Parsing unknown type...')
 377
 378 # Here we go!
 379
 380 for header in fileStruct:
 381         fileCount = fileCount + 1
 382         content = ''
 383         more = ''
 384
 385         lastLevel = level
 386         level = header['level']
 387
 388         # Handle Part/Chapter/subchapter/section/subsection numbering
 389         if level == 0:
 390                 levelNums[2] = 0
 391         elif level == 1:
 392                 levelNums[2] = 0
 393         elif level == 2:
 394                 levelNums[3] = 0
 395         elif level == 3:
 396                 levelNums[4] = 0
 397
 398         levelNums[level] = levelNums[level] + 1;
 399
 400         # This is totally unnecessary, but nice; besides which, you can capture
 401         # the output to a file to look at later if you like :-)
 402         if verbose:
 403                 for i in range(level):
 404                         print('\t', end='')
 405
 406                 if (level == 0):
 407                         print('\nPart ' + num2roman(levelNums[0]) + ': ', end='')
 408                 elif (level == 1):
 409                         print('\n\tChapter ' + str(levelNums[1]) + ': ', end='')
 410
 411                 print(header['title'])
 412
 413         # Handle TOC scriblings...
 414         if level == 0:
 415                 toc = toc + '<h2>Part ' + num2roman(levelNums[level]) + ': ' + header['title'] + '</h2>\n';
 416         elif level == 1:
 417                 toc = toc + '  <p id=chapter>Ch. ' + str(levelNums[level]) + ':&nbsp;&nbsp;<a href="/' + header['filename'] + '/">' + header['title'] + '</a></p>\n'
 418         elif level == 2:
 419                 toc = toc + '    <a id=subchapter href="/' + header['filename'] + '/">' + header['title'] + '</a><br>\n'
 420         elif level == 3:
 421                 toc = toc + '      <a id=subchapter href="/' + header['filename'] + '/">' + header['title'] + '</a><br>\n'
 422         elif level == 4:
 423                 toc = toc + '      <a id=subchapter href="/' + header['filename'] + '/">' + header['title'] + '</a><br>\n'
 424
 425         # Make the 'this thing contains...' stuff
 426         if HaveChildren(fileStruct, pageNumber):
 427                 pages = GetChildren(fileStruct, pageNumber)
 428
 429                 for pg in pages:
 430                         more = more + '<li>' + '<a href="/' + fileStruct[pg]['filename'] + '/">' + fileStruct[pg]['title'] + '</a>' + '</li>\n'
 431
 432                 more = '<div id=subtopics>\n' + '<h2>This section contains the following topics:</h2>\n' + '<ul>\n' + more + '</ul>\n' + '</div>\n'
 433
 434         # Make the 'Previous' & 'Next' content
 435         nLink = ''
 436         pLink = ''
 437
 438         if pageNumber > 0:
 439                 pLink = '<li><a title="' + fileStruct[pageNumber - 1]['title'] + '" href="/' + fileStruct[pageNumber - 1]['filename'] + '/" class="previous"> &lt; Previous </a></li>'
 440
 441         if pageNumber < len(fileStruct) - 1:
 442                 nLink = '<li><a title="' + fileStruct[pageNumber + 1]['title'] + '" href="/' + fileStruct[pageNumber + 1]['filename'] + '/" class="next"> Next &gt; </a></li>'
 443
 444         prevnext = '<ul class=pager>' + pLink + nLink + '</ul>'
 445
 446         # Create the link sidebar
 447         sidebar = CreateLinkSidebar(fileStruct, pageNumber, nodeChildren)
 448
 449         # Parts DO NOT have any content, they are ONLY an organizing construct!
 450         # Chapters, subchapters, sections & subsections can all have content,
 451         # but the basic fundamental organizing unit WRT content is still the
 452         # chapter.
 453         if level > 0:
 454                 if 'include' in header:
 455                         srcFile = open('include/' + header['include'])
 456                         content = srcFile.read()
 457                         srcFile.close()
 458
 459                         # Get rid of any extant header in the include file
 460                         # (once this is accepted, we can nuke this bit, as content files
 461                         # will not have any headers or footers in them)
 462                         content = re.sub('---.*\n(.*\n)*---.*\n', '', content)
 463                         content = content.replace('{% children %}', '')
 464
 465                 else:
 466                         if 'content' in header:
 467                                 content = header['content']
 468                         else:
 469                                 content = '[something went wrong]'
 470
 471         # Fix up any internal links
 472         content = FixInternalLinks(links, content, header['title'])
 473
 474         # Add header information to the page if in dev mode
 475         if devmode and 'link' in header:
 476                 content = '<h1>link: ' + header['link'] + '</h2>\n<br><br>\n' + content
 477
 478         # Set up the actual page from the template
 479         if 'style' not in header:
 480                 page = re.sub("{% if page.style %}.*\n.*\n{% endif %}.*\n", "", template)
 481         else:
 482                 page = template.replace('{{page.style}}', header['style'])
 483                 page = page.replace('{% if page.style %}', '')
 484                 page = page.replace('{% endif %}', '')
 485
 486         page = page.replace('{{ page.title }}', header['title'])
 487         page = page.replace('{% tree %}', sidebar)
 488         page = page.replace('{% prevnext %}', prevnext)
 489         page = page.replace('{{ content }}', content + more)
 490
 491         # Create the directory for the index.html file to go into (we use makedirs,
 492         # because we have to in order to accomodate the 'uri' keyword)
 493         os.makedirs(siteDir + header['filename'], 0o775, exist_ok=True)
 494
 495         # Finally, write the file!
 496         destFile = open(siteDir + header['filename'] + '/index.html', 'w')
 497         destFile.write(page)
 498         destFile.close()
 499
 500         # Save filename for next header...
 501         lastFile = header['filename']
 502         pageNumber = pageNumber + 1
 503
 504 # Finally, create the TOC
 505 sidebar = CreateLinkSidebar(fileStruct, -1, nodeChildren)
 506
 507 page = re.sub("{% if page.style %}.*\n.*\n{% endif %}.*\n", "", template)
 508 page = page.replace('{{ page.title }}', 'Ardour Table of Contents')
 509 page = page.replace('{% tree %}', sidebar)
 510 page = page.replace('{{ content }}', toc)
 511 page = page.replace('{% prevnext %}', '')
 512
 513 os.mkdir(siteDir + 'toc', 0o775)
 514 tocFile = open(siteDir + 'toc/index.html', 'w')
 515 tocFile.write(page)
 516 tocFile.close()
 517
 518 if not quiet:
 519         print('Processed ' + str(fileCount) + ' files.')