3 # Script to take the master document and ancillary files and create the
4 # finished manual/website.
7 # (C) 2020 Underground Software
9 # Contributors: Ed Ward
12 # Remnants (could go into the master document as the first header)
21 global_bootstrap_path = '/bootstrap-3.3.7'
22 global_page_title = 'The Ardour Manual'
23 global_site_dir = './website/'
24 global_manual_url = 'http://manual.ardour.org'
25 global_githuburl = 'https://github.com/Ardour/manual/edit/master/include/'
26 global_screen_template = 'page-template.html'
27 global_onepage_template = 'onepage-template.html'
28 global_pdf_template = 'pdf-template.html'
29 global_master_doc = 'master-doc.txt'
30 global_pdflink = '<button class="btn btn-default" type="button" onclick="window.location.href=\'/manual.pdf\'"><span class="glyphicon glyphicon-book" aria-hidden="true"></span></button>'
31 from datetime import datetime
32 global_today = datetime.today().strftime('%Y-%m-%d')
34 # This matches all *non* letter/number, ' ', '.', '-', and '_' chars
35 cleanString = re.compile(r'[^a-zA-Z0-9 \._-]+')
36 # This matches new 'unbreakable' links, up to the closing quote or anchor
37 findLinks = re.compile(r'"@@[^#"]*[#"]')
40 # Create an all lowercase filename without special characters and with spaces
41 # replaced with dashes.
45 # Clean up the file name, removing all non letter/number or " .-_" chars.
46 # Also, convert to lower case and replace all spaces with dashes.
47 fn = cleanString.sub('', s).lower().replace(' ', '-')
48 # Double dashes can creep in from the above replacement, so we check for
50 fn = fn.replace('--', '-')
56 # Parse headers into a dictionary
58 def ParseHeader(fileObj):
62 hdrLine = fileObj.readline().rstrip('\r\n')
64 # Break out of the loop if we hit the end of header marker
65 if hdrLine.startswith('---'):
68 # Check to see that we have a well-formed header construct
69 match = re.findall(': ', hdrLine)
72 # Parse out foo: bar pairs & put into header dictionary
73 a = re.split(': ', hdrLine, 1)
80 # Turn a "part" name into an int
83 lvl = {'part': 0, 'chapter': 1, 'subchapter': 2, 'section': 3, 'subsection': 4 }
92 # Converts a integer to a Roman numeral
95 num_map = [(1000, 'M'), (900, 'CM'), (500, 'D'), (400, 'CD'), (100, 'C'), (90, 'XC'), (50, 'L'), (40, 'XL'), (10, 'X'), (9, 'IX'), (5, 'V'), (4, 'IV'), (1, 'I')]
107 # Capture the master document's structure (and content, if any) in a list
109 def GetFileStructure():
114 mf = open(global_master_doc)
117 if ln.startswith('---'):
118 # First, stuff any content that we may have read into the current
119 # header's dictionary
121 fs[-1]['content'] = content
125 # Then, get the new header and do things to it
126 hdr = ParseHeader(mf)
127 level = PartToLevel(hdr['part'])
129 fnames[level] = MakeFilename(hdr['title'])
131 # Ickyness--user specified URIs
133 hdr['filename'] = hdr['uri']
137 for i in range(level + 1):
138 fullName = fullName + fnames[i] + '/'
140 # Strip trailing '/' on filename
141 hdr['filename'] = fullName[:-1]
145 if ('include' not in hdr) and (level > 0):
149 content = content + ln
151 # Catch the last file, since it would be missed above
153 fs[-1]['content'] = content
160 # Determine if a particular node has child nodes
162 def HaveChildren(fs, pos):
163 # If we're at the end of the list, there can be no children
164 if pos == len(fs) - 1:
167 # If the next node is at a lower level than the current node, we have
169 if fs[pos]['level'] < fs[pos + 1]['level']:
172 # Otherwise, no children at this node.
177 # Get the children at this level, and return them in a list
179 def GetChildren(fs, pos):
182 childLevel = fs[pos]['level']
184 while fs[pos]['level'] >= childLevel:
185 if fs[pos]['level'] == childLevel:
198 # Get the parent at this level
200 def GetParent(fs, pos):
201 thisLevel = fs[pos]['level']
204 while pos >= 0 and fs[pos]['level'] >= thisLevel:
211 #Â Change the hierarchy of titles : <h1>-><hn>, <h2>-><hn+1>, so that the
212 # highest hyerarchy level is maxlevel
214 def remapheader(txt, maxlevel):
216 # find the highest hierarchy level in the content
217 while maxlvl < 7 and txt.find('<h' + str(maxlvl)) == -1:
219 # if there is a hierarchy, remap it so that the highest level is maxlevel
221 for i in range(6, maxlvl-1, -1):
222 txt = txt.replace('<h' + str(i), '<h' + str(i + maxlevel - maxlvl))
223 txt = txt.replace('</h' + str(i), '</h' + str(i + maxlevel - maxlvl))
228 # Creates the BreadCrumbs
230 def GetBreadCrumbs(fs, pos):
231 breadcrumbs = '<li class="active">'+ fs[pos]['title'] + '</li>'
234 pos = GetParent(fs, pos)
237 breadcrumbs='<li><a href="/' + fs[pos]['filename'] + '/">'+ fs[pos]['title'] + '</a></li>'+ breadcrumbs
239 breadcrumbs = '<ul class="breadcrumb"><li><a href="/toc/index.html">Home</a></li>' + breadcrumbs + '</ul>'
244 # Make an array of children attached to each node in the file structure
245 # (It's a quasi-tree structure, and can be traversed as such.)
247 def FindChildren(fs):
250 for i in range(len(fs)):
251 if HaveChildren(fs, i):
252 childArray.append(GetChildren(fs, i))
254 childArray.append([])
260 # Make an array of the top level nodes in the file structure
262 def FindTopLevelNodes(fs):
265 for i in range(len(fs)):
266 if fs[i]['level'] == 0:
273 # Find all header links and create a dictionary out of them
275 def FindInternalLinks(fs):
280 linkDict['"@@' + hdr['link'] + '"'] = '"/' + hdr['filename'] + '/"'
281 linkDict['"@@' + hdr['link'] + '#'] = '"/' + hdr['filename'] + '/index.html#'
287 # Same as above, but create anchors (for the one-page version)
289 def FindInternalAnchors(fs):
294 linkDict['"@@' + hdr['link'] + '"'] = '"#' + hdr['link'] + '"'
295 linkDict['"@@' + hdr['link'] + '#'] = '"#' + hdr['link'] + '"'
301 # Internal links are of the form '@@link-name', which are references to the
302 # 'link:' field in the part header. We have to find all occurrences and replace
303 # them with the appropriate link.
305 def FixInternalLinks(links, content, title):
307 match = findLinks.findall(content)
313 content = content.replace(s, links[s])
317 # Report missing link targets to the user (if any)
319 print('\nMissing link target' + ('s' if len(missing) > 1 else '') + ' in "' + title + '":')
330 # Recursively build a list of links based on the location of the page we're
331 # looking at currently
333 def BuildList(lst, fs, pagePos, cList):
336 for i in range(len(lst)):
338 nextPos = lst[i + 1] if i + 1 < len(lst) else len(fs)
340 active = ' class=active' if curPos == pagePos else ''
341 menuTitle = fs[curPos]['menu_title'] if 'menu_title' in fs[curPos] else fs[curPos]['title']
342 content = content + '\t<li' + active + '><a href="/' + fs[curPos]['filename'] + '/">' + menuTitle + '</a></li>\n'
344 # If the current page is our page, and it has children, enumerate them
345 if curPos == pagePos:
346 if len(cList[curPos]) > 0:
347 content = content + BuildList(cList[curPos], fs, -1, cList)
349 # Otherwise, if our page lies between the current one and the next,
350 # build a list of links from those nodes one level down.
351 elif (pagePos > curPos) and (pagePos < nextPos):
352 content = content + BuildList(cList[curPos], fs, pagePos, cList)
354 content = content + '</ul>\n'
360 # Builds the sidebar for the one-page version
362 def BuildOnePageSidebar(fs):
364 content = '\n\n<ul class="toc">\n'
368 for i in range(len(fs)):
369 # Handle Part/Chapter/subchapter/section/subsection numbering
370 level = fs[i]['level']
375 levelNums[level] = levelNums[level] + 1;
379 while j > 0: #level 0 is the part number which is not shown
380 txtlevel = str(levelNums[j]) + '.' + txtlevel
383 if len(txtlevel) > 0:
384 txtlevel = txtlevel[:-1] + ' - '
387 anchor = fs[i]['link']
389 anchor = fs[i]['filename']
392 content = content + '<ul class="toc">\n'
396 content = content + '</ul>\n'
399 content = content + '<li><a href="#' + anchor + '">' + txtlevel + fs[i]['title'] + '</a></li>\n'
401 content = content + '</ul>\n'
407 # Create link sidebar given a position in the list.
409 def CreateLinkSidebar(fs, pos, childList):
411 # Build the list recursively from the top level nodes
412 content = BuildList(FindTopLevelNodes(fs), fs, pos, childList)
413 # Shove the TOC link and one file link at the top...
414 active = ' class=active' if pos < 0 else ''
415 content = content.replace('<ul>', '<ul><li' + active + '><a href="/toc/">Table of Contents</a></li>\n', 1)
422 # We have command line arguments now, so deal with them
423 parser = argparse.ArgumentParser(description='A build script for the Ardour Manual')
424 parser.add_argument('-v', '--verbose', action='store_true', help='Display the high-level structure of the manual')
425 parser.add_argument('-q', '--quiet', action='store_true', help='Suppress all output (overrides -v)')
426 parser.add_argument('-d', '--devmode', action='store_true', help='Add content to pages to help developers debug them')
427 parser.add_argument('-p', '--pdf', action='store_true', help='Automatically generate PDF from content')
428 args = parser.parse_args()
429 verbose = args.verbose
430 noisy = not args.quiet
431 devmode = args.devmode
434 # --quiet overrides --verbose, so tell it to shut up if user did both
448 if noisy and devmode:
449 print('Devmode active: scribbling extra junk to the manual...')
451 if os.access(global_site_dir, os.F_OK):
453 print('Removing stale HTML data...')
455 shutil.rmtree(global_site_dir)
457 shutil.copytree('./source', global_site_dir)
459 # Read the template, and fix the stuff that's fixed for all pages
460 temp = open(global_screen_template)
461 template = temp.read()
463 template = template.replace('{{page.bootstrap_path}}', global_bootstrap_path)
464 template = template.replace('{{page.page_title}}', global_page_title)
466 template = template.replace('{{page.page_pdflink}}', global_pdflink)
468 template = template.replace('{{page.page_pdflink}}', '')
471 # Same as above, but for the "One-Page" version
472 temp = open(global_onepage_template)
473 onepage = temp.read()
475 onepage = onepage.replace('{{page.bootstrap_path}}', global_bootstrap_path)
476 onepage = onepage.replace('{{page.page_title}}', global_page_title)
479 # Same as above, but for the PDF version
480 temp = open(global_pdf_template)
481 pdfpage = temp.read()
483 pdfpage = pdfpage.replace('{{page.page_title}}', global_page_title)
485 # Parse out the master document's structure into a dictionary list
486 fileStruct = GetFileStructure()
488 # Build a quasi-tree structure listing children at level + 1 for each node
489 nodeChildren = FindChildren(fileStruct)
491 # Create a dictionary for translation of internal links to real links
492 links = FindInternalLinks(fileStruct)
493 oplinks = FindInternalAnchors(fileStruct)
496 print('Found ' + str(len(links)) + ' internal link target', end='')
497 print('.') if len(links) == 1 else print('s.')
500 master = open(global_master_doc)
501 firstLine = master.readline().rstrip('\r\n')
504 if firstLine == '<!-- exploded -->':
505 print('Parsing exploded file...')
506 elif firstLine == '<!-- imploded -->':
507 print('Parsing imploded file...')
509 print('Parsing unknown type...')
513 for header in fileStruct:
514 fileCount = fileCount + 1
519 level = header['level']
521 # Handle Part/Chapter/subchapter/section/subsection numbering
525 levelNums[level] = levelNums[level] + 1;
527 # This is totally unnecessary, but nice; besides which, you can capture
528 # the output to a file to look at later if you like :-)
530 for i in range(level):
534 print('\nPart ' + num2roman(levelNums[0]) + ': ', end='')
536 print('\n\tChapter ' + str(levelNums[1]) + ': ', end='')
538 print(header['title'])
540 # Handle TOC scriblings...
542 toc = toc + '<h2>Part ' + num2roman(levelNums[level]) + ': ' + header['title'] + '</h2>\n';
544 toc = toc + '\t<p class="chapter">Ch. ' + str(levelNums[level]) + ': <a href="/' + header['filename'] + '/">' + header['title'] + '</a></p>\n'
546 toc = toc + '\t\t<p class="subchapter"><a href="/' + header['filename'] + '/">' + header['title'] + '</a></p>\n'
548 toc = toc + '<p class="section"><a href="/' + header['filename'] + '/">' + header['title'] + '</a></p>\n'
550 toc = toc + '<p class="subsection"><a href="/' + header['filename'] + '/">' + header['title'] + '</a></p>\n'
552 # Make the 'this thing contains...' stuff
553 if HaveChildren(fileStruct, pageNumber):
554 pages = GetChildren(fileStruct, pageNumber)
557 more = more + '<li>' + '<a href="/' + fileStruct[pg]['filename'] + '/">' + fileStruct[pg]['title'] + '</a>' + '</li>\n'
559 more = '<div id=subtopics>\n' + '<h2>This section contains the following topics:</h2>\n' + '<ul>\n' + more + '</ul>\n' + '</div>\n'
561 parent = GetParent(fileStruct, pageNumber)
563 # Make the 'Previous', 'Up' & 'Next' content
569 pLink = '<li class="previous"><a title="' + fileStruct[pageNumber - 1]['title'] + '" href="/' + fileStruct[pageNumber - 1]['filename'] + '/" class="previous"> ← Previous </a></li>'
571 if pageNumber < len(fileStruct) - 1:
572 nLink = '<li class="next"><a title="' + fileStruct[pageNumber + 1]['title'] + '" href="/' + fileStruct[pageNumber + 1]['filename'] + '/" class="next"> Next → </a></li>'
575 uLink = '<li><a title="' + fileStruct[parent]['title'] + '" href="/' + fileStruct[parent]['filename'] + '/" class="active"> ↑ Up </a></li>'
577 uLink = '<li><a title="Ardour Table of Contents" href="/toc/index.html" class="active"> ↑ Up </a></li>'
579 prevnext = '<ul class="pager">' + pLink + uLink + nLink + '</ul>'
581 # Make the BreadCrumbs
582 breadcrumbs = GetBreadCrumbs(fileStruct, pageNumber)
584 # Create the link sidebar
585 sidebar = CreateLinkSidebar(fileStruct, pageNumber, nodeChildren)
587 # Parts DO NOT have any content, they are ONLY an organizing construct!
588 # Chapters, subchapters, sections & subsections can all have content,
589 # but the basic fundamental organizing unit WRT content is still the
594 if 'include' in header:
595 srcFile = open('include/' + header['include'])
596 githubedit = '<span style="float:right;"><a title="Edit in GitHub" href="' + global_githuburl + header['include'] + '"><img src="/images/github.png" alt="Edit in GitHub"/></a></span>'
597 content = srcFile.read()
600 # Get rid of any extant header in the include file
601 # (once this is accepted, we can nuke this bit, as content files
602 # will not have any headers or footers in them)
603 content = re.sub('---.*\n(.*\n)*---.*\n', '', content)
604 content = content.replace('{% children %}', '')
607 if 'content' in header:
608 content = header['content']
610 content = '[something went wrong]'
612 # Add header information to the page if in dev mode
614 devnote ='<aside style="background-color:indigo; color:white;">'
616 if 'filename' in header:
617 devnote = devnote + 'filename: ' + header['filename'] + '<br>'
619 if 'include' in header:
620 devnote = devnote + 'include: ' + header['include'] + '<br>'
623 devnote = devnote + 'link: ' + header['link'] + '<br>'
625 content = devnote + '</aside>' + content
627 # ----- One page and PDF version -----
629 # Fix up any internal links
630 opcontent = FixInternalLinks(oplinks, content, header['title'])
631 opcontent = remapheader(opcontent, level+2)
633 # Create "one page" header
634 oph = '<h' + str(level+1) + ' class="clear" id="' + header[('link' if 'link' in header else 'filename')] +'">' + header['title'] + '</h' + str(level+1) + '>\n';
636 # Set up the actual page from the template
637 onepage = onepage.replace('{{ content }}', oph + '\n' + opcontent + '\n{{ content }}')
640 if not 'pdf-exclude' in header:
641 pdfpage = pdfpage.replace('{{ content }}', oph + '\n' + opcontent + '\n{{ content }}')
643 pdfpage = pdfpage.replace('{{ content }}', oph + '\n' + 'Please refer to the <a href="' + global_manual_url + '/' + header['filename'] + '/">online manual</a>.\n{{ content }}')
645 # ----- Normal version -----
647 # Fix up any internal links
648 content = FixInternalLinks(links, content, header['title'])
650 # Set up the actual page from the template
651 if 'style' not in header:
652 page = re.sub("{% if page.style %}.*\n.*\n{% endif %}.*\n", "", template)
654 page = template.replace('{{page.style}}', header['style'])
655 page = page.replace('{% if page.style %}', '')
656 page = page.replace('{% endif %}', '')
658 page = page.replace('{{ page.title }}', header['title'])
659 page = page.replace('{% tree %}', sidebar)
660 page = page.replace('{% prevnext %}', prevnext)
661 page = page.replace('{% githubedit %}', githubedit)
662 page = page.replace('{% breadcrumbs %}', breadcrumbs)
663 page = page.replace('{{ content }}', content + more)
665 # Create the directory for the index.html file to go into (we use makedirs,
666 # because we have to in order to accomodate the 'uri' keyword)
667 os.makedirs(global_site_dir + header['filename'], 0o775, exist_ok=True)
669 # Finally, write the file!
670 destFile = open(global_site_dir + header['filename'] + '/index.html', 'w')
674 # Save filename for next header...
675 lastFile = header['filename']
676 pageNumber = pageNumber + 1
678 # Finally, create the TOC
679 sidebar = CreateLinkSidebar(fileStruct, -1, nodeChildren)
681 page = re.sub("{% if page.style %}.*\n.*\n{% endif %}.*\n", "", template)
682 page = page.replace('{{ page.title }}', 'Ardour Table of Contents')
683 page = page.replace('{% tree %}', sidebar)
684 page = page.replace('{{ content }}', toc)
685 page = page.replace('{% prevnext %}', '')
686 page = page.replace('{% githubedit %}', '')
687 page = page.replace('{% breadcrumbs %}', '')
689 os.mkdir(global_site_dir + 'toc', 0o775)
690 tocFile = open(global_site_dir + 'toc/index.html', 'w')
694 # Create the one-page version of the documentation
695 onepageFile = open(global_site_dir + 'ardourmanual.html', 'w')
696 opsidebar = BuildOnePageSidebar(fileStruct) # create the link sidebar
697 onepage = onepage.replace('{% tree %}', opsidebar)
698 onepage = onepage.replace('{{ content }}', '') # cleans up the last spaceholder
699 onepageFile.write(onepage)
704 print('Generating the PDF...')
706 logger = logging.getLogger('weasyprint')
707 logger.addHandler(logging.StreamHandler())
709 # Create the PDF version of the documentation
710 pdfpage = pdfpage.replace('{% tree %}', opsidebar) # create the TOC
711 pdfpage = pdfpage.replace('{{ content }}', '') # cleans up the last spaceholder
712 pdfpage = pdfpage.replace('{{ today }}', global_today)
713 pdfpage = pdfpage.replace('src="/images/', 'src="images/') # makes images links relative
714 pdfpage = pdfpage.replace('url(\'/images/', 'url(\'images/') # CSS images links relative
715 # Write it to disk (optional, can be removed)
716 pdfpageFile = open(global_site_dir + 'pdf.html', 'w')
717 pdfpageFile.write(pdfpage)
720 # Generating the actual PDF with weasyprint (https://weasyprint.org/)
721 from weasyprint import HTML
722 from weasyprint.text.fonts import FontConfiguration
724 html_font_config = FontConfiguration()
725 doc = HTML(string = pdfpage, base_url = global_site_dir)
726 doc.write_pdf(global_site_dir + 'manual.pdf', font_config = html_font_config)
729 print('Processed ' + str(fileCount) + ' files.')