Shamusworld >> Repos - ardour-manual/blob - import.rb

   1 require 'nokogiri'
   2 require 'fileutils'
   3 require 'open-uri'
   4
   5 URL = 'http://ardour.org/book/export/html/5848'
   6 FILENAME = 'drupal-export.html'
   7
   8 WRITE = true
   9 DOWNLOAD_FILES = false
  10 GET_ARDOUR_ORG_IMAGES = false
  11 HANDLE_OTHER_IMAGES = false
  12
  13 OUTPUT_DIR = '_manual'
  14
  15 FILES_DIR = 'source'
  16
  17 SLUG_MAPPINGS = {
  18     'working_with_sessions' => 'sessions',
  19     'export_stem' => 'export',
  20     'track_groups' => 'track_bus_groups',
  21     'vst_support' => 'windows_vst',
  22     'kbd_default' => 'default_bindings',
  23     'midistep_entry' => 'midi_step_entry',
  24     'midi_stepentry' => 'midi_step_entry'
  25 }
  26
  27 MISSING_SLUGS = %w(
  28     range_selection
  29     track_templates
  30     track_template
  31     color_dialog
  32     region_layering
  33     round_robin_inputs
  34     mcp_osx
  35     mcp_new_device
  36 )
  37
  38 FILES_MAPPINGS = {
  39     '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf',
  40     '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf'
  41 }
  42
  43 LINK_SLUG_TO_NODE_ID = {}
  44
  45 def link_slug_to_node_id(slug)
  46
  47     slug = SLUG_MAPPINGS[slug] || slug
  48
  49     return nil if MISSING_SLUGS.include? slug
  50
  51     LINK_SLUG_TO_NODE_ID[slug] ||= begin
  52         filename = "tmp/slug-to-node/#{slug}"
  53
  54         if File.exists? filename
  55             File.read(filename).to_i
  56         else
  57             url = "http://ardour.org/manual/#{slug}"
  58             puts "opening #{url}"
  59             node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i
  60             File.open(filename,'w+') { |f| f << node_id }
  61             node_id
  62         end
  63     end
  64 end
  65
  66
  67 def register_node(node_id, path)
  68     filename = "tmp/node-to-path/#{node_id}"
  69     File.open(filename,'w+') { |f| f << path } unless File.exists? filename
  70 end
  71
  72 def node_id_to_path!(node_id)
  73     filename = "tmp/node-to-path/#{node_id}"
  74     return '' unless File.exists? filename
  75     #raise "no path for node-id #{node_id}" unless File.exists? filename
  76     File.read(filename)
  77 end
  78
  79 def process(html, level = 1, path = [], numbered_path = [])
  80     html.search("div.section-#{level}").each_with_index do |child, i|
  81
  82         title = child.at('h1.book-heading').inner_text
  83
  84         node_id = child['id'].sub(/^node\-/,'')
  85
  86
  87         slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '')
  88
  89         root = slug == 'the-ardour3-manual'
  90
  91         if root
  92
  93             # top level
  94
  95             this_path = []
  96             this_numbered_path = []
  97         else
  98             numbered_slug = "%02d_%s" % [i + 1, slug, node_id]
  99
 100             this_path = path + [slug]
 101             this_numbered_path = numbered_path + [numbered_slug]
 102         end
 103
 104         register_node node_id, this_path.join('/')
 105
 106         indent = ' ' * level * 3
 107
 108         has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 }
 109
 110         output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}"
 111
 112         output_file = case
 113         when root
 114             "#{OUTPUT_DIR}/blah.html"
 115         #when has_children
 116         #    "#{output_dir}/index.html"
 117         else
 118             "#{output_dir}.html"
 119         end
 120
 121         content = child.dup
 122
 123         content.search('h1.book-heading').remove
 124         content.search("div.section-#{level + 1}").remove
 125
 126         if heading = content.at('h2') and heading.inner_text == title
 127             heading.remove
 128         end
 129
 130         #puts "processing links in [#{this_path.join('/')}]"
 131
 132         content.search('a').each do |a|
 133             href = a['href']
 134             case href
 135             when /^\/manual\/(.*)/
 136                 slug = $1
 137                 if node_id = link_slug_to_node_id(slug)
 138                     link_path = node_id_to_path! node_id
 139                     #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}"
 140                     a['href'] = "/#{link_path}"
 141                 else
 142                     a['href'] = "/missing"
 143                 end
 144
 145             when /^(\/files\/.*)/
 146
 147                 if DOWNLOAD_FILES
 148                     file_path = $1
 149
 150
 151                     if FILES_MAPPINGS[file_path]
 152                         file_path = FILES_MAPPINGS[file_path]
 153                         a['href'] = file_path
 154                     end
 155
 156                     puts "downloading [#{file_path}] (for #{this_path.join('/')})"
 157
 158                     filename = "#{FILES_DIR}/#{file_path}"
 159                     FileUtils.mkdir_p File.dirname(filename)
 160                     File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read }
 161                 end
 162             end
 163         end
 164
 165         content.search('img').each do |img|
 166
 167             src = img['src']
 168
 169             case src
 170             when /^\//
 171                 if GET_ARDOUR_ORG_IMAGES
 172                     url = "http://ardour.org#{src}"
 173                     puts "getting #{url}"
 174                     img_path = "#{FILES_DIR}#{src}"
 175                     FileUtils.mkdir_p File.dirname(img_path)
 176                     File.open(img_path, 'w+') { |f| f << open(url).read }
 177                 end
 178             when /^http/
 179                 new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'')
 180                 img['src'] = new_src
 181
 182                 if HANDLE_OTHER_IMAGES
 183                     puts "new_src: #{new_src}"
 184                     img_path = "#{FILES_DIR}#{new_src}"
 185                     FileUtils.mkdir_p File.dirname(img_path)
 186                     puts "getting #{src}"
 187                     File.open(img_path, 'w+') { |f| f << open(src).read }
 188                 end
 189             end
 190
 191         end
 192
 193         if WRITE
 194             FileUtils.mkdir_p output_dir if has_children
 195             File.open(output_file, 'w:UTF-8') do |f|
 196                 f << <<-HTML
 197 ---
 198 layout: default
 199 title: #{title}
 200 ---
 201
 202 #{content.inner_html}
 203                 HTML
 204
 205                 if has_children
 206                     f << <<-HTML
 207 {% children %}
 208                     HTML
 209                 end
 210
 211
 212             end
 213         end
 214
 215         process(child, level + 1, this_path, this_numbered_path)
 216     end
 217 end
 218
 219
 220 unless File.exists?(FILENAME)
 221     puts "downloading #{URL} to #{FILENAME}"
 222     File.open(FILENAME,'w+') { |f| f << open(URL).read }
 223 end
 224
 225 FileUtils.mkdir_p('tmp/node-to-path')
 226 FileUtils.mkdir_p('tmp/slug-to-node')
 227
 228 process Nokogiri(File.read(FILENAME))
 229