X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=import.rb;fp=import.rb;h=0000000000000000000000000000000000000000;hb=8e3b4c710683de8f44cc46d5f00d9e19c0855527;hp=b3e58d7dbe9157ea7e91e2d967bc777a43b987d9;hpb=33c301104caebd44a7aa6fd09d563fc8f622764d;p=ardour-manual-diverged diff --git a/import.rb b/import.rb deleted file mode 100644 index b3e58d7..0000000 --- a/import.rb +++ /dev/null @@ -1,229 +0,0 @@ -require 'nokogiri' -require 'fileutils' -require 'open-uri' - -URL = 'http://ardour.org/book/export/html/5848' -FILENAME = 'drupal-export.html' - -WRITE = true -DOWNLOAD_FILES = false -GET_ARDOUR_ORG_IMAGES = false -HANDLE_OTHER_IMAGES = false - -OUTPUT_DIR = '_manual' - -FILES_DIR = 'source' - -SLUG_MAPPINGS = { - 'working_with_sessions' => 'sessions', - 'export_stem' => 'export', - 'track_groups' => 'track_bus_groups', - 'vst_support' => 'windows_vst', - 'kbd_default' => 'default_bindings', - 'midistep_entry' => 'midi_step_entry', - 'midi_stepentry' => 'midi_step_entry' -} - -MISSING_SLUGS = %w( - range_selection - track_templates - track_template - color_dialog - region_layering - round_robin_inputs - mcp_osx - mcp_new_device -) - -FILES_MAPPINGS = { - '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf', - '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf' -} - -LINK_SLUG_TO_NODE_ID = {} - -def link_slug_to_node_id(slug) - - slug = SLUG_MAPPINGS[slug] || slug - - return nil if MISSING_SLUGS.include? slug - - LINK_SLUG_TO_NODE_ID[slug] ||= begin - filename = "tmp/slug-to-node/#{slug}" - - if File.exists? filename - File.read(filename).to_i - else - url = "http://ardour.org/manual/#{slug}" - puts "opening #{url}" - node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i - File.open(filename,'w+') { |f| f << node_id } - node_id - end - end -end - - -def register_node(node_id, path) - filename = "tmp/node-to-path/#{node_id}" - File.open(filename,'w+') { |f| f << path } unless File.exists? filename -end - -def node_id_to_path!(node_id) - filename = "tmp/node-to-path/#{node_id}" - return '' unless File.exists? filename - #raise "no path for node-id #{node_id}" unless File.exists? filename - File.read(filename) -end - -def process(html, level = 1, path = [], numbered_path = []) - html.search("div.section-#{level}").each_with_index do |child, i| - - title = child.at('h1.book-heading').inner_text - - node_id = child['id'].sub(/^node\-/,'') - - - slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '') - - root = slug == 'the-ardour3-manual' - - if root - - # top level - - this_path = [] - this_numbered_path = [] - else - numbered_slug = "%02d_%s" % [i + 1, slug, node_id] - - this_path = path + [slug] - this_numbered_path = numbered_path + [numbered_slug] - end - - register_node node_id, this_path.join('/') - - indent = ' ' * level * 3 - - has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 } - - output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}" - - output_file = case - when root - "#{OUTPUT_DIR}/blah.html" - #when has_children - # "#{output_dir}/index.html" - else - "#{output_dir}.html" - end - - content = child.dup - - content.search('h1.book-heading').remove - content.search("div.section-#{level + 1}").remove - - if heading = content.at('h2') and heading.inner_text == title - heading.remove - end - - #puts "processing links in [#{this_path.join('/')}]" - - content.search('a').each do |a| - href = a['href'] - case href - when /^\/manual\/(.*)/ - slug = $1 - if node_id = link_slug_to_node_id(slug) - link_path = node_id_to_path! node_id - #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}" - a['href'] = "/#{link_path}" - else - a['href'] = "/missing" - end - - when /^(\/files\/.*)/ - - if DOWNLOAD_FILES - file_path = $1 - - - if FILES_MAPPINGS[file_path] - file_path = FILES_MAPPINGS[file_path] - a['href'] = file_path - end - - puts "downloading [#{file_path}] (for #{this_path.join('/')})" - - filename = "#{FILES_DIR}/#{file_path}" - FileUtils.mkdir_p File.dirname(filename) - File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read } - end - end - end - - content.search('img').each do |img| - - src = img['src'] - - case src - when /^\// - if GET_ARDOUR_ORG_IMAGES - url = "http://ardour.org#{src}" - puts "getting #{url}" - img_path = "#{FILES_DIR}#{src}" - FileUtils.mkdir_p File.dirname(img_path) - File.open(img_path, 'w+') { |f| f << open(url).read } - end - when /^http/ - new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'') - img['src'] = new_src - - if HANDLE_OTHER_IMAGES - puts "new_src: #{new_src}" - img_path = "#{FILES_DIR}#{new_src}" - FileUtils.mkdir_p File.dirname(img_path) - puts "getting #{src}" - File.open(img_path, 'w+') { |f| f << open(src).read } - end - end - - end - - if WRITE - FileUtils.mkdir_p output_dir if has_children - File.open(output_file, 'w:UTF-8') do |f| - f << <<-HTML ---- -layout: default -title: #{title} ---- - -#{content.inner_html} - HTML - - if has_children - f << <<-HTML -{% children %} - HTML - end - - - end - end - - process(child, level + 1, this_path, this_numbered_path) - end -end - - -unless File.exists?(FILENAME) - puts "downloading #{URL} to #{FILENAME}" - File.open(FILENAME,'w+') { |f| f << open(URL).read } -end - -FileUtils.mkdir_p('tmp/node-to-path') -FileUtils.mkdir_p('tmp/slug-to-node') - -process Nokogiri(File.read(FILENAME)) -