X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=import.rb;fp=import.rb;h=b3e58d7dbe9157ea7e91e2d967bc777a43b987d9;hb=8dda4023a5e675351ea43924f8e477af05eadccc;hp=0000000000000000000000000000000000000000;hpb=ae6efdc4a0dd1825a30a1b95b70d6a5f799713a9;p=ardour-manual-diverged

diff --git a/import.rb b/import.rb
new file mode 100644
index 0000000..b3e58d7
--- /dev/null
+++ b/import.rb
@@ -0,0 +1,229 @@
+require 'nokogiri'
+require 'fileutils'
+require 'open-uri'
+
+URL = 'http://ardour.org/book/export/html/5848'
+FILENAME = 'drupal-export.html'
+
+WRITE = true
+DOWNLOAD_FILES = false
+GET_ARDOUR_ORG_IMAGES = false
+HANDLE_OTHER_IMAGES = false
+
+OUTPUT_DIR = '_manual'
+
+FILES_DIR = 'source'
+
+SLUG_MAPPINGS = {
+    'working_with_sessions' => 'sessions',
+    'export_stem' => 'export',
+    'track_groups' => 'track_bus_groups',
+    'vst_support' => 'windows_vst',
+    'kbd_default' => 'default_bindings',
+    'midistep_entry' => 'midi_step_entry',
+    'midi_stepentry' => 'midi_step_entry'
+}
+
+MISSING_SLUGS = %w(
+    range_selection
+    track_templates
+    track_template
+    color_dialog
+    region_layering
+    round_robin_inputs
+    mcp_osx
+    mcp_new_device
+)
+
+FILES_MAPPINGS = {
+    '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf',
+    '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf'
+}
+
+LINK_SLUG_TO_NODE_ID = {}
+
+def link_slug_to_node_id(slug)
+
+    slug = SLUG_MAPPINGS[slug] || slug
+
+    return nil if MISSING_SLUGS.include? slug
+
+    LINK_SLUG_TO_NODE_ID[slug] ||= begin
+        filename = "tmp/slug-to-node/#{slug}"
+
+        if File.exists? filename
+            File.read(filename).to_i
+        else
+            url = "http://ardour.org/manual/#{slug}"
+            puts "opening #{url}"
+            node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i
+            File.open(filename,'w+') { |f| f << node_id }
+            node_id
+        end
+    end
+end
+
+
+def register_node(node_id, path)
+    filename = "tmp/node-to-path/#{node_id}"
+    File.open(filename,'w+') { |f| f << path } unless File.exists? filename
+end
+
+def node_id_to_path!(node_id)
+    filename = "tmp/node-to-path/#{node_id}"
+    return '' unless File.exists? filename
+    #raise "no path for node-id #{node_id}" unless File.exists? filename
+    File.read(filename)
+end
+
+def process(html, level = 1, path = [], numbered_path = [])
+    html.search("div.section-#{level}").each_with_index do |child, i|
+
+        title = child.at('h1.book-heading').inner_text
+
+        node_id = child['id'].sub(/^node\-/,'')
+
+
+        slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '')
+
+        root = slug == 'the-ardour3-manual'
+
+        if root
+
+            # top level
+
+            this_path = []
+            this_numbered_path = []
+        else
+            numbered_slug = "%02d_%s" % [i + 1, slug, node_id]
+
+            this_path = path + [slug]
+            this_numbered_path = numbered_path + [numbered_slug]
+        end
+
+        register_node node_id, this_path.join('/')
+
+        indent = ' ' * level * 3
+
+        has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 }
+
+        output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}"
+
+        output_file = case 
+        when root
+            "#{OUTPUT_DIR}/blah.html"
+        #when has_children
+        #    "#{output_dir}/index.html"
+        else
+            "#{output_dir}.html"
+        end
+
+        content = child.dup
+
+        content.search('h1.book-heading').remove
+        content.search("div.section-#{level + 1}").remove
+
+        if heading = content.at('h2') and heading.inner_text == title
+            heading.remove
+        end
+
+        #puts "processing links in [#{this_path.join('/')}]"
+
+        content.search('a').each do |a|
+            href = a['href']
+            case href
+            when /^\/manual\/(.*)/
+                slug = $1
+                if node_id = link_slug_to_node_id(slug)
+                    link_path = node_id_to_path! node_id
+                    #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}"
+                    a['href'] = "/#{link_path}"
+                else
+                    a['href'] = "/missing"
+                end
+
+            when /^(\/files\/.*)/
+
+                if DOWNLOAD_FILES
+                    file_path = $1
+
+
+                    if FILES_MAPPINGS[file_path]
+                        file_path = FILES_MAPPINGS[file_path]
+                        a['href'] = file_path
+                    end
+
+                    puts "downloading [#{file_path}] (for #{this_path.join('/')})"
+
+                    filename = "#{FILES_DIR}/#{file_path}"
+                    FileUtils.mkdir_p File.dirname(filename)
+                    File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read }
+                end
+            end
+        end
+
+        content.search('img').each do |img|
+
+            src = img['src']
+
+            case src
+            when /^\//
+                if GET_ARDOUR_ORG_IMAGES
+                    url = "http://ardour.org#{src}"
+                    puts "getting #{url}"
+                    img_path = "#{FILES_DIR}#{src}"
+                    FileUtils.mkdir_p File.dirname(img_path)
+                    File.open(img_path, 'w+') { |f| f << open(url).read }
+                end
+            when /^http/
+                new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'')
+                img['src'] = new_src
+                    
+                if HANDLE_OTHER_IMAGES
+                    puts "new_src: #{new_src}"
+                    img_path = "#{FILES_DIR}#{new_src}"
+                    FileUtils.mkdir_p File.dirname(img_path)
+                    puts "getting #{src}"
+                    File.open(img_path, 'w+') { |f| f << open(src).read }
+                end
+            end
+
+        end
+
+        if WRITE
+            FileUtils.mkdir_p output_dir if has_children
+            File.open(output_file, 'w:UTF-8') do |f| 
+                f << <<-HTML
+---
+layout: default
+title: #{title}
+---                        
+
+#{content.inner_html}
+                HTML
+
+                if has_children
+                    f << <<-HTML
+{% children %}
+                    HTML
+                end
+
+
+            end
+        end
+
+        process(child, level + 1, this_path, this_numbered_path)
+    end
+end
+
+
+unless File.exists?(FILENAME)
+    puts "downloading #{URL} to #{FILENAME}"
+    File.open(FILENAME,'w+') { |f| f << open(URL).read }
+end
+
+FileUtils.mkdir_p('tmp/node-to-path')
+FileUtils.mkdir_p('tmp/slug-to-node')
+
+process Nokogiri(File.read(FILENAME))
+