]> Shamusworld >> Repos - ardour-manual/blob - import.rb
Documenting the Transport Bar and modifying the CSS for prettier checkboxes
[ardour-manual] / import.rb
1 require 'nokogiri'
2 require 'fileutils'
3 require 'open-uri'
4
5 URL = 'http://ardour.org/book/export/html/5848'
6 FILENAME = 'drupal-export.html'
7
8 WRITE = true
9 DOWNLOAD_FILES = false
10 GET_ARDOUR_ORG_IMAGES = false
11 HANDLE_OTHER_IMAGES = false
12
13 OUTPUT_DIR = '_manual'
14
15 FILES_DIR = 'source'
16
17 SLUG_MAPPINGS = {
18     'working_with_sessions' => 'sessions',
19     'export_stem' => 'export',
20     'track_groups' => 'track_bus_groups',
21     'vst_support' => 'windows_vst',
22     'kbd_default' => 'default_bindings',
23     'midistep_entry' => 'midi_step_entry',
24     'midi_stepentry' => 'midi_step_entry'
25 }
26
27 MISSING_SLUGS = %w(
28     range_selection
29     track_templates
30     track_template
31     color_dialog
32     region_layering
33     round_robin_inputs
34     mcp_osx
35     mcp_new_device
36 )
37
38 FILES_MAPPINGS = {
39     '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf',
40     '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf'
41 }
42
43 LINK_SLUG_TO_NODE_ID = {}
44
45 def link_slug_to_node_id(slug)
46
47     slug = SLUG_MAPPINGS[slug] || slug
48
49     return nil if MISSING_SLUGS.include? slug
50
51     LINK_SLUG_TO_NODE_ID[slug] ||= begin
52         filename = "tmp/slug-to-node/#{slug}"
53
54         if File.exists? filename
55             File.read(filename).to_i
56         else
57             url = "http://ardour.org/manual/#{slug}"
58             puts "opening #{url}"
59             node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i
60             File.open(filename,'w+') { |f| f << node_id }
61             node_id
62         end
63     end
64 end
65
66
67 def register_node(node_id, path)
68     filename = "tmp/node-to-path/#{node_id}"
69     File.open(filename,'w+') { |f| f << path } unless File.exists? filename
70 end
71
72 def node_id_to_path!(node_id)
73     filename = "tmp/node-to-path/#{node_id}"
74     return '' unless File.exists? filename
75     #raise "no path for node-id #{node_id}" unless File.exists? filename
76     File.read(filename)
77 end
78
79 def process(html, level = 1, path = [], numbered_path = [])
80     html.search("div.section-#{level}").each_with_index do |child, i|
81
82         title = child.at('h1.book-heading').inner_text
83
84         node_id = child['id'].sub(/^node\-/,'')
85
86
87         slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '')
88
89         root = slug == 'the-ardour3-manual'
90
91         if root
92
93             # top level
94
95             this_path = []
96             this_numbered_path = []
97         else
98             numbered_slug = "%02d_%s" % [i + 1, slug, node_id]
99
100             this_path = path + [slug]
101             this_numbered_path = numbered_path + [numbered_slug]
102         end
103
104         register_node node_id, this_path.join('/')
105
106         indent = ' ' * level * 3
107
108         has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 }
109
110         output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}"
111
112         output_file = case 
113         when root
114             "#{OUTPUT_DIR}/blah.html"
115         #when has_children
116         #    "#{output_dir}/index.html"
117         else
118             "#{output_dir}.html"
119         end
120
121         content = child.dup
122
123         content.search('h1.book-heading').remove
124         content.search("div.section-#{level + 1}").remove
125
126         if heading = content.at('h2') and heading.inner_text == title
127             heading.remove
128         end
129
130         #puts "processing links in [#{this_path.join('/')}]"
131
132         content.search('a').each do |a|
133             href = a['href']
134             case href
135             when /^\/manual\/(.*)/
136                 slug = $1
137                 if node_id = link_slug_to_node_id(slug)
138                     link_path = node_id_to_path! node_id
139                     #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}"
140                     a['href'] = "/#{link_path}"
141                 else
142                     a['href'] = "/missing"
143                 end
144
145             when /^(\/files\/.*)/
146
147                 if DOWNLOAD_FILES
148                     file_path = $1
149
150
151                     if FILES_MAPPINGS[file_path]
152                         file_path = FILES_MAPPINGS[file_path]
153                         a['href'] = file_path
154                     end
155
156                     puts "downloading [#{file_path}] (for #{this_path.join('/')})"
157
158                     filename = "#{FILES_DIR}/#{file_path}"
159                     FileUtils.mkdir_p File.dirname(filename)
160                     File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read }
161                 end
162             end
163         end
164
165         content.search('img').each do |img|
166
167             src = img['src']
168
169             case src
170             when /^\//
171                 if GET_ARDOUR_ORG_IMAGES
172                     url = "http://ardour.org#{src}"
173                     puts "getting #{url}"
174                     img_path = "#{FILES_DIR}#{src}"
175                     FileUtils.mkdir_p File.dirname(img_path)
176                     File.open(img_path, 'w+') { |f| f << open(url).read }
177                 end
178             when /^http/
179                 new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'')
180                 img['src'] = new_src
181                     
182                 if HANDLE_OTHER_IMAGES
183                     puts "new_src: #{new_src}"
184                     img_path = "#{FILES_DIR}#{new_src}"
185                     FileUtils.mkdir_p File.dirname(img_path)
186                     puts "getting #{src}"
187                     File.open(img_path, 'w+') { |f| f << open(src).read }
188                 end
189             end
190
191         end
192
193         if WRITE
194             FileUtils.mkdir_p output_dir if has_children
195             File.open(output_file, 'w:UTF-8') do |f| 
196                 f << <<-HTML
197 ---
198 layout: default
199 title: #{title}
200 ---                        
201
202 #{content.inner_html}
203                 HTML
204
205                 if has_children
206                     f << <<-HTML
207 {% children %}
208                     HTML
209                 end
210
211
212             end
213         end
214
215         process(child, level + 1, this_path, this_numbered_path)
216     end
217 end
218
219
220 unless File.exists?(FILENAME)
221     puts "downloading #{URL} to #{FILENAME}"
222     File.open(FILENAME,'w+') { |f| f << open(URL).read }
223 end
224
225 FileUtils.mkdir_p('tmp/node-to-path')
226 FileUtils.mkdir_p('tmp/slug-to-node')
227
228 process Nokogiri(File.read(FILENAME))
229