Skip to content
Extraits de code Groupes Projets
Sélectionner une révision Git
1 résultat Searching

mpvwidget.hh

Blame
  • get_kaamelott.rb 1,85 Kio
    #!/usr/bin/env ruby
    
    # ./get_kaamelott.rb > Data/kaamelott_citations.dat
    
    require 'nokogiri'
    require 'open-uri'
    
    $base = 'https://fr.wikiquote.org'
    
    def convert content
    	content.gsub("\n", ' ').gsub('"', '\\"').gsub('’', "'")
    end
    
    def puts_new_page node
    	link = node.xpath('.//a').attr('href')
    	new_doc = Nokogiri::HTML(open($base + link))
    	new_span = true
    
    	new_doc.xpath('//div[@id="mw-content-text"]//*[@class = "citation" or @class = "ref"]').each do | node |
    		if node.name == 'span'
    			puts '		},' unless new_span
    			puts "		\"#{convert node.content}\": {"
    			new_span = false
    		elsif node.name == 'div'
    			node.content =~ /^([^,]+),\s+\w+,\s+Livre\s+([IVX]+),[^\d\w]*(?:(\d+)[^:]*:\s+)?"?([^,"]+)"?,/
    			puts "			\"acteur\": \"#{$1}\","
    			puts "			\"livre\": \"#{$2}\","
    			puts "			\"episode numero\": \"#{$3}\","
    			puts "			\"episode titre\": \"#{$4}\""
    		else
    			raise "Shit! Got this: #{node}"
    		end
    	end
    end
    
    
    new_a = true
    new_span = false
    
    puts '{'
    
    doc = Nokogiri::HTML(open($base + '/wiki/Kaamelott'))
    doc.xpath('//div[@id="mw-content-text"]//*[@class = "extiw" or @class = "citation" or @class = "ref" or self::dl]').each do | node |
    	if node.name == 'a'
    		puts '		}' unless new_a
    		puts "	}," unless new_a
    		puts "	\"#{convert node.content}\": {"
    		new_a = false
    		new_span = true
    	elsif node.name == 'dl'
    		puts_new_page node
    		break if node.xpath('.//a')[0].content == 'Yvain'
    	elsif node.name == 'span'
    		puts '		},' unless new_span
    		puts "		\"#{convert node.content}\": {"
    		new_span = false
    	elsif node.name == 'div'
    # 		puts node.content
    		node.content =~ /^([^,]+),\s+\w+,\s+Livre\s+([IVX]+),\s*(?:ép.\s*)?(\d+)[^:]*:\s*([^,]+),/
    		puts "			\"acteur\": \"#{$1}\","
    		puts "			\"livre\": \"#{$2}\","
    		puts "			\"episode numero\": \"#{$3}\","
    		puts "			\"episode titre\": \"#{$4}\""
    	else
    		raise "Shit! Got this: #{node}"
    	end
    end
    
    puts '		}'
    puts '	}'