转(http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/227040)
require 'net/http'
#以http:\\finance.yahoo.com\q?s=IBM 页面为例子
http = Net::HTTP.new('finance.yahoo.com', 80)
resp, page = http.get('/q?s=IBM', nil )
#传入tag解析出数据
def parse_html(data,tag)
return data.scan(%r{<#{tag}\s*.*?>(.*?)</#{tag}>}im).flatten
end
#flatten 是一维化数组
output = []
table_data = parse_html(page,"table")
table_data.each do |table|
out_row = []
row_data = parse_html(table,"tr")
row_data.each do |row|
cell_data = parse_html(row,"td")
cell_data.each do |cell|
cell.gsub!(%r{<.*?>},"")
end
out_row << cell_data
end
output << out_row
end
#组织显示
def parse_nested_array(array,tab = 0)
n = 0
array.each do |item|
if(item.size > 0)
puts "#{"\t" * tab}[#{n}] {"
if(item.class == Array)
parse_nested_array(item,tab+1)
else
puts "#{"\t" * (tab+1)}#{item}"
end
puts "#{"\t" * tab}}"
end
n += 1
end
end
parse_nested_array(output)