#!/usr/bin/env ruby

require 'yaml'
require 'set'
require 'net/http'
require 'rss/maker'

# Configuration

package_filename = "packages.yaml"
rss_filename     = "pypi-rb.xml"
rss_version      = "2.0"
amount_items     = 20

# Don't change anything below here.

path = File.expand_path(File.dirname(__FILE__))

package_file = path + "/" + package_filename
rss_file     = path + "/" + rss_filename

# We load a file with a list of serialized objects.  
# These contain the list of packages from the last script execution.  
# If this file does not exist, we start our list from scratch.  

begin

    f_content_alt = File.new(package_file, 'r').read
    old_packages  = YAML.load(f_content_alt)

rescue

    old_packages = Set.new

end

# Now we fetch the HTML page from the Python Packages Index
# to get a complete list of available packages.

host = "pypi.python.org"
http = Net::HTTP.new(host, 80)
url  = "/pypi?:action=index"

response = http.get(url)
content  = response.body

curr_packages = Set.new

# From this HTML page we extract all package names
# and fill them to our 'set' variable.

package_re = /<a href="\/pypi\/(.*)\/.*">/

content.each do |line| 
    m = package_re.match(line)
    if m
        found_package = m.captures[0]
        curr_packages.add(found_package)
    end
end

# With this we compute the difference
# and get only the new packages.

new_packages = curr_packages - old_packages

list_new_packages = new_packages.to_a
list_old_packages = old_packages.to_a

list_new_packages.each do |new_package|
list_old_packages << new_package
end

# We only want the last x packages in our RSS feed.

list_rss_packages =  list_old_packages[ -amount_items .. -1 ]
list_rss_packages.reverse!

# If there are no new packages, we don't touch anything.

if new_packages

    content = RSS::Maker.make(rss_version) do |m|
    m.channel.title = "New Python Packages @ http://pypi.python.org/pypi"
    m.channel.link  = "http://pypi.python.org/pypi?:action=index"
    m.channel.description = "The latest modules at the Python Package Index"
  
    list_rss_packages.each do |package|

        i = m.items.new_item
        i.title = package
        i.link = "http://pypi.python.org/pypi/#{package}/"

        end
    
    end

    File.open(rss_file, "w") do |f|
    f.write(content)
    end

    # We save the list as a serialized object,
    # so the next script run can detect only new packages.

    old_packages = list_old_packages.to_set

    content_alt = YAML.dump(old_packages)

    f_content_alt = File.new(package_file, mode="w")
    f_content_alt.write(content_alt)
    f_content_alt.close

end

