-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrssToPages.py
executable file
·124 lines (94 loc) · 2.92 KB
/
rssToPages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
# encoding: utf-8
#
# Very simple Python program to publish the last RSS entry of a feed
# in a Facebook Page. It shows the blogs available and allows to
# select one of them.
#
# It has a configuration file with a number of blogs with:
# - The RSS feed of the blog
# - The Twitter account where the news will be published
# - The Facebook page where the news will be published
# It uses a configuration file that has two sections:
# - The oauth access token
#
#
#
#
#
#
import ConfigParser, os
import feedparser
import facebook
import re, sys
from bs4 import BeautifulSoup
from bs4 import NavigableString
from bs4 import Tag
reload(sys)
sys.setdefaultencoding("UTF-8")
config = ConfigParser.ConfigParser()
config.read([os.path.expanduser('~/.rssBlogs')])
pageFB = config.get("Blog"+str(i), "pageFB")
print "Configured blogs:"
i=1
for section in config.sections():
print str(i), ')', section, config.get(section, "rssFeed")
i = i + 1
if (int(i)>1):
i = raw_input ('Select one: ')
else:
i = 1
print "You have chosen ", config.get("Blog"+str(i), "rssFeed")
rssFeed = config.get("Blog"+str(i), "rssFeed")
feed = feedparser.parse(rssFeed)
i = 0 # It will publish the last added item
soup = BeautifulSoup(feed.entries[i].title)
theTitle = soup.get_text()
theLink = feed.entries[i].link
soup = BeautifulSoup(feed.entries[i].summary)
if (config.has_option("Blog"+str(i), "linksToAvoid")):
linksToAvoid = config.get("Blog"+str(i), "linksToAvoid")
else:
linksToAvoid = ""
# Now the links
j = 0
linksTxt = ""
for link in soup("a"):
if not isinstance(link.contents[0], Tag):
# We want to avoid embdeded tags (mainly <img ... )
print linksToAvoid
print re.escape(linksToAvoid)
print str(link['href'])
print re.search(linksToAvoid, link['href'])
if ((linksToAvoid =="")
or (not re.search(linksToAvoid, link['href']))):
link.append(" ["+str(j)+"]")
linksTxt = linksTxt + "["+str(j)+"] " + link.contents[0] + "\n"
linksTxt = linksTxt + " " + link['href'] + "\n"
j = j + 1
#theSummary = theTitle+"\n"
theSummary = soup.get_text()
theSummary = theSummary+"\n\n"
if linksTxt != "":
theSummary = theSummary + linksTxt
theSummary = theSummary+"\n\n"
print theSummary.encode('utf-8')
pageImage = soup.findAll("img")
# Only the first one
if len(pageImage) > 0:
imageLink = (pageImage[0]["src"])
else:
imageLink = ""
config.read([os.path.expanduser('~/.rssFacebook')])
oauth_access_token= config.get("Facebook", "oauth_access_token")
graph = facebook.GraphAPI(oauth_access_token)
pages = graph.get_connections("me", "accounts")
for i in range(len(pages['data'])):
if (pages['data'][i]['name'] == pageFB):
print "Writing in... ", pages['data'][i]['name']
graph2 = facebook.GraphAPI(pages['data'][i]['access_token'])
graph2.put_object(pages['data'][i]['id'],
"feed", message = theSummary, link=theLink,
picture = imageLink,
name=theTitle, caption='',
description=theSummary.encode('utf-8'))