1 """WordPress to Zinnia command module"""
2 import sys
3 from datetime import datetime
4 from optparse import make_option
5 from xml.etree import ElementTree as ET
6
7 from django.utils.html import strip_tags
8 from django.db.utils import IntegrityError
9 from django.utils.encoding import smart_str
10 from django.contrib.auth.models import User
11 from django.contrib.sites.models import Site
12 from django.utils.text import truncate_words
13 from django.db.models.signals import post_save
14 from django.template.defaultfilters import slugify
15 from django.contrib.comments.models import Comment
16 from django.core.management.base import CommandError
17 from django.core.management.base import LabelCommand
18
19 from tagging.models import Tag
20
21 from zinnia import __version__
22 from zinnia.models import Entry
23 from zinnia.models import Category
24 from zinnia.managers import DRAFT, HIDDEN, PUBLISHED
25
26
28 """Command object for importing a WordPress blog
29 into Zinnia via a WordPress eXtended RSS (WXR) file."""
30 help = 'Import a Wordpress blog into Zinnia.'
31 label = 'WXR file'
32 args = 'wordpress.xml'
33
34 option_list = LabelCommand.option_list + (
35 make_option('--noautoexcerpt', action='store_false', dest='auto_excerpt',
36 default=True, help='Do NOT generate an excerpt if not present.'),
37 make_option('--author', dest='author', default='',
38 help='All imported entries belong to specified author'),
39 )
40
41 SITE = Site.objects.get_current()
42 REVERSE_STATUS = {'pending': DRAFT,
43 'draft': DRAFT,
44 'auto-draft': DRAFT,
45 'inherit': DRAFT,
46 'publish': PUBLISHED,
47 'future': PUBLISHED,
48 'trash': HIDDEN,
49 'private': PUBLISHED}
50
52 """Init the Command and add custom styles"""
53 super(Command, self).__init__()
54 self.style.TITLE = self.style.SQL_FIELD
55 self.style.STEP = self.style.SQL_COLTYPE
56 self.style.ITEM = self.style.HTTP_INFO
57
58 post_save.disconnect(sender=Entry,
59 dispatch_uid='zinnia.entry.post_save.ping_directories')
60 post_save.disconnect(sender=Entry,
61 dispatch_uid='zinnia.entry.post_save.ping_external_urls')
62
63 - def write_out(self, message, verbosity_level=1):
64 """Convenient method for outputing"""
65 if self.verbosity and self.verbosity >= verbosity_level:
66 sys.stdout.write(smart_str(message))
67 sys.stdout.flush()
68
70 self.verbosity = int(options.get('verbosity', 1))
71 self.auto_excerpt = options.get('auto_excerpt', True)
72 self.default_author = options.get('author')
73 if self.default_author:
74 try:
75 self.default_author = User.objects.get(username=self.default_author)
76 except User.DoesNotExist:
77 raise CommandError('Invalid username for default author')
78
79 self.write_out(self.style.TITLE('Starting migration from Wordpress to Zinnia %s:\n' % __version__))
80
81 tree = ET.parse(wxr_file)
82
83 self.authors = self.import_authors(tree)
84
85 self.categories = self.import_categories(
86 tree.findall('channel/{http://wordpress.org/export/1.0/}category'))
87
88 self.import_tags(
89 tree.findall('channel/{http://wordpress.org/export/1.0/}tag'))
90
91 self.import_entries(tree.findall('channel/item'))
92
94 """Retrieve all the authors used in posts
95 and convert it to new or existing user, and
96 return the convertion"""
97 self.write_out(self.style.STEP('- Importing authors\n'))
98
99 post_authors = set()
100 for item in tree.findall('channel/item'):
101 post_type = item.find('{http://wordpress.org/export/1.0/}post_type').text
102 if post_type == 'post':
103 post_authors.add(item.find('{http://purl.org/dc/elements/1.1/}creator').text)
104
105 self.write_out('%i authors found.\n' % len(post_authors))
106
107 authors = {}
108 for post_author in post_authors:
109 if self.default_author:
110 authors[post_author] = self.default_author
111 else:
112 authors[post_author] = self.migrate_author(post_author)
113 return authors
114
116 """Handle actions for migrating the users"""
117 select_action_text = "The author '%s' need to be migrated to an User:\n"\
118 "1. Use an existing user ?\n"\
119 "2. Create a new user ?\n"\
120 "Please select a choice: " % author_name
121 while 42:
122 selection = raw_input(smart_str(select_action_text))
123 if selection in '12':
124 break
125 if selection == '1':
126 users = User.objects.all()
127 usernames = [user.username for user in users]
128 while 42:
129 select_user_text = "1. Select your user, by typing one of theses usernames:\n"\
130 "[%s]\n"\
131 "Please select a choice: " % ', '.join(usernames)
132 user_selected = raw_input(select_user_text)
133 if user_selected in usernames:
134 break
135 return users.get(username=user_selected)
136 else:
137 create_user_text = "2. Please type the email of the '%s' user: " % author_name
138 author_mail = raw_input(create_user_text)
139 try:
140 return User.objects.create_user(author_name, author_mail)
141 except IntegrityError:
142 return User.objects.get(username=author_name)
143
145 """Import all the categories from 'wp:category' nodes,
146 because categories in 'item' nodes are not necessarily
147 all the categories and returning it in a dict for
148 database optimizations."""
149 self.write_out(self.style.STEP('- Importing categories\n'))
150
151 categories = {}
152 for category_node in category_nodes:
153 title = category_node.find('{http://wordpress.org/export/1.0/}cat_name').text[:255]
154 slug = category_node.find('{http://wordpress.org/export/1.0/}category_nicename').text[:255]
155 try:
156 parent = category_node.find('{http://wordpress.org/export/1.0/}category_parent').text[:255]
157 except TypeError:
158 parent = None
159 self.write_out('> %s... ' % title)
160 category, created = Category.objects.get_or_create(
161 title=title, slug=slug, parent=categories.get(parent))
162 categories[title] = category
163 self.write_out(self.style.ITEM('OK\n'))
164 return categories
165
177
179 """Return a list of entry's tags,
180 by using the nicename for url compatibility"""
181 tags = []
182 for category in categories:
183 domain = category.attrib.get('domain', 'category')
184 if domain == 'tag' and category.attrib.get('nicename'):
185 tags.append(category.attrib.get('nicename'))
186 return tags
187
188 - def get_entry_categories(self, category_nodes):
189 """Return a list of entry's categories
190 based of imported categories"""
191 categories = []
192 for category_node in category_nodes:
193 domain = category_node.attrib.get('domain')
194 if domain == 'category':
195 categories.append(self.categories[category_node.text])
196 return categories
197
198 - def import_entry(self, title, content, item_node):
199 """Importing an entry but some data are missing like
200 the image, related entries, start_publication and end_publication.
201 start_publication and creation_date will use the same value,
202 wich is always in Wordpress $post->post_date"""
203 creation_date = datetime.strptime(
204 item_node.find('{http://wordpress.org/export/1.0/}post_date').text,
205 '%Y-%m-%d %H:%M:%S')
206
207 excerpt = item_node.find('{http://wordpress.org/export/1.0/excerpt/}encoded').text
208 if not excerpt:
209 if self.auto_excerpt:
210 excerpt = truncate_words(strip_tags(content), 50)
211 else:
212 excerpt = ''
213
214 entry_dict = {'content': content,
215 'excerpt': excerpt,
216
217
218
219 'slug': slugify(title)[:255] or 'post-%s' % item_node.find('{http://wordpress.org/export/1.0/}post_id').text,
220 'tags': ', '.join(self.get_entry_tags(item_node.findall('category'))),
221 'status': self.REVERSE_STATUS[item_node.find('{http://wordpress.org/export/1.0/}status').text],
222 'comment_enabled': item_node.find('{http://wordpress.org/export/1.0/}comment_status').text == 'open',
223 'pingback_enabled': item_node.find('{http://wordpress.org/export/1.0/}ping_status').text == 'open',
224 'featured': item_node.find('{http://wordpress.org/export/1.0/}is_sticky').text == '1',
225 'password': item_node.find('{http://wordpress.org/export/1.0/}post_password').text or '',
226 'login_required': item_node.find('{http://wordpress.org/export/1.0/}status').text == 'private',
227 'creation_date': creation_date,
228 'last_update': datetime.now(),
229 'start_publication': creation_date}
230
231 entry, created = Entry.objects.get_or_create(title=title,
232 defaults=entry_dict)
233
234 entry.categories.add(*self.get_entry_categories(item_node.findall('category')))
235 entry.authors.add(self.authors[item_node.find('{http://purl.org/dc/elements/1.1/}creator').text])
236 entry.sites.add(self.SITE)
237
238
239
240
241 return entry
242
244 """Loops over items and find entry to import,
245 an entry need to have 'post_type' set to 'post' and
246 have content."""
247 self.write_out(self.style.STEP('- Importing entries\n'))
248
249 for item_node in items:
250 title = (item_node.find('title').text or '')[:255]
251 post_type = item_node.find('{http://wordpress.org/export/1.0/}post_type').text
252 content = item_node.find('{http://purl.org/rss/1.0/modules/content/}encoded').text
253
254 if post_type == 'post' and content and title:
255 self.write_out('> %s... ' % title)
256 entry = self.import_entry(title, content, item_node)
257 self.write_out(self.style.ITEM('OK\n'))
258 self.import_comments(entry, item_node.findall(
259 '{http://wordpress.org/export/1.0/}comment/'))
260 else:
261 self.write_out('> %s... ' % title, 2)
262 self.write_out(self.style.NOTICE('SKIPPED (not a post)\n'), 2)
263
319