# custom importer for www.sfn.org, feel free to borrow ideas require 'mysql2' require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Sfn < ImportScripts::Base BATCH_SIZE = 1000 def initialize super end def execute import_users import_categories import_topics import_posts end def import_users puts "", "importing users..." user_count = mysql_query <<-SQL SELECT COUNT(DISTINCT cm.ContactKey) AS "count" FROM CommunityMember cm LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5 SQL user_count = user_count.first["count"] batches(BATCH_SIZE) do |offset| users = mysql_query <<-SQL SELECT cm.ContactKey AS "id", cm.InvitedOn AS "created_at", es.EmailAddr_ AS "email", es.FullName_ AS "name", c.Bio AS "bio", c.ProfileImage AS "avatar" FROM CommunityMember cm LEFT JOIN EgroupSubscription es ON es.ContactKey = cm.ContactKey LEFT JOIN Contact c ON c.ContactKey = cm.ContactKey WHERE LENGTH(COALESCE(es.EmailAddr_, "")) > 5 GROUP BY cm.ContactKey ORDER BY "created_at" LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if users.size < 1 create_users(users, total: user_count, offset: offset) do |user| { id: user["id"], name: user["name"], email: user["email"], bio_raw: user["bio"], created_at: user["created_at"], post_create_action: proc do |newuser| next if user["avatar"].blank? avatar = Tempfile.new("sfn-avatar") avatar.write(user["avatar"].encode("ASCII-8BIT").force_encoding("UTF-8")) avatar.rewind upload = Upload.create_for(newuser.id, avatar, "avatar.jpg", avatar.size) if upload.persisted? newuser.create_user_avatar newuser.user_avatar.update(custom_upload_id: upload.id) newuser.update(uploaded_avatar_id: upload.id) end avatar.try(:close!) rescue nil end } end end end NEW_CATEGORIES = [ "Abstract Topic Matching Forum", "Animals in Research", "Brain Awareness and Teaching", "Career Advice", "Career Paths", "Diversity", "Early Career Policy Advocates", "LATP Associates", "LATP Fellows", "Mid and Advanced Career", "Neurobiology of Disease Workshop", "Neuroscience 2015", "Neuroscience Scholars Program", "NSP Associates", "NSP Fellows", "Outreach", "Postdocs and Early Career", "Program Committee", "Program Development", "Roommate Matching Forum", "Scientific Research", "Students", ] # EgroupKey => New Category Name CATEGORY_MAPPING = { "{DE10E4F4-621A-48BF-9B45-05D9F774A590}" => "Abstract Topic Matching Forum", "{3FFC1217-1576-4D38-BB81-D6CADC7FB793}" => "Animals in Research", "{9362BB21-BF6C-4E55-A3E0-18CD5D9F3323}" => "Brain Awareness and Teaching", "{3AC01B09-A21F-4166-95DA-0E585E271075}" => "Brain Awareness and Teaching", "{C249728D-8C9E-4138-AA49-D02467C28EAD}" => "Career Advice", "{01570B85-0124-478F-A8B9-B028BD1B1F2F}" => "Career Paths", "{2A430528-278A-46CD-BE1A-07CFA1122919}" => "Diversity", "{2F211345-3C19-43C9-90B5-27BA9FCD4DB0}" => "Diversity", "{8092297D-8DF4-404A-8BEB-4D5D0DC6A191}" => "Early Career Policy Advocates", "{8CB58762-D562-448C-9AF1-8DAE6C482C9B}" => "LATP Associates", "{CDF80A92-925A-46DD-A867-8558FA72D016}" => "LATP Fellows", "{E71E237B-7C23-4596-AECA-655BD8ED50DB}" => "Mid and Advanced Career", "{1D674C38-17CB-4C48-826A-D465AC3F8948}" => "Neurobiology of Disease Workshop", "{3D4F885B-0037-403B-83DD-62FAA8E81DF1}" => "Neuroscience 2015", "{9ACC3B40-E4A3-4FFD-AADC-C8403EB6231D}" => "Neuroscience 2015", "{9FC30FFB-E450-4361-8844-0266C3D96868}" => "Neuroscience Scholars Program", "{3E78123E-87CE-435E-B4B7-7DAB1A21C541}" => "NSP Associates", "{12D889D3-5CFD-49D5-93E4-32AAB2CFFCDA}" => "NSP Fellows", "{FA86D79E-170E-4F53-8F1C-942CB3FFB19E}" => "Outreach", "{D7041C64-3D32-4010-B3D8-71858323CB4A}" => "Outreach", "{69B76913-4E23-4C80-A11E-9CDB4130722E}" => "Outreach", "{774878EA-96AD-49F5-9D29-105AEA488007}" => "Outreach", "{E6349704-FD01-41B1-9C59-68E928DD4318}" => "Postdocs and Early Career", "{31CF5944-2567-4E79-9730-18EEC23E5B52}" => "Postdocs and Early Career", "{5625C403-AFAE-4323-A470-33FC32B12B53}" => "Program Committee", "{8415D871-54F5-4128-B099-E5A376A6B41B}" => "Program Development", "{B4DF2044-47AB-4329-8BF7-0D832CAB402C}" => "Roommate Matching Forum", "{6A3A12B9-5C72-472F-97AC-F34983674960}" => "Scientific Research", "{2CF635E9-4866-451C-A4F2-E2A8A80FED54}" => "Scientific Research", "{CF2DDCCE-737F-499D-AFE4-E5C36F195C8B}" => "Scientific Research", "{282B48D7-AC1D-453E-9806-3C6CE6830EF9}" => "Scientific Research", "{6D750CAF-E96F-4AD1-A45B-7B74FDFF0B40}" => "Scientific Research", "{10AF5D45-BEB3-4F07-BE77-0BAB6910DE10}" => "Scientific Research", "{18D7F624-26D1-44B9-BF33-AB5C5A2AB2BF}" => "Scientific Research", "{6016FF4F-D834-4888-BA03-F9FE8CB1D4CC}" => "Scientific Research", "{B0290A37-EA39-4CB8-B6CB-3E0B7EF6D036}" => "Scientific Research", "{97CC60D0-B93A-43FF-BB48-366FAAEE2BAC}" => "Scientific Research", "{8FC9B57B-2755-4FC5-90E8-CCDB56CF2F66}" => "Scientific Research", "{57C8BF37-357E-4FE6-952D-906248642792}" => "Scientific Research", "{7B2A3B63-BC2C-4219-830C-BA1DECB33337}" => "Scientific Research", "{0ED1D205-0E48-48D2-B82B-3CE80C6C553F}" => "Scientific Research", "{10355962-D172-4294-AA8E-1BC381B67971}" => "Scientific Research", "{C84B0222-5232-4B94-9FB8-DDF802241171}" => "Scientific Research", "{9143F984-0D67-46CB-AAAF-7FE3B6335E07}" => "Scientific Research", "{1392DC10-37A0-46A6-9979-4568D0224C5F}" => "Scientific Research", "{E4891409-0F4F-4151-B550-ECE53655E231}" => "Scientific Research", "{9613BAC2-229B-4563-9E1C-35C31CDDCE2F}" => "Students", } def import_categories puts "", "importing categories..." create_categories(NEW_CATEGORIES) do |category| { id: category, name: category } end end def import_topics puts "", "importing topics..." topic_count = mysql_query <<-SQL SELECT COUNT(MessageID_) AS "count" FROM EgroupMessages WHERE ParentId_ = 0 AND ApprovedRejectedPendingInd = "Approved" SQL topic_count = topic_count.first["count"] batches(BATCH_SIZE) do |offset| topics = mysql_query <<-SQL SELECT MessageID_ AS "id", EgroupKey AS "category_id", ContactKey AS "user_id", HdrSubject_ AS "title", Body_ AS "raw", CreatStamp_ AS "created_at" FROM EgroupMessages WHERE ParentId_ = 0 AND ApprovedRejectedPendingInd = "Approved" ORDER BY "created_at" LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if topics.size < 1 create_posts(topics, total: topic_count, offset: offset) do |topic| next unless category_id = CATEGORY_MAPPING[topic["category_id"]] { id: topic["id"], category: category_id_from_imported_category_id(category_id), user_id: user_id_from_imported_user_id(topic["user_id"]) || Discourse::SYSTEM_USER_ID, title: topic["title"][0..250], raw: cleanup_raw(topic["raw"]), created_at: topic["created_at"], } end end end def import_posts puts "", "importing posts..." posts_count = mysql_query <<-SQL SELECT COUNT(MessageID_) AS "count" FROM EgroupMessages WHERE ParentId_ > 0 AND ApprovedRejectedPendingInd = "Approved" SQL posts_count = posts_count.first["count"] batches(BATCH_SIZE) do |offset| posts = mysql_query <<-SQL SELECT MessageID_ AS "id", ContactKey AS "user_id", ParentID_ AS "topic_id", Body_ AS "raw", CreatStamp_ AS "created_at" FROM EgroupMessages WHERE ParentId_ > 0 AND ApprovedRejectedPendingInd = "Approved" ORDER BY "created_at" LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if posts.size < 1 create_posts(posts, total: posts_count, offset: offset) do |post| next unless parent = topic_lookup_from_imported_post_id(post["topic_id"]) { id: post["id"], topic_id: parent[:topic_id], user_id: user_id_from_imported_user_id(post["user_id"]) || Discourse::SYSTEM_USER_ID, raw: cleanup_raw(post["raw"]), created_at: post["created_at"], } end end end def cleanup_raw(raw) # fix some html raw.gsub!(//i, "\n") # remove "This message has been cross posted to the following eGroups: ..." raw.gsub!(/^This message has been cross posted to the following eGroups: .+\n-{3,}/i, "") # remove signatures raw.gsub!(/-{3,}.+/m, "") # strip leading/trailing whitespaces raw.strip end def mysql_query(sql) @client ||= Mysql2::Client.new(username: "root", database: "sfn") @client.query(sql) end end ImportScripts::Sfn.new.perform