import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "archive/zip"
20 "encoding/csv"
21 "encoding/json"
22 "fmt"
23 "html"
24 "io/ioutil"
25 "log"
26 "os"
27 "regexp"
28 "sort"
29 "strings"
30 "time"
31
32 "humungus.tedunangst.com/r/webs/junk"
33)
34
35func importMain(username, flavor, source string) {
36 switch flavor {
37 case "mastodon":
38 importMastodon(username, source)
39 case "honk":
40 importHonk(username, source)
41 case "twitter":
42 importTwitter(username, source)
43 case "instagram":
44 importInstagram(username, source)
45 default:
46 elog.Fatal("unknown source flavor")
47 }
48}
49
50type ActivityObject struct {
51 AttributedTo string
52 Summary string
53 Content string
54 Source struct {
55 MediaType string
56 Content string
57 }
58 InReplyTo string
59 Conversation string
60 Context string
61 Published time.Time
62 Tag []struct {
63 Type string
64 Name string
65 }
66 Attachment []struct {
67 Type string
68 MediaType string
69 Url string
70 Name string
71 }
72}
73
74type PlainActivityObject ActivityObject
75
76func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
77 p := (*PlainActivityObject)(obj)
78 json.Unmarshal(b, p)
79 return nil
80}
81
82func importMastodon(username, source string) {
83 user, err := butwhatabout(username)
84 if err != nil {
85 elog.Fatal(err)
86 }
87
88 outbox := source + "/outbox.json"
89 if _, err := os.Stat(outbox); err == nil {
90 importActivities(user, outbox, source)
91 } else {
92 ilog.Printf("skipping outbox.json!")
93 }
94 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
95 importMastotooters(user, source)
96 } else {
97 ilog.Printf("skipping following_accounts.csv!")
98 }
99}
100
101func importHonk(username, source string) {
102 user, err := butwhatabout(username)
103 if err != nil {
104 elog.Fatal(err)
105 }
106
107 outbox := source + "/outbox.json"
108 if _, err := os.Stat(outbox); err == nil {
109 importActivities(user, outbox, source)
110 } else {
111 ilog.Printf("skipping outbox.json!")
112 }
113}
114
115func importActivities(user *WhatAbout, filename, source string) {
116 type Activity struct {
117 Id string
118 Type string
119 To interface{}
120 Cc []string
121 Object ActivityObject
122 }
123 var outbox struct {
124 OrderedItems []Activity
125 }
126 ilog.Println("Importing honks...")
127 fd, err := os.Open(filename)
128 if err != nil {
129 elog.Fatal(err)
130 }
131 dec := json.NewDecoder(fd)
132 err = dec.Decode(&outbox)
133 if err != nil {
134 elog.Fatalf("error parsing json: %s", err)
135 }
136 fd.Close()
137
138 havetoot := func(xid string) bool {
139 var id int64
140 row := stmtFindXonk.QueryRow(user.ID, xid)
141 err := row.Scan(&id)
142 if err == nil {
143 return true
144 }
145 return false
146 }
147
148 re_tootid := regexp.MustCompile("[^/]+$")
149 items := outbox.OrderedItems
150 for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
151 items[i], items[j] = items[j], items[i]
152 }
153 for _, item := range items {
154 toot := item
155 if toot.Type != "Create" {
156 continue
157 }
158 if strings.HasSuffix(toot.Id, "/activity") {
159 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
160 }
161 tootid := re_tootid.FindString(toot.Id)
162 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
163 if havetoot(xid) {
164 continue
165 }
166
167 convoy := toot.Object.Context
168 if convoy == "" {
169 convoy = toot.Object.Conversation
170 }
171 var audience []string
172 to, ok := toot.To.(string)
173 if ok {
174 audience = append(audience, to)
175 } else {
176 for _, t := range toot.To.([]interface{}) {
177 audience = append(audience, t.(string))
178 }
179 }
180 content := toot.Object.Content
181 format := "html"
182 if toot.Object.Source.MediaType == "text/markdown" {
183 content = toot.Object.Source.Content
184 format = "markdown"
185 }
186 audience = append(audience, toot.Cc...)
187 honk := Honk{
188 UserID: user.ID,
189 What: "honk",
190 Honker: user.URL,
191 XID: xid,
192 RID: toot.Object.InReplyTo,
193 Date: toot.Object.Published,
194 URL: xid,
195 Audience: audience,
196 Noise: content,
197 Convoy: convoy,
198 Whofore: 2,
199 Format: format,
200 Precis: toot.Object.Summary,
201 }
202 if !loudandproud(honk.Audience) {
203 honk.Whofore = 3
204 }
205 for _, att := range toot.Object.Attachment {
206 var meta DonkMeta
207 switch att.Type {
208 case "Document":
209 fname := fmt.Sprintf("%s/%s", source, att.Url)
210 data, err := ioutil.ReadFile(fname)
211 if err != nil {
212 elog.Printf("error reading media for %s: %s", honk.XID, fname)
213 continue
214 }
215 u := xfiltrate()
216 name := att.Name
217 desc := name
218 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
219 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data, &meta)
220 if err != nil {
221 elog.Printf("error saving media: %s", fname)
222 continue
223 }
224 donk := &Donk{
225 FileID: fileid,
226 }
227 honk.Donks = append(honk.Donks, donk)
228 }
229 }
230 for _, t := range toot.Object.Tag {
231 switch t.Type {
232 case "Hashtag":
233 honk.Onts = append(honk.Onts, t.Name)
234 }
235 }
236 savehonk(&honk)
237 }
238}
239
240func importMastotooters(user *WhatAbout, source string) {
241 ilog.Println("Importing honkers...")
242 fd, err := os.Open(source + "/following_accounts.csv")
243 if err != nil {
244 elog.Fatal(err)
245 }
246 r := csv.NewReader(fd)
247 data, err := r.ReadAll()
248 if err != nil {
249 elog.Fatal(err)
250 }
251 fd.Close()
252
253 var meta HonkerMeta
254 mj, _ := jsonify(&meta)
255
256 for i, d := range data {
257 if i == 0 {
258 continue
259 }
260 url := "@" + d[0]
261 name := ""
262 flavor := "peep"
263 combos := ""
264 _, _, err := savehonker(user, url, name, flavor, combos, mj)
265 if err != nil {
266 elog.Printf("trouble with a honker: %s", err)
267 }
268 }
269}
270
271func importTwitter(username, source string) {
272 user, err := butwhatabout(username)
273 if err != nil {
274 elog.Fatal(err)
275 }
276
277 type Tweet struct {
278 date time.Time
279 convoy string
280 Tweet struct {
281 CreatedAt string `json:"created_at"`
282 DisplayTextRange []string `json:"display_text_range"`
283 EditInfo struct {
284 Initial struct {
285 EditTweetIds []string `json:"editTweetIds"`
286 EditableUntil string `json:"editableUntil"`
287 EditsRemaining string `json:"editsRemaining"`
288 IsEditEligible bool `json:"isEditEligible"`
289 } `json:"initial"`
290 } `json:"edit_info"`
291 Entities struct {
292 Hashtags []struct {
293 Indices []string `json:"indices"`
294 Text string `json:"text"`
295 } `json:"hashtags"`
296 Media []struct {
297 DisplayURL string `json:"display_url"`
298 ExpandedURL string `json:"expanded_url"`
299 ID string `json:"id"`
300 IdStr string `json:"id_str"`
301 Indices []string `json:"indices"`
302 MediaURL string `json:"media_url"`
303 MediaUrlHttps string `json:"media_url_https"`
304 Sizes struct {
305 Large struct {
306 H string `json:"h"`
307 Resize string `json:"resize"`
308 W string `json:"w"`
309 } `json:"large"`
310 Medium struct {
311 H string `json:"h"`
312 Resize string `json:"resize"`
313 W string `json:"w"`
314 } `json:"medium"`
315 Small struct {
316 H string `json:"h"`
317 Resize string `json:"resize"`
318 W string `json:"w"`
319 } `json:"small"`
320 Thumb struct {
321 H string `json:"h"`
322 Resize string `json:"resize"`
323 W string `json:"w"`
324 } `json:"thumb"`
325 } `json:"sizes"`
326 Type string `json:"type"`
327 URL string `json:"url"`
328 } `json:"media"`
329 Symbols []interface{} `json:"symbols"`
330 Urls []struct {
331 DisplayURL string `json:"display_url"`
332 ExpandedURL string `json:"expanded_url"`
333 Indices []string `json:"indices"`
334 URL string `json:"url"`
335 } `json:"urls"`
336 UserMentions []interface{} `json:"user_mentions"`
337 } `json:"entities"`
338 ExtendedEntities struct {
339 Media []struct {
340 DisplayURL string `json:"display_url"`
341 ExpandedURL string `json:"expanded_url"`
342 ID string `json:"id"`
343 IdStr string `json:"id_str"`
344 Indices []string `json:"indices"`
345 MediaURL string `json:"media_url"`
346 MediaUrlHttps string `json:"media_url_https"`
347 Sizes struct {
348 Large struct {
349 H string `json:"h"`
350 Resize string `json:"resize"`
351 W string `json:"w"`
352 } `json:"large"`
353 Medium struct {
354 H string `json:"h"`
355 Resize string `json:"resize"`
356 W string `json:"w"`
357 } `json:"medium"`
358 Small struct {
359 H string `json:"h"`
360 Resize string `json:"resize"`
361 W string `json:"w"`
362 } `json:"small"`
363 Thumb struct {
364 H string `json:"h"`
365 Resize string `json:"resize"`
366 W string `json:"w"`
367 } `json:"thumb"`
368 } `json:"sizes"`
369 Type string `json:"type"`
370 URL string `json:"url"`
371 } `json:"media"`
372 } `json:"extended_entities"`
373 FavoriteCount string `json:"favorite_count"`
374 Favorited bool `json:"favorited"`
375 FullText string `json:"full_text"`
376 ID string `json:"id"`
377 IdStr string `json:"id_str"`
378 InReplyToScreenName string `json:"in_reply_to_screen_name"`
379 InReplyToStatusID string `json:"in_reply_to_status_id"`
380 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
381 InReplyToUserID string `json:"in_reply_to_user_id"`
382 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
383 Lang string `json:"lang"`
384 PossiblySensitive bool `json:"possibly_sensitive"`
385 RetweetCount string `json:"retweet_count"`
386 Retweeted bool `json:"retweeted"`
387 Source string `json:"source"`
388 Truncated bool `json:"truncated"`
389 } `json:"tweet"`
390 }
391
392 var tweets []*Tweet
393 fd, err := os.Open(source + "/tweet.js")
394 if err != nil {
395 elog.Fatal(err)
396 }
397 // skip past window.YTD.tweet.part0 =
398 fd.Seek(25, 0)
399 dec := json.NewDecoder(fd)
400 err = dec.Decode(&tweets)
401 if err != nil {
402 elog.Fatalf("error parsing json: %s", err)
403 }
404 fd.Close()
405 tweetmap := make(map[string]*Tweet)
406 for _, t := range tweets {
407 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
408 tweetmap[t.Tweet.IdStr] = t
409 }
410 sort.Slice(tweets, func(i, j int) bool {
411 return tweets[i].date.Before(tweets[j].date)
412 })
413 havetwid := func(xid string) bool {
414 var id int64
415 row := stmtFindXonk.QueryRow(user.ID, xid)
416 err := row.Scan(&id)
417 if err == nil {
418 log.Printf("id = %v", id)
419 return true
420 }
421 return false
422 }
423 log.Printf("importing %v tweets", len(tweets))
424 for _, t := range tweets {
425 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
426 if havetwid(xid) {
427 continue
428 }
429
430 what := "honk"
431 noise := ""
432 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
433 t.convoy = parent.convoy
434 } else {
435 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
436 if t.Tweet.InReplyToScreenName != "" {
437 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
438 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
439 }
440 }
441 audience := []string{thewholeworld}
442 honk := Honk{
443 UserID: user.ID,
444 Username: user.Name,
445 What: what,
446 Honker: user.URL,
447 XID: xid,
448 Date: t.date,
449 Format: "markdown",
450 Audience: audience,
451 Convoy: t.convoy,
452 Public: true,
453 Whofore: 2,
454 }
455 noise += t.Tweet.FullText
456 // unbelievable
457 noise = html.UnescapeString(noise)
458 for _, r := range t.Tweet.Entities.Urls {
459 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
460 }
461 for _, m := range t.Tweet.Entities.Media {
462 var meta DonkMeta
463 u := m.MediaURL
464 idx := strings.LastIndexByte(u, '/')
465 u = u[idx+1:]
466 fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.Tweet.IdStr, u)
467 data, err := ioutil.ReadFile(fname)
468 if err != nil {
469 elog.Printf("error reading media: %s", fname)
470 continue
471 }
472 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
473
474 fileid, err := savefile(u, u, newurl, "image/jpg", true, data, &meta)
475 if err != nil {
476 elog.Printf("error saving media: %s", fname)
477 continue
478 }
479 donk := &Donk{
480 FileID: fileid,
481 }
482 honk.Donks = append(honk.Donks, donk)
483 noise = strings.Replace(noise, m.URL, "", -1)
484 }
485 for _, ht := range t.Tweet.Entities.Hashtags {
486 honk.Onts = append(honk.Onts, "#"+ht.Text)
487 }
488 honk.Noise = noise
489 err := savehonk(&honk)
490 log.Printf("honk saved %v -> %v", xid, err)
491 }
492}
493
494func importInstagram(username, source string) {
495 user, err := butwhatabout(username)
496 if err != nil {
497 elog.Fatal(err)
498 }
499
500 type Gram struct {
501 Media []struct {
502 URI string
503 Creation int64 `json:"creation_timestamp"`
504 Title string
505 }
506 }
507
508 var grams []*Gram
509 fd, err := os.Open(source + "/content/posts_1.json")
510 if err != nil {
511 elog.Fatal(err)
512 }
513 dec := json.NewDecoder(fd)
514 err = dec.Decode(&grams)
515 if err != nil {
516 elog.Fatalf("error parsing json: %s", err)
517 }
518 fd.Close()
519 log.Printf("importing %d grams", len(grams))
520 sort.Slice(grams, func(i, j int) bool {
521 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
522 })
523 for _, g0 := range grams {
524 g := g0.Media[0]
525 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
526 what := "honk"
527 noise := g.Title
528 convoy := "data:,acoustichonkytonk-" + xfiltrate()
529 date := time.Unix(g.Creation, 0)
530 audience := []string{thewholeworld}
531 honk := Honk{
532 UserID: user.ID,
533 Username: user.Name,
534 What: what,
535 Honker: user.URL,
536 XID: xid,
537 Date: date,
538 Format: "markdown",
539 Audience: audience,
540 Convoy: convoy,
541 Public: true,
542 Whofore: 2,
543 }
544 {
545 var meta DonkMeta
546 u := xfiltrate()
547 fname := fmt.Sprintf("%s/%s", source, g.URI)
548 data, err := ioutil.ReadFile(fname)
549 if err != nil {
550 elog.Printf("error reading media: %s", fname)
551 continue
552 }
553 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
554
555 fileid, err := savefile(u, u, newurl, "image/jpg", true, data, &meta)
556 if err != nil {
557 elog.Printf("error saving media: %s", fname)
558 continue
559 }
560 donk := &Donk{
561 FileID: fileid,
562 }
563 honk.Donks = append(honk.Donks, donk)
564 }
565 honk.Noise = noise
566 err := savehonk(&honk)
567 log.Printf("honk saved %v -> %v", xid, err)
568 }
569}
570
571func export(username, file string) {
572 user, err := butwhatabout(username)
573 if err != nil {
574 elog.Fatal(err)
575 }
576 fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
577 if err != nil {
578 elog.Fatal(err)
579 }
580 zd := zip.NewWriter(fd)
581 donks := make(map[string]bool)
582 {
583 w, err := zd.Create("outbox.json")
584 if err != nil {
585 elog.Fatal("error creating outbox.json", err)
586 }
587 var jonks []junk.Junk
588 rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
589 honks := getsomehonks(rows, err)
590 for _, honk := range honks {
591 for _, donk := range honk.Donks {
592 donk.URL = "media/" + donk.XID
593 donks[donk.XID] = true
594 }
595 noise := honk.Noise
596 j, jo := jonkjonk(user, honk)
597 if honk.Format == "markdown" {
598 source := junk.New()
599 source["mediaType"] = "text/markdown"
600 source["content"] = noise
601 jo["source"] = source
602 }
603 jonks = append(jonks, j)
604 }
605 j := junk.New()
606 j["@context"] = itiswhatitis
607 j["id"] = user.URL + "/outbox"
608 j["attributedTo"] = user.URL
609 j["type"] = "OrderedCollection"
610 j["totalItems"] = len(jonks)
611 j["orderedItems"] = jonks
612 j.Write(w)
613 }
614 {
615 w, err := zd.Create("inbox.json")
616 if err != nil {
617 elog.Fatal("error creating inbox.json", err)
618 }
619 var jonks []junk.Junk
620 rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
621 honks := getsomehonks(rows, err)
622 for _, honk := range honks {
623 for _, donk := range honk.Donks {
624 donk.URL = "media/" + donk.XID
625 donks[donk.XID] = true
626 }
627 j, _ := jonkjonk(user, honk)
628 jonks = append(jonks, j)
629 }
630 j := junk.New()
631 j["@context"] = itiswhatitis
632 j["id"] = user.URL + "/inbox"
633 j["attributedTo"] = user.URL
634 j["type"] = "OrderedCollection"
635 j["totalItems"] = len(jonks)
636 j["orderedItems"] = jonks
637 j.Write(w)
638 }
639 zd.Create("media/")
640 for donk := range donks {
641 if donk == "" {
642 continue
643 }
644 var media string
645 var data []byte
646 w, err := zd.Create("media/" + donk)
647 if err != nil {
648 elog.Printf("error creating %s: %s", donk, err)
649 continue
650 }
651 row := stmtGetFileData.QueryRow(donk)
652 err = row.Scan(&media, &data)
653 if err != nil {
654 elog.Printf("error scanning file %s: %s", donk, err)
655 continue
656 }
657 w.Write(data)
658 }
659 zd.Close()
660 fd.Close()
661}