import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "archive/zip"
20 "encoding/csv"
21 "encoding/json"
22 "fmt"
23 "html"
24 "io/ioutil"
25 "log"
26 "os"
27 "regexp"
28 "sort"
29 "strings"
30 "time"
31
32 "humungus.tedunangst.com/r/webs/junk"
33)
34
35func importMain(username, flavor, source string) {
36 switch flavor {
37 case "mastodon":
38 importMastodon(username, source)
39 case "honk":
40 importHonk(username, source)
41 case "twitter":
42 importTwitter(username, source)
43 case "instagram":
44 importInstagram(username, source)
45 default:
46 elog.Fatal("unknown source flavor")
47 }
48}
49
50type ActivityObject struct {
51 AttributedTo string
52 Summary string
53 Content string
54 Source struct {
55 MediaType string
56 Content string
57 }
58 InReplyTo string
59 Conversation string
60 Context string
61 Published time.Time
62 Tag []struct {
63 Type string
64 Name string
65 }
66 Attachment []struct {
67 Type string
68 MediaType string
69 Url string
70 Name string
71 }
72}
73
74type PlainActivityObject ActivityObject
75
76func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
77 p := (*PlainActivityObject)(obj)
78 json.Unmarshal(b, p)
79 return nil
80}
81
82func importMastodon(username, source string) {
83 user, err := butwhatabout(username)
84 if err != nil {
85 elog.Fatal(err)
86 }
87
88 outbox := source + "/outbox.json"
89 if _, err := os.Stat(outbox); err == nil {
90 importActivities(user, outbox, source)
91 } else {
92 ilog.Printf("skipping outbox.json!")
93 }
94 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
95 importMastotooters(user, source)
96 } else {
97 ilog.Printf("skipping following_accounts.csv!")
98 }
99}
100
101func importHonk(username, source string) {
102 user, err := butwhatabout(username)
103 if err != nil {
104 elog.Fatal(err)
105 }
106
107 outbox := source + "/outbox.json"
108 if _, err := os.Stat(outbox); err == nil {
109 importActivities(user, outbox, source)
110 } else {
111 ilog.Printf("skipping outbox.json!")
112 }
113}
114
115func importActivities(user *WhatAbout, filename, source string) {
116 type Activity struct {
117 Id string
118 Type string
119 To interface{}
120 Cc []string
121 Object ActivityObject
122 }
123 var outbox struct {
124 OrderedItems []Activity
125 }
126 ilog.Println("Importing honks...")
127 fd, err := os.Open(filename)
128 if err != nil {
129 elog.Fatal(err)
130 }
131 dec := json.NewDecoder(fd)
132 err = dec.Decode(&outbox)
133 if err != nil {
134 elog.Fatalf("error parsing json: %s", err)
135 }
136 fd.Close()
137
138 havetoot := func(xid string) bool {
139 var id int64
140 row := stmtFindXonk.QueryRow(user.ID, xid)
141 err := row.Scan(&id)
142 if err == nil {
143 return true
144 }
145 return false
146 }
147
148 re_tootid := regexp.MustCompile("[^/]+$")
149 items := outbox.OrderedItems
150 for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
151 items[i], items[j] = items[j], items[i]
152 }
153 for _, item := range items {
154 toot := item
155 if toot.Type != "Create" {
156 continue
157 }
158 if strings.HasSuffix(toot.Id, "/activity") {
159 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
160 }
161 tootid := re_tootid.FindString(toot.Id)
162 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
163 if havetoot(xid) {
164 continue
165 }
166
167 convoy := toot.Object.Context
168 if convoy == "" {
169 convoy = toot.Object.Conversation
170 }
171 var audience []string
172 to, ok := toot.To.(string)
173 if ok {
174 audience = append(audience, to)
175 } else {
176 for _, t := range toot.To.([]interface{}) {
177 audience = append(audience, t.(string))
178 }
179 }
180 content := toot.Object.Content
181 format := "html"
182 if toot.Object.Source.MediaType == "text/markdown" {
183 content = toot.Object.Source.Content
184 format = "markdown"
185 }
186 audience = append(audience, toot.Cc...)
187 honk := Honk{
188 UserID: user.ID,
189 What: "honk",
190 Honker: user.URL,
191 XID: xid,
192 RID: toot.Object.InReplyTo,
193 Date: toot.Object.Published,
194 URL: xid,
195 Audience: audience,
196 Noise: content,
197 Convoy: convoy,
198 Whofore: 2,
199 Format: format,
200 Precis: toot.Object.Summary,
201 }
202 if !loudandproud(honk.Audience) {
203 honk.Whofore = 3
204 }
205 for _, att := range toot.Object.Attachment {
206 switch att.Type {
207 case "Document":
208 fname := fmt.Sprintf("%s/%s", source, att.Url)
209 data, err := ioutil.ReadFile(fname)
210 if err != nil {
211 elog.Printf("error reading media for %s: %s", honk.XID, fname)
212 continue
213 }
214 u := xfiltrate()
215 name := att.Name
216 desc := name
217 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
218 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
219 if err != nil {
220 elog.Printf("error saving media: %s", fname)
221 continue
222 }
223 donk := &Donk{
224 FileID: fileid,
225 }
226 honk.Donks = append(honk.Donks, donk)
227 }
228 }
229 for _, t := range toot.Object.Tag {
230 switch t.Type {
231 case "Hashtag":
232 honk.Onts = append(honk.Onts, t.Name)
233 }
234 }
235 savehonk(&honk)
236 }
237}
238
239func importMastotooters(user *WhatAbout, source string) {
240 ilog.Println("Importing honkers...")
241 fd, err := os.Open(source + "/following_accounts.csv")
242 if err != nil {
243 elog.Fatal(err)
244 }
245 r := csv.NewReader(fd)
246 data, err := r.ReadAll()
247 if err != nil {
248 elog.Fatal(err)
249 }
250 fd.Close()
251
252 var meta HonkerMeta
253 mj, _ := jsonify(&meta)
254
255 for i, d := range data {
256 if i == 0 {
257 continue
258 }
259 url := "@" + d[0]
260 name := ""
261 flavor := "peep"
262 combos := ""
263 _, err := savehonker(user, url, name, flavor, combos, mj)
264 if err != nil {
265 elog.Printf("trouble with a honker: %s", err)
266 }
267 }
268}
269
270func importTwitter(username, source string) {
271 user, err := butwhatabout(username)
272 if err != nil {
273 elog.Fatal(err)
274 }
275
276 type Tweet struct {
277 date time.Time
278 convoy string
279 Tweet struct {
280 CreatedAt string `json:"created_at"`
281 DisplayTextRange []string `json:"display_text_range"`
282 EditInfo struct {
283 Initial struct {
284 EditTweetIds []string `json:"editTweetIds"`
285 EditableUntil string `json:"editableUntil"`
286 EditsRemaining string `json:"editsRemaining"`
287 IsEditEligible bool `json:"isEditEligible"`
288 } `json:"initial"`
289 } `json:"edit_info"`
290 Entities struct {
291 Hashtags []struct {
292 Indices []string `json:"indices"`
293 Text string `json:"text"`
294 } `json:"hashtags"`
295 Media []struct {
296 DisplayURL string `json:"display_url"`
297 ExpandedURL string `json:"expanded_url"`
298 ID string `json:"id"`
299 IdStr string `json:"id_str"`
300 Indices []string `json:"indices"`
301 MediaURL string `json:"media_url"`
302 MediaUrlHttps string `json:"media_url_https"`
303 Sizes struct {
304 Large struct {
305 H string `json:"h"`
306 Resize string `json:"resize"`
307 W string `json:"w"`
308 } `json:"large"`
309 Medium struct {
310 H string `json:"h"`
311 Resize string `json:"resize"`
312 W string `json:"w"`
313 } `json:"medium"`
314 Small struct {
315 H string `json:"h"`
316 Resize string `json:"resize"`
317 W string `json:"w"`
318 } `json:"small"`
319 Thumb struct {
320 H string `json:"h"`
321 Resize string `json:"resize"`
322 W string `json:"w"`
323 } `json:"thumb"`
324 } `json:"sizes"`
325 Type string `json:"type"`
326 URL string `json:"url"`
327 } `json:"media"`
328 Symbols []interface{} `json:"symbols"`
329 Urls []struct {
330 DisplayURL string `json:"display_url"`
331 ExpandedURL string `json:"expanded_url"`
332 Indices []string `json:"indices"`
333 URL string `json:"url"`
334 } `json:"urls"`
335 UserMentions []interface{} `json:"user_mentions"`
336 } `json:"entities"`
337 ExtendedEntities struct {
338 Media []struct {
339 DisplayURL string `json:"display_url"`
340 ExpandedURL string `json:"expanded_url"`
341 ID string `json:"id"`
342 IdStr string `json:"id_str"`
343 Indices []string `json:"indices"`
344 MediaURL string `json:"media_url"`
345 MediaUrlHttps string `json:"media_url_https"`
346 Sizes struct {
347 Large struct {
348 H string `json:"h"`
349 Resize string `json:"resize"`
350 W string `json:"w"`
351 } `json:"large"`
352 Medium struct {
353 H string `json:"h"`
354 Resize string `json:"resize"`
355 W string `json:"w"`
356 } `json:"medium"`
357 Small struct {
358 H string `json:"h"`
359 Resize string `json:"resize"`
360 W string `json:"w"`
361 } `json:"small"`
362 Thumb struct {
363 H string `json:"h"`
364 Resize string `json:"resize"`
365 W string `json:"w"`
366 } `json:"thumb"`
367 } `json:"sizes"`
368 Type string `json:"type"`
369 URL string `json:"url"`
370 } `json:"media"`
371 } `json:"extended_entities"`
372 FavoriteCount string `json:"favorite_count"`
373 Favorited bool `json:"favorited"`
374 FullText string `json:"full_text"`
375 ID string `json:"id"`
376 IdStr string `json:"id_str"`
377 InReplyToScreenName string `json:"in_reply_to_screen_name"`
378 InReplyToStatusID string `json:"in_reply_to_status_id"`
379 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
380 InReplyToUserID string `json:"in_reply_to_user_id"`
381 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
382 Lang string `json:"lang"`
383 PossiblySensitive bool `json:"possibly_sensitive"`
384 RetweetCount string `json:"retweet_count"`
385 Retweeted bool `json:"retweeted"`
386 Source string `json:"source"`
387 Truncated bool `json:"truncated"`
388 } `json:"tweet"`
389 }
390
391 var tweets []*Tweet
392 fd, err := os.Open(source + "/tweets.js")
393 if err != nil {
394 elog.Fatal(err)
395 }
396 // skip past window.YTD.tweet.part0 =
397 fd.Seek(25, 0)
398 dec := json.NewDecoder(fd)
399 err = dec.Decode(&tweets)
400 if err != nil {
401 elog.Fatalf("error parsing json: %s", err)
402 }
403 fd.Close()
404 tweetmap := make(map[string]*Tweet)
405 for _, t := range tweets {
406 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
407 tweetmap[t.Tweet.IdStr] = t
408 }
409 sort.Slice(tweets, func(i, j int) bool {
410 return tweets[i].date.Before(tweets[j].date)
411 })
412 havetwid := func(xid string) bool {
413 var id int64
414 row := stmtFindXonk.QueryRow(user.ID, xid)
415 err := row.Scan(&id)
416 if err == nil {
417 log.Printf("id = %v", id)
418 return true
419 }
420 return false
421 }
422 log.Printf("importing %v tweets", len(tweets))
423 for _, t := range tweets {
424 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
425 if havetwid(xid) {
426 continue
427 }
428
429 what := "honk"
430 noise := ""
431 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
432 t.convoy = parent.convoy
433 } else {
434 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
435 if t.Tweet.InReplyToScreenName != "" {
436 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
437 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
438 }
439 }
440 audience := []string{thewholeworld}
441 honk := Honk{
442 UserID: user.ID,
443 Username: user.Name,
444 What: what,
445 Honker: user.URL,
446 XID: xid,
447 Date: t.date,
448 Format: "markdown",
449 Audience: audience,
450 Convoy: t.convoy,
451 Public: true,
452 Whofore: 2,
453 }
454 noise += t.Tweet.FullText
455 // unbelievable
456 noise = html.UnescapeString(noise)
457 for _, r := range t.Tweet.Entities.Urls {
458 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
459 }
460 for _, m := range t.Tweet.Entities.Media {
461 u := m.MediaURL
462 idx := strings.LastIndexByte(u, '/')
463 u = u[idx+1:]
464 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
465 data, err := ioutil.ReadFile(fname)
466 if err != nil {
467 elog.Printf("error reading media: %s", fname)
468 continue
469 }
470 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
471
472 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
473 if err != nil {
474 elog.Printf("error saving media: %s", fname)
475 continue
476 }
477 donk := &Donk{
478 FileID: fileid,
479 }
480 honk.Donks = append(honk.Donks, donk)
481 noise = strings.Replace(noise, m.URL, "", -1)
482 }
483 for _, ht := range t.Tweet.Entities.Hashtags {
484 honk.Onts = append(honk.Onts, "#"+ht.Text)
485 }
486 honk.Noise = noise
487 err := savehonk(&honk)
488 log.Printf("honk saved %v -> %v", xid, err)
489 }
490}
491
492func importInstagram(username, source string) {
493 user, err := butwhatabout(username)
494 if err != nil {
495 elog.Fatal(err)
496 }
497
498 type Gram struct {
499 Media []struct {
500 URI string
501 Creation int64 `json:"creation_timestamp"`
502 Title string
503 }
504 }
505
506 var grams []*Gram
507 fd, err := os.Open(source + "/content/posts_1.json")
508 if err != nil {
509 elog.Fatal(err)
510 }
511 dec := json.NewDecoder(fd)
512 err = dec.Decode(&grams)
513 if err != nil {
514 elog.Fatalf("error parsing json: %s", err)
515 }
516 fd.Close()
517 log.Printf("importing %d grams", len(grams))
518 sort.Slice(grams, func(i, j int) bool {
519 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
520 })
521 for _, g0 := range grams {
522 g := g0.Media[0]
523 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
524 what := "honk"
525 noise := g.Title
526 convoy := "data:,acoustichonkytonk-" + xfiltrate()
527 date := time.Unix(g.Creation, 0)
528 audience := []string{thewholeworld}
529 honk := Honk{
530 UserID: user.ID,
531 Username: user.Name,
532 What: what,
533 Honker: user.URL,
534 XID: xid,
535 Date: date,
536 Format: "markdown",
537 Audience: audience,
538 Convoy: convoy,
539 Public: true,
540 Whofore: 2,
541 }
542 {
543 u := xfiltrate()
544 fname := fmt.Sprintf("%s/%s", source, g.URI)
545 data, err := ioutil.ReadFile(fname)
546 if err != nil {
547 elog.Printf("error reading media: %s", fname)
548 continue
549 }
550 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
551
552 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
553 if err != nil {
554 elog.Printf("error saving media: %s", fname)
555 continue
556 }
557 donk := &Donk{
558 FileID: fileid,
559 }
560 honk.Donks = append(honk.Donks, donk)
561 }
562 honk.Noise = noise
563 err := savehonk(&honk)
564 log.Printf("honk saved %v -> %v", xid, err)
565 }
566}
567
568func export(username, file string) {
569 user, err := butwhatabout(username)
570 if err != nil {
571 elog.Fatal(err)
572 }
573 fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
574 if err != nil {
575 elog.Fatal(err)
576 }
577 zd := zip.NewWriter(fd)
578 donks := make(map[string]bool)
579 {
580 w, err := zd.Create("outbox.json")
581 if err != nil {
582 elog.Fatal("error creating outbox.json", err)
583 }
584 var jonks []junk.Junk
585 rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
586 honks := getsomehonks(rows, err)
587 for _, honk := range honks {
588 for _, donk := range honk.Donks {
589 donk.URL = "media/" + donk.XID
590 donks[donk.XID] = true
591 }
592 noise := honk.Noise
593 j, jo := jonkjonk(user, honk)
594 if honk.Format == "markdown" {
595 source := junk.New()
596 source["mediaType"] = "text/markdown"
597 source["content"] = noise
598 jo["source"] = source
599 }
600 jonks = append(jonks, j)
601 }
602 j := junk.New()
603 j["@context"] = itiswhatitis
604 j["id"] = user.URL + "/outbox"
605 j["attributedTo"] = user.URL
606 j["type"] = "OrderedCollection"
607 j["totalItems"] = len(jonks)
608 j["orderedItems"] = jonks
609 j.Write(w)
610 }
611 {
612 w, err := zd.Create("inbox.json")
613 if err != nil {
614 elog.Fatal("error creating inbox.json", err)
615 }
616 var jonks []junk.Junk
617 rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
618 honks := getsomehonks(rows, err)
619 for _, honk := range honks {
620 for _, donk := range honk.Donks {
621 donk.URL = "media/" + donk.XID
622 donks[donk.XID] = true
623 }
624 j, _ := jonkjonk(user, honk)
625 jonks = append(jonks, j)
626 }
627 j := junk.New()
628 j["@context"] = itiswhatitis
629 j["id"] = user.URL + "/inbox"
630 j["attributedTo"] = user.URL
631 j["type"] = "OrderedCollection"
632 j["totalItems"] = len(jonks)
633 j["orderedItems"] = jonks
634 j.Write(w)
635 }
636 zd.Create("media/")
637 for donk := range donks {
638 if donk == "" {
639 continue
640 }
641 var media string
642 var data []byte
643 w, err := zd.Create("media/" + donk)
644 if err != nil {
645 elog.Printf("error creating %s: %s", donk, err)
646 continue
647 }
648 row := stmtGetFileData.QueryRow(donk)
649 err = row.Scan(&media, &data)
650 if err != nil {
651 elog.Printf("error scanning file %s: %s", donk, err)
652 continue
653 }
654 w.Write(data)
655 }
656 zd.Close()
657 fd.Close()
658}